diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:49 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:49 +0000 |
commit | 2298981669bf3bd63335a4be179bc0f96823a8f4 (patch) | |
tree | 1cbe2eb27f030d2d70b80ee5ca3c86bee7326a9f /lib/CodeGen | |
parent | 9a83721404652cea39e9f02ae3e3b5c964602a5c (diff) |
Notes
Diffstat (limited to 'lib/CodeGen')
87 files changed, 10377 insertions, 5285 deletions
diff --git a/lib/CodeGen/ABIInfo.h b/lib/CodeGen/ABIInfo.h index feed3833f24a..0c3a076da0b5 100644 --- a/lib/CodeGen/ABIInfo.h +++ b/lib/CodeGen/ABIInfo.h @@ -1,9 +1,8 @@ //===----- ABIInfo.h - ABI information access & encapsulation ---*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/Address.h b/lib/CodeGen/Address.h index 334308081ff3..6a8e57f8db33 100644 --- a/lib/CodeGen/Address.h +++ b/lib/CodeGen/Address.h @@ -1,9 +1,8 @@ //===-- Address.h - An aligned address -------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp index b927acabac59..497652e85b47 100644 --- a/lib/CodeGen/BackendUtil.cpp +++ b/lib/CodeGen/BackendUtil.cpp @@ -1,9 +1,8 @@ //===--- BackendUtil.cpp - LLVM Backend Utilities -------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -37,11 +36,13 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Passes/PassBuilder.h" +#include "llvm/Passes/PassPlugin.h" #include "llvm/Support/BuryPointer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -53,8 +54,11 @@ #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Instrumentation/AddressSanitizer.h" #include "llvm/Transforms/Instrumentation/BoundsChecking.h" #include "llvm/Transforms/Instrumentation/GCOVProfiler.h" +#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" +#include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/ObjCARC.h" @@ -62,6 +66,7 @@ #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/CanonicalizeAliases.h" +#include "llvm/Transforms/Utils/EntryExitInstrumenter.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include <memory> @@ -243,15 +248,15 @@ static void addAddressSanitizerPasses(const PassManagerBuilder &Builder, bool UseGlobalsGC = asanUseGlobalsGC(T, CGOpts); PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/ false, Recover, UseAfterScope)); - PM.add(createAddressSanitizerModulePass(/*CompileKernel*/ false, Recover, - UseGlobalsGC, UseOdrIndicator)); + PM.add(createModuleAddressSanitizerLegacyPassPass( + /*CompileKernel*/ false, Recover, UseGlobalsGC, UseOdrIndicator)); } static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) { PM.add(createAddressSanitizerFunctionPass( /*CompileKernel*/ true, /*Recover*/ true, /*UseAfterScope*/ false)); - PM.add(createAddressSanitizerModulePass( + PM.add(createModuleAddressSanitizerLegacyPassPass( /*CompileKernel*/ true, /*Recover*/ true, /*UseGlobalsGC*/ true, /*UseOdrIndicator*/ false)); } @@ -262,12 +267,13 @@ static void addHWAddressSanitizerPasses(const PassManagerBuilder &Builder, static_cast<const PassManagerBuilderWrapper &>(Builder); const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts(); bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::HWAddress); - PM.add(createHWAddressSanitizerPass(/*CompileKernel*/ false, Recover)); + PM.add( + createHWAddressSanitizerLegacyPassPass(/*CompileKernel*/ false, Recover)); } static void addKernelHWAddressSanitizerPasses(const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) { - PM.add(createHWAddressSanitizerPass( + PM.add(createHWAddressSanitizerLegacyPassPass( /*CompileKernel*/ true, /*Recover*/ true)); } @@ -279,7 +285,8 @@ static void addGeneralOptsForMemorySanitizer(const PassManagerBuilder &Builder, const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts(); int TrackOrigins = CGOpts.SanitizeMemoryTrackOrigins; bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::Memory); - PM.add(createMemorySanitizerLegacyPassPass(TrackOrigins, Recover, CompileKernel)); + PM.add(createMemorySanitizerLegacyPassPass( + MemorySanitizerOptions{TrackOrigins, Recover, CompileKernel})); // MemorySanitizer inserts complex instrumentation that mostly follows // the logic of the original code, but operates on "shadow" values. @@ -317,19 +324,6 @@ static void addDataFlowSanitizerPass(const PassManagerBuilder &Builder, PM.add(createDataFlowSanitizerPass(LangOpts.SanitizerBlacklistFiles)); } -static void addEfficiencySanitizerPass(const PassManagerBuilder &Builder, - legacy::PassManagerBase &PM) { - const PassManagerBuilderWrapper &BuilderWrapper = - static_cast<const PassManagerBuilderWrapper&>(Builder); - const LangOptions &LangOpts = BuilderWrapper.getLangOpts(); - EfficiencySanitizerOptions Opts; - if (LangOpts.Sanitize.has(SanitizerKind::EfficiencyCacheFrag)) - Opts.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag; - else if (LangOpts.Sanitize.has(SanitizerKind::EfficiencyWorkingSet)) - Opts.ToolType = EfficiencySanitizerOptions::ESAN_WorkingSet; - PM.add(createEfficiencySanitizerPass(Opts)); -} - static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, const CodeGenOptions &CodeGenOpts) { TargetLibraryInfoImpl *TLII = new TargetLibraryInfoImpl(TargetTriple); @@ -347,6 +341,9 @@ static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, case CodeGenOptions::Accelerate: TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::Accelerate); break; + case CodeGenOptions::MASSV: + TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV); + break; case CodeGenOptions::SVML: TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML); break; @@ -473,9 +470,9 @@ static void initTargetOptions(llvm::TargetOptions &Options, Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning(); Options.EmitStackSizeSection = CodeGenOpts.StackSizeSection; Options.EmitAddrsig = CodeGenOpts.Addrsig; + Options.EnableDebugEntryValues = CodeGenOpts.EnableDebugEntryValues; - if (CodeGenOpts.getSplitDwarfMode() != CodeGenOptions::NoFission) - Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile; + Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile; Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll; Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels; Options.MCOptions.MCUseDwarfDirectory = !CodeGenOpts.NoDwarfDirectoryAsm; @@ -515,6 +512,21 @@ static Optional<GCOVOptions> getGCOVOptions(const CodeGenOptions &CodeGenOpts) { return Options; } +static Optional<InstrProfOptions> +getInstrProfOptions(const CodeGenOptions &CodeGenOpts, + const LangOptions &LangOpts) { + if (!CodeGenOpts.hasProfileClangInstr()) + return None; + InstrProfOptions Options; + Options.NoRedZone = CodeGenOpts.DisableRedZone; + Options.InstrProfileOutput = CodeGenOpts.InstrProfileOutput; + + // TODO: Surface the option to emit atomic profile counter increments at + // the driver level. + Options.Atomic = LangOpts.Sanitize.has(SanitizerKind::Thread); + return Options; +} + void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, legacy::FunctionPassManager &FPM) { // Handle disabling of all LLVM passes, where we want to preserve the @@ -554,6 +566,9 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop; PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops; + // Loop interleaving in the loop vectorizer has historically been set to be + // enabled when loop unrolling is enabled. + PMBuilder.LoopsInterleaved = CodeGenOpts.UnrollLoops; PMBuilder.MergeFunctions = CodeGenOpts.MergeFunctions; PMBuilder.PrepareForThinLTO = CodeGenOpts.PrepareForThinLTO; PMBuilder.PrepareForLTO = CodeGenOpts.PrepareForLTO; @@ -579,7 +594,7 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, addObjCARCOptPass); } - if (LangOpts.CoroutinesTS) + if (LangOpts.Coroutines) addCoroutinePassesToExtensionPoints(PMBuilder); if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) { @@ -654,13 +669,6 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, addDataFlowSanitizerPass); } - if (LangOpts.Sanitize.hasOneOf(SanitizerKind::Efficiency)) { - PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast, - addEfficiencySanitizerPass); - PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0, - addEfficiencySanitizerPass); - } - // Set up the per-function pass manager. FPM.add(new TargetLibraryInfoWrapperPass(*TLII)); if (CodeGenOpts.VerifyModule) @@ -676,26 +684,35 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, MPM.add(createStripSymbolsPass(true)); } - if (CodeGenOpts.hasProfileClangInstr()) { - InstrProfOptions Options; - Options.NoRedZone = CodeGenOpts.DisableRedZone; - Options.InstrProfileOutput = CodeGenOpts.InstrProfileOutput; - - // TODO: Surface the option to emit atomic profile counter increments at - // the driver level. - Options.Atomic = LangOpts.Sanitize.has(SanitizerKind::Thread); + if (Optional<InstrProfOptions> Options = + getInstrProfOptions(CodeGenOpts, LangOpts)) + MPM.add(createInstrProfilingLegacyPass(*Options, false)); - MPM.add(createInstrProfilingLegacyPass(Options)); - } + bool hasIRInstr = false; if (CodeGenOpts.hasProfileIRInstr()) { PMBuilder.EnablePGOInstrGen = true; + hasIRInstr = true; + } + if (CodeGenOpts.hasProfileCSIRInstr()) { + assert(!CodeGenOpts.hasProfileCSIRUse() && + "Cannot have both CSProfileUse pass and CSProfileGen pass at the " + "same time"); + assert(!hasIRInstr && + "Cannot have both ProfileGen pass and CSProfileGen pass at the " + "same time"); + PMBuilder.EnablePGOCSInstrGen = true; + hasIRInstr = true; + } + if (hasIRInstr) { if (!CodeGenOpts.InstrProfileOutput.empty()) PMBuilder.PGOInstrGen = CodeGenOpts.InstrProfileOutput; else PMBuilder.PGOInstrGen = DefaultProfileGenName; } - if (CodeGenOpts.hasProfileIRUse()) + if (CodeGenOpts.hasProfileIRUse()) { PMBuilder.PGOInstrUse = CodeGenOpts.ProfileInstrumentUsePath; + PMBuilder.EnablePGOCSInstrUse = CodeGenOpts.hasProfileCSIRUse(); + } if (!CodeGenOpts.SampleProfileFile.empty()) PMBuilder.PGOSampleUse = CodeGenOpts.SampleProfileFile; @@ -845,9 +862,8 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, break; default: - if (!CodeGenOpts.SplitDwarfFile.empty() && - (CodeGenOpts.getSplitDwarfMode() == CodeGenOptions::SplitFileFission)) { - DwoOS = openOutputFile(CodeGenOpts.SplitDwarfFile); + if (!CodeGenOpts.SplitDwarfOutput.empty()) { + DwoOS = openOutputFile(CodeGenOpts.SplitDwarfOutput); if (!DwoOS) return; } @@ -916,6 +932,43 @@ static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) { } } +static void addSanitizersAtO0(ModulePassManager &MPM, + const Triple &TargetTriple, + const LangOptions &LangOpts, + const CodeGenOptions &CodeGenOpts) { + auto ASanPass = [&](SanitizerMask Mask, bool CompileKernel) { + MPM.addPass(RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>()); + bool Recover = CodeGenOpts.SanitizeRecover.has(Mask); + MPM.addPass(createModuleToFunctionPassAdaptor(AddressSanitizerPass( + CompileKernel, Recover, CodeGenOpts.SanitizeAddressUseAfterScope))); + bool ModuleUseAfterScope = asanUseGlobalsGC(TargetTriple, CodeGenOpts); + MPM.addPass( + ModuleAddressSanitizerPass(CompileKernel, Recover, ModuleUseAfterScope, + CodeGenOpts.SanitizeAddressUseOdrIndicator)); + }; + + if (LangOpts.Sanitize.has(SanitizerKind::Address)) { + ASanPass(SanitizerKind::Address, /*CompileKernel=*/false); + } + + if (LangOpts.Sanitize.has(SanitizerKind::KernelAddress)) { + ASanPass(SanitizerKind::KernelAddress, /*CompileKernel=*/true); + } + + if (LangOpts.Sanitize.has(SanitizerKind::Memory)) { + MPM.addPass(createModuleToFunctionPassAdaptor(MemorySanitizerPass({}))); + } + + if (LangOpts.Sanitize.has(SanitizerKind::KernelMemory)) { + MPM.addPass(createModuleToFunctionPassAdaptor( + MemorySanitizerPass({0, false, /*Kernel=*/true}))); + } + + if (LangOpts.Sanitize.has(SanitizerKind::Thread)) { + MPM.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass())); + } +} + /// A clean version of `EmitAssembly` that uses the new pass manager. /// /// Not all features are currently supported in this system, but where @@ -929,13 +982,15 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( TimeRegion Region(FrontendTimesIsEnabled ? &CodeGenerationTime : nullptr); setCommandLineOpts(CodeGenOpts); - // The new pass manager always makes a target machine available to passes - // during construction. - CreateTargetMachine(/*MustCreateTM*/ true); - if (!TM) - // This will already be diagnosed, just bail. + bool RequiresCodeGen = (Action != Backend_EmitNothing && + Action != Backend_EmitBC && + Action != Backend_EmitLL); + CreateTargetMachine(RequiresCodeGen); + + if (RequiresCodeGen && !TM) return; - TheModule->setDataLayout(TM->createDataLayout()); + if (TM) + TheModule->setDataLayout(TM->createDataLayout()); Optional<PGOOptions> PGOOpt; @@ -944,23 +999,69 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( PGOOpt = PGOOptions(CodeGenOpts.InstrProfileOutput.empty() ? DefaultProfileGenName : CodeGenOpts.InstrProfileOutput, - "", "", "", true, + "", "", PGOOptions::IRInstr, PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling); - else if (CodeGenOpts.hasProfileIRUse()) + else if (CodeGenOpts.hasProfileIRUse()) { // -fprofile-use. - PGOOpt = PGOOptions("", CodeGenOpts.ProfileInstrumentUsePath, "", - CodeGenOpts.ProfileRemappingFile, false, - CodeGenOpts.DebugInfoForProfiling); - else if (!CodeGenOpts.SampleProfileFile.empty()) + auto CSAction = CodeGenOpts.hasProfileCSIRUse() ? PGOOptions::CSIRUse + : PGOOptions::NoCSAction; + PGOOpt = PGOOptions(CodeGenOpts.ProfileInstrumentUsePath, "", + CodeGenOpts.ProfileRemappingFile, PGOOptions::IRUse, + CSAction, CodeGenOpts.DebugInfoForProfiling); + } else if (!CodeGenOpts.SampleProfileFile.empty()) // -fprofile-sample-use - PGOOpt = PGOOptions("", "", CodeGenOpts.SampleProfileFile, - CodeGenOpts.ProfileRemappingFile, false, - CodeGenOpts.DebugInfoForProfiling); + PGOOpt = + PGOOptions(CodeGenOpts.SampleProfileFile, "", + CodeGenOpts.ProfileRemappingFile, PGOOptions::SampleUse, + PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling); else if (CodeGenOpts.DebugInfoForProfiling) // -fdebug-info-for-profiling - PGOOpt = PGOOptions("", "", "", "", false, true); + PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction, + PGOOptions::NoCSAction, true); + + // Check to see if we want to generate a CS profile. + if (CodeGenOpts.hasProfileCSIRInstr()) { + assert(!CodeGenOpts.hasProfileCSIRUse() && + "Cannot have both CSProfileUse pass and CSProfileGen pass at " + "the same time"); + if (PGOOpt.hasValue()) { + assert(PGOOpt->Action != PGOOptions::IRInstr && + PGOOpt->Action != PGOOptions::SampleUse && + "Cannot run CSProfileGen pass with ProfileGen or SampleUse " + " pass"); + PGOOpt->CSProfileGenFile = CodeGenOpts.InstrProfileOutput.empty() + ? DefaultProfileGenName + : CodeGenOpts.InstrProfileOutput; + PGOOpt->CSAction = PGOOptions::CSIRInstr; + } else + PGOOpt = PGOOptions("", + CodeGenOpts.InstrProfileOutput.empty() + ? DefaultProfileGenName + : CodeGenOpts.InstrProfileOutput, + "", PGOOptions::NoAction, PGOOptions::CSIRInstr, + CodeGenOpts.DebugInfoForProfiling); + } - PassBuilder PB(TM.get(), PGOOpt); + PipelineTuningOptions PTO; + PTO.LoopUnrolling = CodeGenOpts.UnrollLoops; + // For historical reasons, loop interleaving is set to mirror setting for loop + // unrolling. + PTO.LoopInterleaving = CodeGenOpts.UnrollLoops; + PTO.LoopVectorization = CodeGenOpts.VectorizeLoop; + PTO.SLPVectorization = CodeGenOpts.VectorizeSLP; + + PassBuilder PB(TM.get(), PTO, PGOOpt); + + // Attempt to load pass plugins and register their callbacks with PB. + for (auto &PluginFN : CodeGenOpts.PassPlugins) { + auto PassPlugin = PassPlugin::Load(PluginFN); + if (PassPlugin) { + PassPlugin->registerPassBuilderCallbacks(PB); + } else { + Diags.Report(diag::err_fe_unable_to_load_plugin) + << PluginFN << toString(PassPlugin.takeError()); + } + } LoopAnalysisManager LAM(CodeGenOpts.DebugPassManager); FunctionAnalysisManager FAM(CodeGenOpts.DebugPassManager); @@ -994,10 +1095,15 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( if (CodeGenOpts.OptimizationLevel == 0) { if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts)) MPM.addPass(GCOVProfilerPass(*Options)); + if (Optional<InstrProfOptions> Options = + getInstrProfOptions(CodeGenOpts, LangOpts)) + MPM.addPass(InstrProfiling(*Options, false)); // Build a minimal pipeline based on the semantics required by Clang, - // which is just that always inlining occurs. - MPM.addPass(AlwaysInlinerPass()); + // which is just that always inlining occurs. Further, disable generating + // lifetime intrinsics to avoid enabling further optimizations during + // code generation. + MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/false)); // At -O0 we directly run necessary sanitizer passes. if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) @@ -1013,17 +1119,61 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // configure the pipeline. PassBuilder::OptimizationLevel Level = mapToLevel(CodeGenOpts); + PB.registerPipelineStartEPCallback([](ModulePassManager &MPM) { + MPM.addPass(createModuleToFunctionPassAdaptor( + EntryExitInstrumenterPass(/*PostInlining=*/false))); + }); + // Register callbacks to schedule sanitizer passes at the appropriate part of // the pipeline. + // FIXME: either handle asan/the remaining sanitizers or error out if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) PB.registerScalarOptimizerLateEPCallback( [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { FPM.addPass(BoundsCheckingPass()); }); + if (LangOpts.Sanitize.has(SanitizerKind::Memory)) + PB.registerOptimizerLastEPCallback( + [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { + FPM.addPass(MemorySanitizerPass({})); + }); + if (LangOpts.Sanitize.has(SanitizerKind::Thread)) + PB.registerOptimizerLastEPCallback( + [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { + FPM.addPass(ThreadSanitizerPass()); + }); + if (LangOpts.Sanitize.has(SanitizerKind::Address)) { + PB.registerPipelineStartEPCallback([&](ModulePassManager &MPM) { + MPM.addPass( + RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>()); + }); + bool Recover = CodeGenOpts.SanitizeRecover.has(SanitizerKind::Address); + bool UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope; + PB.registerOptimizerLastEPCallback( + [Recover, UseAfterScope](FunctionPassManager &FPM, + PassBuilder::OptimizationLevel Level) { + FPM.addPass(AddressSanitizerPass( + /*CompileKernel=*/false, Recover, UseAfterScope)); + }); + bool ModuleUseAfterScope = asanUseGlobalsGC(TargetTriple, CodeGenOpts); + bool UseOdrIndicator = CodeGenOpts.SanitizeAddressUseOdrIndicator; + PB.registerPipelineStartEPCallback( + [Recover, ModuleUseAfterScope, + UseOdrIndicator](ModulePassManager &MPM) { + MPM.addPass(ModuleAddressSanitizerPass( + /*CompileKernel=*/false, Recover, ModuleUseAfterScope, + UseOdrIndicator)); + }); + } if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts)) PB.registerPipelineStartEPCallback([Options](ModulePassManager &MPM) { MPM.addPass(GCOVProfilerPass(*Options)); }); + if (Optional<InstrProfOptions> Options = + getInstrProfOptions(CodeGenOpts, LangOpts)) + PB.registerPipelineStartEPCallback([Options](ModulePassManager &MPM) { + MPM.addPass(InstrProfiling(*Options, false)); + }); if (IsThinLTO) { MPM = PB.buildThinLTOPreLinkDefaultPipeline( @@ -1040,6 +1190,19 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( CodeGenOpts.DebugPassManager); } } + + if (LangOpts.Sanitize.has(SanitizerKind::HWAddress)) { + bool Recover = CodeGenOpts.SanitizeRecover.has(SanitizerKind::HWAddress); + MPM.addPass(HWAddressSanitizerPass( + /*CompileKernel=*/false, Recover)); + } + if (LangOpts.Sanitize.has(SanitizerKind::KernelHWAddress)) { + MPM.addPass(HWAddressSanitizerPass( + /*CompileKernel=*/true, /*Recover=*/true)); + } + + if (CodeGenOpts.OptimizationLevel == 0) + addSanitizersAtO0(MPM, TargetTriple, LangOpts, CodeGenOpts); } // FIXME: We still use the legacy pass manager to do code generation. We @@ -1093,8 +1256,8 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( NeedCodeGen = true; CodeGenPasses.add( createTargetTransformInfoWrapperPass(getTargetIRAnalysis())); - if (!CodeGenOpts.SplitDwarfFile.empty()) { - DwoOS = openOutputFile(CodeGenOpts.SplitDwarfFile); + if (!CodeGenOpts.SplitDwarfOutput.empty()) { + DwoOS = openOutputFile(CodeGenOpts.SplitDwarfOutput); if (!DwoOS) return; } @@ -1226,14 +1389,28 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, Conf.MAttrs = TOpts.Features; Conf.RelocModel = CGOpts.RelocationModel; Conf.CGOptLevel = getCGOptLevel(CGOpts); + Conf.OptLevel = CGOpts.OptimizationLevel; initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts); Conf.SampleProfile = std::move(SampleProfile); + + // Context sensitive profile. + if (CGOpts.hasProfileCSIRInstr()) { + Conf.RunCSIRInstr = true; + Conf.CSIRProfile = std::move(CGOpts.InstrProfileOutput); + } else if (CGOpts.hasProfileCSIRUse()) { + Conf.RunCSIRInstr = false; + Conf.CSIRProfile = std::move(CGOpts.ProfileInstrumentUsePath); + } + Conf.ProfileRemapping = std::move(ProfileRemapping); Conf.UseNewPM = CGOpts.ExperimentalNewPassManager; Conf.DebugPassManager = CGOpts.DebugPassManager; Conf.RemarksWithHotness = CGOpts.DiagnosticsWithHotness; Conf.RemarksFilename = CGOpts.OptRecordFile; - Conf.DwoPath = CGOpts.SplitDwarfFile; + Conf.RemarksPasses = CGOpts.OptRecordPasses; + Conf.RemarksFormat = CGOpts.OptRecordFormat; + Conf.SplitDwarfFile = CGOpts.SplitDwarfFile; + Conf.SplitDwarfOutput = CGOpts.SplitDwarfOutput; switch (Action) { case Backend_EmitNothing: Conf.PreCodeGenModuleHook = [](size_t Task, const Module &Mod) { @@ -1273,6 +1450,9 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, const llvm::DataLayout &TDesc, Module *M, BackendAction Action, std::unique_ptr<raw_pwrite_stream> OS) { + + llvm::TimeTraceScope TimeScope("Backend", StringRef("")); + std::unique_ptr<llvm::Module> EmptyModule; if (!CGOpts.ThinLTOIndexFile.empty()) { // If we are performing a ThinLTO importing compile, load the function index @@ -1339,6 +1519,9 @@ static const char* getSectionNameForBitcode(const Triple &T) { case Triple::Wasm: case Triple::UnknownObjectFormat: return ".llvmbc"; + case Triple::XCOFF: + llvm_unreachable("XCOFF is not yet implemented"); + break; } llvm_unreachable("Unimplemented ObjectFormatType"); } @@ -1352,6 +1535,9 @@ static const char* getSectionNameForCommandline(const Triple &T) { case Triple::Wasm: case Triple::UnknownObjectFormat: return ".llvmcmd"; + case Triple::XCOFF: + llvm_unreachable("XCOFF is not yet implemented"); + break; } llvm_unreachable("Unimplemented ObjectFormatType"); } diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp index 24056a449def..a95cd12c2d64 100644 --- a/lib/CodeGen/CGAtomic.cpp +++ b/lib/CodeGen/CGAtomic.cpp @@ -1,9 +1,8 @@ //===--- CGAtomic.cpp - Emit LLVM IR for atomic operations ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -36,7 +35,6 @@ namespace { uint64_t ValueSizeInBits; CharUnits AtomicAlign; CharUnits ValueAlign; - CharUnits LValueAlign; TypeEvaluationKind EvaluationKind; bool UseLibcall; LValue LVal; @@ -133,7 +131,6 @@ namespace { QualType getAtomicType() const { return AtomicTy; } QualType getValueType() const { return ValueTy; } CharUnits getAtomicAlignment() const { return AtomicAlign; } - CharUnits getValueAlignment() const { return ValueAlign; } uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; } uint64_t getValueSizeInBits() const { return ValueSizeInBits; } TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; } @@ -202,7 +199,7 @@ namespace { assert(LVal.isSimple()); Address addr = getAtomicAddress(); if (hasPadding()) - addr = CGF.Builder.CreateStructGEP(addr, 0, CharUnits()); + addr = CGF.Builder.CreateStructGEP(addr, 0); return LValue::MakeAddr(addr, getValueType(), CGF.getContext(), LVal.getBaseInfo(), LVal.getTBAAInfo()); @@ -308,7 +305,7 @@ static RValue emitAtomicLibcall(CodeGenFunction &CGF, const CGFunctionInfo &fnInfo = CGF.CGM.getTypes().arrangeBuiltinFunctionCall(resultType, args); llvm::FunctionType *fnTy = CGF.CGM.getTypes().GetFunctionType(fnInfo); - llvm::Constant *fn = CGF.CGM.CreateRuntimeFunction(fnTy, fnName); + llvm::FunctionCallee fn = CGF.CGM.CreateRuntimeFunction(fnTy, fnName); auto callee = CGCallee::forDirect(fn); return CGF.EmitCall(fnInfo, callee, ReturnValueSlot(), args); } @@ -680,7 +677,8 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, // Handle constant scope. if (auto SC = dyn_cast<llvm::ConstantInt>(Scope)) { auto SCID = CGF.getTargetHooks().getLLVMSyncScopeID( - ScopeModel->map(SC->getZExtValue()), CGF.CGM.getLLVMContext()); + CGF.CGM.getLangOpts(), ScopeModel->map(SC->getZExtValue()), + Order, CGF.CGM.getLLVMContext()); EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, Order, SCID); return; @@ -709,7 +707,9 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, Builder.SetInsertPoint(B); EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, Order, - CGF.getTargetHooks().getLLVMSyncScopeID(ScopeModel->map(S), + CGF.getTargetHooks().getLLVMSyncScopeID(CGF.CGM.getLangOpts(), + ScopeModel->map(S), + Order, CGF.getLLVMContext())); Builder.CreateBr(ContBB); } @@ -1357,7 +1357,7 @@ RValue AtomicInfo::convertAtomicTempToRValue(Address addr, // Drill into the padding structure if we have one. if (hasPadding()) - addr = CGF.Builder.CreateStructGEP(addr, 0, CharUnits()); + addr = CGF.Builder.CreateStructGEP(addr, 0); // Otherwise, just convert the temporary to an r-value using the // normal conversion routine. @@ -1688,7 +1688,7 @@ EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, RValue OldRVal, UpRVal = OldRVal; DesiredLVal = CGF.MakeAddrLValue(DesiredAddr, AtomicLVal.getType()); } else { - // Build new lvalue for temp address + // Build new lvalue for temp address. Address Ptr = Atomics.materializeRValue(OldRVal); LValue UpdateLVal; if (AtomicLVal.isBitField()) { @@ -1721,7 +1721,7 @@ EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, RValue OldRVal, } UpRVal = CGF.EmitLoadOfLValue(UpdateLVal, SourceLocation()); } - // Store new value in the corresponding memory area + // Store new value in the corresponding memory area. RValue NewRVal = UpdateOp(UpRVal); if (NewRVal.isScalar()) { CGF.EmitStoreThroughLValue(NewRVal, DesiredLVal); @@ -1786,7 +1786,7 @@ void AtomicInfo::EmitAtomicUpdateOp( SourceLocation(), /*AsValue=*/false); EmitAtomicUpdateValue(CGF, *this, OldRVal, UpdateOp, NewAtomicAddr); auto *DesiredVal = CGF.Builder.CreateLoad(NewAtomicIntAddr); - // Try to write new value using cmpxchg operation + // Try to write new value using cmpxchg operation. auto Res = EmitAtomicCompareExchangeOp(PHI, DesiredVal, AO, Failure); PHI->addIncoming(Res.first, CGF.Builder.GetInsertBlock()); CGF.Builder.CreateCondBr(Res.second, ExitBB, ContBB); @@ -1797,7 +1797,7 @@ static void EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, RValue UpdateRVal, Address DesiredAddr) { LValue AtomicLVal = Atomics.getAtomicLValue(); LValue DesiredLVal; - // Build new lvalue for temp address + // Build new lvalue for temp address. if (AtomicLVal.isBitField()) { DesiredLVal = LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(), @@ -1814,7 +1814,7 @@ static void EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(), AtomicLVal.getBaseInfo(), AtomicLVal.getTBAAInfo()); } - // Store new value in the corresponding memory area + // Store new value in the corresponding memory area. assert(UpdateRVal.isScalar()); CGF.EmitStoreThroughLValue(UpdateRVal, DesiredLVal); } @@ -1866,7 +1866,7 @@ void AtomicInfo::EmitAtomicUpdateOp(llvm::AtomicOrdering AO, RValue UpdateRVal, } EmitAtomicUpdateValue(CGF, *this, UpdateRVal, NewAtomicAddr); auto *DesiredVal = CGF.Builder.CreateLoad(NewAtomicIntAddr); - // Try to write new value using cmpxchg operation + // Try to write new value using cmpxchg operation. auto Res = EmitAtomicCompareExchangeOp(PHI, DesiredVal, AO, Failure); PHI->addIncoming(Res.first, CGF.Builder.GetInsertBlock()); CGF.Builder.CreateCondBr(Res.second, ExitBB, ContBB); diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp index fa3c3ee8610c..c3ee7129d9d7 100644 --- a/lib/CodeGen/CGBlocks.cpp +++ b/lib/CodeGen/CGBlocks.cpp @@ -1,9 +1,8 @@ //===--- CGBlocks.cpp - Emit LLVM Code for declarations ---------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -23,7 +22,6 @@ #include "clang/AST/DeclObjC.h" #include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Module.h" #include "llvm/Support/ScopedPrinter.h" @@ -276,6 +274,8 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM, /*constant*/ true, linkage, AddrSpace); if (linkage == llvm::GlobalValue::LinkOnceODRLinkage) { + if (CGM.supportsCOMDAT()) + global->setComdat(CGM.getModule().getOrInsertComdat(descName)); global->setVisibility(llvm::GlobalValue::HiddenVisibility); global->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); } @@ -671,7 +671,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, // Sort the layout by alignment. We have to use a stable sort here // to get reproducible results. There should probably be an // llvm::array_pod_stable_sort. - std::stable_sort(layout.begin(), layout.end()); + llvm::stable_sort(layout); // Needed for blocks layout info. info.BlockHeaderForcedGapOffset = info.BlockSize; @@ -838,9 +838,8 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) { } // GEP down to the address. - Address addr = CGF.Builder.CreateStructGEP(blockInfo.LocalAddress, - capture.getIndex(), - capture.getOffset()); + Address addr = + CGF.Builder.CreateStructGEP(blockInfo.LocalAddress, capture.getIndex()); // We can use that GEP as the dominating IP. if (!blockInfo.DominatingIP) @@ -977,27 +976,24 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { flags |= BLOCK_IS_NOESCAPE | BLOCK_IS_GLOBAL; } - auto projectField = - [&](unsigned index, CharUnits offset, const Twine &name) -> Address { - return Builder.CreateStructGEP(blockAddr, index, offset, name); - }; - auto storeField = - [&](llvm::Value *value, unsigned index, CharUnits offset, - const Twine &name) { - Builder.CreateStore(value, projectField(index, offset, name)); - }; + auto projectField = [&](unsigned index, const Twine &name) -> Address { + return Builder.CreateStructGEP(blockAddr, index, name); + }; + auto storeField = [&](llvm::Value *value, unsigned index, const Twine &name) { + Builder.CreateStore(value, projectField(index, name)); + }; // Initialize the block header. { // We assume all the header fields are densely packed. unsigned index = 0; CharUnits offset; - auto addHeaderField = - [&](llvm::Value *value, CharUnits size, const Twine &name) { - storeField(value, index, offset, name); - offset += size; - index++; - }; + auto addHeaderField = [&](llvm::Value *value, CharUnits size, + const Twine &name) { + storeField(value, index, name); + offset += size; + index++; + }; if (!IsOpenCL) { addHeaderField(isa, getPointerSize(), "block.isa"); @@ -1033,8 +1029,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // First, 'this'. if (blockDecl->capturesCXXThis()) { - Address addr = projectField(blockInfo.CXXThisIndex, blockInfo.CXXThisOffset, - "block.captured-this.addr"); + Address addr = + projectField(blockInfo.CXXThisIndex, "block.captured-this.addr"); Builder.CreateStore(LoadCXXThis(), addr); } @@ -1050,8 +1046,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // This will be a [[type]]*, except that a byref entry will just be // an i8**. - Address blockField = - projectField(capture.getIndex(), capture.getOffset(), "block.captured"); + Address blockField = projectField(capture.getIndex(), "block.captured"); // Compute the address of the thing we're going to move into the // block literal. @@ -1070,7 +1065,6 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // This is a [[type]]*, except that a byref entry will just be an i8**. src = Builder.CreateStructGEP(LoadBlockStruct(), enclosingCapture.getIndex(), - enclosingCapture.getOffset(), "block.capture.addr"); } else { auto I = LocalDeclMap.find(variable); @@ -1261,52 +1255,49 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue) { const BlockPointerType *BPT = E->getCallee()->getType()->getAs<BlockPointerType>(); - llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee()); - - // Get a pointer to the generic block literal. - // For OpenCL we generate generic AS void ptr to be able to reuse the same - // block definition for blocks with captures generated as private AS local - // variables and without captures generated as global AS program scope - // variables. - unsigned AddrSpace = 0; - if (getLangOpts().OpenCL) - AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic); - - llvm::Type *BlockLiteralTy = - llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace); - - // Bitcast the callee to a block literal. - BlockPtr = - Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal"); - - // Get the function pointer from the literal. - llvm::Value *FuncPtr = - Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, - CGM.getLangOpts().OpenCL ? 2 : 3); - - // Add the block literal. + llvm::Type *GenBlockTy = CGM.getGenericBlockLiteralType(); + llvm::Value *Func = nullptr; + QualType FnType = BPT->getPointeeType(); + ASTContext &Ctx = getContext(); CallArgList Args; - QualType VoidPtrQualTy = getContext().VoidPtrTy; - llvm::Type *GenericVoidPtrTy = VoidPtrTy; if (getLangOpts().OpenCL) { - GenericVoidPtrTy = CGM.getOpenCLRuntime().getGenericVoidPointerType(); - VoidPtrQualTy = - getContext().getPointerType(getContext().getAddrSpaceQualType( - getContext().VoidTy, LangAS::opencl_generic)); - } - - BlockPtr = Builder.CreatePointerCast(BlockPtr, GenericVoidPtrTy); - Args.add(RValue::get(BlockPtr), VoidPtrQualTy); - - QualType FnType = BPT->getPointeeType(); + // For OpenCL, BlockPtr is already casted to generic block literal. + + // First argument of a block call is a generic block literal casted to + // generic void pointer, i.e. i8 addrspace(4)* + llvm::Value *BlockDescriptor = Builder.CreatePointerCast( + BlockPtr, CGM.getOpenCLRuntime().getGenericVoidPointerType()); + QualType VoidPtrQualTy = Ctx.getPointerType( + Ctx.getAddrSpaceQualType(Ctx.VoidTy, LangAS::opencl_generic)); + Args.add(RValue::get(BlockDescriptor), VoidPtrQualTy); + // And the rest of the arguments. + EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); + + // We *can* call the block directly unless it is a function argument. + if (!isa<ParmVarDecl>(E->getCalleeDecl())) + Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee()); + else { + llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 2); + Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); + } + } else { + // Bitcast the block literal to a generic block literal. + BlockPtr = Builder.CreatePointerCast( + BlockPtr, llvm::PointerType::get(GenBlockTy, 0), "block.literal"); + // Get pointer to the block invoke function + llvm::Value *FuncPtr = Builder.CreateStructGEP(GenBlockTy, BlockPtr, 3); - // And the rest of the arguments. - EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); + // First argument is a block literal casted to a void pointer + BlockPtr = Builder.CreatePointerCast(BlockPtr, VoidPtrTy); + Args.add(RValue::get(BlockPtr), Ctx.VoidPtrTy); + // And the rest of the arguments. + EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); - // Load the function. - llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); + // Load the function. + Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); + } const FunctionType *FuncTy = FnType->castAs<FunctionType>(); const CGFunctionInfo &FnInfo = @@ -1332,9 +1323,8 @@ Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable) { // Handle constant captures. if (capture.isConstant()) return LocalDeclMap.find(variable)->second; - Address addr = - Builder.CreateStructGEP(LoadBlockStruct(), capture.getIndex(), - capture.getOffset(), "block.capture.addr"); + Address addr = Builder.CreateStructGEP(LoadBlockStruct(), capture.getIndex(), + "block.capture.addr"); if (variable->isEscapingByref()) { // addr should be a void** right now. Load, then cast the result @@ -1444,10 +1434,12 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, if (CGM.getContext().getLangOpts().OpenCL) AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_global); - llvm::Constant *literal = fields.finishAndCreateGlobal( + llvm::GlobalVariable *literal = fields.finishAndCreateGlobal( "__block_literal_global", blockInfo.BlockAlign, /*constant*/ !IsWindows, llvm::GlobalVariable::InternalLinkage, AddrSpace); + literal->addAttribute("objc_arc_inert"); + // Windows does not allow globals to be initialised to point to globals in // different DLLs. Any such variables must run code to initialise them. if (IsWindows) { @@ -1617,9 +1609,8 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, // If we have a C++ 'this' reference, go ahead and force it into // existence now. if (blockDecl->capturesCXXThis()) { - Address addr = - Builder.CreateStructGEP(LoadBlockStruct(), blockInfo.CXXThisIndex, - blockInfo.CXXThisOffset, "block.captured-this"); + Address addr = Builder.CreateStructGEP( + LoadBlockStruct(), blockInfo.CXXThisIndex, "block.captured-this"); CXXThisValue = Builder.CreateLoad(addr, "this"); } @@ -2029,6 +2020,8 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { llvm::Function *Fn = llvm::Function::Create(LTy, llvm::GlobalValue::LinkOnceODRLinkage, FuncName, &CGM.getModule()); + if (CGM.supportsCOMDAT()) + Fn->setComdat(CGM.getModule().getOrInsertComdat(FuncName)); IdentifierInfo *II = &C.Idents.get(FuncName); @@ -2062,8 +2055,8 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { BlockFieldFlags flags = CopiedCapture.CopyFlags; unsigned index = capture.getIndex(); - Address srcField = Builder.CreateStructGEP(src, index, capture.getOffset()); - Address dstField = Builder.CreateStructGEP(dst, index, capture.getOffset()); + Address srcField = Builder.CreateStructGEP(src, index); + Address dstField = Builder.CreateStructGEP(dst, index); switch (CopiedCapture.CopyKind) { case BlockCaptureEntityKind::CXXRecord: @@ -2220,6 +2213,8 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { llvm::Function *Fn = llvm::Function::Create(LTy, llvm::GlobalValue::LinkOnceODRLinkage, FuncName, &CGM.getModule()); + if (CGM.supportsCOMDAT()) + Fn->setComdat(CGM.getModule().getOrInsertComdat(FuncName)); IdentifierInfo *II = &C.Idents.get(FuncName); @@ -2251,8 +2246,7 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { const CGBlockInfo::Capture &capture = *DestroyedCapture.Capture; BlockFieldFlags flags = DestroyedCapture.DisposeFlags; - Address srcField = - Builder.CreateStructGEP(src, capture.getIndex(), capture.getOffset()); + Address srcField = Builder.CreateStructGEP(src, capture.getIndex()); pushCaptureCleanup(DestroyedCapture.DisposeKind, srcField, CI.getVariable()->getType(), flags, @@ -2286,7 +2280,7 @@ public: unsigned flags = (Flags | BLOCK_BYREF_CALLER).getBitMask(); llvm::Value *flagsVal = llvm::ConstantInt::get(CGF.Int32Ty, flags); - llvm::Value *fn = CGF.CGM.getBlockObjectAssign(); + llvm::FunctionCallee fn = CGF.CGM.getBlockObjectAssign(); llvm::Value *args[] = { destField.getPointer(), srcValue, flagsVal }; CGF.EmitNounwindRuntimeCall(fn, args); @@ -2712,13 +2706,11 @@ Address CodeGenFunction::emitBlockByrefAddress(Address baseAddr, const llvm::Twine &name) { // Chase the forwarding address if requested. if (followForward) { - Address forwardingAddr = - Builder.CreateStructGEP(baseAddr, 1, getPointerSize(), "forwarding"); + Address forwardingAddr = Builder.CreateStructGEP(baseAddr, 1, "forwarding"); baseAddr = Address(Builder.CreateLoad(forwardingAddr), info.ByrefAlignment); } - return Builder.CreateStructGEP(baseAddr, info.FieldIndex, - info.FieldOffset, name); + return Builder.CreateStructGEP(baseAddr, info.FieldIndex, name); } /// BuildByrefInfo - This routine changes a __block variable declared as T x @@ -2836,8 +2828,7 @@ void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) { CharUnits nextHeaderOffset; auto storeHeaderField = [&](llvm::Value *value, CharUnits fieldSize, const Twine &name) { - auto fieldAddr = Builder.CreateStructGEP(addr, nextHeaderIndex, - nextHeaderOffset, name); + auto fieldAddr = Builder.CreateStructGEP(addr, nextHeaderIndex, name); Builder.CreateStore(value, fieldAddr); nextHeaderIndex++; @@ -2933,7 +2924,7 @@ void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) { void CodeGenFunction::BuildBlockRelease(llvm::Value *V, BlockFieldFlags flags, bool CanThrow) { - llvm::Value *F = CGM.getBlockObjectDispose(); + llvm::FunctionCallee F = CGM.getBlockObjectDispose(); llvm::Value *args[] = { Builder.CreateBitCast(V, Int8PtrTy), llvm::ConstantInt::get(Int32Ty, flags.getBitMask()) @@ -2989,7 +2980,7 @@ static void configureBlocksRuntimeObject(CodeGenModule &CGM, CGM.setDSOLocal(GV); } -llvm::Constant *CodeGenModule::getBlockObjectDispose() { +llvm::FunctionCallee CodeGenModule::getBlockObjectDispose() { if (BlockObjectDispose) return BlockObjectDispose; @@ -2997,11 +2988,12 @@ llvm::Constant *CodeGenModule::getBlockObjectDispose() { llvm::FunctionType *fty = llvm::FunctionType::get(VoidTy, args, false); BlockObjectDispose = CreateRuntimeFunction(fty, "_Block_object_dispose"); - configureBlocksRuntimeObject(*this, BlockObjectDispose); + configureBlocksRuntimeObject( + *this, cast<llvm::Constant>(BlockObjectDispose.getCallee())); return BlockObjectDispose; } -llvm::Constant *CodeGenModule::getBlockObjectAssign() { +llvm::FunctionCallee CodeGenModule::getBlockObjectAssign() { if (BlockObjectAssign) return BlockObjectAssign; @@ -3009,7 +3001,8 @@ llvm::Constant *CodeGenModule::getBlockObjectAssign() { llvm::FunctionType *fty = llvm::FunctionType::get(VoidTy, args, false); BlockObjectAssign = CreateRuntimeFunction(fty, "_Block_object_assign"); - configureBlocksRuntimeObject(*this, BlockObjectAssign); + configureBlocksRuntimeObject( + *this, cast<llvm::Constant>(BlockObjectAssign.getCallee())); return BlockObjectAssign; } diff --git a/lib/CodeGen/CGBlocks.h b/lib/CodeGen/CGBlocks.h index 3f9fc16d9b10..c4bfde666154 100644 --- a/lib/CodeGen/CGBlocks.h +++ b/lib/CodeGen/CGBlocks.h @@ -1,9 +1,8 @@ //===-- CGBlocks.h - state for LLVM CodeGen for blocks ----------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CGBuilder.h b/lib/CodeGen/CGBuilder.h index 654ef72060b7..68c8c641139f 100644 --- a/lib/CodeGen/CGBuilder.h +++ b/lib/CodeGen/CGBuilder.h @@ -1,9 +1,8 @@ //===-- CGBuilder.h - Choose IRBuilder implementation ----------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -168,19 +167,25 @@ public: return Address(Ptr, Addr.getAlignment()); } + /// Given + /// %addr = {T1, T2...}* ... + /// produce + /// %name = getelementptr inbounds %addr, i32 0, i32 index + /// + /// This API assumes that drilling into a struct like this is always an + /// inbounds operation. using CGBuilderBaseTy::CreateStructGEP; - Address CreateStructGEP(Address Addr, unsigned Index, CharUnits Offset, + Address CreateStructGEP(Address Addr, unsigned Index, const llvm::Twine &Name = "") { + llvm::StructType *ElTy = cast<llvm::StructType>(Addr.getElementType()); + const llvm::DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); + const llvm::StructLayout *Layout = DL.getStructLayout(ElTy); + auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index)); + return Address(CreateStructGEP(Addr.getElementType(), Addr.getPointer(), Index, Name), Addr.getAlignment().alignmentAtOffset(Offset)); } - Address CreateStructGEP(Address Addr, unsigned Index, - const llvm::StructLayout *Layout, - const llvm::Twine &Name = "") { - auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index)); - return CreateStructGEP(Addr, Index, Offset, Name); - } /// Given /// %addr = [n x T]* ... @@ -190,15 +195,17 @@ public: /// /// This API assumes that drilling into an array like this is always /// an inbounds operation. - /// - /// \param EltSize - the size of the type T in bytes - Address CreateConstArrayGEP(Address Addr, uint64_t Index, CharUnits EltSize, + Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name = "") { - return Address(CreateInBoundsGEP(Addr.getPointer(), - {getSize(CharUnits::Zero()), - getSize(Index)}, - Name), - Addr.getAlignment().alignmentAtOffset(Index * EltSize)); + llvm::ArrayType *ElTy = cast<llvm::ArrayType>(Addr.getElementType()); + const llvm::DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); + CharUnits EltSize = + CharUnits::fromQuantity(DL.getTypeAllocSize(ElTy->getElementType())); + + return Address( + CreateInBoundsGEP(Addr.getPointer(), + {getSize(CharUnits::Zero()), getSize(Index)}, Name), + Addr.getAlignment().alignmentAtOffset(Index * EltSize)); } /// Given @@ -206,11 +213,12 @@ public: /// produce /// %name = getelementptr inbounds %addr, i64 index /// where i64 is actually the target word size. - /// - /// \param EltSize - the size of the type T in bytes Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, - CharUnits EltSize, const llvm::Twine &Name = "") { + llvm::Type *ElTy = Addr.getElementType(); + const llvm::DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); + CharUnits EltSize = CharUnits::fromQuantity(DL.getTypeAllocSize(ElTy)); + return Address(CreateInBoundsGEP(Addr.getElementType(), Addr.getPointer(), getSize(Index), Name), Addr.getAlignment().alignmentAtOffset(Index * EltSize)); @@ -221,10 +229,12 @@ public: /// produce /// %name = getelementptr inbounds %addr, i64 index /// where i64 is actually the target word size. - /// - /// \param EltSize - the size of the type T in bytes - Address CreateConstGEP(Address Addr, uint64_t Index, CharUnits EltSize, + Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name = "") { + const llvm::DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); + CharUnits EltSize = + CharUnits::fromQuantity(DL.getTypeAllocSize(Addr.getElementType())); + return Address(CreateGEP(Addr.getElementType(), Addr.getPointer(), getSize(Index), Name), Addr.getAlignment().alignmentAtOffset(Index * EltSize)); @@ -245,31 +255,21 @@ public: } using CGBuilderBaseTy::CreateConstInBoundsGEP2_32; - Address CreateConstInBoundsGEP2_32(Address Addr, unsigned Idx0, - unsigned Idx1, const llvm::DataLayout &DL, - const llvm::Twine &Name = "") { + Address CreateConstInBoundsGEP2_32(Address Addr, unsigned Idx0, unsigned Idx1, + const llvm::Twine &Name = "") { + const llvm::DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); + auto *GEP = cast<llvm::GetElementPtrInst>(CreateConstInBoundsGEP2_32( Addr.getElementType(), Addr.getPointer(), Idx0, Idx1, Name)); llvm::APInt Offset( DL.getIndexSizeInBits(Addr.getType()->getPointerAddressSpace()), 0, - /*IsSigned=*/true); + /*isSigned=*/true); if (!GEP->accumulateConstantOffset(DL, Offset)) llvm_unreachable("offset of GEP with constants is always computable"); return Address(GEP, Addr.getAlignment().alignmentAtOffset( CharUnits::fromQuantity(Offset.getSExtValue()))); } - llvm::Value *CreateConstInBoundsByteGEP(llvm::Value *Ptr, CharUnits Offset, - const llvm::Twine &Name = "") { - assert(Ptr->getType()->getPointerElementType() == TypeCache.Int8Ty); - return CreateInBoundsGEP(Ptr, getSize(Offset), Name); - } - llvm::Value *CreateConstByteGEP(llvm::Value *Ptr, CharUnits Offset, - const llvm::Twine &Name = "") { - assert(Ptr->getType()->getPointerElementType() == TypeCache.Int8Ty); - return CreateGEP(Ptr, getSize(Offset), Name); - } - using CGBuilderBaseTy::CreateMemCpy; llvm::CallInst *CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile = false) { @@ -298,6 +298,21 @@ public: return CreateMemSet(Dest.getPointer(), Value, Size, Dest.getAlignment().getQuantity(), IsVolatile); } + + using CGBuilderBaseTy::CreatePreserveStructAccessIndex; + Address CreatePreserveStructAccessIndex(Address Addr, + unsigned Index, + unsigned FieldIndex, + llvm::MDNode *DbgInfo) { + llvm::StructType *ElTy = cast<llvm::StructType>(Addr.getElementType()); + const llvm::DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); + const llvm::StructLayout *Layout = DL.getStructLayout(ElTy); + auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index)); + + return Address(CreatePreserveStructAccessIndex(Addr.getPointer(), + Index, FieldIndex, DbgInfo), + Addr.getAlignment().alignmentAtOffset(Offset)); + } }; } // end namespace CodeGen diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index a718f2f19aa6..a300bab49f9c 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -1,9 +1,8 @@ //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -18,6 +17,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" +#include "PatternInit.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" @@ -27,7 +27,6 @@ #include "clang/CodeGen/CGFunctionInfo.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" @@ -46,6 +45,25 @@ int64_t clamp(int64_t Value, int64_t Low, int64_t High) { return std::min(High, std::max(Low, Value)); } +static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, unsigned AlignmentInBytes) { + ConstantInt *Byte; + switch (CGF.getLangOpts().getTrivialAutoVarInit()) { + case LangOptions::TrivialAutoVarInitKind::Uninitialized: + // Nothing to initialize. + return; + case LangOptions::TrivialAutoVarInitKind::Zero: + Byte = CGF.Builder.getInt8(0x00); + break; + case LangOptions::TrivialAutoVarInitKind::Pattern: { + llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext()); + Byte = llvm::dyn_cast<llvm::ConstantInt>( + initializationPatternFor(CGF.CGM, Int8)); + break; + } + } + CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes); +} + /// getBuiltinLibFunction - Given a builtin id for a function like /// "__builtin_fabsf", return a Function* for "fabsf". llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, @@ -300,6 +318,34 @@ static Value *EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1)); } +// Build a plain volatile load. +static Value *EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E) { + Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); + QualType ElTy = E->getArg(0)->getType()->getPointeeType(); + CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy); + llvm::Type *ITy = + llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8); + Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo()); + llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(Ptr, LoadSize); + Load->setVolatile(true); + return Load; +} + +// Build a plain volatile store. +static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) { + Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); + Value *Value = CGF.EmitScalarExpr(E->getArg(1)); + QualType ElTy = E->getArg(0)->getType()->getPointeeType(); + CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy); + llvm::Type *ITy = + llvm::IntegerType::get(CGF.getLLVMContext(), StoreSize.getQuantity() * 8); + Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo()); + llvm::StoreInst *Store = + CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize); + Store->setVolatile(true); + return Store; +} + // Emit a simple mangled intrinsic that has 1 argument and a return type // matching the argument type. static Value *emitUnaryBuiltin(CodeGenFunction &CGF, @@ -307,7 +353,7 @@ static Value *emitUnaryBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID) { llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); return CGF.Builder.CreateCall(F, Src0); } @@ -318,7 +364,7 @@ static Value *emitBinaryBuiltin(CodeGenFunction &CGF, llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); return CGF.Builder.CreateCall(F, { Src0, Src1 }); } @@ -330,7 +376,7 @@ static Value *emitTernaryBuiltin(CodeGenFunction &CGF, llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); } @@ -341,13 +387,25 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF, llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); return CGF.Builder.CreateCall(F, {Src0, Src1}); } +// Emit an intrinsic that has overloaded integer result and fp operand. +static Value *emitFPToIntRoundBuiltin(CodeGenFunction &CGF, + const CallExpr *E, + unsigned IntrinsicID) { + llvm::Type *ResultType = CGF.ConvertType(E->getType()); + llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); + + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, + {ResultType, Src0->getType()}); + return CGF.Builder.CreateCall(F, Src0); +} + /// EmitFAbs - Emit a call to @llvm.fabs(). static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { - Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); + Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); Call->setDoesNotAccessMemory(); return Call; @@ -408,7 +466,7 @@ static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, "Arguments must be the same type. (Did you forget to make sure both " "arguments have the same integer width?)"); - llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); + Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y}); Carry = CGF.Builder.CreateExtractValue(Tmp, 1); return CGF.Builder.CreateExtractValue(Tmp, 0); @@ -419,7 +477,7 @@ static Value *emitRangedBuiltin(CodeGenFunction &CGF, int low, int high) { llvm::MDBuilder MDHelper(CGF.getLLVMContext()); llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); llvm::Instruction *Call = CGF.Builder.CreateCall(F); Call->setMetadata(llvm::LLVMContext::MD_range, RNode); return Call; @@ -496,10 +554,11 @@ getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { llvm::Value * CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, llvm::IntegerType *ResType, - llvm::Value *EmittedE) { + llvm::Value *EmittedE, + bool IsDynamic) { uint64_t ObjectSize; if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) - return emitBuiltinObjectSize(E, Type, ResType, EmittedE); + return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic); return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); } @@ -515,7 +574,7 @@ CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, llvm::Value * CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, llvm::IntegerType *ResType, - llvm::Value *EmittedE) { + llvm::Value *EmittedE, bool IsDynamic) { // We need to reference an argument if the pointer is a parameter with the // pass_object_size attribute. if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { @@ -530,7 +589,7 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, auto DIter = LocalDeclMap.find(D); assert(DIter != LocalDeclMap.end()); - return EmitLoadOfScalar(DIter->second, /*volatile=*/false, + return EmitLoadOfScalar(DIter->second, /*Volatile=*/false, getContext().getSizeType(), E->getBeginLoc()); } } @@ -545,13 +604,15 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, assert(Ptr->getType()->isPointerTy() && "Non-pointer passed to __builtin_object_size?"); - Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); + Function *F = + CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); // LLVM only supports 0 and 2, make sure that we pass along that as a boolean. Value *Min = Builder.getInt1((Type & 2) != 0); // For GCC compatibility, __builtin_object_size treat NULL as unknown size. Value *NullIsUnknown = Builder.getTrue(); - return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown}); + Value *Dynamic = Builder.getInt1(IsDynamic); + return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic}); } namespace { @@ -658,7 +719,7 @@ static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF, llvm::FunctionType::get(CGF.Int8Ty, {IntPtrType, IntType}, false); llvm::InlineAsm *IA = - llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true); + llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); return CGF.Builder.CreateCall(IA, {BitBase, BitPos}); } @@ -793,16 +854,16 @@ static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::ReturnsTwice); - llvm::Constant *SetJmpFn = CGF.CGM.CreateRuntimeFunction( + llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name, ReturnsTwiceAttr, /*Local=*/true); llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast( CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy); llvm::Value *Args[] = {Buf, Arg1}; - llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args); - CS.setAttributes(ReturnsTwiceAttr); - return RValue::get(CS.getInstruction()); + llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args); + CB->setAttributes(ReturnsTwiceAttr); + return RValue::get(CB); } // Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code, @@ -876,7 +937,7 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, Address IndexAddress = EmitPointerWithAlignment(E->getArg(0)); if (BuiltinID == MSVCIntrin::_BitScanForward) { - Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); Builder.CreateStore(ZeroCount, IndexAddress, false); @@ -884,7 +945,7 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth(); Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1); - Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount); @@ -996,16 +1057,19 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, Asm = "udf #251"; Constraints = "{r0}"; break; + case llvm::Triple::aarch64: + Asm = "brk #0xF003"; + Constraints = "{w0}"; } llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false); llvm::InlineAsm *IA = - llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true); + llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::NoReturn); - CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); - CS.setAttributes(NoReturnAttr); - return CS.getInstruction(); + llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); + CI->setAttributes(NoReturnAttr); + return CI; } } llvm_unreachable("Incorrect MSVC intrinsic!"); @@ -1070,9 +1134,10 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( return F; llvm::SmallVector<QualType, 4> ArgTys; - llvm::SmallVector<ImplicitParamDecl, 4> Params; - Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), - Ctx.VoidPtrTy, ImplicitParamDecl::Other); + FunctionArgList Args; + Args.push_back(ImplicitParamDecl::Create( + Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy, + ImplicitParamDecl::Other)); ArgTys.emplace_back(Ctx.VoidPtrTy); for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) { @@ -1081,17 +1146,13 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( continue; QualType ArgTy = getOSLogArgType(Ctx, Size); - Params.emplace_back( + Args.push_back(ImplicitParamDecl::Create( Ctx, nullptr, SourceLocation(), &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy, - ImplicitParamDecl::Other); + ImplicitParamDecl::Other)); ArgTys.emplace_back(ArgTy); } - FunctionArgList Args; - for (auto &P : Params) - Args.push_back(&P); - QualType ReturnTy = Ctx.VoidTy; QualType FuncionTy = Ctx.getFunctionType(ReturnTy, ArgTys, {}); @@ -1106,6 +1167,7 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( Fn->setVisibility(llvm::GlobalValue::HiddenVisibility); CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn); CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn); + Fn->setDoesNotThrow(); // Attach 'noinline' at -Oz. if (CGM.getCodeGenOpts().OptimizeSize == 2) @@ -1123,7 +1185,7 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( auto AL = ApplyDebugLocation::CreateArtificial(*this); CharUnits Offset; - Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(&Params[0]), "buf"), + Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), BufferAlignment); Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()), Builder.CreateConstByteGEP(BufAddr, Offset++, "summary")); @@ -1143,7 +1205,7 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( if (!Size.getQuantity()) continue; - Address Arg = GetAddrOfLocalVar(&Params[I]); + Address Arg = GetAddrOfLocalVar(Args[I]); Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData"); Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(), "argDataCast"); @@ -1330,13 +1392,11 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, } static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType, - Value *&RecordPtr, CharUnits Align, Value *Func, - int Lvl) { + Value *&RecordPtr, CharUnits Align, + llvm::FunctionCallee Func, int Lvl) { const auto *RT = RType->getAs<RecordType>(); ASTContext &Context = CGF.getContext(); RecordDecl *RD = RT->getDecl()->getDefinition(); - ASTContext &Ctx = RD->getASTContext(); - const ASTRecordLayout &RL = Ctx.getASTRecordLayout(RD); std::string Pad = std::string(Lvl * 4, ' '); Value *GString = @@ -1366,9 +1426,6 @@ static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType, } for (const auto *FD : RD->fields()) { - uint64_t Off = RL.getFieldOffset(FD->getFieldIndex()); - Off = Ctx.toCharUnitsFromBits(Off).getQuantity(); - Value *FieldPtr = RecordPtr; if (RD->isUnion()) FieldPtr = CGF.Builder.CreatePointerCast( @@ -1466,7 +1523,7 @@ RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) { // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same. unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl; - Value *F = CGM.getIntrinsic(IID, Ty); + Function *F = CGM.getIntrinsic(IID, Ty); return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt })); } @@ -1668,6 +1725,38 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_truncl: return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc)); + case Builtin::BIlround: + case Builtin::BIlroundf: + case Builtin::BIlroundl: + case Builtin::BI__builtin_lround: + case Builtin::BI__builtin_lroundf: + case Builtin::BI__builtin_lroundl: + return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::lround)); + + case Builtin::BIllround: + case Builtin::BIllroundf: + case Builtin::BIllroundl: + case Builtin::BI__builtin_llround: + case Builtin::BI__builtin_llroundf: + case Builtin::BI__builtin_llroundl: + return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::llround)); + + case Builtin::BIlrint: + case Builtin::BIlrintf: + case Builtin::BIlrintl: + case Builtin::BI__builtin_lrint: + case Builtin::BI__builtin_lrintf: + case Builtin::BI__builtin_lrintl: + return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::lrint)); + + case Builtin::BIllrint: + case Builtin::BIllrintf: + case Builtin::BIllrintl: + case Builtin::BI__builtin_llrint: + case Builtin::BI__builtin_llrintf: + case Builtin::BI__builtin_llrintl: + return RValue::get(emitFPToIntRoundBuiltin(*this, E, Intrinsic::llrint)); + default: break; } @@ -1735,6 +1824,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_dump_struct: { + llvm::Type *LLVMIntTy = getTypes().ConvertType(getContext().IntTy); + llvm::FunctionType *LLVMFuncType = llvm::FunctionType::get( + LLVMIntTy, {llvm::Type::getInt8PtrTy(getLLVMContext())}, true); + Value *Func = EmitScalarExpr(E->getArg(1)->IgnoreImpCasts()); CharUnits Arg0Align = EmitPointerWithAlignment(E->getArg(0)).getAlignment(); @@ -1742,7 +1835,29 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, QualType Arg0Type = Arg0->getType()->getPointeeType(); Value *RecordPtr = EmitScalarExpr(Arg0); - Value *Res = dumpRecord(*this, Arg0Type, RecordPtr, Arg0Align, Func, 0); + Value *Res = dumpRecord(*this, Arg0Type, RecordPtr, Arg0Align, + {LLVMFuncType, Func}, 0); + return RValue::get(Res); + } + + case Builtin::BI__builtin_preserve_access_index: { + // Only enabled preserved access index region when debuginfo + // is available as debuginfo is needed to preserve user-level + // access pattern. + if (!getDebugInfo()) { + CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g"); + return RValue::get(EmitScalarExpr(E->getArg(0))); + } + + // Nested builtin_preserve_access_index() not supported + if (IsInPreservedAIRegion) { + CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported"); + return RValue::get(EmitScalarExpr(E->getArg(0))); + } + + IsInPreservedAIRegion = true; + Value *Res = EmitScalarExpr(E->getArg(0)); + IsInPreservedAIRegion = false; return RValue::get(Res); } @@ -1763,7 +1878,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Zero = llvm::Constant::getNullValue(ArgType); @@ -1783,7 +1898,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); @@ -1800,7 +1915,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); @@ -1817,7 +1932,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Tmp = @@ -1838,7 +1953,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Tmp = Builder.CreateCall(F, ArgValue); @@ -1854,7 +1969,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()}); @@ -1872,7 +1987,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Result = Builder.CreateCall(F, ArgValue); @@ -1898,7 +2013,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, if (CGM.getCodeGenOpts().OptimizationLevel == 0) return RValue::get(ArgValue); - Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); + Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); Value *Result = Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); return RValue::get(Result); @@ -1913,7 +2028,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); unsigned Alignment = (unsigned)AlignmentCI->getZExtValue(); - EmitAlignmentAssumption(PtrValue, Ptr, /*The expr loc is sufficient.*/ SourceLocation(), + EmitAlignmentAssumption(PtrValue, Ptr, + /*The expr loc is sufficient.*/ SourceLocation(), Alignment, OffsetValue); return RValue::get(PtrValue); } @@ -1923,7 +2039,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(nullptr); Value *ArgValue = EmitScalarExpr(E->getArg(0)); - Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume); + Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume); return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); } case Builtin::BI__builtin_bswap16: @@ -1968,17 +2084,34 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const Expr *Arg = E->getArg(0); QualType ArgType = Arg->getType(); - if (!hasScalarEvaluationKind(ArgType) || ArgType->isFunctionType()) - // We can only reason about scalar types. + // FIXME: The allowance for Obj-C pointers and block pointers is historical + // and likely a mistake. + if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() && + !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType()) + // Per the GCC documentation, only numeric constants are recognized after + // inlining. + return RValue::get(ConstantInt::get(ResultType, 0)); + + if (Arg->HasSideEffects(getContext())) + // The argument is unevaluated, so be conservative if it might have + // side-effects. return RValue::get(ConstantInt::get(ResultType, 0)); Value *ArgValue = EmitScalarExpr(Arg); - Value *F = CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType)); + if (ArgType->isObjCObjectPointerType()) { + // Convert Objective-C objects to id because we cannot distinguish between + // LLVM types for Obj-C classes as they are opaque. + ArgType = CGM.getContext().getObjCIdType(); + ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType)); + } + Function *F = + CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType)); Value *Result = Builder.CreateCall(F, ArgValue); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false); return RValue::get(Result); } + case Builtin::BI__builtin_dynamic_object_size: case Builtin::BI__builtin_object_size: { unsigned Type = E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); @@ -1986,8 +2119,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // We pass this builtin onto the optimizer so that it can figure out the // object size in more complex cases. + bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size; return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType, - /*EmittedE=*/nullptr)); + /*EmittedE=*/nullptr, IsDynamic)); } case Builtin::BI__builtin_prefetch: { Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); @@ -1997,17 +2131,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : llvm::ConstantInt::get(Int32Ty, 3); Value *Data = llvm::ConstantInt::get(Int32Ty, 1); - Value *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch); return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data})); } case Builtin::BI__builtin_readcyclecounter: { - Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); + Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); return RValue::get(Builder.CreateCall(F)); } case Builtin::BI__builtin___clear_cache: { Value *Begin = EmitScalarExpr(E->getArg(0)); Value *End = EmitScalarExpr(E->getArg(1)); - Value *F = CGM.getIntrinsic(Intrinsic::clear_cache); + Function *F = CGM.getIntrinsic(Intrinsic::clear_cache); return RValue::get(Builder.CreateCall(F, {Begin, End})); } case Builtin::BI__builtin_trap: @@ -2029,7 +2163,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *Base = EmitScalarExpr(E->getArg(0)); Value *Exponent = EmitScalarExpr(E->getArg(1)); llvm::Type *ArgType = Base->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); + Function *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); return RValue::get(Builder.CreateCall(F, {Base, Exponent})); } @@ -2130,6 +2264,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); } + case Builtin::BI__builtin_flt_rounds: { + Function *F = CGM.getIntrinsic(Intrinsic::flt_rounds); + + llvm::Type *ResultType = ConvertType(E->getType()); + Value *Result = Builder.CreateCall(F); + if (Result->getType() != ResultType) + Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, + "cast"); + return RValue::get(Result); + } + case Builtin::BI__builtin_fpclassify: { Value *V = EmitScalarExpr(E->getArg(5)); llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); @@ -2200,6 +2345,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, .getQuantity(); AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); AI->setAlignment(SuitableAlignmentInBytes); + initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes); return RValue::get(AI); } @@ -2212,6 +2358,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity(); AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); AI->setAlignment(AlignmentInBytes); + initializeAlloca(*this, AI, Size, AlignmentInBytes); return RValue::get(AI); } @@ -2392,24 +2539,24 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // this instead of hard-coding 0, which is correct for most targets. int32_t Offset = 0; - Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); + Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); return RValue::get(Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, Offset))); } case Builtin::BI__builtin_return_address: { Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), getContext().UnsignedIntTy); - Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); + Function *F = CGM.getIntrinsic(Intrinsic::returnaddress); return RValue::get(Builder.CreateCall(F, Depth)); } case Builtin::BI_ReturnAddress: { - Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); + Function *F = CGM.getIntrinsic(Intrinsic::returnaddress); return RValue::get(Builder.CreateCall(F, Builder.getInt32(0))); } case Builtin::BI__builtin_frame_address: { Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), getContext().UnsignedIntTy); - Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); + Function *F = CGM.getIntrinsic(Intrinsic::frameaddress); return RValue::get(Builder.CreateCall(F, Depth)); } case Builtin::BI__builtin_extract_return_addr: { @@ -2445,9 +2592,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); - Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 - ? Intrinsic::eh_return_i32 - : Intrinsic::eh_return_i64); + Function *F = + CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32 + : Intrinsic::eh_return_i64); Builder.CreateCall(F, {Int, Ptr}); Builder.CreateUnreachable(); @@ -2457,7 +2604,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(nullptr); } case Builtin::BI__builtin_unwind_init: { - Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); + Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); return RValue::get(Builder.CreateCall(F)); } case Builtin::BI__builtin_extend_pointer: { @@ -2498,12 +2645,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // Store the stack pointer to the setjmp buffer. Value *StackAddr = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); - Address StackSaveSlot = - Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize()); + Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2); Builder.CreateStore(StackAddr, StackSaveSlot); // Call LLVM's EH setjmp, which is lightweight. - Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); + Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); Buf = Builder.CreateBitCast(Buf, Int8PtrTy); return RValue::get(Builder.CreateCall(F, Buf.getPointer())); } @@ -2719,7 +2865,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CGFunctionInfo &FuncInfo = CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); - llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); + llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName); return EmitCall(FuncInfo, CGCallee::forDirect(Func), ReturnValueSlot(), Args); } @@ -2959,14 +3105,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } // Build and MDTuple of MDStrings and emit the intrinsic call. - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {}); + llvm::Function *F = + CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {}); MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings); Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple)); return RValue::getIgnored(); } case Builtin::BI__builtin_annotation: { llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, AnnVal->getType()); // Get the annotation string, go through casts. Sema requires this to be a @@ -3311,6 +3458,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI_interlockedbittestandreset_nf: return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E)); + // These builtins exist to emit regular volatile loads and stores not + // affected by the -fms-volatile setting. + case Builtin::BI__iso_volatile_load8: + case Builtin::BI__iso_volatile_load16: + case Builtin::BI__iso_volatile_load32: + case Builtin::BI__iso_volatile_load64: + return RValue::get(EmitISOVolatileLoad(*this, E)); + case Builtin::BI__iso_volatile_store8: + case Builtin::BI__iso_volatile_store16: + case Builtin::BI__iso_volatile_store32: + case Builtin::BI__iso_volatile_store64: + return RValue::get(EmitISOVolatileStore(*this, E)); + case Builtin::BI__exception_code: case Builtin::BI_exception_code: return RValue::get(EmitSEHExceptionCode()); @@ -3348,7 +3508,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, auto & Context = getContext(); auto SizeTy = Context.getSizeType(); auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); - Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T); + Function *F = CGM.getIntrinsic(Intrinsic::coro_size, T); return RValue::get(Builder.CreateCall(F)); } @@ -3591,7 +3751,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); AttrBuilder B; - B.addAttribute(Attribute::ByVal); + B.addByValAttr(NDRangeL.getAddress().getElementType()); llvm::AttributeList ByValAttrSet = llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); @@ -3666,21 +3826,35 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // Any calls now have event arguments passed. if (NumArgs >= 7) { llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy); - llvm::Type *EventPtrTy = EventTy->getPointerTo( + llvm::PointerType *EventPtrTy = EventTy->getPointerTo( CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); llvm::Value *NumEvents = Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty); - llvm::Value *EventList = - E->getArg(4)->getType()->isArrayType() - ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() - : EmitScalarExpr(E->getArg(4)); - llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5)); - // Convert to generic address space. - EventList = Builder.CreatePointerCast(EventList, EventPtrTy); - ClkEvent = ClkEvent->getType()->isIntegerTy() - ? Builder.CreateBitOrPointerCast(ClkEvent, EventPtrTy) - : Builder.CreatePointerCast(ClkEvent, EventPtrTy); + + // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments + // to be a null pointer constant (including `0` literal), we can take it + // into account and emit null pointer directly. + llvm::Value *EventWaitList = nullptr; + if (E->getArg(4)->isNullPointerConstant( + getContext(), Expr::NPC_ValueDependentIsNotNull)) { + EventWaitList = llvm::ConstantPointerNull::get(EventPtrTy); + } else { + EventWaitList = E->getArg(4)->getType()->isArrayType() + ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() + : EmitScalarExpr(E->getArg(4)); + // Convert to generic address space. + EventWaitList = Builder.CreatePointerCast(EventWaitList, EventPtrTy); + } + llvm::Value *EventRet = nullptr; + if (E->getArg(5)->isNullPointerConstant( + getContext(), Expr::NPC_ValueDependentIsNotNull)) { + EventRet = llvm::ConstantPointerNull::get(EventPtrTy); + } else { + EventRet = + Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), EventPtrTy); + } + auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6)); llvm::Value *Kernel = @@ -3692,8 +3866,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, QueueTy, Int32Ty, RangeTy, Int32Ty, EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy}; - std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, - EventList, ClkEvent, Kernel, Block}; + std::vector<llvm::Value *> Args = {Queue, Flags, Range, + NumEvents, EventWaitList, EventRet, + Kernel, Block}; if (NumArgs == 7) { // Has events but no variadics. @@ -4922,8 +5097,7 @@ findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap, } #endif - const NeonIntrinsicInfo *Builtin = - std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID); + const NeonIntrinsicInfo *Builtin = llvm::lower_bound(IntrinsicMap, BuiltinID); if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) return Builtin; @@ -5065,6 +5239,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( switch (BuiltinID) { default: break; + case NEON::BI__builtin_neon_vpadd_v: + case NEON::BI__builtin_neon_vpaddq_v: + // We don't allow fp/int overloading of intrinsics. + if (VTy->getElementType()->isFloatingPointTy() && + Int == Intrinsic::aarch64_neon_addp) + Int = Intrinsic::aarch64_neon_faddp; + break; case NEON::BI__builtin_neon_vabs_v: case NEON::BI__builtin_neon_vabsq_v: if (VTy->getElementType()->isFloatingPointTy()) @@ -5262,7 +5443,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( } case NEON::BI__builtin_neon_vfma_v: case NEON::BI__builtin_neon_vfmaq_v: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); + Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); @@ -5731,7 +5912,7 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, && "Can't fit 64-bit value in 32-bit register"); if (IsRead) { - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); llvm::Value *Call = Builder.CreateCall(F, Metadata); if (MixedTypes) @@ -5745,7 +5926,7 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, return Call; } - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1)); if (MixedTypes) { // Extend 32 bit write value to 64 bit to pass to write. @@ -5798,34 +5979,6 @@ static bool HasExtraNeonArgument(unsigned BuiltinID) { return true; } -Value *CodeGenFunction::EmitISOVolatileLoad(const CallExpr *E) { - Value *Ptr = EmitScalarExpr(E->getArg(0)); - QualType ElTy = E->getArg(0)->getType()->getPointeeType(); - CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy); - llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), - LoadSize.getQuantity() * 8); - Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); - llvm::LoadInst *Load = - Builder.CreateAlignedLoad(Ptr, LoadSize); - Load->setVolatile(true); - return Load; -} - -Value *CodeGenFunction::EmitISOVolatileStore(const CallExpr *E) { - Value *Ptr = EmitScalarExpr(E->getArg(0)); - Value *Value = EmitScalarExpr(E->getArg(1)); - QualType ElTy = E->getArg(0)->getType()->getPointeeType(); - CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); - llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), - StoreSize.getQuantity() * 8); - Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); - llvm::StoreInst *Store = - Builder.CreateAlignedStore(Value, Ptr, - StoreSize); - Store->setVolatile(true); - return Store; -} - Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch) { @@ -5846,9 +5999,9 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, llvm::InlineAsm *Emit = IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "", - /*SideEffects=*/true) + /*hasSideEffects=*/true) : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", - /*SideEffects=*/true); + /*hasSideEffects=*/true); return Builder.CreateCall(Emit); } @@ -5866,7 +6019,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, // Locality is not supported on ARM target Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); - Value *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch); return Builder.CreateCall(F, {Address, RW, Locality, IsData}); } @@ -6065,19 +6218,6 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); } - switch (BuiltinID) { - case ARM::BI__iso_volatile_load8: - case ARM::BI__iso_volatile_load16: - case ARM::BI__iso_volatile_load32: - case ARM::BI__iso_volatile_load64: - return EmitISOVolatileLoad(E); - case ARM::BI__iso_volatile_store8: - case ARM::BI__iso_volatile_store16: - case ARM::BI__iso_volatile_store32: - case ARM::BI__iso_volatile_store64: - return EmitISOVolatileStore(E); - } - if (BuiltinID == ARM::BI__builtin_arm_clrex) { Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); return Builder.CreateCall(F); @@ -6818,7 +6958,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify // PLDL3STRM or PLDL2STRM. - Value *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch); return Builder.CreateCall(F, {Address, RW, Locality, IsData}); } @@ -6837,6 +6977,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } + if (BuiltinID == AArch64::BI__builtin_arm_jcvt) { + assert((getContext().getTypeSize(E->getType()) == 32) && + "__jcvt of unusual size!"); + llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg); + } + if (BuiltinID == AArch64::BI__clear_cache) { assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); const FunctionDecl *FD = E->getDirectCallee(); @@ -6956,7 +7104,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); - llvm::Value *F = + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); return Builder.CreateCall(F, Metadata); } @@ -7002,6 +7150,84 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Arg0, Arg1}); } + // Memory Tagging Extensions (MTE) Intrinsics + Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic; + switch (BuiltinID) { + case AArch64::BI__builtin_arm_irg: + MTEIntrinsicID = Intrinsic::aarch64_irg; break; + case AArch64::BI__builtin_arm_addg: + MTEIntrinsicID = Intrinsic::aarch64_addg; break; + case AArch64::BI__builtin_arm_gmi: + MTEIntrinsicID = Intrinsic::aarch64_gmi; break; + case AArch64::BI__builtin_arm_ldg: + MTEIntrinsicID = Intrinsic::aarch64_ldg; break; + case AArch64::BI__builtin_arm_stg: + MTEIntrinsicID = Intrinsic::aarch64_stg; break; + case AArch64::BI__builtin_arm_subp: + MTEIntrinsicID = Intrinsic::aarch64_subp; break; + } + + if (MTEIntrinsicID != Intrinsic::not_intrinsic) { + llvm::Type *T = ConvertType(E->getType()); + + if (MTEIntrinsicID == Intrinsic::aarch64_irg) { + Value *Pointer = EmitScalarExpr(E->getArg(0)); + Value *Mask = EmitScalarExpr(E->getArg(1)); + + Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); + Mask = Builder.CreateZExt(Mask, Int64Ty); + Value *RV = Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask}); + return Builder.CreatePointerCast(RV, T); + } + if (MTEIntrinsicID == Intrinsic::aarch64_addg) { + Value *Pointer = EmitScalarExpr(E->getArg(0)); + Value *TagOffset = EmitScalarExpr(E->getArg(1)); + + Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); + TagOffset = Builder.CreateZExt(TagOffset, Int64Ty); + Value *RV = Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset}); + return Builder.CreatePointerCast(RV, T); + } + if (MTEIntrinsicID == Intrinsic::aarch64_gmi) { + Value *Pointer = EmitScalarExpr(E->getArg(0)); + Value *ExcludedMask = EmitScalarExpr(E->getArg(1)); + + ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty); + Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); + return Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask}); + } + // Although it is possible to supply a different return + // address (first arg) to this intrinsic, for now we set + // return address same as input address. + if (MTEIntrinsicID == Intrinsic::aarch64_ldg) { + Value *TagAddress = EmitScalarExpr(E->getArg(0)); + TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy); + Value *RV = Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress}); + return Builder.CreatePointerCast(RV, T); + } + // Although it is possible to supply a different tag (to set) + // to this intrinsic (as first arg), for now we supply + // the tag that is in input address arg (common use case). + if (MTEIntrinsicID == Intrinsic::aarch64_stg) { + Value *TagAddress = EmitScalarExpr(E->getArg(0)); + TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy); + return Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress}); + } + if (MTEIntrinsicID == Intrinsic::aarch64_subp) { + Value *PointerA = EmitScalarExpr(E->getArg(0)); + Value *PointerB = EmitScalarExpr(E->getArg(1)); + PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy); + PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy); + return Builder.CreateCall( + CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB}); + } + } + if (BuiltinID == AArch64::BI__builtin_arm_rsr || BuiltinID == AArch64::BI__builtin_arm_rsr64 || BuiltinID == AArch64::BI__builtin_arm_rsrp || @@ -7052,25 +7278,27 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); llvm::Type *RegisterType = Int64Ty; - llvm::Type *ValueType = Int32Ty; llvm::Type *Types[] = { RegisterType }; if (BuiltinID == AArch64::BI_ReadStatusReg) { - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); - llvm::Value *Call = Builder.CreateCall(F, Metadata); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); - return Builder.CreateTrunc(Call, ValueType); + return Builder.CreateCall(F, Metadata); } - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1)); - ArgValue = Builder.CreateZExt(ArgValue, RegisterType); return Builder.CreateCall(F, { Metadata, ArgValue }); } if (BuiltinID == AArch64::BI_AddressOfReturnAddress) { - llvm::Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); + return Builder.CreateCall(F); + } + + if (BuiltinID == AArch64::BI__builtin_sponentry) { + llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry); return Builder.CreateCall(F); } @@ -7608,13 +7836,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops.push_back(EmitScalarExpr(E->getArg(1))); return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh"); case NEON::BI__builtin_neon_vfmah_f16: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); + Function *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); // NEON intrinsic puts accumulator first, unlike the LLVM fma. return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]}); } case NEON::BI__builtin_neon_vfmsh_f16: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); + Function *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy); Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh"); // NEON intrinsic puts accumulator first, unlike the LLVM fma. @@ -7775,6 +8003,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, : Intrinsic::aarch64_neon_sqsub; return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); } + case NEON::BI__builtin_neon_vduph_lane_f16: { + return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), + "vget_lane"); + } + case NEON::BI__builtin_neon_vduph_laneq_f16: { + return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), + "vgetq_lane"); + } } llvm::VectorType *VTy = GetNeonType(this, Type); @@ -7845,11 +8081,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, NeonTypeFlags(NeonTypeFlags::Float64, false, true)); Ops[2] = Builder.CreateBitCast(Ops[2], VTy); Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); - Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); + Function *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); return Builder.CreateBitCast(Result, Ty); } - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); + Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); @@ -7863,7 +8099,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); } case NEON::BI__builtin_neon_vfmaq_laneq_v: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); + Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); @@ -7879,7 +8115,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vfmad_laneq_f64: { Ops.push_back(EmitScalarExpr(E->getArg(3))); llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); + Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); } @@ -8892,16 +9128,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = Intrinsic::aarch64_neon_suqadd; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); } - case AArch64::BI__iso_volatile_load8: - case AArch64::BI__iso_volatile_load16: - case AArch64::BI__iso_volatile_load32: - case AArch64::BI__iso_volatile_load64: - return EmitISOVolatileLoad(E); - case AArch64::BI__iso_volatile_store8: - case AArch64::BI__iso_volatile_store16: - case AArch64::BI__iso_volatile_store32: - case AArch64::BI__iso_volatile_store64: - return EmitISOVolatileStore(E); case AArch64::BI_BitScanForward: case AArch64::BI_BitScanForward64: return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); @@ -9139,6 +9365,20 @@ static Value *EmitX86ExpandLoad(CodeGenFunction &CGF, return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] }); } +static Value *EmitX86CompressExpand(CodeGenFunction &CGF, + ArrayRef<Value *> Ops, + bool IsCompress) { + llvm::Type *ResultTy = Ops[1]->getType(); + + Value *MaskVec = getMaskVecValue(CGF, Ops[2], + ResultTy->getVectorNumElements()); + + Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress + : Intrinsic::x86_avx512_mask_expand; + llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy); + return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec }); +} + static Value *EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { llvm::Type *ResultTy = Ops[1]->getType(); @@ -9184,10 +9424,50 @@ static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, } unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl; - Value *F = CGF.CGM.getIntrinsic(IID, Ty); + Function *F = CGF.CGM.getIntrinsic(IID, Ty); return CGF.Builder.CreateCall(F, {Op0, Op1, Amt}); } +static Value *EmitX86vpcom(CodeGenFunction &CGF, ArrayRef<Value *> Ops, + bool IsSigned) { + Value *Op0 = Ops[0]; + Value *Op1 = Ops[1]; + llvm::Type *Ty = Op0->getType(); + uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; + + CmpInst::Predicate Pred; + switch (Imm) { + case 0x0: + Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; + break; + case 0x1: + Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; + break; + case 0x2: + Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; + break; + case 0x3: + Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; + break; + case 0x4: + Pred = ICmpInst::ICMP_EQ; + break; + case 0x5: + Pred = ICmpInst::ICMP_NE; + break; + case 0x6: + return llvm::Constant::getNullValue(Ty); // FALSE + case 0x7: + return llvm::Constant::getAllOnesValue(Ty); // TRUE + default: + llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate"); + } + + Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1); + Value *Res = CGF.Builder.CreateSExt(Cmp, Ty); + return Res; +} + static Value *EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1) { @@ -9278,6 +9558,25 @@ static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) { return EmitX86MaskedCompare(CGF, 1, true, { In, Zero }); } +static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, + ArrayRef<Value *> Ops, bool IsSigned) { + unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue(); + llvm::Type *Ty = Ops[1]->getType(); + + Value *Res; + if (Rnd != 4) { + Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round + : Intrinsic::x86_avx512_uitofp_round; + Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() }); + Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] }); + } else { + Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty) + : CGF.Builder.CreateUIToFP(Ops[0], Ty); + } + + return EmitX86Select(CGF, Ops[2], Res, Ops[1]); +} + static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { llvm::Type *Ty = Ops[0]->getType(); @@ -9524,6 +9823,18 @@ Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) { return EmitX86CpuIs(CPUStr); } +// Convert a BF16 to a float. +static Value *EmitX86CvtBF16ToFloatExpr(CodeGenFunction &CGF, + const CallExpr *E, + ArrayRef<Value *> Ops) { + llvm::Type *Int32Ty = CGF.Builder.getInt32Ty(); + Value *ZeroExt = CGF.Builder.CreateZExt(Ops[0], Int32Ty); + Value *Shl = CGF.Builder.CreateShl(ZeroExt, 16); + llvm::Type *ResultType = CGF.ConvertType(E->getType()); + Value *BitCast = CGF.Builder.CreateBitCast(Shl, ResultType); + return BitCast; +} + Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { llvm::Type *Int32Ty = Builder.getInt32Ty(); @@ -9650,10 +9961,11 @@ llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) { Value *CodeGenFunction::EmitX86CpuInit() { llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, /*Variadic*/ false); - llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init"); - cast<llvm::GlobalValue>(Func)->setDSOLocal(true); - cast<llvm::GlobalValue>(Func)->setDLLStorageClass( - llvm::GlobalValue::DefaultStorageClass); + llvm::FunctionCallee Func = + CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init"); + cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true); + cast<llvm::GlobalValue>(Func.getCallee()) + ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); return Builder.CreateCall(Func); } @@ -9722,7 +10034,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1); Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3); Value *Data = ConstantInt::get(Int32Ty, 1); - Value *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch); return Builder.CreateCall(F, {Address, RW, Locality, Data}); } case X86::BI_mm_clflush: { @@ -9753,13 +10065,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_lzcnt_u16: case X86::BI__builtin_ia32_lzcnt_u32: case X86::BI__builtin_ia32_lzcnt_u64: { - Value *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); } case X86::BI__builtin_ia32_tzcnt_u16: case X86::BI__builtin_ia32_tzcnt_u32: case X86::BI__builtin_ia32_tzcnt_u64: { - Value *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); + Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); } case X86::BI__builtin_ia32_undef128: @@ -9833,7 +10145,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_xsavec: case X86::BI__builtin_ia32_xsavec64: case X86::BI__builtin_ia32_xsaves: - case X86::BI__builtin_ia32_xsaves64: { + case X86::BI__builtin_ia32_xsaves64: + case X86::BI__builtin_ia32_xsetbv: + case X86::BI_xsetbv: { Intrinsic::ID ID; #define INTRINSIC_X86_XSAVE_ID(NAME) \ case X86::BI__builtin_ia32_##NAME: \ @@ -9853,6 +10167,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, INTRINSIC_X86_XSAVE_ID(xsavec64); INTRINSIC_X86_XSAVE_ID(xsaves); INTRINSIC_X86_XSAVE_ID(xsaves64); + INTRINSIC_X86_XSAVE_ID(xsetbv); + case X86::BI_xsetbv: + ID = Intrinsic::x86_xsetbv; + break; } #undef INTRINSIC_X86_XSAVE_ID Value *Mhi = Builder.CreateTrunc( @@ -9862,6 +10180,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops.push_back(Mlo); return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); } + case X86::BI__builtin_ia32_xgetbv: + case X86::BI_xgetbv: + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops); case X86::BI__builtin_ia32_storedqudi128_mask: case X86::BI__builtin_ia32_storedqusi128_mask: case X86::BI__builtin_ia32_storedquhi128_mask: @@ -9930,6 +10251,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cvtq2mask512: return EmitX86ConvertToMask(*this, Ops[0]); + case X86::BI__builtin_ia32_cvtdq2ps512_mask: + case X86::BI__builtin_ia32_cvtqq2ps512_mask: + case X86::BI__builtin_ia32_cvtqq2pd512_mask: + return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/true); + case X86::BI__builtin_ia32_cvtudq2ps512_mask: + case X86::BI__builtin_ia32_cvtuqq2ps512_mask: + case X86::BI__builtin_ia32_cvtuqq2pd512_mask: + return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/false); + case X86::BI__builtin_ia32_vfmaddss3: case X86::BI__builtin_ia32_vfmaddsd3: case X86::BI__builtin_ia32_vfmaddss3_mask: @@ -10073,22 +10403,262 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_compressstoreqi512_mask: return EmitX86CompressStore(*this, Ops); - case X86::BI__builtin_ia32_storehps: - case X86::BI__builtin_ia32_storelps: { - llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); - llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); + case X86::BI__builtin_ia32_expanddf128_mask: + case X86::BI__builtin_ia32_expanddf256_mask: + case X86::BI__builtin_ia32_expanddf512_mask: + case X86::BI__builtin_ia32_expandsf128_mask: + case X86::BI__builtin_ia32_expandsf256_mask: + case X86::BI__builtin_ia32_expandsf512_mask: + case X86::BI__builtin_ia32_expanddi128_mask: + case X86::BI__builtin_ia32_expanddi256_mask: + case X86::BI__builtin_ia32_expanddi512_mask: + case X86::BI__builtin_ia32_expandsi128_mask: + case X86::BI__builtin_ia32_expandsi256_mask: + case X86::BI__builtin_ia32_expandsi512_mask: + case X86::BI__builtin_ia32_expandhi128_mask: + case X86::BI__builtin_ia32_expandhi256_mask: + case X86::BI__builtin_ia32_expandhi512_mask: + case X86::BI__builtin_ia32_expandqi128_mask: + case X86::BI__builtin_ia32_expandqi256_mask: + case X86::BI__builtin_ia32_expandqi512_mask: + return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false); + + case X86::BI__builtin_ia32_compressdf128_mask: + case X86::BI__builtin_ia32_compressdf256_mask: + case X86::BI__builtin_ia32_compressdf512_mask: + case X86::BI__builtin_ia32_compresssf128_mask: + case X86::BI__builtin_ia32_compresssf256_mask: + case X86::BI__builtin_ia32_compresssf512_mask: + case X86::BI__builtin_ia32_compressdi128_mask: + case X86::BI__builtin_ia32_compressdi256_mask: + case X86::BI__builtin_ia32_compressdi512_mask: + case X86::BI__builtin_ia32_compresssi128_mask: + case X86::BI__builtin_ia32_compresssi256_mask: + case X86::BI__builtin_ia32_compresssi512_mask: + case X86::BI__builtin_ia32_compresshi128_mask: + case X86::BI__builtin_ia32_compresshi256_mask: + case X86::BI__builtin_ia32_compresshi512_mask: + case X86::BI__builtin_ia32_compressqi128_mask: + case X86::BI__builtin_ia32_compressqi256_mask: + case X86::BI__builtin_ia32_compressqi512_mask: + return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true); + + case X86::BI__builtin_ia32_gather3div2df: + case X86::BI__builtin_ia32_gather3div2di: + case X86::BI__builtin_ia32_gather3div4df: + case X86::BI__builtin_ia32_gather3div4di: + case X86::BI__builtin_ia32_gather3div4sf: + case X86::BI__builtin_ia32_gather3div4si: + case X86::BI__builtin_ia32_gather3div8sf: + case X86::BI__builtin_ia32_gather3div8si: + case X86::BI__builtin_ia32_gather3siv2df: + case X86::BI__builtin_ia32_gather3siv2di: + case X86::BI__builtin_ia32_gather3siv4df: + case X86::BI__builtin_ia32_gather3siv4di: + case X86::BI__builtin_ia32_gather3siv4sf: + case X86::BI__builtin_ia32_gather3siv4si: + case X86::BI__builtin_ia32_gather3siv8sf: + case X86::BI__builtin_ia32_gather3siv8si: + case X86::BI__builtin_ia32_gathersiv8df: + case X86::BI__builtin_ia32_gathersiv16sf: + case X86::BI__builtin_ia32_gatherdiv8df: + case X86::BI__builtin_ia32_gatherdiv16sf: + case X86::BI__builtin_ia32_gathersiv8di: + case X86::BI__builtin_ia32_gathersiv16si: + case X86::BI__builtin_ia32_gatherdiv8di: + case X86::BI__builtin_ia32_gatherdiv16si: { + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_gather3div2df: + IID = Intrinsic::x86_avx512_mask_gather3div2_df; + break; + case X86::BI__builtin_ia32_gather3div2di: + IID = Intrinsic::x86_avx512_mask_gather3div2_di; + break; + case X86::BI__builtin_ia32_gather3div4df: + IID = Intrinsic::x86_avx512_mask_gather3div4_df; + break; + case X86::BI__builtin_ia32_gather3div4di: + IID = Intrinsic::x86_avx512_mask_gather3div4_di; + break; + case X86::BI__builtin_ia32_gather3div4sf: + IID = Intrinsic::x86_avx512_mask_gather3div4_sf; + break; + case X86::BI__builtin_ia32_gather3div4si: + IID = Intrinsic::x86_avx512_mask_gather3div4_si; + break; + case X86::BI__builtin_ia32_gather3div8sf: + IID = Intrinsic::x86_avx512_mask_gather3div8_sf; + break; + case X86::BI__builtin_ia32_gather3div8si: + IID = Intrinsic::x86_avx512_mask_gather3div8_si; + break; + case X86::BI__builtin_ia32_gather3siv2df: + IID = Intrinsic::x86_avx512_mask_gather3siv2_df; + break; + case X86::BI__builtin_ia32_gather3siv2di: + IID = Intrinsic::x86_avx512_mask_gather3siv2_di; + break; + case X86::BI__builtin_ia32_gather3siv4df: + IID = Intrinsic::x86_avx512_mask_gather3siv4_df; + break; + case X86::BI__builtin_ia32_gather3siv4di: + IID = Intrinsic::x86_avx512_mask_gather3siv4_di; + break; + case X86::BI__builtin_ia32_gather3siv4sf: + IID = Intrinsic::x86_avx512_mask_gather3siv4_sf; + break; + case X86::BI__builtin_ia32_gather3siv4si: + IID = Intrinsic::x86_avx512_mask_gather3siv4_si; + break; + case X86::BI__builtin_ia32_gather3siv8sf: + IID = Intrinsic::x86_avx512_mask_gather3siv8_sf; + break; + case X86::BI__builtin_ia32_gather3siv8si: + IID = Intrinsic::x86_avx512_mask_gather3siv8_si; + break; + case X86::BI__builtin_ia32_gathersiv8df: + IID = Intrinsic::x86_avx512_mask_gather_dpd_512; + break; + case X86::BI__builtin_ia32_gathersiv16sf: + IID = Intrinsic::x86_avx512_mask_gather_dps_512; + break; + case X86::BI__builtin_ia32_gatherdiv8df: + IID = Intrinsic::x86_avx512_mask_gather_qpd_512; + break; + case X86::BI__builtin_ia32_gatherdiv16sf: + IID = Intrinsic::x86_avx512_mask_gather_qps_512; + break; + case X86::BI__builtin_ia32_gathersiv8di: + IID = Intrinsic::x86_avx512_mask_gather_dpq_512; + break; + case X86::BI__builtin_ia32_gathersiv16si: + IID = Intrinsic::x86_avx512_mask_gather_dpi_512; + break; + case X86::BI__builtin_ia32_gatherdiv8di: + IID = Intrinsic::x86_avx512_mask_gather_qpq_512; + break; + case X86::BI__builtin_ia32_gatherdiv16si: + IID = Intrinsic::x86_avx512_mask_gather_qpi_512; + break; + } - // cast val v2i64 - Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); + unsigned MinElts = std::min(Ops[0]->getType()->getVectorNumElements(), + Ops[2]->getType()->getVectorNumElements()); + Ops[3] = getMaskVecValue(*this, Ops[3], MinElts); + Function *Intr = CGM.getIntrinsic(IID); + return Builder.CreateCall(Intr, Ops); + } - // extract (0, 1) - unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; - Ops[1] = Builder.CreateExtractElement(Ops[1], Index, "extract"); + case X86::BI__builtin_ia32_scattersiv8df: + case X86::BI__builtin_ia32_scattersiv16sf: + case X86::BI__builtin_ia32_scatterdiv8df: + case X86::BI__builtin_ia32_scatterdiv16sf: + case X86::BI__builtin_ia32_scattersiv8di: + case X86::BI__builtin_ia32_scattersiv16si: + case X86::BI__builtin_ia32_scatterdiv8di: + case X86::BI__builtin_ia32_scatterdiv16si: + case X86::BI__builtin_ia32_scatterdiv2df: + case X86::BI__builtin_ia32_scatterdiv2di: + case X86::BI__builtin_ia32_scatterdiv4df: + case X86::BI__builtin_ia32_scatterdiv4di: + case X86::BI__builtin_ia32_scatterdiv4sf: + case X86::BI__builtin_ia32_scatterdiv4si: + case X86::BI__builtin_ia32_scatterdiv8sf: + case X86::BI__builtin_ia32_scatterdiv8si: + case X86::BI__builtin_ia32_scattersiv2df: + case X86::BI__builtin_ia32_scattersiv2di: + case X86::BI__builtin_ia32_scattersiv4df: + case X86::BI__builtin_ia32_scattersiv4di: + case X86::BI__builtin_ia32_scattersiv4sf: + case X86::BI__builtin_ia32_scattersiv4si: + case X86::BI__builtin_ia32_scattersiv8sf: + case X86::BI__builtin_ia32_scattersiv8si: { + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_scattersiv8df: + IID = Intrinsic::x86_avx512_mask_scatter_dpd_512; + break; + case X86::BI__builtin_ia32_scattersiv16sf: + IID = Intrinsic::x86_avx512_mask_scatter_dps_512; + break; + case X86::BI__builtin_ia32_scatterdiv8df: + IID = Intrinsic::x86_avx512_mask_scatter_qpd_512; + break; + case X86::BI__builtin_ia32_scatterdiv16sf: + IID = Intrinsic::x86_avx512_mask_scatter_qps_512; + break; + case X86::BI__builtin_ia32_scattersiv8di: + IID = Intrinsic::x86_avx512_mask_scatter_dpq_512; + break; + case X86::BI__builtin_ia32_scattersiv16si: + IID = Intrinsic::x86_avx512_mask_scatter_dpi_512; + break; + case X86::BI__builtin_ia32_scatterdiv8di: + IID = Intrinsic::x86_avx512_mask_scatter_qpq_512; + break; + case X86::BI__builtin_ia32_scatterdiv16si: + IID = Intrinsic::x86_avx512_mask_scatter_qpi_512; + break; + case X86::BI__builtin_ia32_scatterdiv2df: + IID = Intrinsic::x86_avx512_mask_scatterdiv2_df; + break; + case X86::BI__builtin_ia32_scatterdiv2di: + IID = Intrinsic::x86_avx512_mask_scatterdiv2_di; + break; + case X86::BI__builtin_ia32_scatterdiv4df: + IID = Intrinsic::x86_avx512_mask_scatterdiv4_df; + break; + case X86::BI__builtin_ia32_scatterdiv4di: + IID = Intrinsic::x86_avx512_mask_scatterdiv4_di; + break; + case X86::BI__builtin_ia32_scatterdiv4sf: + IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf; + break; + case X86::BI__builtin_ia32_scatterdiv4si: + IID = Intrinsic::x86_avx512_mask_scatterdiv4_si; + break; + case X86::BI__builtin_ia32_scatterdiv8sf: + IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf; + break; + case X86::BI__builtin_ia32_scatterdiv8si: + IID = Intrinsic::x86_avx512_mask_scatterdiv8_si; + break; + case X86::BI__builtin_ia32_scattersiv2df: + IID = Intrinsic::x86_avx512_mask_scattersiv2_df; + break; + case X86::BI__builtin_ia32_scattersiv2di: + IID = Intrinsic::x86_avx512_mask_scattersiv2_di; + break; + case X86::BI__builtin_ia32_scattersiv4df: + IID = Intrinsic::x86_avx512_mask_scattersiv4_df; + break; + case X86::BI__builtin_ia32_scattersiv4di: + IID = Intrinsic::x86_avx512_mask_scattersiv4_di; + break; + case X86::BI__builtin_ia32_scattersiv4sf: + IID = Intrinsic::x86_avx512_mask_scattersiv4_sf; + break; + case X86::BI__builtin_ia32_scattersiv4si: + IID = Intrinsic::x86_avx512_mask_scattersiv4_si; + break; + case X86::BI__builtin_ia32_scattersiv8sf: + IID = Intrinsic::x86_avx512_mask_scattersiv8_sf; + break; + case X86::BI__builtin_ia32_scattersiv8si: + IID = Intrinsic::x86_avx512_mask_scattersiv8_si; + break; + } - // cast pointer to i64 & store - Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); - return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); + unsigned MinElts = std::min(Ops[2]->getType()->getVectorNumElements(), + Ops[3]->getType()->getVectorNumElements()); + Ops[1] = getMaskVecValue(*this, Ops[1], MinElts); + Function *Intr = CGM.getIntrinsic(IID); + return Builder.CreateCall(Intr, Ops); } + case X86::BI__builtin_ia32_vextractf128_pd256: case X86::BI__builtin_ia32_vextractf128_ps256: case X86::BI__builtin_ia32_vextractf128_si256: @@ -10693,6 +11263,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; return EmitX86MaskedCompare(*this, CC, false, Ops); } + case X86::BI__builtin_ia32_vpcomb: + case X86::BI__builtin_ia32_vpcomw: + case X86::BI__builtin_ia32_vpcomd: + case X86::BI__builtin_ia32_vpcomq: + return EmitX86vpcom(*this, Ops, true); + case X86::BI__builtin_ia32_vpcomub: + case X86::BI__builtin_ia32_vpcomuw: + case X86::BI__builtin_ia32_vpcomud: + case X86::BI__builtin_ia32_vpcomuq: + return EmitX86vpcom(*this, Ops, false); case X86::BI__builtin_ia32_kortestcqi: case X86::BI__builtin_ia32_kortestchi: @@ -11154,6 +11734,47 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn); } + case X86::BI__builtin_ia32_vp2intersect_q_512: + case X86::BI__builtin_ia32_vp2intersect_q_256: + case X86::BI__builtin_ia32_vp2intersect_q_128: + case X86::BI__builtin_ia32_vp2intersect_d_512: + case X86::BI__builtin_ia32_vp2intersect_d_256: + case X86::BI__builtin_ia32_vp2intersect_d_128: { + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + Intrinsic::ID ID; + + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_vp2intersect_q_512: + ID = Intrinsic::x86_avx512_vp2intersect_q_512; + break; + case X86::BI__builtin_ia32_vp2intersect_q_256: + ID = Intrinsic::x86_avx512_vp2intersect_q_256; + break; + case X86::BI__builtin_ia32_vp2intersect_q_128: + ID = Intrinsic::x86_avx512_vp2intersect_q_128; + break; + case X86::BI__builtin_ia32_vp2intersect_d_512: + ID = Intrinsic::x86_avx512_vp2intersect_d_512; + break; + case X86::BI__builtin_ia32_vp2intersect_d_256: + ID = Intrinsic::x86_avx512_vp2intersect_d_256; + break; + case X86::BI__builtin_ia32_vp2intersect_d_128: + ID = Intrinsic::x86_avx512_vp2intersect_d_128; + break; + } + + Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]}); + Value *Result = Builder.CreateExtractValue(Call, 0); + Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr); + Builder.CreateDefaultAlignedStore(Result, Ops[2]); + + Result = Builder.CreateExtractValue(Call, 1); + Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr); + return Builder.CreateDefaultAlignedStore(Result, Ops[3]); + } + case X86::BI__builtin_ia32_vpmultishiftqb128: case X86::BI__builtin_ia32_vpmultishiftqb256: case X86::BI__builtin_ia32_vpmultishiftqb512: { @@ -11336,6 +11957,32 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cmpordsd: return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); +// AVX512 bf16 intrinsics + case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: { + Ops[2] = getMaskVecValue(*this, Ops[2], + Ops[0]->getType()->getVectorNumElements()); + Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128; + return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); + } + case X86::BI__builtin_ia32_cvtsbf162ss_32: + return EmitX86CvtBF16ToFloatExpr(*this, E, Ops); + + case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: + case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: { + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: + IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256; + break; + case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: + IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512; + break; + } + Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]); + return EmitX86Select(*this, Ops[2], Res, Ops[1]); + } + case X86::BI__emul: case X86::BI__emulu: { llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64); @@ -11386,9 +12033,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); // return Builder.CreateCall(F, Ops); llvm::Type *Int128Ty = Builder.getInt128Ty(); - Value *Val = Builder.CreateOr( - Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64), - Builder.CreateZExt(Ops[0], Int128Ty)); + Value *HighPart128 = + Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64); + Value *LowPart128 = Builder.CreateZExt(Ops[0], Int128Ty); + Value *Val = Builder.CreateOr(HighPart128, LowPart128); Value *Amt = Builder.CreateAnd(Builder.CreateZExt(Ops[2], Int128Ty), llvm::ConstantInt::get(Int128Ty, 0x3f)); Value *Res; @@ -11465,7 +12113,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } case X86::BI_AddressOfReturnAddress: { - Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); + Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); return Builder.CreateCall(F); } case X86::BI__stosb: { @@ -11480,13 +12128,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // This syscall signals a driver assertion failure in x86 NT kernels. llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); llvm::InlineAsm *IA = - llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true); + llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true); llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::NoReturn); - CallSite CS = Builder.CreateCall(IA); - CS.setAttributes(NoReturnAttr); - return CS.getInstruction(); + llvm::CallInst *CI = Builder.CreateCall(IA); + CI->setAttributes(NoReturnAttr); + return CI; } case X86::BI__readfsbyte: case X86::BI__readfsword: @@ -12001,7 +12649,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::Value *Y = EmitScalarExpr(E->getArg(1)); llvm::Value *Z = EmitScalarExpr(E->getArg(2)); - llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, + llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, X->getType()); llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); @@ -12023,7 +12671,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); - llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, + llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, Src0->getType()); llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); @@ -12031,6 +12679,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_swizzle: return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); + case AMDGPU::BI__builtin_amdgcn_mov_dpp8: + return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8); case AMDGPU::BI__builtin_amdgcn_mov_dpp: case AMDGPU::BI__builtin_amdgcn_update_dpp: { llvm::SmallVector<llvm::Value *, 6> Args; @@ -12039,7 +12689,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, assert(Args.size() == 5 || Args.size() == 6); if (Args.size() == 5) Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType())); - Value *F = + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType()); return Builder.CreateCall(F, Args); } @@ -12080,13 +12730,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_frexp_exp: case AMDGPU::BI__builtin_amdgcn_frexp_expf: { Value *Src0 = EmitScalarExpr(E->getArg(0)); - Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, { Builder.getInt32Ty(), Src0->getType() }); return Builder.CreateCall(F, Src0); } case AMDGPU::BI__builtin_amdgcn_frexp_exph: { Value *Src0 = EmitScalarExpr(E->getArg(0)); - Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, { Builder.getInt16Ty(), Src0->getType() }); return Builder.CreateCall(F, Src0); } @@ -12096,14 +12746,34 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); case AMDGPU::BI__builtin_amdgcn_lerp: return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp); + case AMDGPU::BI__builtin_amdgcn_ubfe: + return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe); + case AMDGPU::BI__builtin_amdgcn_sbfe: + return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe); case AMDGPU::BI__builtin_amdgcn_uicmp: case AMDGPU::BI__builtin_amdgcn_uicmpl: case AMDGPU::BI__builtin_amdgcn_sicmp: - case AMDGPU::BI__builtin_amdgcn_sicmpl: - return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp); + case AMDGPU::BI__builtin_amdgcn_sicmpl: { + llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); + llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); + + // FIXME-GFX10: How should 32 bit mask be handled? + Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp, + { Builder.getInt64Ty(), Src0->getType() }); + return Builder.CreateCall(F, { Src0, Src1, Src2 }); + } case AMDGPU::BI__builtin_amdgcn_fcmp: - case AMDGPU::BI__builtin_amdgcn_fcmpf: - return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp); + case AMDGPU::BI__builtin_amdgcn_fcmpf: { + llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); + llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); + + // FIXME-GFX10: How should 32 bit mask be handled? + Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp, + { Builder.getInt64Ty(), Src0->getType() }); + return Builder.CreateCall(F, { Src0, Src1, Src2 }); + } case AMDGPU::BI__builtin_amdgcn_class: case AMDGPU::BI__builtin_amdgcn_classf: case AMDGPU::BI__builtin_amdgcn_classh: @@ -12111,6 +12781,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_fmed3f: case AMDGPU::BI__builtin_amdgcn_fmed3h: return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); + case AMDGPU::BI__builtin_amdgcn_ds_append: + case AMDGPU::BI__builtin_amdgcn_ds_consume: { + Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ? + Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume; + Value *Src0 = EmitScalarExpr(E->getArg(0)); + Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); + return Builder.CreateCall(F, { Src0, Builder.getFalse() }); + } case AMDGPU::BI__builtin_amdgcn_read_exec: { CallInst *CI = cast<CallInst>( EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); @@ -12160,7 +12838,7 @@ static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, for (unsigned I = 0; I < NumArgs; ++I) Args[I] = CGF.EmitScalarExpr(E->getArg(I)); Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs)); - Value *F = CGF.CGM.getIntrinsic(IntrinsicID); + Function *F = CGF.CGM.getIntrinsic(IntrinsicID); Value *Call = CGF.Builder.CreateCall(F, Args); Value *CC = CGF.Builder.CreateExtractValue(Call, 1); CGF.Builder.CreateStore(CC, CCPtr); @@ -12173,30 +12851,30 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, case SystemZ::BI__builtin_tbegin: { Value *TDB = EmitScalarExpr(E->getArg(0)); Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); - Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); + Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); return Builder.CreateCall(F, {TDB, Control}); } case SystemZ::BI__builtin_tbegin_nofloat: { Value *TDB = EmitScalarExpr(E->getArg(0)); Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); - Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); + Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); return Builder.CreateCall(F, {TDB, Control}); } case SystemZ::BI__builtin_tbeginc: { Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy); Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08); - Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); + Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); return Builder.CreateCall(F, {TDB, Control}); } case SystemZ::BI__builtin_tabort: { Value *Data = EmitScalarExpr(E->getArg(0)); - Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort); + Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort); return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort")); } case SystemZ::BI__builtin_non_tx_store: { Value *Address = EmitScalarExpr(E->getArg(0)); Value *Data = EmitScalarExpr(E->getArg(1)); - Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); + Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); return Builder.CreateCall(F, {Data, Address}); } @@ -12406,6 +13084,15 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {X, Y, M4Value}); } + case SystemZ::BI__builtin_s390_vlbrh: + case SystemZ::BI__builtin_s390_vlbrf: + case SystemZ::BI__builtin_s390_vlbrg: { + llvm::Type *ResultType = ConvertType(E->getType()); + Value *X = EmitScalarExpr(E->getArg(0)); + Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType); + return Builder.CreateCall(F, X); + } + // Vector intrinsics that output the post-instruction CC value. #define INTRINSIC_WITH_CC(NAME) \ @@ -12481,6 +13168,14 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, INTRINSIC_WITH_CC(s390_vftcisb); INTRINSIC_WITH_CC(s390_vftcidb); + INTRINSIC_WITH_CC(s390_vstrsb); + INTRINSIC_WITH_CC(s390_vstrsh); + INTRINSIC_WITH_CC(s390_vstrsf); + + INTRINSIC_WITH_CC(s390_vstrszb); + INTRINSIC_WITH_CC(s390_vstrszh); + INTRINSIC_WITH_CC(s390_vstrszf); + #undef INTRINSIC_WITH_CC default: @@ -12488,8 +13183,252 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, } } -Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { +namespace { +// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant. +struct NVPTXMmaLdstInfo { + unsigned NumResults; // Number of elements to load/store + // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported. + unsigned IID_col; + unsigned IID_row; +}; + +#define MMA_INTR(geom_op_type, layout) \ + Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride +#define MMA_LDST(n, geom_op_type) \ + { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) } + +static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) { + switch (BuiltinID) { + // FP MMA loads + case NVPTX::BI__hmma_m16n16k16_ld_a: + return MMA_LDST(8, m16n16k16_load_a_f16); + case NVPTX::BI__hmma_m16n16k16_ld_b: + return MMA_LDST(8, m16n16k16_load_b_f16); + case NVPTX::BI__hmma_m16n16k16_ld_c_f16: + return MMA_LDST(4, m16n16k16_load_c_f16); + case NVPTX::BI__hmma_m16n16k16_ld_c_f32: + return MMA_LDST(8, m16n16k16_load_c_f32); + case NVPTX::BI__hmma_m32n8k16_ld_a: + return MMA_LDST(8, m32n8k16_load_a_f16); + case NVPTX::BI__hmma_m32n8k16_ld_b: + return MMA_LDST(8, m32n8k16_load_b_f16); + case NVPTX::BI__hmma_m32n8k16_ld_c_f16: + return MMA_LDST(4, m32n8k16_load_c_f16); + case NVPTX::BI__hmma_m32n8k16_ld_c_f32: + return MMA_LDST(8, m32n8k16_load_c_f32); + case NVPTX::BI__hmma_m8n32k16_ld_a: + return MMA_LDST(8, m8n32k16_load_a_f16); + case NVPTX::BI__hmma_m8n32k16_ld_b: + return MMA_LDST(8, m8n32k16_load_b_f16); + case NVPTX::BI__hmma_m8n32k16_ld_c_f16: + return MMA_LDST(4, m8n32k16_load_c_f16); + case NVPTX::BI__hmma_m8n32k16_ld_c_f32: + return MMA_LDST(8, m8n32k16_load_c_f32); + + // Integer MMA loads + case NVPTX::BI__imma_m16n16k16_ld_a_s8: + return MMA_LDST(2, m16n16k16_load_a_s8); + case NVPTX::BI__imma_m16n16k16_ld_a_u8: + return MMA_LDST(2, m16n16k16_load_a_u8); + case NVPTX::BI__imma_m16n16k16_ld_b_s8: + return MMA_LDST(2, m16n16k16_load_b_s8); + case NVPTX::BI__imma_m16n16k16_ld_b_u8: + return MMA_LDST(2, m16n16k16_load_b_u8); + case NVPTX::BI__imma_m16n16k16_ld_c: + return MMA_LDST(8, m16n16k16_load_c_s32); + case NVPTX::BI__imma_m32n8k16_ld_a_s8: + return MMA_LDST(4, m32n8k16_load_a_s8); + case NVPTX::BI__imma_m32n8k16_ld_a_u8: + return MMA_LDST(4, m32n8k16_load_a_u8); + case NVPTX::BI__imma_m32n8k16_ld_b_s8: + return MMA_LDST(1, m32n8k16_load_b_s8); + case NVPTX::BI__imma_m32n8k16_ld_b_u8: + return MMA_LDST(1, m32n8k16_load_b_u8); + case NVPTX::BI__imma_m32n8k16_ld_c: + return MMA_LDST(8, m32n8k16_load_c_s32); + case NVPTX::BI__imma_m8n32k16_ld_a_s8: + return MMA_LDST(1, m8n32k16_load_a_s8); + case NVPTX::BI__imma_m8n32k16_ld_a_u8: + return MMA_LDST(1, m8n32k16_load_a_u8); + case NVPTX::BI__imma_m8n32k16_ld_b_s8: + return MMA_LDST(4, m8n32k16_load_b_s8); + case NVPTX::BI__imma_m8n32k16_ld_b_u8: + return MMA_LDST(4, m8n32k16_load_b_u8); + case NVPTX::BI__imma_m8n32k16_ld_c: + return MMA_LDST(8, m8n32k16_load_c_s32); + + // Sub-integer MMA loads. + // Only row/col layout is supported by A/B fragments. + case NVPTX::BI__imma_m8n8k32_ld_a_s4: + return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)}; + case NVPTX::BI__imma_m8n8k32_ld_a_u4: + return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)}; + case NVPTX::BI__imma_m8n8k32_ld_b_s4: + return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0}; + case NVPTX::BI__imma_m8n8k32_ld_b_u4: + return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0}; + case NVPTX::BI__imma_m8n8k32_ld_c: + return MMA_LDST(2, m8n8k32_load_c_s32); + case NVPTX::BI__bmma_m8n8k128_ld_a_b1: + return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)}; + case NVPTX::BI__bmma_m8n8k128_ld_b_b1: + return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0}; + case NVPTX::BI__bmma_m8n8k128_ld_c: + return MMA_LDST(2, m8n8k128_load_c_s32); + + // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike + // PTX and LLVM IR where stores always use fragment D, NVCC builtins always + // use fragment C for both loads and stores. + // FP MMA stores. + case NVPTX::BI__hmma_m16n16k16_st_c_f16: + return MMA_LDST(4, m16n16k16_store_d_f16); + case NVPTX::BI__hmma_m16n16k16_st_c_f32: + return MMA_LDST(8, m16n16k16_store_d_f32); + case NVPTX::BI__hmma_m32n8k16_st_c_f16: + return MMA_LDST(4, m32n8k16_store_d_f16); + case NVPTX::BI__hmma_m32n8k16_st_c_f32: + return MMA_LDST(8, m32n8k16_store_d_f32); + case NVPTX::BI__hmma_m8n32k16_st_c_f16: + return MMA_LDST(4, m8n32k16_store_d_f16); + case NVPTX::BI__hmma_m8n32k16_st_c_f32: + return MMA_LDST(8, m8n32k16_store_d_f32); + + // Integer and sub-integer MMA stores. + // Another naming quirk. Unlike other MMA builtins that use PTX types in the + // name, integer loads/stores use LLVM's i32. + case NVPTX::BI__imma_m16n16k16_st_c_i32: + return MMA_LDST(8, m16n16k16_store_d_s32); + case NVPTX::BI__imma_m32n8k16_st_c_i32: + return MMA_LDST(8, m32n8k16_store_d_s32); + case NVPTX::BI__imma_m8n32k16_st_c_i32: + return MMA_LDST(8, m8n32k16_store_d_s32); + case NVPTX::BI__imma_m8n8k32_st_c_i32: + return MMA_LDST(2, m8n8k32_store_d_s32); + case NVPTX::BI__bmma_m8n8k128_st_c_i32: + return MMA_LDST(2, m8n8k128_store_d_s32); + + default: + llvm_unreachable("Unknown MMA builtin"); + } +} +#undef MMA_LDST +#undef MMA_INTR + + +struct NVPTXMmaInfo { + unsigned NumEltsA; + unsigned NumEltsB; + unsigned NumEltsC; + unsigned NumEltsD; + std::array<unsigned, 8> Variants; + + unsigned getMMAIntrinsic(int Layout, bool Satf) { + unsigned Index = Layout * 2 + Satf; + if (Index >= Variants.size()) + return 0; + return Variants[Index]; + } +}; + + // Returns an intrinsic that matches Layout and Satf for valid combinations of + // Layout and Satf, 0 otherwise. +static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) { + // clang-format off +#define MMA_VARIANTS(geom, type) {{ \ + Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ + Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \ + Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite \ + }} +// Sub-integer MMA only supports row.col layout. +#define MMA_VARIANTS_I4(geom, type) {{ \ + 0, \ + 0, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ + 0, \ + 0, \ + 0, \ + 0 \ + }} +// b1 MMA does not support .satfinite. +#define MMA_VARIANTS_B1(geom, type) {{ \ + 0, \ + 0, \ + Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ + 0, \ + 0, \ + 0, \ + 0, \ + 0 \ + }} + // clang-format on + switch (BuiltinID) { + // FP MMA + // Note that 'type' argument of MMA_VARIANT uses D_C notation, while + // NumEltsN of return value are ordered as A,B,C,D. + case NVPTX::BI__hmma_m16n16k16_mma_f16f16: + return {8, 8, 4, 4, MMA_VARIANTS(m16n16k16, f16_f16)}; + case NVPTX::BI__hmma_m16n16k16_mma_f32f16: + return {8, 8, 4, 8, MMA_VARIANTS(m16n16k16, f32_f16)}; + case NVPTX::BI__hmma_m16n16k16_mma_f16f32: + return {8, 8, 8, 4, MMA_VARIANTS(m16n16k16, f16_f32)}; + case NVPTX::BI__hmma_m16n16k16_mma_f32f32: + return {8, 8, 8, 8, MMA_VARIANTS(m16n16k16, f32_f32)}; + case NVPTX::BI__hmma_m32n8k16_mma_f16f16: + return {8, 8, 4, 4, MMA_VARIANTS(m32n8k16, f16_f16)}; + case NVPTX::BI__hmma_m32n8k16_mma_f32f16: + return {8, 8, 4, 8, MMA_VARIANTS(m32n8k16, f32_f16)}; + case NVPTX::BI__hmma_m32n8k16_mma_f16f32: + return {8, 8, 8, 4, MMA_VARIANTS(m32n8k16, f16_f32)}; + case NVPTX::BI__hmma_m32n8k16_mma_f32f32: + return {8, 8, 8, 8, MMA_VARIANTS(m32n8k16, f32_f32)}; + case NVPTX::BI__hmma_m8n32k16_mma_f16f16: + return {8, 8, 4, 4, MMA_VARIANTS(m8n32k16, f16_f16)}; + case NVPTX::BI__hmma_m8n32k16_mma_f32f16: + return {8, 8, 4, 8, MMA_VARIANTS(m8n32k16, f32_f16)}; + case NVPTX::BI__hmma_m8n32k16_mma_f16f32: + return {8, 8, 8, 4, MMA_VARIANTS(m8n32k16, f16_f32)}; + case NVPTX::BI__hmma_m8n32k16_mma_f32f32: + return {8, 8, 8, 8, MMA_VARIANTS(m8n32k16, f32_f32)}; + + // Integer MMA + case NVPTX::BI__imma_m16n16k16_mma_s8: + return {2, 2, 8, 8, MMA_VARIANTS(m16n16k16, s8)}; + case NVPTX::BI__imma_m16n16k16_mma_u8: + return {2, 2, 8, 8, MMA_VARIANTS(m16n16k16, u8)}; + case NVPTX::BI__imma_m32n8k16_mma_s8: + return {4, 1, 8, 8, MMA_VARIANTS(m32n8k16, s8)}; + case NVPTX::BI__imma_m32n8k16_mma_u8: + return {4, 1, 8, 8, MMA_VARIANTS(m32n8k16, u8)}; + case NVPTX::BI__imma_m8n32k16_mma_s8: + return {1, 4, 8, 8, MMA_VARIANTS(m8n32k16, s8)}; + case NVPTX::BI__imma_m8n32k16_mma_u8: + return {1, 4, 8, 8, MMA_VARIANTS(m8n32k16, u8)}; + + // Sub-integer MMA + case NVPTX::BI__imma_m8n8k32_mma_s4: + return {1, 1, 2, 2, MMA_VARIANTS_I4(m8n8k32, s4)}; + case NVPTX::BI__imma_m8n8k32_mma_u4: + return {1, 1, 2, 2, MMA_VARIANTS_I4(m8n8k32, u4)}; + case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1: + return {1, 1, 2, 2, MMA_VARIANTS_B1(m8n8k128, b1)}; + default: + llvm_unreachable("Unexpected builtin ID."); + } +#undef MMA_VARIANTS +#undef MMA_VARIANTS_I4 +#undef MMA_VARIANTS_B1 +} + +} // namespace + +Value * +CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { auto MakeLdg = [&](unsigned IntrinsicID) { Value *Ptr = EmitScalarExpr(E->getArg(0)); clang::CharUnits Align = @@ -12564,30 +13503,18 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, // success flag. return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false); - case NVPTX::BI__nvvm_atom_add_gen_f: { - Value *Ptr = EmitScalarExpr(E->getArg(0)); - Value *Val = EmitScalarExpr(E->getArg(1)); - // atomicrmw only deals with integer arguments so we need to use - // LLVM's nvvm_atomic_load_add_f32 intrinsic for that. - Value *FnALAF32 = - CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType()); - return Builder.CreateCall(FnALAF32, {Ptr, Val}); - } - + case NVPTX::BI__nvvm_atom_add_gen_f: case NVPTX::BI__nvvm_atom_add_gen_d: { Value *Ptr = EmitScalarExpr(E->getArg(0)); Value *Val = EmitScalarExpr(E->getArg(1)); - // atomicrmw only deals with integer arguments, so we need to use - // LLVM's nvvm_atomic_load_add_f64 intrinsic. - Value *FnALAF64 = - CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f64, Ptr->getType()); - return Builder.CreateCall(FnALAF64, {Ptr, Val}); + return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, Ptr, Val, + AtomicOrdering::SequentiallyConsistent); } case NVPTX::BI__nvvm_atom_inc_gen_ui: { Value *Ptr = EmitScalarExpr(E->getArg(0)); Value *Val = EmitScalarExpr(E->getArg(1)); - Value *FnALI32 = + Function *FnALI32 = CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType()); return Builder.CreateCall(FnALI32, {Ptr, Val}); } @@ -12595,7 +13522,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__nvvm_atom_dec_gen_ui: { Value *Ptr = EmitScalarExpr(E->getArg(0)); Value *Val = EmitScalarExpr(E->getArg(1)); - Value *FnALD32 = + Function *FnALD32 = CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType()); return Builder.CreateCall(FnALD32, {Ptr, Val}); } @@ -12752,6 +13679,8 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, Builder.CreateStore(Pred, PredOutPtr); return Builder.CreateExtractValue(ResultPair, 0); } + + // FP MMA loads case NVPTX::BI__hmma_m16n16k16_ld_a: case NVPTX::BI__hmma_m16n16k16_ld_b: case NVPTX::BI__hmma_m16n16k16_ld_c_f16: @@ -12763,7 +13692,33 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__hmma_m8n32k16_ld_a: case NVPTX::BI__hmma_m8n32k16_ld_b: case NVPTX::BI__hmma_m8n32k16_ld_c_f16: - case NVPTX::BI__hmma_m8n32k16_ld_c_f32: { + case NVPTX::BI__hmma_m8n32k16_ld_c_f32: + // Integer MMA loads. + case NVPTX::BI__imma_m16n16k16_ld_a_s8: + case NVPTX::BI__imma_m16n16k16_ld_a_u8: + case NVPTX::BI__imma_m16n16k16_ld_b_s8: + case NVPTX::BI__imma_m16n16k16_ld_b_u8: + case NVPTX::BI__imma_m16n16k16_ld_c: + case NVPTX::BI__imma_m32n8k16_ld_a_s8: + case NVPTX::BI__imma_m32n8k16_ld_a_u8: + case NVPTX::BI__imma_m32n8k16_ld_b_s8: + case NVPTX::BI__imma_m32n8k16_ld_b_u8: + case NVPTX::BI__imma_m32n8k16_ld_c: + case NVPTX::BI__imma_m8n32k16_ld_a_s8: + case NVPTX::BI__imma_m8n32k16_ld_a_u8: + case NVPTX::BI__imma_m8n32k16_ld_b_s8: + case NVPTX::BI__imma_m8n32k16_ld_b_u8: + case NVPTX::BI__imma_m8n32k16_ld_c: + // Sub-integer MMA loads. + case NVPTX::BI__imma_m8n8k32_ld_a_s4: + case NVPTX::BI__imma_m8n8k32_ld_a_u4: + case NVPTX::BI__imma_m8n8k32_ld_b_s4: + case NVPTX::BI__imma_m8n8k32_ld_b_u4: + case NVPTX::BI__imma_m8n8k32_ld_c: + case NVPTX::BI__bmma_m8n8k128_ld_a_b1: + case NVPTX::BI__bmma_m8n8k128_ld_b_b1: + case NVPTX::BI__bmma_m8n8k128_ld_c: + { Address Dst = EmitPointerWithAlignment(E->getArg(0)); Value *Src = EmitScalarExpr(E->getArg(1)); Value *Ldm = EmitScalarExpr(E->getArg(2)); @@ -12771,82 +13726,28 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext())) return nullptr; bool isColMajor = isColMajorArg.getSExtValue(); - unsigned IID; - unsigned NumResults; - switch (BuiltinID) { - case NVPTX::BI__hmma_m16n16k16_ld_a: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m16n16k16_ld_b: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m16n16k16_ld_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m16n16k16_ld_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m32n8k16_ld_a: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m32n8k16_ld_b: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m32n8k16_ld_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m32n8k16_ld_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m8n32k16_ld_a: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m8n32k16_ld_b: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride; - NumResults = 8; - break; - case NVPTX::BI__hmma_m8n32k16_ld_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m8n32k16_ld_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride; - NumResults = 8; - break; - default: - llvm_unreachable("Unexpected builtin ID."); - } + NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID); + unsigned IID = isColMajor ? II.IID_col : II.IID_row; + if (IID == 0) + return nullptr; + Value *Result = Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm}); // Save returned values. - for (unsigned i = 0; i < NumResults; ++i) { - Builder.CreateAlignedStore( - Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), - Dst.getElementType()), - Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)), - CharUnits::fromQuantity(4)); + assert(II.NumResults); + if (II.NumResults == 1) { + Builder.CreateAlignedStore(Result, Dst.getPointer(), + CharUnits::fromQuantity(4)); + } else { + for (unsigned i = 0; i < II.NumResults; ++i) { + Builder.CreateAlignedStore( + Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), + Dst.getElementType()), + Builder.CreateGEP(Dst.getPointer(), + llvm::ConstantInt::get(IntTy, i)), + CharUnits::fromQuantity(4)); + } } return Result; } @@ -12856,7 +13757,12 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__hmma_m32n8k16_st_c_f16: case NVPTX::BI__hmma_m32n8k16_st_c_f32: case NVPTX::BI__hmma_m8n32k16_st_c_f16: - case NVPTX::BI__hmma_m8n32k16_st_c_f32: { + case NVPTX::BI__hmma_m8n32k16_st_c_f32: + case NVPTX::BI__imma_m16n16k16_st_c_i32: + case NVPTX::BI__imma_m32n8k16_st_c_i32: + case NVPTX::BI__imma_m8n32k16_st_c_i32: + case NVPTX::BI__imma_m8n8k32_st_c_i32: + case NVPTX::BI__bmma_m8n8k128_st_c_i32: { Value *Dst = EmitScalarExpr(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *Ldm = EmitScalarExpr(E->getArg(2)); @@ -12864,45 +13770,15 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext())) return nullptr; bool isColMajor = isColMajorArg.getSExtValue(); - unsigned IID; - unsigned NumResults = 8; - // PTX Instructions (and LLVM intrinsics) are defined for slice _d_, yet - // for some reason nvcc builtins use _c_. - switch (BuiltinID) { - case NVPTX::BI__hmma_m16n16k16_st_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m16n16k16_st_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride - : Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride; - break; - case NVPTX::BI__hmma_m32n8k16_st_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m32n8k16_st_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride - : Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride; - break; - case NVPTX::BI__hmma_m8n32k16_st_c_f16: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride; - NumResults = 4; - break; - case NVPTX::BI__hmma_m8n32k16_st_c_f32: - IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride - : Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride; - break; - default: - llvm_unreachable("Unexpected builtin ID."); - } - Function *Intrinsic = CGM.getIntrinsic(IID, Dst->getType()); + NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID); + unsigned IID = isColMajor ? II.IID_col : II.IID_row; + if (IID == 0) + return nullptr; + Function *Intrinsic = + CGM.getIntrinsic(IID, Dst->getType()); llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1); SmallVector<Value *, 10> Values = {Dst}; - for (unsigned i = 0; i < NumResults; ++i) { + for (unsigned i = 0; i < II.NumResults; ++i) { Value *V = Builder.CreateAlignedLoad( Builder.CreateGEP(Src.getPointer(), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); @@ -12926,7 +13802,16 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__hmma_m8n32k16_mma_f16f16: case NVPTX::BI__hmma_m8n32k16_mma_f32f16: case NVPTX::BI__hmma_m8n32k16_mma_f32f32: - case NVPTX::BI__hmma_m8n32k16_mma_f16f32: { + case NVPTX::BI__hmma_m8n32k16_mma_f16f32: + case NVPTX::BI__imma_m16n16k16_mma_s8: + case NVPTX::BI__imma_m16n16k16_mma_u8: + case NVPTX::BI__imma_m32n8k16_mma_s8: + case NVPTX::BI__imma_m32n8k16_mma_u8: + case NVPTX::BI__imma_m8n32k16_mma_s8: + case NVPTX::BI__imma_m8n32k16_mma_u8: + case NVPTX::BI__imma_m8n8k32_mma_s4: + case NVPTX::BI__imma_m8n8k32_mma_u4: + case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1: { Address Dst = EmitPointerWithAlignment(E->getArg(0)); Address SrcA = EmitPointerWithAlignment(E->getArg(1)); Address SrcB = EmitPointerWithAlignment(E->getArg(2)); @@ -12938,119 +13823,40 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, if (Layout < 0 || Layout > 3) return nullptr; llvm::APSInt SatfArg; - if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext())) + if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1) + SatfArg = 0; // .b1 does not have satf argument. + else if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext())) return nullptr; bool Satf = SatfArg.getSExtValue(); - - // clang-format off -#define MMA_VARIANTS(geom, type) {{ \ - Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \ - Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \ - Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ - Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ - Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \ - Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \ - Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type, \ - Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite \ - }} - // clang-format on - - auto getMMAIntrinsic = [Layout, Satf](std::array<unsigned, 8> Variants) { - unsigned Index = Layout * 2 + Satf; - assert(Index < 8); - return Variants[Index]; - }; - unsigned IID; - unsigned NumEltsC; - unsigned NumEltsD; - switch (BuiltinID) { - case NVPTX::BI__hmma_m16n16k16_mma_f16f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f16)); - NumEltsC = 4; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m16n16k16_mma_f32f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f16)); - NumEltsC = 4; - NumEltsD = 8; - break; - case NVPTX::BI__hmma_m16n16k16_mma_f16f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f32)); - NumEltsC = 8; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m16n16k16_mma_f32f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f32)); - NumEltsC = 8; - NumEltsD = 8; - break; - case NVPTX::BI__hmma_m32n8k16_mma_f16f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f16)); - NumEltsC = 4; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m32n8k16_mma_f32f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f16)); - NumEltsC = 4; - NumEltsD = 8; - break; - case NVPTX::BI__hmma_m32n8k16_mma_f16f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f32)); - NumEltsC = 8; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m32n8k16_mma_f32f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f32)); - NumEltsC = 8; - NumEltsD = 8; - break; - case NVPTX::BI__hmma_m8n32k16_mma_f16f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f16)); - NumEltsC = 4; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m8n32k16_mma_f32f16: - IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f16)); - NumEltsC = 4; - NumEltsD = 8; - break; - case NVPTX::BI__hmma_m8n32k16_mma_f16f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f32)); - NumEltsC = 8; - NumEltsD = 4; - break; - case NVPTX::BI__hmma_m8n32k16_mma_f32f32: - IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f32)); - NumEltsC = 8; - NumEltsD = 8; - break; - default: - llvm_unreachable("Unexpected builtin ID."); - } -#undef MMA_VARIANTS + NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID); + unsigned IID = MI.getMMAIntrinsic(Layout, Satf); + if (IID == 0) // Unsupported combination of Layout/Satf. + return nullptr; SmallVector<Value *, 24> Values; Function *Intrinsic = CGM.getIntrinsic(IID); - llvm::Type *ABType = Intrinsic->getFunctionType()->getParamType(0); + llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0); // Load A - for (unsigned i = 0; i < 8; ++i) { + for (unsigned i = 0; i < MI.NumEltsA; ++i) { Value *V = Builder.CreateAlignedLoad( Builder.CreateGEP(SrcA.getPointer(), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); - Values.push_back(Builder.CreateBitCast(V, ABType)); + Values.push_back(Builder.CreateBitCast(V, AType)); } // Load B - for (unsigned i = 0; i < 8; ++i) { + llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA); + for (unsigned i = 0; i < MI.NumEltsB; ++i) { Value *V = Builder.CreateAlignedLoad( Builder.CreateGEP(SrcB.getPointer(), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); - Values.push_back(Builder.CreateBitCast(V, ABType)); + Values.push_back(Builder.CreateBitCast(V, BType)); } // Load C - llvm::Type *CType = Intrinsic->getFunctionType()->getParamType(16); - for (unsigned i = 0; i < NumEltsC; ++i) { + llvm::Type *CType = + Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB); + for (unsigned i = 0; i < MI.NumEltsC; ++i) { Value *V = Builder.CreateAlignedLoad( Builder.CreateGEP(SrcC.getPointer(), llvm::ConstantInt::get(IntTy, i)), @@ -13059,7 +13865,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, } Value *Result = Builder.CreateCall(Intrinsic, Values); llvm::Type *DType = Dst.getElementType(); - for (unsigned i = 0; i < NumEltsD; ++i) + for (unsigned i = 0; i < MI.NumEltsD; ++i) Builder.CreateAlignedStore( Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType), Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)), @@ -13077,7 +13883,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_memory_size: { llvm::Type *ResultType = ConvertType(E->getType()); Value *I = EmitScalarExpr(E->getArg(0)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType); return Builder.CreateCall(Callee, I); } case WebAssembly::BI__builtin_wasm_memory_grow: { @@ -13086,37 +13892,66 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)) }; - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType); + return Builder.CreateCall(Callee, Args); + } + case WebAssembly::BI__builtin_wasm_memory_init: { + llvm::APSInt SegConst; + if (!E->getArg(0)->isIntegerConstantExpr(SegConst, getContext())) + llvm_unreachable("Constant arg isn't actually constant?"); + llvm::APSInt MemConst; + if (!E->getArg(1)->isIntegerConstantExpr(MemConst, getContext())) + llvm_unreachable("Constant arg isn't actually constant?"); + if (!MemConst.isNullValue()) + ErrorUnsupported(E, "non-zero memory index"); + Value *Args[] = {llvm::ConstantInt::get(getLLVMContext(), SegConst), + llvm::ConstantInt::get(getLLVMContext(), MemConst), + EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)), + EmitScalarExpr(E->getArg(4))}; + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_init); return Builder.CreateCall(Callee, Args); } + case WebAssembly::BI__builtin_wasm_data_drop: { + llvm::APSInt SegConst; + if (!E->getArg(0)->isIntegerConstantExpr(SegConst, getContext())) + llvm_unreachable("Constant arg isn't actually constant?"); + Value *Arg = llvm::ConstantInt::get(getLLVMContext(), SegConst); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_data_drop); + return Builder.CreateCall(Callee, {Arg}); + } + case WebAssembly::BI__builtin_wasm_tls_size: { + llvm::Type *ResultType = ConvertType(E->getType()); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType); + return Builder.CreateCall(Callee); + } case WebAssembly::BI__builtin_wasm_throw: { Value *Tag = EmitScalarExpr(E->getArg(0)); Value *Obj = EmitScalarExpr(E->getArg(1)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw); return Builder.CreateCall(Callee, {Tag, Obj}); } - case WebAssembly::BI__builtin_wasm_rethrow: { - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow); + case WebAssembly::BI__builtin_wasm_rethrow_in_catch: { + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow_in_catch); return Builder.CreateCall(Callee); } case WebAssembly::BI__builtin_wasm_atomic_wait_i32: { Value *Addr = EmitScalarExpr(E->getArg(0)); Value *Expected = EmitScalarExpr(E->getArg(1)); Value *Timeout = EmitScalarExpr(E->getArg(2)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i32); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i32); return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); } case WebAssembly::BI__builtin_wasm_atomic_wait_i64: { Value *Addr = EmitScalarExpr(E->getArg(0)); Value *Expected = EmitScalarExpr(E->getArg(1)); Value *Timeout = EmitScalarExpr(E->getArg(2)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i64); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i64); return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); } case WebAssembly::BI__builtin_wasm_atomic_notify: { Value *Addr = EmitScalarExpr(E->getArg(0)); Value *Count = EmitScalarExpr(E->getArg(1)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify); return Builder.CreateCall(Callee, {Addr, Count}); } case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32: @@ -13127,7 +13962,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64x2_f64x2: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed, + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed, {ResT, Src->getType()}); return Builder.CreateCall(Callee, {Src}); } @@ -13139,7 +13974,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64x2_f64x2: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned, + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned, {ResT, Src->getType()}); return Builder.CreateCall(Callee, {Src}); } @@ -13149,7 +13984,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_min_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); - Value *Callee = CGM.getIntrinsic(Intrinsic::minimum, + Function *Callee = CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } @@ -13159,7 +13994,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_max_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); - Value *Callee = CGM.getIntrinsic(Intrinsic::maximum, + Function *Callee = CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } @@ -13252,14 +14087,14 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, } Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); - Value *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); + Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } case WebAssembly::BI__builtin_wasm_bitselect: { Value *V1 = EmitScalarExpr(E->getArg(0)); Value *V2 = EmitScalarExpr(E->getArg(1)); Value *C = EmitScalarExpr(E->getArg(2)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect, + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType())); return Builder.CreateCall(Callee, {V1, V2, C}); } @@ -13289,19 +14124,19 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, llvm_unreachable("unexpected builtin ID"); } Value *Vec = EmitScalarExpr(E->getArg(0)); - Value *Callee = CGM.getIntrinsic(IntNo, Vec->getType()); + Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } case WebAssembly::BI__builtin_wasm_abs_f32x4: case WebAssembly::BI__builtin_wasm_abs_f64x2: { Value *Vec = EmitScalarExpr(E->getArg(0)); - Value *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType()); + Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } case WebAssembly::BI__builtin_wasm_sqrt_f32x4: case WebAssembly::BI__builtin_wasm_sqrt_f64x2: { Value *Vec = EmitScalarExpr(E->getArg(0)); - Value *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType()); + Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp index 1c578bd151bd..4d4038dae9cf 100644 --- a/lib/CodeGen/CGCUDANV.cpp +++ b/lib/CodeGen/CGCUDANV.cpp @@ -1,9 +1,8 @@ //===----- CGCUDANV.cpp - Interface to NVIDIA CUDA Runtime ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -16,9 +15,10 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "clang/AST/Decl.h" +#include "clang/Basic/Cuda.h" +#include "clang/CodeGen/CodeGenABITypes.h" #include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Support/Format.h" @@ -42,17 +42,28 @@ private: /// Convenience reference to the current module llvm::Module &TheModule; /// Keeps track of kernel launch stubs emitted in this module - llvm::SmallVector<llvm::Function *, 16> EmittedKernels; - llvm::SmallVector<std::pair<llvm::GlobalVariable *, unsigned>, 16> DeviceVars; + struct KernelInfo { + llvm::Function *Kernel; + const Decl *D; + }; + llvm::SmallVector<KernelInfo, 16> EmittedKernels; + struct VarInfo { + llvm::GlobalVariable *Var; + const VarDecl *D; + unsigned Flag; + }; + llvm::SmallVector<VarInfo, 16> DeviceVars; /// Keeps track of variable containing handle of GPU binary. Populated by /// ModuleCtorFunction() and used to create corresponding cleanup calls in /// ModuleDtorFunction() llvm::GlobalVariable *GpuBinaryHandle = nullptr; /// Whether we generate relocatable device code. bool RelocatableDeviceCode; + /// Mangle context for device. + std::unique_ptr<MangleContext> DeviceMC; - llvm::Constant *getSetupArgumentFn() const; - llvm::Constant *getLaunchFn() const; + llvm::FunctionCallee getSetupArgumentFn() const; + llvm::FunctionCallee getLaunchFn() const; llvm::FunctionType *getRegisterGlobalsFnTy() const; llvm::FunctionType *getCallbackFnTy() const; @@ -104,20 +115,25 @@ private: return DummyFunc; } - void emitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args); + void emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args); + void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args); + std::string getDeviceSideName(const Decl *ND); public: CGNVCUDARuntime(CodeGenModule &CGM); void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override; - void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) override { - DeviceVars.push_back(std::make_pair(&Var, Flags)); + void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var, + unsigned Flags) override { + DeviceVars.push_back({&Var, VD, Flags}); } /// Creates module constructor function llvm::Function *makeModuleCtorFunction() override; /// Creates module destructor function llvm::Function *makeModuleDtorFunction() override; + /// Construct and return the stub name of a kernel. + std::string getDeviceStubName(llvm::StringRef Name) const override; }; } @@ -137,7 +153,9 @@ CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName) const { CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()), TheModule(CGM.getModule()), - RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode) { + RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode), + DeviceMC(CGM.getContext().createMangleContext( + CGM.getContext().getAuxTargetInfo())) { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); @@ -150,7 +168,7 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) VoidPtrPtrTy = VoidPtrTy->getPointerTo(); } -llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const { +llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const { // cudaError_t cudaSetupArgument(void *, size_t, size_t) llvm::Type *Params[] = {VoidPtrTy, SizeTy, SizeTy}; return CGM.CreateRuntimeFunction( @@ -158,7 +176,7 @@ llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const { addPrefixToName("SetupArgument")); } -llvm::Constant *CGNVCUDARuntime::getLaunchFn() const { +llvm::FunctionCallee CGNVCUDARuntime::getLaunchFn() const { if (CGM.getLangOpts().HIP) { // hipError_t hipLaunchByPtr(char *); return CGM.CreateRuntimeFunction( @@ -186,16 +204,143 @@ llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const { return llvm::FunctionType::get(VoidTy, Params, false); } +std::string CGNVCUDARuntime::getDeviceSideName(const Decl *D) { + auto *ND = cast<const NamedDecl>(D); + std::string DeviceSideName; + if (DeviceMC->shouldMangleDeclName(ND)) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + DeviceMC->mangleName(ND, Out); + DeviceSideName = Out.str(); + } else + DeviceSideName = ND->getIdentifier()->getName(); + return DeviceSideName; +} + void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) { - EmittedKernels.push_back(CGF.CurFn); - emitDeviceStubBody(CGF, Args); + // Ensure either we have different ABIs between host and device compilations, + // says host compilation following MSVC ABI but device compilation follows + // Itanium C++ ABI or, if they follow the same ABI, kernel names after + // mangling should be the same after name stubbing. The later checking is + // very important as the device kernel name being mangled in host-compilation + // is used to resolve the device binaries to be executed. Inconsistent naming + // result in undefined behavior. Even though we cannot check that naming + // directly between host- and device-compilations, the host- and + // device-mangling in host compilation could help catching certain ones. + assert((CGF.CGM.getContext().getAuxTargetInfo() && + (CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI() != + CGF.CGM.getContext().getTargetInfo().getCXXABI())) || + getDeviceStubName(getDeviceSideName(CGF.CurFuncDecl)) == + CGF.CurFn->getName()); + + EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl}); + if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(), + CudaFeature::CUDA_USES_NEW_LAUNCH)) + emitDeviceStubBodyNew(CGF, Args); + else + emitDeviceStubBodyLegacy(CGF, Args); } -void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF, - FunctionArgList &Args) { +// CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local +// array and kernels are launched using cudaLaunchKernel(). +void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, + FunctionArgList &Args) { + // Build the shadow stack entry at the very start of the function. + + // Calculate amount of space we will need for all arguments. If we have no + // args, allocate a single pointer so we still have a valid pointer to the + // argument array that we can pass to runtime, even if it will be unused. + Address KernelArgs = CGF.CreateTempAlloca( + VoidPtrTy, CharUnits::fromQuantity(16), "kernel_args", + llvm::ConstantInt::get(SizeTy, std::max<size_t>(1, Args.size()))); + // Store pointers to the arguments in a locally allocated launch_args. + for (unsigned i = 0; i < Args.size(); ++i) { + llvm::Value* VarPtr = CGF.GetAddrOfLocalVar(Args[i]).getPointer(); + llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, VoidPtrTy); + CGF.Builder.CreateDefaultAlignedStore( + VoidVarPtr, CGF.Builder.CreateConstGEP1_32(KernelArgs.getPointer(), i)); + } + + llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); + + // Lookup cudaLaunchKernel function. + // cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, + // void **args, size_t sharedMem, + // cudaStream_t stream); + TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); + DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); + IdentifierInfo &cudaLaunchKernelII = + CGM.getContext().Idents.get("cudaLaunchKernel"); + FunctionDecl *cudaLaunchKernelFD = nullptr; + for (const auto &Result : DC->lookup(&cudaLaunchKernelII)) { + if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result)) + cudaLaunchKernelFD = FD; + } + + if (cudaLaunchKernelFD == nullptr) { + CGM.Error(CGF.CurFuncDecl->getLocation(), + "Can't find declaration for cudaLaunchKernel()"); + return; + } + // Create temporary dim3 grid_dim, block_dim. + ParmVarDecl *GridDimParam = cudaLaunchKernelFD->getParamDecl(1); + QualType Dim3Ty = GridDimParam->getType(); + Address GridDim = + CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "grid_dim"); + Address BlockDim = + CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "block_dim"); + Address ShmemSize = + CGF.CreateTempAlloca(SizeTy, CGM.getSizeAlign(), "shmem_size"); + Address Stream = + CGF.CreateTempAlloca(VoidPtrTy, CGM.getPointerAlign(), "stream"); + llvm::FunctionCallee cudaPopConfigFn = CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IntTy, + {/*gridDim=*/GridDim.getType(), + /*blockDim=*/BlockDim.getType(), + /*ShmemSize=*/ShmemSize.getType(), + /*Stream=*/Stream.getType()}, + /*isVarArg=*/false), + "__cudaPopCallConfiguration"); + + CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn, + {GridDim.getPointer(), BlockDim.getPointer(), + ShmemSize.getPointer(), Stream.getPointer()}); + + // Emit the call to cudaLaunch + llvm::Value *Kernel = CGF.Builder.CreatePointerCast(CGF.CurFn, VoidPtrTy); + CallArgList LaunchKernelArgs; + LaunchKernelArgs.add(RValue::get(Kernel), + cudaLaunchKernelFD->getParamDecl(0)->getType()); + LaunchKernelArgs.add(RValue::getAggregate(GridDim), Dim3Ty); + LaunchKernelArgs.add(RValue::getAggregate(BlockDim), Dim3Ty); + LaunchKernelArgs.add(RValue::get(KernelArgs.getPointer()), + cudaLaunchKernelFD->getParamDecl(3)->getType()); + LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(ShmemSize)), + cudaLaunchKernelFD->getParamDecl(4)->getType()); + LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(Stream)), + cudaLaunchKernelFD->getParamDecl(5)->getType()); + + QualType QT = cudaLaunchKernelFD->getType(); + QualType CQT = QT.getCanonicalType(); + llvm::Type *Ty = CGM.getTypes().ConvertType(CQT); + llvm::FunctionType *FTy = dyn_cast<llvm::FunctionType>(Ty); + + const CGFunctionInfo &FI = + CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD); + llvm::FunctionCallee cudaLaunchKernelFn = + CGM.CreateRuntimeFunction(FTy, "cudaLaunchKernel"); + CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(), + LaunchKernelArgs); + CGF.EmitBranch(EndBlock); + + CGF.EmitBlock(EndBlock); +} + +void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF, + FunctionArgList &Args) { // Emit a call to cudaSetupArgument for each arg in Args. - llvm::Constant *cudaSetupArgFn = getSetupArgumentFn(); + llvm::FunctionCallee cudaSetupArgFn = getSetupArgumentFn(); llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); CharUnits Offset = CharUnits::Zero(); for (const VarDecl *A : Args) { @@ -209,17 +354,17 @@ void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF, llvm::ConstantInt::get(SizeTy, TyWidth.getQuantity()), llvm::ConstantInt::get(SizeTy, Offset.getQuantity()), }; - llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args); + llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args); llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0); - llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero); + llvm::Value *CBZero = CGF.Builder.CreateICmpEQ(CB, Zero); llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next"); - CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock); + CGF.Builder.CreateCondBr(CBZero, NextBlock, EndBlock); CGF.EmitBlock(NextBlock); Offset += TyWidth; } // Emit the call to cudaLaunch - llvm::Constant *cudaLaunchFn = getLaunchFn(); + llvm::FunctionCallee cudaLaunchFn = getLaunchFn(); llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy); CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg); CGF.EmitBranch(EndBlock); @@ -259,7 +404,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { llvm::Type *RegisterFuncParams[] = { VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy, IntTy, VoidPtrTy, VoidPtrTy, VoidPtrTy, VoidPtrTy, IntTy->getPointerTo()}; - llvm::Constant *RegisterFunc = CGM.CreateRuntimeFunction( + llvm::FunctionCallee RegisterFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, RegisterFuncParams, false), addUnderscoredPrefixToName("RegisterFunction")); @@ -267,13 +412,19 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { // __cuda_register_globals() and generate __cudaRegisterFunction() call for // each emitted kernel. llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin(); - for (llvm::Function *Kernel : EmittedKernels) { - llvm::Constant *KernelName = makeConstantString(Kernel->getName()); + for (auto &&I : EmittedKernels) { + llvm::Constant *KernelName = makeConstantString(getDeviceSideName(I.D)); llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy); llvm::Value *Args[] = { - &GpuBinaryHandlePtr, Builder.CreateBitCast(Kernel, VoidPtrTy), - KernelName, KernelName, llvm::ConstantInt::get(IntTy, -1), NullPtr, - NullPtr, NullPtr, NullPtr, + &GpuBinaryHandlePtr, + Builder.CreateBitCast(I.Kernel, VoidPtrTy), + KernelName, + KernelName, + llvm::ConstantInt::get(IntTy, -1), + NullPtr, + NullPtr, + NullPtr, + NullPtr, llvm::ConstantPointerNull::get(IntTy->getPointerTo())}; Builder.CreateCall(RegisterFunc, Args); } @@ -283,13 +434,13 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy, IntTy, IntTy}; - llvm::Constant *RegisterVar = CGM.CreateRuntimeFunction( + llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, RegisterVarParams, false), addUnderscoredPrefixToName("RegisterVar")); - for (auto &Pair : DeviceVars) { - llvm::GlobalVariable *Var = Pair.first; - unsigned Flags = Pair.second; - llvm::Constant *VarName = makeConstantString(Var->getName()); + for (auto &&Info : DeviceVars) { + llvm::GlobalVariable *Var = Info.Var; + unsigned Flags = Info.Flag; + llvm::Constant *VarName = makeConstantString(getDeviceSideName(Info.D)); uint64_t VarSize = CGM.getDataLayout().getTypeAllocSize(Var->getValueType()); llvm::Value *Args[] = { @@ -329,10 +480,14 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { /// \endcode llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { bool IsHIP = CGM.getLangOpts().HIP; + bool IsCUDA = CGM.getLangOpts().CUDA; // No need to generate ctors/dtors if there is no GPU binary. StringRef CudaGpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName; if (CudaGpuBinaryFileName.empty() && !IsHIP) return nullptr; + if ((IsHIP || (IsCUDA && !RelocatableDeviceCode)) && EmittedKernels.empty() && + DeviceVars.empty()) + return nullptr; // void __{cuda|hip}_register_globals(void* handle); llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn(); @@ -342,7 +497,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { RegisterGlobalsFunc = makeDummyFunction(getRegisterGlobalsFnTy()); // void ** __{cuda|hip}RegisterFatBinary(void *); - llvm::Constant *RegisterFatbinFunc = CGM.CreateRuntimeFunction( + llvm::FunctionCallee RegisterFatbinFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false), addUnderscoredPrefixToName("RegisterFatBinary")); // struct { int magic, int version, void * gpu_binary, void * dont_care }; @@ -516,6 +671,16 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // Call __cuda_register_globals(GpuBinaryHandle); if (RegisterGlobalsFunc) CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall); + + // Call __cudaRegisterFatBinaryEnd(Handle) if this CUDA version needs it. + if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(), + CudaFeature::CUDA_USES_FATBIN_REGISTER_END)) { + // void __cudaRegisterFatBinaryEnd(void **); + llvm::FunctionCallee RegisterFatbinEndFunc = CGM.CreateRuntimeFunction( + llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false), + "__cudaRegisterFatBinaryEnd"); + CtorBuilder.CreateCall(RegisterFatbinEndFunc, RegisterFatbinCall); + } } else { // Generate a unique module ID. SmallString<64> ModuleID; @@ -532,7 +697,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // void *, void (*)(void **)) SmallString<128> RegisterLinkedBinaryName("__cudaRegisterLinkedBinary"); RegisterLinkedBinaryName += ModuleID; - llvm::Constant *RegisterLinkedBinaryFunc = CGM.CreateRuntimeFunction( + llvm::FunctionCallee RegisterLinkedBinaryFunc = CGM.CreateRuntimeFunction( getRegisterLinkedBinaryFnTy(), RegisterLinkedBinaryName); assert(RegisterGlobalsFunc && "Expecting at least dummy function!"); @@ -550,7 +715,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // extern "C" int atexit(void (*f)(void)); llvm::FunctionType *AtExitTy = llvm::FunctionType::get(IntTy, CleanupFn->getType(), false); - llvm::Constant *AtExitFunc = + llvm::FunctionCallee AtExitFunc = CGM.CreateRuntimeFunction(AtExitTy, "atexit", llvm::AttributeList(), /*Local=*/true); CtorBuilder.CreateCall(AtExitFunc, CleanupFn); @@ -585,7 +750,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() { return nullptr; // void __cudaUnregisterFatBinary(void ** handle); - llvm::Constant *UnregisterFatbinFunc = CGM.CreateRuntimeFunction( + llvm::FunctionCallee UnregisterFatbinFunc = CGM.CreateRuntimeFunction( llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false), addUnderscoredPrefixToName("UnregisterFatBinary")); @@ -627,6 +792,12 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() { return ModuleDtorFunc; } +std::string CGNVCUDARuntime::getDeviceStubName(llvm::StringRef Name) const { + if (!CGM.getLangOpts().HIP) + return Name; + return (Name + ".stub").str(); +} + CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) { return new CGNVCUDARuntime(CGM); } diff --git a/lib/CodeGen/CGCUDARuntime.cpp b/lib/CodeGen/CGCUDARuntime.cpp index 1936f9f13692..c14a9d3f2bbb 100644 --- a/lib/CodeGen/CGCUDARuntime.cpp +++ b/lib/CodeGen/CGCUDARuntime.cpp @@ -1,9 +1,8 @@ //===----- CGCUDARuntime.cpp - Interface to CUDA Runtimes -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CGCUDARuntime.h b/lib/CodeGen/CGCUDARuntime.h index 0168f4f9e942..e548a3a546d4 100644 --- a/lib/CodeGen/CGCUDARuntime.h +++ b/lib/CodeGen/CGCUDARuntime.h @@ -1,9 +1,8 @@ //===----- CGCUDARuntime.h - Interface to CUDA Runtimes ---------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -16,6 +15,8 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_CGCUDARUNTIME_H #define LLVM_CLANG_LIB_CODEGEN_CGCUDARUNTIME_H +#include "llvm/ADT/StringRef.h" + namespace llvm { class Function; class GlobalVariable; @@ -24,6 +25,7 @@ class GlobalVariable; namespace clang { class CUDAKernelCallExpr; +class VarDecl; namespace CodeGen { @@ -53,7 +55,8 @@ public: /// Emits a kernel launch stub. virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0; - virtual void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) = 0; + virtual void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var, + unsigned Flags) = 0; /// Constructs and returns a module initialization function or nullptr if it's /// not needed. Must be called after all kernels have been emitted. @@ -62,6 +65,9 @@ public: /// Returns a module cleanup function or nullptr if it's not needed. /// Must be called after ModuleCtorFunction virtual llvm::Function *makeModuleDtorFunction() = 0; + + /// Construct and return the stub name of a kernel. + virtual std::string getDeviceStubName(llvm::StringRef Name) const = 0; }; /// Creates an instance of a CUDA runtime class. diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp index 8b0733fbec3e..6d903a0d09e2 100644 --- a/lib/CodeGen/CGCXX.cpp +++ b/lib/CodeGen/CGCXX.cpp @@ -1,9 +1,8 @@ //===--- CGCXX.cpp - Emit LLVM Code for declarations ----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -204,55 +203,44 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) { return false; } -llvm::Function *CodeGenModule::codegenCXXStructor(const CXXMethodDecl *MD, - StructorType Type) { - const CGFunctionInfo &FnInfo = - getTypes().arrangeCXXStructorDeclaration(MD, Type); +llvm::Function *CodeGenModule::codegenCXXStructor(GlobalDecl GD) { + const CGFunctionInfo &FnInfo = getTypes().arrangeCXXStructorDeclaration(GD); auto *Fn = cast<llvm::Function>( - getAddrOfCXXStructor(MD, Type, &FnInfo, /*FnType=*/nullptr, + getAddrOfCXXStructor(GD, &FnInfo, /*FnType=*/nullptr, /*DontDefer=*/true, ForDefinition)); - GlobalDecl GD; - if (const auto *DD = dyn_cast<CXXDestructorDecl>(MD)) { - GD = GlobalDecl(DD, toCXXDtorType(Type)); - } else { - const auto *CD = cast<CXXConstructorDecl>(MD); - GD = GlobalDecl(CD, toCXXCtorType(Type)); - } - setFunctionLinkage(GD, Fn); CodeGenFunction(*this).GenerateCode(GD, Fn, FnInfo); setNonAliasAttributes(GD, Fn); - SetLLVMFunctionAttributesForDefinition(MD, Fn); + SetLLVMFunctionAttributesForDefinition(cast<CXXMethodDecl>(GD.getDecl()), Fn); return Fn; } -llvm::Constant *CodeGenModule::getAddrOfCXXStructor( - const CXXMethodDecl *MD, StructorType Type, const CGFunctionInfo *FnInfo, - llvm::FunctionType *FnType, bool DontDefer, - ForDefinition_t IsForDefinition) { - GlobalDecl GD; - if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) { - GD = GlobalDecl(CD, toCXXCtorType(Type)); - } else { +llvm::FunctionCallee CodeGenModule::getAddrAndTypeOfCXXStructor( + GlobalDecl GD, const CGFunctionInfo *FnInfo, llvm::FunctionType *FnType, + bool DontDefer, ForDefinition_t IsForDefinition) { + auto *MD = cast<CXXMethodDecl>(GD.getDecl()); + + if (isa<CXXDestructorDecl>(MD)) { // Always alias equivalent complete destructors to base destructors in the // MS ABI. if (getTarget().getCXXABI().isMicrosoft() && - Type == StructorType::Complete && MD->getParent()->getNumVBases() == 0) - Type = StructorType::Base; - GD = GlobalDecl(cast<CXXDestructorDecl>(MD), toCXXDtorType(Type)); + GD.getDtorType() == Dtor_Complete && + MD->getParent()->getNumVBases() == 0) + GD = GD.getWithDtorType(Dtor_Base); } if (!FnType) { if (!FnInfo) - FnInfo = &getTypes().arrangeCXXStructorDeclaration(MD, Type); + FnInfo = &getTypes().arrangeCXXStructorDeclaration(GD); FnType = getTypes().GetFunctionType(*FnInfo); } - return GetOrCreateLLVMFunction( + llvm::Constant *Ptr = GetOrCreateLLVMFunction( getMangledName(GD), FnType, GD, /*ForVTable=*/false, DontDefer, - /*isThunk=*/false, /*ExtraAttrs=*/llvm::AttributeList(), IsForDefinition); + /*IsThunk=*/false, /*ExtraAttrs=*/llvm::AttributeList(), IsForDefinition); + return {FnType, Ptr}; } static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF, @@ -312,7 +300,7 @@ CodeGenFunction::BuildAppleKextVirtualDestructorCall( assert(DD->isVirtual() && Type != Dtor_Base); // Compute the function type we're calling. const CGFunctionInfo &FInfo = CGM.getTypes().arrangeCXXStructorDeclaration( - DD, StructorType::Complete); + GlobalDecl(DD, Dtor_Complete)); llvm::Type *Ty = CGM.getTypes().GetFunctionType(FInfo); return ::BuildAppleKextVirtualCall(*this, GlobalDecl(DD, Type), Ty, RD); } diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp index ed168b1ce72d..041c0f8959fd 100644 --- a/lib/CodeGen/CGCXXABI.cpp +++ b/lib/CodeGen/CGCXXABI.cpp @@ -1,9 +1,8 @@ //===----- CGCXXABI.cpp - Interface to C++ ABIs ---------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -29,12 +28,6 @@ void CGCXXABI::ErrorUnsupportedABI(CodeGenFunction &CGF, StringRef S) { << S; } -bool CGCXXABI::canCopyArgument(const CXXRecordDecl *RD) const { - // We can only copy the argument if there exists at least one trivial, - // non-deleted copy or move constructor. - return RD->canPassInRegisters(); -} - llvm::Constant *CGCXXABI::GetBogusMemberPointer(QualType T) { return llvm::Constant::getNullValue(CGM.getTypes().ConvertType(T)); } @@ -298,7 +291,7 @@ llvm::GlobalValue::LinkageTypes CGCXXABI::getCXXDestructorLinkage( GVALinkage Linkage, const CXXDestructorDecl *Dtor, CXXDtorType DT) const { // Delegate back to CGM by default. return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage, - /*isConstantVariable=*/false); + /*IsConstantVariable=*/false); } bool CGCXXABI::NeedsVTTParameter(GlobalDecl GD) { diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h index 65b50e14f436..3a9c3b347439 100644 --- a/lib/CodeGen/CGCXXABI.h +++ b/lib/CodeGen/CGCXXABI.h @@ -1,9 +1,8 @@ //===----- CGCXXABI.h - Interface to C++ ABIs -------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -137,10 +136,6 @@ public: RAA_Indirect }; - /// Returns true if C++ allows us to copy the memory of an object of type RD - /// when it is passed as an argument. - bool canCopyArgument(const CXXRecordDecl *RD) const; - /// Returns how an argument of the given record type should be passed. virtual RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const = 0; @@ -310,7 +305,7 @@ public: /// adding any required parameters. For convenience, ArgTys has been /// initialized with the type of 'this'. virtual AddedStructorArgs - buildStructorSignature(const CXXMethodDecl *MD, StructorType T, + buildStructorSignature(GlobalDecl GD, SmallVectorImpl<CanQualType> &ArgTys) = 0; /// Returns true if the given destructor type should be emitted as a linkonce @@ -383,7 +378,7 @@ public: virtual void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD, CXXDtorType Type, bool ForVirtualBase, bool Delegating, - Address This) = 0; + Address This, QualType ThisTy) = 0; /// Emits the VTable definitions required for the given record type. virtual void emitVTableDefinitions(CodeGenVTables &CGVT, @@ -426,11 +421,15 @@ public: llvm::Type *Ty, SourceLocation Loc) = 0; + using DeleteOrMemberCallExpr = + llvm::PointerUnion<const CXXDeleteExpr *, const CXXMemberCallExpr *>; + /// Emit the ABI-specific virtual destructor call. - virtual llvm::Value * - EmitVirtualDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *Dtor, - CXXDtorType DtorType, Address This, - const CXXMemberCallExpr *CE) = 0; + virtual llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF, + const CXXDestructorDecl *Dtor, + CXXDtorType DtorType, + Address This, + DeleteOrMemberCallExpr E) = 0; virtual void adjustCallArgsForDestructorThunk(CodeGenFunction &CGF, GlobalDecl GD, @@ -557,7 +556,7 @@ public: /// \param Dtor - a function taking a single pointer argument /// \param Addr - a pointer to pass to the destructor function. virtual void registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, - llvm::Constant *Dtor, + llvm::FunctionCallee Dtor, llvm::Constant *Addr) = 0; /*************************** thread_local initialization ********************/ @@ -589,7 +588,7 @@ public: /// Emit a single constructor/destructor with the given type from a C++ /// constructor Decl. - virtual void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) = 0; + virtual void emitCXXStructor(GlobalDecl GD) = 0; /// Load a vtable from This, an object of polymorphic type RD, or from one of /// its virtual bases if it does not have its own vtable. Returns the vtable diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index 7d494bb1f1c7..cf8024550eee 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -1,9 +1,8 @@ //===--- CGCall.cpp - Encapsulate calling convention details --------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -32,7 +31,6 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" @@ -69,12 +67,19 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { } /// Derives the 'this' type for codegen purposes, i.e. ignoring method CVR -/// qualification. -static CanQualType GetThisType(ASTContext &Context, const CXXRecordDecl *RD, - const CXXMethodDecl *MD) { - QualType RecTy = Context.getTagDeclType(RD)->getCanonicalTypeInternal(); +/// qualification. Either or both of RD and MD may be null. A null RD indicates +/// that there is no meaningful 'this' type, and a null MD can occur when +/// calling a method pointer. +CanQualType CodeGenTypes::DeriveThisType(const CXXRecordDecl *RD, + const CXXMethodDecl *MD) { + QualType RecTy; + if (RD) + RecTy = Context.getTagDeclType(RD)->getCanonicalTypeInternal(); + else + RecTy = Context.VoidTy; + if (MD) - RecTy = Context.getAddrSpaceQualType(RecTy, MD->getTypeQualifiers().getAddressSpace()); + RecTy = Context.getAddrSpaceQualType(RecTy, MD->getMethodQualifiers().getAddressSpace()); return Context.getPointerType(CanQualType::CreateUnsafe(RecTy)); } @@ -169,11 +174,9 @@ static void appendParameterTypes(const CodeGenTypes &CGT, static const CGFunctionInfo & arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod, SmallVectorImpl<CanQualType> &prefix, - CanQual<FunctionProtoType> FTP, - const FunctionDecl *FD) { + CanQual<FunctionProtoType> FTP) { SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos; - RequiredArgs Required = - RequiredArgs::forPrototypePlus(FTP, prefix.size(), FD); + RequiredArgs Required = RequiredArgs::forPrototypePlus(FTP, prefix.size()); // FIXME: Kill copy. appendParameterTypes(CGT, prefix, paramInfos, FTP); CanQualType resultType = FTP->getReturnType().getUnqualifiedType(); @@ -187,11 +190,10 @@ arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod, /// Arrange the argument and result information for a value of the /// given freestanding function type. const CGFunctionInfo & -CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> FTP, - const FunctionDecl *FD) { +CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> FTP) { SmallVector<CanQualType, 16> argTypes; return ::arrangeLLVMFunctionInfo(*this, /*instanceMethod=*/false, argTypes, - FTP, FD); + FTP); } static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) { @@ -240,7 +242,7 @@ static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) { /// Arrange the argument and result information for a call to an /// unknown C++ non-static member function of the given abstract type. -/// (Zero value of RD means we don't have any meaningful "this" argument type, +/// (A null RD means we don't have any meaningful "this" argument type, /// so fall back to a generic pointer type). /// The member function must be an ordinary function, i.e. not a /// constructor or destructor. @@ -251,14 +253,11 @@ CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD, SmallVector<CanQualType, 16> argTypes; // Add the 'this' pointer. - if (RD) - argTypes.push_back(GetThisType(Context, RD, MD)); - else - argTypes.push_back(Context.VoidPtrTy); + argTypes.push_back(DeriveThisType(RD, MD)); return ::arrangeLLVMFunctionInfo( *this, true, argTypes, - FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>(), MD); + FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>()); } /// Set calling convention for CUDA/HIP kernel. @@ -290,7 +289,7 @@ CodeGenTypes::arrangeCXXMethodDeclaration(const CXXMethodDecl *MD) { return arrangeCXXMethodType(ThisType, prototype.getTypePtr(), MD); } - return arrangeFreeFunctionType(prototype, MD); + return arrangeFreeFunctionType(prototype); } bool CodeGenTypes::inheritingCtorHasParams( @@ -300,29 +299,23 @@ bool CodeGenTypes::inheritingCtorHasParams( return Type == Ctor_Complete || !Inherited.getShadowDecl()->constructsVirtualBase() || !Target.getCXXABI().hasConstructorVariants(); - } +} const CGFunctionInfo & -CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD, - StructorType Type) { +CodeGenTypes::arrangeCXXStructorDeclaration(GlobalDecl GD) { + auto *MD = cast<CXXMethodDecl>(GD.getDecl()); SmallVector<CanQualType, 16> argTypes; SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos; - argTypes.push_back(GetThisType(Context, MD->getParent(), MD)); + argTypes.push_back(DeriveThisType(MD->getParent(), MD)); bool PassParams = true; - GlobalDecl GD; if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) { - GD = GlobalDecl(CD, toCXXCtorType(Type)); - // A base class inheriting constructor doesn't get forwarded arguments // needed to construct a virtual base (or base class thereof). if (auto Inherited = CD->getInheritedConstructor()) - PassParams = inheritingCtorHasParams(Inherited, toCXXCtorType(Type)); - } else { - auto *DD = dyn_cast<CXXDestructorDecl>(MD); - GD = GlobalDecl(DD, toCXXDtorType(Type)); + PassParams = inheritingCtorHasParams(Inherited, GD.getCtorType()); } CanQual<FunctionProtoType> FTP = GetFormalType(MD); @@ -332,7 +325,7 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD, appendParameterTypes(*this, argTypes, paramInfos, FTP); CGCXXABI::AddedStructorArgs AddedArgs = - TheCXXABI.buildStructorSignature(MD, Type, argTypes); + TheCXXABI.buildStructorSignature(GD, argTypes); if (!paramInfos.empty()) { // Note: prefix implies after the first param. if (AddedArgs.Prefix) @@ -408,8 +401,11 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args, unsigned TotalPrefixArgs = 1 + ExtraPrefixArgs; CanQual<FunctionProtoType> FPT = GetFormalType(D); - RequiredArgs Required = - RequiredArgs::forPrototypePlus(FPT, TotalPrefixArgs + ExtraSuffixArgs, D); + RequiredArgs Required = PassProtoArgs + ? RequiredArgs::forPrototypePlus( + FPT, TotalPrefixArgs + ExtraSuffixArgs) + : RequiredArgs::All; + GlobalDecl GD(D, CtorKind); CanQualType ResultType = TheCXXABI.HasThisReturn(GD) ? ArgTypes.front() @@ -452,7 +448,7 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) { /*chainCall=*/false, None, noProto->getExtInfo(), {},RequiredArgs::All); } - return arrangeFreeFunctionType(FTy.castAs<FunctionProtoType>(), FD); + return arrangeFreeFunctionType(FTy.castAs<FunctionProtoType>()); } /// Arrange the argument and result information for the declaration or @@ -517,11 +513,9 @@ CodeGenTypes::arrangeGlobalDeclaration(GlobalDecl GD) { // FIXME: Do we need to handle ObjCMethodDecl? const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl()); - if (const CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(FD)) - return arrangeCXXStructorDeclaration(CD, getFromCtorType(GD.getCtorType())); - - if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(FD)) - return arrangeCXXStructorDeclaration(DD, getFromDtorType(GD.getDtorType())); + if (isa<CXXConstructorDecl>(GD.getDecl()) || + isa<CXXDestructorDecl>(GD.getDecl())) + return arrangeCXXStructorDeclaration(GD); return arrangeFunctionDeclaration(FD); } @@ -535,7 +529,7 @@ const CGFunctionInfo & CodeGenTypes::arrangeUnprototypedMustTailThunk(const CXXMethodDecl *MD) { assert(MD->isVirtual() && "only methods have thunks"); CanQual<FunctionProtoType> FTP = GetFormalType(MD); - CanQualType ArgTys[] = { GetThisType(Context, MD->getParent(), MD) }; + CanQualType ArgTys[] = {DeriveThisType(MD->getParent(), MD)}; return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/false, /*chainCall=*/false, ArgTys, FTP->getExtInfo(), {}, RequiredArgs(1)); @@ -549,7 +543,7 @@ CodeGenTypes::arrangeMSCtorClosure(const CXXConstructorDecl *CD, CanQual<FunctionProtoType> FTP = GetFormalType(CD); SmallVector<CanQualType, 2> ArgTys; const CXXRecordDecl *RD = CD->getParent(); - ArgTys.push_back(GetThisType(Context, RD, CD)); + ArgTys.push_back(DeriveThisType(RD, CD)); if (CT == Ctor_CopyingClosure) ArgTys.push_back(*FTP->param_type_begin()); if (RD->getNumVBases() > 0) @@ -582,7 +576,7 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT, // extra prefix plus the arguments in the prototype. if (const FunctionProtoType *proto = dyn_cast<FunctionProtoType>(fnType)) { if (proto->isVariadic()) - required = RequiredArgs(proto->getNumParams() + numExtraRequiredArgs); + required = RequiredArgs::forPrototypePlus(proto, numExtraRequiredArgs); if (proto->hasExtParameterInfos()) addExtParameterInfosForCall(paramInfos, proto, numExtraRequiredArgs, @@ -635,11 +629,10 @@ CodeGenTypes::arrangeBlockFunctionDeclaration(const FunctionProtoType *proto, auto paramInfos = getExtParameterInfosForCall(proto, 1, params.size()); auto argTypes = getArgTypesForDeclaration(Context, params); - return arrangeLLVMFunctionInfo( - GetReturnType(proto->getReturnType()), - /*instanceMethod*/ false, /*chainCall*/ false, argTypes, - proto->getExtInfo(), paramInfos, - RequiredArgs::forPrototypePlus(proto, 1, nullptr)); + return arrangeLLVMFunctionInfo(GetReturnType(proto->getReturnType()), + /*instanceMethod*/ false, /*chainCall*/ false, + argTypes, proto->getExtInfo(), paramInfos, + RequiredArgs::forPrototypePlus(proto, 1)); } const CGFunctionInfo & @@ -808,6 +801,8 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC, ArrayRef<CanQualType> argTypes, RequiredArgs required) { assert(paramInfos.empty() || paramInfos.size() == argTypes.size()); + assert(!required.allowsOptionalArgs() || + required.getNumRequiredArgs() <= argTypes.size()); void *buffer = operator new(totalSizeToAlloc<ArgInfo, ExtParameterInfo>( @@ -1148,7 +1143,7 @@ EnterStructPointerForCoercedAccess(Address SrcPtr, return SrcPtr; // GEP into the first element. - SrcPtr = CGF.Builder.CreateStructGEP(SrcPtr, 0, CharUnits(), "coerce.dive"); + SrcPtr = CGF.Builder.CreateStructGEP(SrcPtr, 0, "coerce.dive"); // If the first element is a struct, recurse. llvm::Type *SrcTy = SrcPtr.getElementType(); @@ -1276,12 +1271,8 @@ static void BuildAggStore(CodeGenFunction &CGF, llvm::Value *Val, // Prefer scalar stores to first-class aggregate stores. if (llvm::StructType *STy = dyn_cast<llvm::StructType>(Val->getType())) { - const llvm::StructLayout *Layout = - CGF.CGM.getDataLayout().getStructLayout(STy); - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - auto EltOffset = CharUnits::fromQuantity(Layout->getElementOffset(i)); - Address EltPtr = CGF.Builder.CreateStructGEP(Dest, i, EltOffset); + Address EltPtr = CGF.Builder.CreateStructGEP(Dest, i); llvm::Value *Elt = CGF.Builder.CreateExtractValue(Val, i); CGF.Builder.CreateStore(Elt, EltPtr, DestIsVolatile); } @@ -1682,13 +1673,7 @@ llvm::Type *CodeGenTypes::GetFunctionTypeForVTable(GlobalDecl GD) { if (!isFuncTypeConvertible(FPT)) return llvm::StructType::get(getLLVMContext()); - const CGFunctionInfo *Info; - if (isa<CXXDestructorDecl>(MD)) - Info = - &arrangeCXXStructorDeclaration(MD, getFromDtorType(GD.getDtorType())); - else - Info = &arrangeCXXMethodDeclaration(MD); - return GetFunctionType(*Info); + return GetFunctionType(GD); } static void AddAttributesFromFunctionProtoType(ASTContext &Ctx, @@ -1793,8 +1778,6 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, if (CodeGenOpts.Backchain) FuncAttrs.addAttribute("backchain"); - // FIXME: The interaction of this attribute with the SLH command line flag - // has not been determined. if (CodeGenOpts.SpeculativeLoadHardening) FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening); } @@ -1826,9 +1809,8 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) { llvm::AttrBuilder FuncAttrs; - ConstructDefaultFnAttrList(F.getName(), - F.hasFnAttribute(llvm::Attribute::OptimizeNone), - /* AttrOnCallsite = */ false, FuncAttrs); + ConstructDefaultFnAttrList(F.getName(), F.hasOptNone(), + /* AttrOnCallSite = */ false, FuncAttrs); F.addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs); } @@ -1864,8 +1846,6 @@ void CodeGenModule::ConstructAttributeList( FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate); if (TargetDecl->hasAttr<ConvergentAttr>()) FuncAttrs.addAttribute(llvm::Attribute::Convergent); - if (TargetDecl->hasAttr<SpeculativeLoadHardeningAttr>()) - FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening); if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) { AddAttributesFromFunctionProtoType( @@ -1910,6 +1890,16 @@ void CodeGenModule::ConstructAttributeList( ConstructDefaultFnAttrList(Name, HasOptnone, AttrOnCallSite, FuncAttrs); + // This must run after constructing the default function attribute list + // to ensure that the speculative load hardening attribute is removed + // in the case where the -mspeculative-load-hardening flag was passed. + if (TargetDecl) { + if (TargetDecl->hasAttr<NoSpeculativeLoadHardeningAttr>()) + FuncAttrs.removeAttribute(llvm::Attribute::SpeculativeLoadHardening); + if (TargetDecl->hasAttr<SpeculativeLoadHardeningAttr>()) + FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening); + } + if (CodeGenOpts.EnableSegmentedStacks && !(TargetDecl && TargetDecl->hasAttr<NoSplitStackAttr>())) FuncAttrs.addAttribute("split-stack"); @@ -2009,8 +1999,7 @@ void CodeGenModule::ConstructAttributeList( // Attach attributes to sret. if (IRFunctionArgs.hasSRetArg()) { llvm::AttrBuilder SRETAttrs; - if (!RetAI.getSuppressSRet()) - SRETAttrs.addAttribute(llvm::Attribute::StructRet); + SRETAttrs.addAttribute(llvm::Attribute::StructRet); hasUsedSRet = true; if (RetAI.getInReg()) SRETAttrs.addAttribute(llvm::Attribute::InReg); @@ -2066,7 +2055,7 @@ void CodeGenModule::ConstructAttributeList( Attrs.addAttribute(llvm::Attribute::InReg); if (AI.getIndirectByVal()) - Attrs.addAttribute(llvm::Attribute::ByVal); + Attrs.addByValAttr(getTypes().ConvertTypeForMem(ParamType)); CharUnits Align = AI.getIndirectAlign(); @@ -2262,9 +2251,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // If we're using inalloca, all the memory arguments are GEPs off of the last // parameter, which is a pointer to the complete memory area. Address ArgStruct = Address::invalid(); - const llvm::StructLayout *ArgStructLayout = nullptr; if (IRFunctionArgs.hasInallocaArg()) { - ArgStructLayout = CGM.getDataLayout().getStructLayout(FI.getArgStruct()); ArgStruct = Address(FnArgs[IRFunctionArgs.getInallocaArgNo()], FI.getArgStructAlignment()); @@ -2313,10 +2300,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, case ABIArgInfo::InAlloca: { assert(NumIRArgs == 0); auto FieldIndex = ArgI.getInAllocaFieldIndex(); - CharUnits FieldOffset = - CharUnits::fromQuantity(ArgStructLayout->getElementOffset(FieldIndex)); - Address V = Builder.CreateStructGEP(ArgStruct, FieldIndex, FieldOffset, - Arg->getName()); + Address V = + Builder.CreateStructGEP(ArgStruct, FieldIndex, Arg->getName()); ArgVals.push_back(ParamValue::forIndirect(V)); break; } @@ -2476,7 +2461,6 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType()); if (ArgI.isDirect() && ArgI.getCanBeFlattened() && STy && STy->getNumElements() > 1) { - auto SrcLayout = CGM.getDataLayout().getStructLayout(STy); uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy); llvm::Type *DstTy = Ptr.getElementType(); uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(DstTy); @@ -2493,9 +2477,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { auto AI = FnArgs[FirstIRArg + i]; AI->setName(Arg->getName() + ".coerce" + Twine(i)); - auto Offset = CharUnits::fromQuantity(SrcLayout->getElementOffset(i)); - Address EltPtr = - Builder.CreateStructGEP(AddrToStoreInto, i, Offset); + Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i); Builder.CreateStore(AI, EltPtr); } @@ -2508,7 +2490,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, assert(NumIRArgs == 1); auto AI = FnArgs[FirstIRArg]; AI->setName(Arg->getName() + ".coerce"); - CreateCoercedStore(AI, Ptr, /*DestIsVolatile=*/false, *this); + CreateCoercedStore(AI, Ptr, /*DstIsVolatile=*/false, *this); } // Match to what EmitParmDecl is expecting for this type. @@ -2531,7 +2513,6 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, auto coercionType = ArgI.getCoerceAndExpandType(); alloca = Builder.CreateElementBitCast(alloca, coercionType); - auto layout = CGM.getDataLayout().getStructLayout(coercionType); unsigned argIndex = FirstIRArg; for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { @@ -2539,7 +2520,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue; - auto eltAddr = Builder.CreateStructGEP(alloca, i, layout); + auto eltAddr = Builder.CreateStructGEP(alloca, i); auto elt = FnArgs[argIndex++]; Builder.CreateStore(elt, eltAddr); } @@ -2891,15 +2872,6 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, RV = SI->getValueOperand(); SI->eraseFromParent(); - // If that was the only use of the return value, nuke it as well now. - auto returnValueInst = ReturnValue.getPointer(); - if (returnValueInst->use_empty()) { - if (auto alloca = dyn_cast<llvm::AllocaInst>(returnValueInst)) { - alloca->eraseFromParent(); - ReturnValue = Address::invalid(); - } - } - // Otherwise, we have to do a simple load. } else { RV = Builder.CreateLoad(ReturnValue); @@ -2944,7 +2916,6 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, case ABIArgInfo::CoerceAndExpand: { auto coercionType = RetAI.getCoerceAndExpandType(); - auto layout = CGM.getDataLayout().getStructLayout(coercionType); // Load all of the coerced elements out into results. llvm::SmallVector<llvm::Value*, 4> results; @@ -2954,7 +2925,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, if (ABIArgInfo::isPaddingForCoerceAndExpand(coercedEltType)) continue; - auto eltAddr = Builder.CreateStructGEP(addr, i, layout); + auto eltAddr = Builder.CreateStructGEP(addr, i); auto elt = Builder.CreateLoad(eltAddr); results.push_back(elt); } @@ -3368,7 +3339,7 @@ void CallArgList::allocateArgumentMemory(CodeGenFunction &CGF) { void CallArgList::freeArgumentMemory(CodeGenFunction &CGF) const { if (StackBase) { // Restore the stack after the call. - llvm::Value *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore); + llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore); CGF.Builder.CreateCall(F, StackBase); } } @@ -3455,7 +3426,8 @@ void CodeGenFunction::EmitCallArgs( auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy)); assert(EmittedArg.getScalarVal() && "We emitted nothing for the arg?"); llvm::Value *V = evaluateOrEmitBuiltinObjectSize(Arg, PS->getType(), T, - EmittedArg.getScalarVal()); + EmittedArg.getScalarVal(), + PS->isDynamic()); Args.add(RValue::get(V), SizeTy); // If we're emitting args in reverse, be sure to do so with // pass_object_size, as well. @@ -3530,7 +3502,7 @@ struct DestroyUnpassedArg final : EHScopeStack::Cleanup { const CXXDestructorDecl *Dtor = Ty->getAsCXXRecordDecl()->getDestructor(); assert(!Dtor->isTrivial()); CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*for vbase*/ false, - /*Delegating=*/false, Addr); + /*Delegating=*/false, Addr, Ty); } else { CGF.callCStructDestructor(CGF.MakeAddrLValue(Addr, Ty)); } @@ -3565,7 +3537,7 @@ RValue CallArg::getRValue(CodeGenFunction &CGF) const { void CallArg::copyInto(CodeGenFunction &CGF, Address Addr) const { LValue Dst = CGF.MakeAddrLValue(Addr, Ty); if (!HasLV && RV.isScalar()) - CGF.EmitStoreOfScalar(RV.getScalarVal(), Dst, /*init=*/true); + CGF.EmitStoreOfScalar(RV.getScalarVal(), Dst, /*isInit=*/true); else if (!HasLV && RV.isComplex()) CGF.EmitStoreOfComplex(RV.getComplexVal(), Dst, /*init=*/true); else { @@ -3678,15 +3650,15 @@ CodeGenFunction::AddObjCARCExceptionMetadata(llvm::Instruction *Inst) { /// Emits a call to the given no-arguments nounwind runtime function. llvm::CallInst * -CodeGenFunction::EmitNounwindRuntimeCall(llvm::Value *callee, +CodeGenFunction::EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const llvm::Twine &name) { return EmitNounwindRuntimeCall(callee, None, name); } /// Emits a call to the given nounwind runtime function. llvm::CallInst * -CodeGenFunction::EmitNounwindRuntimeCall(llvm::Value *callee, - ArrayRef<llvm::Value*> args, +CodeGenFunction::EmitNounwindRuntimeCall(llvm::FunctionCallee callee, + ArrayRef<llvm::Value *> args, const llvm::Twine &name) { llvm::CallInst *call = EmitRuntimeCall(callee, args, name); call->setDoesNotThrow(); @@ -3695,9 +3667,8 @@ CodeGenFunction::EmitNounwindRuntimeCall(llvm::Value *callee, /// Emits a simple call (never an invoke) to the given no-arguments /// runtime function. -llvm::CallInst * -CodeGenFunction::EmitRuntimeCall(llvm::Value *callee, - const llvm::Twine &name) { +llvm::CallInst *CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee, + const llvm::Twine &name) { return EmitRuntimeCall(callee, None, name); } @@ -3721,21 +3692,20 @@ CodeGenFunction::getBundlesForFunclet(llvm::Value *Callee) { } /// Emits a simple call (never an invoke) to the given runtime function. -llvm::CallInst * -CodeGenFunction::EmitRuntimeCall(llvm::Value *callee, - ArrayRef<llvm::Value*> args, - const llvm::Twine &name) { - llvm::CallInst *call = - Builder.CreateCall(callee, args, getBundlesForFunclet(callee), name); +llvm::CallInst *CodeGenFunction::EmitRuntimeCall(llvm::FunctionCallee callee, + ArrayRef<llvm::Value *> args, + const llvm::Twine &name) { + llvm::CallInst *call = Builder.CreateCall( + callee, args, getBundlesForFunclet(callee.getCallee()), name); call->setCallingConv(getRuntimeCC()); return call; } /// Emits a call or invoke to the given noreturn runtime function. -void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee, - ArrayRef<llvm::Value*> args) { +void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke( + llvm::FunctionCallee callee, ArrayRef<llvm::Value *> args) { SmallVector<llvm::OperandBundleDef, 1> BundleList = - getBundlesForFunclet(callee); + getBundlesForFunclet(callee.getCallee()); if (getInvokeDest()) { llvm::InvokeInst *invoke = @@ -3755,33 +3725,32 @@ void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee, } /// Emits a call or invoke instruction to the given nullary runtime function. -llvm::CallSite -CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::Value *callee, +llvm::CallBase * +CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, const Twine &name) { return EmitRuntimeCallOrInvoke(callee, None, name); } /// Emits a call or invoke instruction to the given runtime function. -llvm::CallSite -CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::Value *callee, - ArrayRef<llvm::Value*> args, +llvm::CallBase * +CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, + ArrayRef<llvm::Value *> args, const Twine &name) { - llvm::CallSite callSite = EmitCallOrInvoke(callee, args, name); - callSite.setCallingConv(getRuntimeCC()); - return callSite; + llvm::CallBase *call = EmitCallOrInvoke(callee, args, name); + call->setCallingConv(getRuntimeCC()); + return call; } /// Emits a call or invoke instruction to the given function, depending /// on the current state of the EH stack. -llvm::CallSite -CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee, - ArrayRef<llvm::Value *> Args, - const Twine &Name) { +llvm::CallBase *CodeGenFunction::EmitCallOrInvoke(llvm::FunctionCallee Callee, + ArrayRef<llvm::Value *> Args, + const Twine &Name) { llvm::BasicBlock *InvokeDest = getInvokeDest(); SmallVector<llvm::OperandBundleDef, 1> BundleList = - getBundlesForFunclet(Callee); + getBundlesForFunclet(Callee.getCallee()); - llvm::Instruction *Inst; + llvm::CallBase *Inst; if (!InvokeDest) Inst = Builder.CreateCall(Callee, Args, BundleList, Name); else { @@ -3796,7 +3765,7 @@ CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee, if (CGM.getLangOpts().ObjCAutoRefCount) AddObjCARCExceptionMetadata(Inst); - return llvm::CallSite(Inst); + return Inst; } void CodeGenFunction::deferPlaceholderReplacement(llvm::Instruction *Old, @@ -3808,7 +3777,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &CallArgs, - llvm::Instruction **callOrInvoke, + llvm::CallBase **callOrInvoke, SourceLocation Loc) { // FIXME: We no longer need the types from CallArgs; lift up and simplify. @@ -3819,17 +3788,46 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, QualType RetTy = CallInfo.getReturnType(); const ABIArgInfo &RetAI = CallInfo.getReturnInfo(); - llvm::FunctionType *IRFuncTy = Callee.getFunctionType(); + llvm::FunctionType *IRFuncTy = getTypes().GetFunctionType(CallInfo); + + const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl(); + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) + // We can only guarantee that a function is called from the correct + // context/function based on the appropriate target attributes, + // so only check in the case where we have both always_inline and target + // since otherwise we could be making a conditional call after a check for + // the proper cpu features (and it won't cause code generation issues due to + // function based code generation). + if (TargetDecl->hasAttr<AlwaysInlineAttr>() && + TargetDecl->hasAttr<TargetAttr>()) + checkTargetFeatures(Loc, FD); + +#ifndef NDEBUG + if (!(CallInfo.isVariadic() && CallInfo.getArgStruct())) { + // For an inalloca varargs function, we don't expect CallInfo to match the + // function pointer's type, because the inalloca struct a will have extra + // fields in it for the varargs parameters. Code later in this function + // bitcasts the function pointer to the type derived from CallInfo. + // + // In other cases, we assert that the types match up (until pointers stop + // having pointee types). + llvm::Type *TypeFromVal; + if (Callee.isVirtual()) + TypeFromVal = Callee.getVirtualFunctionType(); + else + TypeFromVal = + Callee.getFunctionPointer()->getType()->getPointerElementType(); + assert(IRFuncTy == TypeFromVal); + } +#endif // 1. Set up the arguments. // If we're using inalloca, insert the allocation after the stack save. // FIXME: Do this earlier rather than hacking it in here! Address ArgMemory = Address::invalid(); - const llvm::StructLayout *ArgMemoryLayout = nullptr; if (llvm::StructType *ArgStruct = CallInfo.getArgStruct()) { const llvm::DataLayout &DL = CGM.getDataLayout(); - ArgMemoryLayout = DL.getStructLayout(ArgStruct); llvm::Instruction *IP = CallArgs.getStackBase(); llvm::AllocaInst *AI; if (IP) { @@ -3846,13 +3844,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, ArgMemory = Address(AI, Align); } - // Helper function to drill into the inalloca allocation. - auto createInAllocaStructGEP = [&](unsigned FieldIndex) -> Address { - auto FieldOffset = - CharUnits::fromQuantity(ArgMemoryLayout->getElementOffset(FieldIndex)); - return Builder.CreateStructGEP(ArgMemory, FieldIndex, FieldOffset); - }; - ClangToLLVMArgMapping IRFunctionArgs(CGM.getContext(), CallInfo); SmallVector<llvm::Value *, 16> IRCallArgs(IRFunctionArgs.totalIRArgs()); @@ -3875,7 +3866,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (IRFunctionArgs.hasSRetArg()) { IRCallArgs[IRFunctionArgs.getSRetArgNo()] = SRetPtr.getPointer(); } else if (RetAI.isInAlloca()) { - Address Addr = createInAllocaStructGEP(RetAI.getInAllocaFieldIndex()); + Address Addr = + Builder.CreateStructGEP(ArgMemory, RetAI.getInAllocaFieldIndex()); Builder.CreateStore(SRetPtr.getPointer(), Addr); } } @@ -3913,12 +3905,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, cast<llvm::Instruction>(Addr.getPointer()); CGBuilderTy::InsertPoint IP = Builder.saveIP(); Builder.SetInsertPoint(Placeholder); - Addr = createInAllocaStructGEP(ArgInfo.getInAllocaFieldIndex()); + Addr = + Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex()); Builder.restoreIP(IP); deferPlaceholderReplacement(Placeholder, Addr.getPointer()); } else { // Store the RValue into the argument struct. - Address Addr = createInAllocaStructGEP(ArgInfo.getInAllocaFieldIndex()); + Address Addr = + Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex()); unsigned AS = Addr.getType()->getPointerAddressSpace(); llvm::Type *MemType = ConvertTypeForMem(I->Ty)->getPointerTo(AS); // There are some cases where a trivial bitcast is not avoidable. The @@ -4099,11 +4093,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, STy->getPointerTo(Src.getAddressSpace())); } - auto SrcLayout = CGM.getDataLayout().getStructLayout(STy); assert(NumIRArgs == STy->getNumElements()); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - auto Offset = CharUnits::fromQuantity(SrcLayout->getElementOffset(i)); - Address EltPtr = Builder.CreateStructGEP(Src, i, Offset); + Address EltPtr = Builder.CreateStructGEP(Src, i); llvm::Value *LI = Builder.CreateLoad(EltPtr); IRCallArgs[FirstIRArg + i] = LI; } @@ -4153,7 +4145,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { llvm::Type *eltType = coercionType->getElementType(i); if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue; - Address eltAddr = Builder.CreateStructGEP(addr, i, layout); + Address eltAddr = Builder.CreateStructGEP(addr, i); llvm::Value *elt = Builder.CreateLoad(eltAddr); IRCallArgs[IRArgPos++] = elt; } @@ -4186,8 +4178,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // cases, we can't do any parameter mismatch checks. Give up and bitcast // the callee. unsigned CalleeAS = CalleePtr->getType()->getPointerAddressSpace(); - auto FnTy = getTypes().GetFunctionType(CallInfo)->getPointerTo(CalleeAS); - CalleePtr = Builder.CreateBitCast(CalleePtr, FnTy); + CalleePtr = + Builder.CreateBitCast(CalleePtr, IRFuncTy->getPointerTo(CalleeAS)); } else { llvm::Type *LastParamTy = IRFuncTy->getParamType(IRFuncTy->getNumParams() - 1); @@ -4219,19 +4211,20 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // // This makes the IR nicer, but more importantly it ensures that we // can inline the function at -O0 if it is marked always_inline. - auto simplifyVariadicCallee = [](llvm::Value *Ptr) -> llvm::Value* { - llvm::FunctionType *CalleeFT = - cast<llvm::FunctionType>(Ptr->getType()->getPointerElementType()); + auto simplifyVariadicCallee = [](llvm::FunctionType *CalleeFT, + llvm::Value *Ptr) -> llvm::Function * { if (!CalleeFT->isVarArg()) - return Ptr; + return nullptr; - llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(Ptr); - if (!CE || CE->getOpcode() != llvm::Instruction::BitCast) - return Ptr; + // Get underlying value if it's a bitcast + if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(Ptr)) { + if (CE->getOpcode() == llvm::Instruction::BitCast) + Ptr = CE->getOperand(0); + } - llvm::Function *OrigFn = dyn_cast<llvm::Function>(CE->getOperand(0)); + llvm::Function *OrigFn = dyn_cast<llvm::Function>(Ptr); if (!OrigFn) - return Ptr; + return nullptr; llvm::FunctionType *OrigFT = OrigFn->getFunctionType(); @@ -4240,15 +4233,19 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (OrigFT->isVarArg() || OrigFT->getNumParams() != CalleeFT->getNumParams() || OrigFT->getReturnType() != CalleeFT->getReturnType()) - return Ptr; + return nullptr; for (unsigned i = 0, e = OrigFT->getNumParams(); i != e; ++i) if (OrigFT->getParamType(i) != CalleeFT->getParamType(i)) - return Ptr; + return nullptr; return OrigFn; }; - CalleePtr = simplifyVariadicCallee(CalleePtr); + + if (llvm::Function *OrigFn = simplifyVariadicCallee(IRFuncTy, CalleePtr)) { + CalleePtr = OrigFn; + IRFuncTy = OrigFn->getFunctionType(); + } // 3. Perform the actual call. @@ -4293,11 +4290,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Apply always_inline to all calls within flatten functions. // FIXME: should this really take priority over __try, below? if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() && - !(Callee.getAbstractInfo().getCalleeDecl().getDecl() && - Callee.getAbstractInfo() - .getCalleeDecl() - .getDecl() - ->hasAttr<NoInlineAttr>())) { + !(TargetDecl && TargetDecl->hasAttr<NoInlineAttr>())) { Attrs = Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::AlwaysInline); @@ -4341,22 +4334,21 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, getBundlesForFunclet(CalleePtr); // Emit the actual call/invoke instruction. - llvm::CallSite CS; + llvm::CallBase *CI; if (!InvokeDest) { - CS = Builder.CreateCall(CalleePtr, IRCallArgs, BundleList); + CI = Builder.CreateCall(IRFuncTy, CalleePtr, IRCallArgs, BundleList); } else { llvm::BasicBlock *Cont = createBasicBlock("invoke.cont"); - CS = Builder.CreateInvoke(CalleePtr, Cont, InvokeDest, IRCallArgs, + CI = Builder.CreateInvoke(IRFuncTy, CalleePtr, Cont, InvokeDest, IRCallArgs, BundleList); EmitBlock(Cont); } - llvm::Instruction *CI = CS.getInstruction(); if (callOrInvoke) *callOrInvoke = CI; // Apply the attributes and calling convention. - CS.setAttributes(Attrs); - CS.setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); + CI->setAttributes(Attrs); + CI->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); // Apply various metadata. @@ -4371,7 +4363,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Insert instrumentation or attach profile metadata at indirect call sites. // For more details, see the comment before the definition of // IPVK_IndirectCallTarget in InstrProfData.inc. - if (!CS.getCalledFunction()) + if (!CI->getCalledFunction()) PGO.valueProfile(Builder, llvm::IPVK_IndirectCallTarget, CI, CalleePtr); @@ -4382,26 +4374,45 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Suppress tail calls if requested. if (llvm::CallInst *Call = dyn_cast<llvm::CallInst>(CI)) { - const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl(); if (TargetDecl && TargetDecl->hasAttr<NotTailCalledAttr>()) Call->setTailCallKind(llvm::CallInst::TCK_NoTail); } + // Add metadata for calls to MSAllocator functions + if (getDebugInfo() && TargetDecl && + TargetDecl->hasAttr<MSAllocatorAttr>()) + getDebugInfo()->addHeapAllocSiteMetadata(CI, RetTy, Loc); + // 4. Finish the call. // If the call doesn't return, finish the basic block and clear the // insertion point; this allows the rest of IRGen to discard // unreachable code. - if (CS.doesNotReturn()) { + if (CI->doesNotReturn()) { if (UnusedReturnSizePtr) PopCleanupBlock(); // Strip away the noreturn attribute to better diagnose unreachable UB. if (SanOpts.has(SanitizerKind::Unreachable)) { - if (auto *F = CS.getCalledFunction()) + // Also remove from function since CallBase::hasFnAttr additionally checks + // attributes of the called function. + if (auto *F = CI->getCalledFunction()) F->removeFnAttr(llvm::Attribute::NoReturn); - CS.removeAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoReturn); + CI->removeAttribute(llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoReturn); + + // Avoid incompatibility with ASan which relies on the `noreturn` + // attribute to insert handler calls. + if (SanOpts.hasOneOf(SanitizerKind::Address | + SanitizerKind::KernelAddress)) { + SanitizerScope SanScope(this); + llvm::IRBuilder<>::InsertPointGuard IPGuard(Builder); + Builder.SetInsertPoint(CI); + auto *FnType = llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); + llvm::FunctionCallee Fn = + CGM.CreateRuntimeFunction(FnType, "__asan_handle_no_return"); + EmitNounwindRuntimeCall(Fn); + } } EmitUnreachable(Loc); @@ -4436,7 +4447,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, switch (RetAI.getKind()) { case ABIArgInfo::CoerceAndExpand: { auto coercionType = RetAI.getCoerceAndExpandType(); - auto layout = CGM.getDataLayout().getStructLayout(coercionType); Address addr = SRetPtr; addr = Builder.CreateElementBitCast(addr, coercionType); @@ -4448,7 +4458,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { llvm::Type *eltType = coercionType->getElementType(i); if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue; - Address eltAddr = Builder.CreateStructGEP(addr, i, layout); + Address eltAddr = Builder.CreateStructGEP(addr, i); llvm::Value *elt = CI; if (requiresExtract) elt = Builder.CreateExtractValue(elt, unpaddedIndex++); @@ -4529,7 +4539,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } (); // Emit the assume_aligned check on the return value. - const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl(); if (Ret.isScalar() && TargetDecl) { if (const auto *AA = TargetDecl->getAttr<AssumeAlignedAttr>()) { llvm::Value *OffsetValue = nullptr; @@ -4556,7 +4565,7 @@ CGCallee CGCallee::prepareConcreteCallee(CodeGenFunction &CGF) const { if (isVirtual()) { const CallExpr *CE = getVirtualCallExpr(); return CGF.CGM.getCXXABI().getVirtualFunctionPointer( - CGF, getVirtualMethodDecl(), getThisAddress(), getFunctionType(), + CGF, getVirtualMethodDecl(), getThisAddress(), getVirtualFunctionType(), CE ? CE->getBeginLoc() : SourceLocation()); } diff --git a/lib/CodeGen/CGCall.h b/lib/CodeGen/CGCall.h index c300808bea28..cc11ded704ab 100644 --- a/lib/CodeGen/CGCall.h +++ b/lib/CodeGen/CGCall.h @@ -1,9 +1,8 @@ //===----- CGCall.h - Encapsulate calling convention details ----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -136,6 +135,12 @@ public: return CGCallee(abstractInfo, functionPtr); } + static CGCallee + forDirect(llvm::FunctionCallee functionPtr, + const CGCalleeInfo &abstractInfo = CGCalleeInfo()) { + return CGCallee(abstractInfo, functionPtr.getCallee()); + } + static CGCallee forVirtual(const CallExpr *CE, GlobalDecl MD, Address Addr, llvm::FunctionType *FTy) { CGCallee result(SpecialKind::Virtual); @@ -199,12 +204,9 @@ public: assert(isVirtual()); return VirtualInfo.Addr; } - - llvm::FunctionType *getFunctionType() const { - if (isVirtual()) - return VirtualInfo.FTy; - return cast<llvm::FunctionType>( - getFunctionPointer()->getType()->getPointerElementType()); + llvm::FunctionType *getVirtualFunctionType() const { + assert(isVirtual()); + return VirtualInfo.FTy; } /// If this is a delayed callee computation of some sort, prepare diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp index ee150a792b76..c8bb63c5c4b1 100644 --- a/lib/CodeGen/CGClass.cpp +++ b/lib/CodeGen/CGClass.cpp @@ -1,9 +1,8 @@ //===--- CGClass.cpp - Emit LLVM Code for C++ classes -----------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -303,7 +302,8 @@ Address CodeGenFunction::GetAddressOfBaseClass( // Get the base pointer type. llvm::Type *BasePtrTy = - ConvertType((PathEnd[-1])->getType())->getPointerTo(); + ConvertType((PathEnd[-1])->getType()) + ->getPointerTo(Value.getType()->getPointerAddressSpace()); QualType DerivedTy = getContext().getRecordType(Derived); CharUnits DerivedAlign = CGM.getClassPointerAlignment(Derived); @@ -491,12 +491,15 @@ namespace { cast<CXXMethodDecl>(CGF.CurCodeDecl)->getParent(); const CXXDestructorDecl *D = BaseClass->getDestructor(); + // We are already inside a destructor, so presumably the object being + // destroyed should have the expected type. + QualType ThisTy = D->getThisObjectType(); Address Addr = CGF.GetAddressOfDirectBaseInCompleteClass(CGF.LoadCXXThisAddress(), DerivedClass, BaseClass, BaseIsVirtual); CGF.EmitCXXDestructorCall(D, Dtor_Base, BaseIsVirtual, - /*Delegating=*/false, Addr); + /*Delegating=*/false, Addr, ThisTy); } }; @@ -526,8 +529,7 @@ static bool BaseInitializerUsesThis(ASTContext &C, const Expr *Init) { static void EmitBaseInitializer(CodeGenFunction &CGF, const CXXRecordDecl *ClassDecl, - CXXCtorInitializer *BaseInit, - CXXCtorType CtorType) { + CXXCtorInitializer *BaseInit) { assert(BaseInit->isBaseInitializer() && "Must have base initializer!"); @@ -539,10 +541,6 @@ static void EmitBaseInitializer(CodeGenFunction &CGF, bool isBaseVirtual = BaseInit->isBaseVirtual(); - // The base constructor doesn't construct virtual bases. - if (CtorType == Ctor_Base && isBaseVirtual) - return; - // If the initializer for the base (other than the constructor // itself) accesses 'this' in any way, we need to initialize the // vtables. @@ -561,7 +559,7 @@ static void EmitBaseInitializer(CodeGenFunction &CGF, AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, - CGF.overlapForBaseInit(ClassDecl, BaseClassDecl, isBaseVirtual)); + CGF.getOverlapForBaseInit(ClassDecl, BaseClassDecl, isBaseVirtual)); CGF.EmitAggExpr(BaseInit->getInit(), AggSlot); @@ -650,7 +648,7 @@ static void EmitMemberInitializer(CodeGenFunction &CGF, LValue Src = CGF.EmitLValueForFieldInitialization(ThisRHSLV, Field); // Copy the aggregate. - CGF.EmitAggregateCopy(LHS, Src, FieldType, CGF.overlapForFieldInit(Field), + CGF.EmitAggregateCopy(LHS, Src, FieldType, CGF.getOverlapForFieldInit(Field), LHS.isVolatileQualified()); // Ensure that we destroy the objects if an exception is thrown later in // the constructor. @@ -686,7 +684,7 @@ void CodeGenFunction::EmitInitializerForField(FieldDecl *Field, LValue LHS, AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, - overlapForFieldInit(Field), + getOverlapForFieldInit(Field), AggValueSlot::IsNotZeroed, // Checks are made by the code that calls constructor. AggValueSlot::IsSanitizerChecked); @@ -793,7 +791,7 @@ void CodeGenFunction::EmitAsanPrologueOrEpilogue(bool Prologue) { llvm::Type *Args[2] = {IntPtrTy, IntPtrTy}; llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, Args, false); - llvm::Constant *F = CGM.CreateRuntimeFunction( + llvm::FunctionCallee F = CGM.CreateRuntimeFunction( FTy, Prologue ? "__asan_poison_intra_object_redzone" : "__asan_unpoison_intra_object_redzone"); @@ -1013,7 +1011,7 @@ namespace { if (FOffset < FirstFieldOffset) { FirstField = F; FirstFieldOffset = FOffset; - } else if (FOffset > LastFieldOffset) { + } else if (FOffset >= LastFieldOffset) { LastField = F; LastFieldOffset = FOffset; } @@ -1264,24 +1262,37 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD, CXXConstructorDecl::init_const_iterator B = CD->init_begin(), E = CD->init_end(); + // Virtual base initializers first, if any. They aren't needed if: + // - This is a base ctor variant + // - There are no vbases + // - The class is abstract, so a complete object of it cannot be constructed + // + // The check for an abstract class is necessary because sema may not have + // marked virtual base destructors referenced. + bool ConstructVBases = CtorType != Ctor_Base && + ClassDecl->getNumVBases() != 0 && + !ClassDecl->isAbstract(); + + // In the Microsoft C++ ABI, there are no constructor variants. Instead, the + // constructor of a class with virtual bases takes an additional parameter to + // conditionally construct the virtual bases. Emit that check here. llvm::BasicBlock *BaseCtorContinueBB = nullptr; - if (ClassDecl->getNumVBases() && + if (ConstructVBases && !CGM.getTarget().getCXXABI().hasConstructorVariants()) { - // The ABIs that don't have constructor variants need to put a branch - // before the virtual base initialization code. BaseCtorContinueBB = - CGM.getCXXABI().EmitCtorCompleteObjectHandler(*this, ClassDecl); + CGM.getCXXABI().EmitCtorCompleteObjectHandler(*this, ClassDecl); assert(BaseCtorContinueBB); } llvm::Value *const OldThis = CXXThisValue; - // Virtual base initializers first. for (; B != E && (*B)->isBaseInitializer() && (*B)->isBaseVirtual(); B++) { + if (!ConstructVBases) + continue; if (CGM.getCodeGenOpts().StrictVTablePointers && CGM.getCodeGenOpts().OptimizationLevel > 0 && isInitializerOfDynamicClass(*B)) CXXThisValue = Builder.CreateLaunderInvariantGroup(LoadCXXThis()); - EmitBaseInitializer(*this, ClassDecl, *B, CtorType); + EmitBaseInitializer(*this, ClassDecl, *B); } if (BaseCtorContinueBB) { @@ -1298,7 +1309,7 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD, CGM.getCodeGenOpts().OptimizationLevel > 0 && isInitializerOfDynamicClass(*B)) CXXThisValue = Builder.CreateLaunderInvariantGroup(LoadCXXThis()); - EmitBaseInitializer(*this, ClassDecl, *B, CtorType); + EmitBaseInitializer(*this, ClassDecl, *B); } CXXThisValue = OldThis; @@ -1432,9 +1443,11 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { if (DtorType == Dtor_Deleting) { RunCleanupsScope DtorEpilogue(*this); EnterDtorCleanups(Dtor, Dtor_Deleting); - if (HaveInsertPoint()) + if (HaveInsertPoint()) { + QualType ThisTy = Dtor->getThisObjectType(); EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false, - /*Delegating=*/false, LoadCXXThisAddress()); + /*Delegating=*/false, LoadCXXThisAddress(), ThisTy); + } return; } @@ -1465,8 +1478,9 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { EnterDtorCleanups(Dtor, Dtor_Complete); if (!isTryBody) { + QualType ThisTy = Dtor->getThisObjectType(); EmitCXXDestructorCall(Dtor, Dtor_Base, /*ForVirtualBase=*/false, - /*Delegating=*/false, LoadCXXThisAddress()); + /*Delegating=*/false, LoadCXXThisAddress(), ThisTy); break; } @@ -1627,7 +1641,7 @@ namespace { llvm::FunctionType *FnType = llvm::FunctionType::get(CGF.VoidTy, ArgTypes, false); - llvm::Value *Fn = + llvm::FunctionCallee Fn = CGF.CGM.CreateRuntimeFunction(FnType, "__sanitizer_dtor_callback"); CGF.EmitNounwindRuntimeCall(Fn, Args); } @@ -1970,10 +1984,14 @@ void CodeGenFunction::EmitCXXAggrConstructorCall(const CXXConstructorDecl *ctor, pushRegularPartialArrayCleanup(arrayBegin, cur, type, eltAlignment, *destroyer); } - + auto currAVS = AggValueSlot::forAddr( + curAddr, type.getQualifiers(), AggValueSlot::IsDestructed, + AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, + AggValueSlot::DoesNotOverlap, AggValueSlot::IsNotZeroed, + NewPointerIsChecked ? AggValueSlot::IsSanitizerChecked + : AggValueSlot::IsNotSanitizerChecked); EmitCXXConstructorCall(ctor, Ctor_Complete, /*ForVirtualBase=*/false, - /*Delegating=*/false, curAddr, E, - AggValueSlot::DoesNotOverlap, NewPointerIsChecked); + /*Delegating=*/false, currAVS, E); } // Go to the next element. @@ -2001,22 +2019,22 @@ void CodeGenFunction::destroyCXXObject(CodeGenFunction &CGF, const CXXDestructorDecl *dtor = record->getDestructor(); assert(!dtor->isTrivial()); CGF.EmitCXXDestructorCall(dtor, Dtor_Complete, /*for vbase*/ false, - /*Delegating=*/false, addr); + /*Delegating=*/false, addr, type); } void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, - bool Delegating, Address This, - const CXXConstructExpr *E, - AggValueSlot::Overlap_t Overlap, - bool NewPointerIsChecked) { + bool Delegating, + AggValueSlot ThisAVS, + const CXXConstructExpr *E) { CallArgList Args; - - LangAS SlotAS = E->getType().getAddressSpace(); + Address This = ThisAVS.getAddress(); + LangAS SlotAS = ThisAVS.getQualifiers().getAddressSpace(); QualType ThisType = D->getThisType(); LangAS ThisAS = ThisType.getTypePtr()->getPointeeType().getAddressSpace(); llvm::Value *ThisPtr = This.getPointer(); + if (SlotAS != ThisAS) { unsigned TargetThisAS = getContext().getTargetAddressSpace(ThisAS); llvm::Type *NewType = @@ -2024,6 +2042,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, ThisPtr = getTargetHooks().performAddrSpaceCast(*this, This.getPointer(), ThisAS, SlotAS, NewType); } + // Push the this ptr. Args.add(RValue::get(ThisPtr), D->getThisType()); @@ -2037,7 +2056,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, LValue Src = EmitLValue(Arg); QualType DestTy = getContext().getTypeDeclType(D->getParent()); LValue Dest = MakeAddrLValue(This, DestTy); - EmitAggregateCopyCtor(Dest, Src, Overlap); + EmitAggregateCopyCtor(Dest, Src, ThisAVS.mayOverlap()); return; } @@ -2050,7 +2069,8 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, /*ParamsToSkip*/ 0, Order); EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args, - Overlap, E->getExprLoc(), NewPointerIsChecked); + ThisAVS.mayOverlap(), E->getExprLoc(), + ThisAVS.isSanitizerChecked()); } static bool canEmitDelegateCallArgs(CodeGenFunction &CGF, @@ -2130,8 +2150,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, Delegating, Args); // Emit the call. - llvm::Constant *CalleePtr = - CGM.getAddrOfCXXStructor(D, getFromCtorType(Type)); + llvm::Constant *CalleePtr = CGM.getAddrOfCXXStructor(GlobalDecl(D, Type)); const CGFunctionInfo &Info = CGM.getTypes().arrangeCXXConstructorCall( Args, D, Type, ExtraArgs.Prefix, ExtraArgs.Suffix, PassPrototypeArgs); CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(D, Type)); @@ -2350,8 +2369,11 @@ namespace { : Dtor(D), Addr(Addr), Type(Type) {} void Emit(CodeGenFunction &CGF, Flags flags) override { + // We are calling the destructor from within the constructor. + // Therefore, "this" should have the expected type. + QualType ThisTy = Dtor->getThisObjectType(); CGF.EmitCXXDestructorCall(Dtor, Type, /*ForVirtualBase=*/false, - /*Delegating=*/true, Addr); + /*Delegating=*/true, Addr, ThisTy); } }; } // end anonymous namespace @@ -2389,31 +2411,32 @@ CodeGenFunction::EmitDelegatingCXXConstructorCall(const CXXConstructorDecl *Ctor void CodeGenFunction::EmitCXXDestructorCall(const CXXDestructorDecl *DD, CXXDtorType Type, bool ForVirtualBase, - bool Delegating, - Address This) { + bool Delegating, Address This, + QualType ThisTy) { CGM.getCXXABI().EmitDestructorCall(*this, DD, Type, ForVirtualBase, - Delegating, This); + Delegating, This, ThisTy); } namespace { struct CallLocalDtor final : EHScopeStack::Cleanup { const CXXDestructorDecl *Dtor; Address Addr; + QualType Ty; - CallLocalDtor(const CXXDestructorDecl *D, Address Addr) - : Dtor(D), Addr(Addr) {} + CallLocalDtor(const CXXDestructorDecl *D, Address Addr, QualType Ty) + : Dtor(D), Addr(Addr), Ty(Ty) {} void Emit(CodeGenFunction &CGF, Flags flags) override { CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false, - /*Delegating=*/false, Addr); + /*Delegating=*/false, Addr, Ty); } }; } // end anonymous namespace void CodeGenFunction::PushDestructorCleanup(const CXXDestructorDecl *D, - Address Addr) { - EHStack.pushCleanup<CallLocalDtor>(NormalAndEHCleanup, D, Addr); + QualType T, Address Addr) { + EHStack.pushCleanup<CallLocalDtor>(NormalAndEHCleanup, D, Addr, T); } void CodeGenFunction::PushDestructorCleanup(QualType T, Address Addr) { @@ -2423,7 +2446,7 @@ void CodeGenFunction::PushDestructorCleanup(QualType T, Address Addr) { const CXXDestructorDecl *D = ClassDecl->getDestructor(); assert(D && D->isUsed() && "destructor not marked as used!"); - PushDestructorCleanup(D, Addr); + PushDestructorCleanup(D, T, Addr); } void CodeGenFunction::InitializeVTablePointer(const VPtr &Vptr) { diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp index 3743d24f11fc..5594f3030229 100644 --- a/lib/CodeGen/CGCleanup.cpp +++ b/lib/CodeGen/CGCleanup.cpp @@ -1,9 +1,8 @@ //===--- CGCleanup.cpp - Bookkeeping and code emission for cleanups -------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -53,12 +52,8 @@ DominatingValue<RValue>::saved_type::save(CodeGenFunction &CGF, RValue rv) { llvm::Type *ComplexTy = llvm::StructType::get(V.first->getType(), V.second->getType()); Address addr = CGF.CreateDefaultAlignTempAlloca(ComplexTy, "saved-complex"); - CGF.Builder.CreateStore(V.first, - CGF.Builder.CreateStructGEP(addr, 0, CharUnits())); - CharUnits offset = CharUnits::fromQuantity( - CGF.CGM.getDataLayout().getTypeAllocSize(V.first->getType())); - CGF.Builder.CreateStore(V.second, - CGF.Builder.CreateStructGEP(addr, 1, offset)); + CGF.Builder.CreateStore(V.first, CGF.Builder.CreateStructGEP(addr, 0)); + CGF.Builder.CreateStore(V.second, CGF.Builder.CreateStructGEP(addr, 1)); return saved_type(addr.getPointer(), ComplexAddress); } @@ -96,12 +91,10 @@ RValue DominatingValue<RValue>::saved_type::restore(CodeGenFunction &CGF) { } case ComplexAddress: { Address address = getSavingAddress(Value); - llvm::Value *real = CGF.Builder.CreateLoad( - CGF.Builder.CreateStructGEP(address, 0, CharUnits())); - CharUnits offset = CharUnits::fromQuantity( - CGF.CGM.getDataLayout().getTypeAllocSize(real->getType())); - llvm::Value *imag = CGF.Builder.CreateLoad( - CGF.Builder.CreateStructGEP(address, 1, offset)); + llvm::Value *real = + CGF.Builder.CreateLoad(CGF.Builder.CreateStructGEP(address, 0)); + llvm::Value *imag = + CGF.Builder.CreateLoad(CGF.Builder.CreateStructGEP(address, 1)); return RValue::getComplex(real, imag); } } diff --git a/lib/CodeGen/CGCleanup.h b/lib/CodeGen/CGCleanup.h index 15d6f46dcb56..ffe0f9d9dd20 100644 --- a/lib/CodeGen/CGCleanup.h +++ b/lib/CodeGen/CGCleanup.h @@ -1,9 +1,8 @@ //===-- CGCleanup.h - Classes for cleanups IR generation --------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CGCoroutine.cpp b/lib/CodeGen/CGCoroutine.cpp index 80fa7c873631..aee5a927a055 100644 --- a/lib/CodeGen/CGCoroutine.cpp +++ b/lib/CodeGen/CGCoroutine.cpp @@ -1,9 +1,8 @@ //===----- CGCoroutine.cpp - Emit LLVM Code for C++ coroutines ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -205,7 +204,6 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co BasicBlock *RealSuspendBlock = CGF.createBasicBlock(Prefix + Twine(".suspend.bool")); CGF.Builder.CreateCondBr(SuspendRet, RealSuspendBlock, ReadyBlock); - SuspendBlock = RealSuspendBlock; CGF.EmitBlock(RealSuspendBlock); } @@ -407,7 +405,7 @@ struct CallCoroEnd final : public EHScopeStack::Cleanup { if (Bundles.empty()) { // Otherwise, (landingpad model), create a conditional branch that leads // either to a cleanup block or a block with EH resume instruction. - auto *ResumeBB = CGF.getEHResumeBlock(/*cleanup=*/true); + auto *ResumeBB = CGF.getEHResumeBlock(/*isCleanup=*/true); auto *CleanupContBB = CGF.createBasicBlock("cleanup.cont"); CGF.Builder.CreateCondBr(CoroEnd, ResumeBB, CleanupContBB); CGF.EmitBlock(CleanupContBB); @@ -733,10 +731,10 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E, Args.push_back(llvm::ConstantTokenNone::get(getLLVMContext())); break; } - for (auto &Arg : E->arguments()) + for (const Expr *Arg : E->arguments()) Args.push_back(EmitScalarExpr(Arg)); - llvm::Value *F = CGM.getIntrinsic(IID); + llvm::Function *F = CGM.getIntrinsic(IID); llvm::CallInst *Call = Builder.CreateCall(F, Args); // Note: The following code is to enable to emit coro.id and coro.begin by diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp index 41f8721468a3..f6ee7ee26d4b 100644 --- a/lib/CodeGen/CGDebugInfo.cpp +++ b/lib/CodeGen/CGDebugInfo.cpp @@ -1,9 +1,8 @@ //===--- CGDebugInfo.cpp - Emit Debug Information for a Module ------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -19,6 +18,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" +#include "clang/Analysis/Analyses/ExprMutationAnalyzer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclFriend.h" #include "clang/AST/DeclObjC.h" @@ -373,7 +373,7 @@ CGDebugInfo::computeChecksum(FileID FID, SmallString<32> &Checksum) const { SourceManager &SM = CGM.getContext().getSourceManager(); bool Invalid; - llvm::MemoryBuffer *MemBuffer = SM.getBuffer(FID, &Invalid); + const llvm::MemoryBuffer *MemBuffer = SM.getBuffer(FID, &Invalid); if (Invalid) return None; @@ -423,8 +423,12 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) { } SmallString<32> Checksum; + + // Compute the checksum if possible. If the location is affected by a #line + // directive that refers to a file, PLoc will have an invalid FileID, and we + // will correctly get no checksum. Optional<llvm::DIFile::ChecksumKind> CSKind = - computeChecksum(SM.getFileID(Loc), Checksum); + computeChecksum(PLoc.getFileID(), Checksum); Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo; if (CSKind) CSInfo.emplace(*CSKind, Checksum); @@ -451,8 +455,8 @@ CGDebugInfo::createFile(StringRef FileName, for (; CurDirIt != CurDirE && *CurDirIt == *FileIt; ++CurDirIt, ++FileIt) llvm::sys::path::append(DirBuf, *CurDirIt); if (std::distance(llvm::sys::path::begin(CurDir), CurDirIt) == 1) { - // The common prefix only the root; stripping it would cause - // LLVM diagnostic locations to be more confusing. + // Don't strip the common prefix if it is only the root "/" + // since that would make LLVM diagnostic locations confusing. Dir = {}; File = RemappedFile; } else { @@ -610,12 +614,8 @@ void CGDebugInfo::CreateCompileUnit() { TheCU = DBuilder.createCompileUnit( LangTag, CUFile, CGOpts.EmitVersionIdentMetadata ? Producer : "", LO.Optimize || CGOpts.PrepareForLTO || CGOpts.PrepareForThinLTO, - CGOpts.DwarfDebugFlags, RuntimeVers, - (CGOpts.getSplitDwarfMode() != CodeGenOptions::NoFission) - ? "" - : CGOpts.SplitDwarfFile, - EmissionKind, DwoId, CGOpts.SplitDwarfInlining, - CGOpts.DebugInfoForProfiling, + CGOpts.DwarfDebugFlags, RuntimeVers, CGOpts.SplitDwarfFile, EmissionKind, + DwoId, CGOpts.SplitDwarfInlining, CGOpts.DebugInfoForProfiling, CGM.getTarget().getTriple().isNVPTX() ? llvm::DICompileUnit::DebugNameTableKind::None : static_cast<llvm::DICompileUnit::DebugNameTableKind>( @@ -916,6 +916,11 @@ static SmallString<256> getTypeIdentifier(const TagType *Ty, CodeGenModule &CGM, if (!needsTypeIdentifier(TD, CGM, TheCU)) return Identifier; + if (const auto *RD = dyn_cast<CXXRecordDecl>(TD)) + if (RD->getDefinition()) + if (RD->isDynamicClass() && + CGM.getVTableLinkage(RD) == llvm::GlobalValue::ExternalLinkage) + return Identifier; // TODO: This is using the RTTI name. Is there a better way to get // a unique string for a type? @@ -1083,15 +1088,18 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty, assert(Ty->isTypeAlias()); llvm::DIType *Src = getOrCreateType(Ty->getAliasedType(), Unit); + auto *AliasDecl = + cast<TypeAliasTemplateDecl>(Ty->getTemplateName().getAsTemplateDecl()) + ->getTemplatedDecl(); + + if (AliasDecl->hasAttr<NoDebugAttr>()) + return Src; + SmallString<128> NS; llvm::raw_svector_ostream OS(NS); Ty->getTemplateName().print(OS, getPrintingPolicy(), /*qualified*/ false); printTemplateArgumentList(OS, Ty->template_arguments(), getPrintingPolicy()); - auto *AliasDecl = - cast<TypeAliasTemplateDecl>(Ty->getTemplateName().getAsTemplateDecl()) - ->getTemplatedDecl(); - SourceLocation Loc = AliasDecl->getLocation(); return DBuilder.createTypedef(Src, OS.str(), getOrCreateFile(Loc), getLineNumber(Loc), @@ -1100,15 +1108,20 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty, llvm::DIType *CGDebugInfo::CreateType(const TypedefType *Ty, llvm::DIFile *Unit) { + llvm::DIType *Underlying = + getOrCreateType(Ty->getDecl()->getUnderlyingType(), Unit); + + if (Ty->getDecl()->hasAttr<NoDebugAttr>()) + return Underlying; + // We don't set size information, but do specify where the typedef was // declared. SourceLocation Loc = Ty->getDecl()->getLocation(); // Typedefs are derived from some other type. - return DBuilder.createTypedef( - getOrCreateType(Ty->getDecl()->getUnderlyingType(), Unit), - Ty->getDecl()->getName(), getOrCreateFile(Loc), getLineNumber(Loc), - getDeclContextDescriptor(Ty->getDecl())); + return DBuilder.createTypedef(Underlying, Ty->getDecl()->getName(), + getOrCreateFile(Loc), getLineNumber(Loc), + getDeclContextDescriptor(Ty->getDecl())); } static unsigned getDwarfCC(CallingConv CC) { @@ -1394,6 +1407,9 @@ void CGDebugInfo::CollectRecordFields( isa<VarTemplateSpecializationDecl>(V)) continue; + if (isa<VarTemplatePartialSpecializationDecl>(V)) + continue; + // Reuse the existing static member declaration if one exists auto MI = StaticDataMemberCache.find(V->getCanonicalDecl()); if (MI != StaticDataMemberCache.end()) { @@ -1726,31 +1742,37 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, QualType T = TA.getParamTypeForDecl().getDesugaredType(CGM.getContext()); llvm::DIType *TTy = getOrCreateType(T, Unit); llvm::Constant *V = nullptr; - const CXXMethodDecl *MD; - // Variable pointer template parameters have a value that is the address - // of the variable. - if (const auto *VD = dyn_cast<VarDecl>(D)) - V = CGM.GetAddrOfGlobalVar(VD); - // Member function pointers have special support for building them, though - // this is currently unsupported in LLVM CodeGen. - else if ((MD = dyn_cast<CXXMethodDecl>(D)) && MD->isInstance()) - V = CGM.getCXXABI().EmitMemberFunctionPointer(MD); - else if (const auto *FD = dyn_cast<FunctionDecl>(D)) - V = CGM.GetAddrOfFunction(FD); - // Member data pointers have special handling too to compute the fixed - // offset within the object. - else if (const auto *MPT = dyn_cast<MemberPointerType>(T.getTypePtr())) { - // These five lines (& possibly the above member function pointer - // handling) might be able to be refactored to use similar code in - // CodeGenModule::getMemberPointerConstant - uint64_t fieldOffset = CGM.getContext().getFieldOffset(D); - CharUnits chars = - CGM.getContext().toCharUnitsFromBits((int64_t)fieldOffset); - V = CGM.getCXXABI().EmitMemberDataPointer(MPT, chars); + // Skip retrieve the value if that template parameter has cuda device + // attribute, i.e. that value is not available at the host side. + if (!CGM.getLangOpts().CUDA || CGM.getLangOpts().CUDAIsDevice || + !D->hasAttr<CUDADeviceAttr>()) { + const CXXMethodDecl *MD; + // Variable pointer template parameters have a value that is the address + // of the variable. + if (const auto *VD = dyn_cast<VarDecl>(D)) + V = CGM.GetAddrOfGlobalVar(VD); + // Member function pointers have special support for building them, + // though this is currently unsupported in LLVM CodeGen. + else if ((MD = dyn_cast<CXXMethodDecl>(D)) && MD->isInstance()) + V = CGM.getCXXABI().EmitMemberFunctionPointer(MD); + else if (const auto *FD = dyn_cast<FunctionDecl>(D)) + V = CGM.GetAddrOfFunction(FD); + // Member data pointers have special handling too to compute the fixed + // offset within the object. + else if (const auto *MPT = + dyn_cast<MemberPointerType>(T.getTypePtr())) { + // These five lines (& possibly the above member function pointer + // handling) might be able to be refactored to use similar code in + // CodeGenModule::getMemberPointerConstant + uint64_t fieldOffset = CGM.getContext().getFieldOffset(D); + CharUnits chars = + CGM.getContext().toCharUnitsFromBits((int64_t)fieldOffset); + V = CGM.getCXXABI().EmitMemberDataPointer(MPT, chars); + } + V = V->stripPointerCasts(); } TemplateParams.push_back(DBuilder.createTemplateValueParameter( - TheCU, Name, TTy, - cast_or_null<llvm::Constant>(V->stripPointerCasts()))); + TheCU, Name, TTy, cast_or_null<llvm::Constant>(V))); } break; case TemplateArgument::NullPtr: { QualType T = TA.getNullPtrType(); @@ -1817,32 +1839,24 @@ CGDebugInfo::CollectFunctionTemplateParams(const FunctionDecl *FD, } llvm::DINodeArray CGDebugInfo::CollectVarTemplateParams(const VarDecl *VL, - llvm::DIFile *Unit) { - if (auto *TS = dyn_cast<VarTemplateSpecializationDecl>(VL)) { - auto T = TS->getSpecializedTemplateOrPartial(); - auto TA = TS->getTemplateArgs().asArray(); - // Collect parameters for a partial specialization - if (T.is<VarTemplatePartialSpecializationDecl *>()) { - const TemplateParameterList *TList = - T.get<VarTemplatePartialSpecializationDecl *>() - ->getTemplateParameters(); - return CollectTemplateParams(TList, TA, Unit); - } - - // Collect parameters for an explicit specialization - if (T.is<VarTemplateDecl *>()) { - const TemplateParameterList *TList = T.get<VarTemplateDecl *>() - ->getTemplateParameters(); - return CollectTemplateParams(TList, TA, Unit); - } - } - return llvm::DINodeArray(); + llvm::DIFile *Unit) { + // Always get the full list of parameters, not just the ones from the + // specialization. A partial specialization may have fewer parameters than + // there are arguments. + auto *TS = dyn_cast<VarTemplateSpecializationDecl>(VL); + if (!TS) + return llvm::DINodeArray(); + VarTemplateDecl *T = TS->getSpecializedTemplate(); + const TemplateParameterList *TList = T->getTemplateParameters(); + auto TA = TS->getTemplateArgs().asArray(); + return CollectTemplateParams(TList, TA, Unit); } llvm::DINodeArray CGDebugInfo::CollectCXXTemplateParams( const ClassTemplateSpecializationDecl *TSpecial, llvm::DIFile *Unit) { - // Always get the full list of parameters, not just the ones from - // the specialization. + // Always get the full list of parameters, not just the ones from the + // specialization. A partial specialization may have fewer parameters than + // there are arguments. TemplateParameterList *TPList = TSpecial->getSpecializedTemplate()->getTemplateParameters(); const TemplateArgumentList &TAList = TSpecial->getTemplateArgs(); @@ -1875,6 +1889,58 @@ StringRef CGDebugInfo::getVTableName(const CXXRecordDecl *RD) { return internString("_vptr$", RD->getNameAsString()); } +StringRef CGDebugInfo::getDynamicInitializerName(const VarDecl *VD, + DynamicInitKind StubKind, + llvm::Function *InitFn) { + // If we're not emitting codeview, use the mangled name. For Itanium, this is + // arbitrary. + if (!CGM.getCodeGenOpts().EmitCodeView) + return InitFn->getName(); + + // Print the normal qualified name for the variable, then break off the last + // NNS, and add the appropriate other text. Clang always prints the global + // variable name without template arguments, so we can use rsplit("::") and + // then recombine the pieces. + SmallString<128> QualifiedGV; + StringRef Quals; + StringRef GVName; + { + llvm::raw_svector_ostream OS(QualifiedGV); + VD->printQualifiedName(OS, getPrintingPolicy()); + std::tie(Quals, GVName) = OS.str().rsplit("::"); + if (GVName.empty()) + std::swap(Quals, GVName); + } + + SmallString<128> InitName; + llvm::raw_svector_ostream OS(InitName); + if (!Quals.empty()) + OS << Quals << "::"; + + switch (StubKind) { + case DynamicInitKind::NoStub: + llvm_unreachable("not an initializer"); + case DynamicInitKind::Initializer: + OS << "`dynamic initializer for '"; + break; + case DynamicInitKind::AtExit: + OS << "`dynamic atexit destructor for '"; + break; + } + + OS << GVName; + + // Add any template specialization args. + if (const auto *VTpl = dyn_cast<VarTemplateSpecializationDecl>(VD)) { + printTemplateArgumentList(OS, VTpl->getTemplateArgs().asArray(), + getPrintingPolicy()); + } + + OS << '\''; + + return internString(OS.str()); +} + void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit, SmallVectorImpl<llvm::Metadata *> &EltTys, llvm::DICompositeType *RecordTy) { @@ -1954,6 +2020,20 @@ llvm::DIType *CGDebugInfo::getOrCreateStandaloneType(QualType D, return T; } +void CGDebugInfo::addHeapAllocSiteMetadata(llvm::Instruction *CI, + QualType D, + SourceLocation Loc) { + llvm::MDNode *node; + if (D.getTypePtr()->isVoidPointerType()) { + node = llvm::MDNode::get(CGM.getLLVMContext(), None); + } else { + QualType PointeeTy = D.getTypePtr()->getPointeeType(); + node = getOrCreateType(PointeeTy, getOrCreateFile(Loc)); + } + + CI->setMetadata("heapallocsite", node); +} + void CGDebugInfo::completeType(const EnumDecl *ED) { if (DebugKind <= codegenoptions::DebugLineTablesOnly) return; @@ -2297,7 +2377,14 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod, } bool IsRootModule = M ? !M->Parent : true; - if (CreateSkeletonCU && IsRootModule) { + // When a module name is specified as -fmodule-name, that module gets a + // clang::Module object, but it won't actually be built or imported; it will + // be textual. + if (CreateSkeletonCU && IsRootModule && Mod.getASTFile().empty() && M) + assert(StringRef(M->Name).startswith(CGM.getLangOpts().ModuleName) && + "clang module without ASTFile must be specified by -fmodule-name"); + + if (CreateSkeletonCU && IsRootModule && !Mod.getASTFile().empty()) { // PCH files don't have a signature field in the control block, // but LLVM detects skeleton CUs by looking for a non-zero DWO id. // We use the lower 64 bits for debug info. @@ -2314,6 +2401,7 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod, Signature); DIB.finalize(); } + llvm::DIModule *Parent = IsRootModule ? nullptr : getOrCreateModuleRef( @@ -2768,6 +2856,9 @@ static QualType UnwrapTypeForDebugInfo(QualType T, const ASTContext &C) { case Type::Paren: T = cast<ParenType>(T)->getInnerType(); break; + case Type::MacroQualified: + T = cast<MacroQualifiedType>(T)->getUnderlyingType(); + break; case Type::SubstTemplateTypeParm: T = cast<SubstTemplateTypeParmType>(T)->getReplacementType(); break; @@ -2947,6 +3038,7 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) { case Type::DeducedTemplateSpecialization: case Type::Elaborated: case Type::Paren: + case Type::MacroQualified: case Type::SubstTemplateTypeParm: case Type::TypeOfExpr: case Type::TypeOf: @@ -3021,9 +3113,9 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) { else Flags |= llvm::DINode::FlagTypePassByValue; - // Record if a C++ record is trivial type. - if (CXXRD->isTrivial()) - Flags |= llvm::DINode::FlagTrivial; + // Record if a C++ record is non-trivial type. + if (!CXXRD->isTrivial()) + Flags |= llvm::DINode::FlagNonTrivial; } llvm::DICompositeType *RealDecl = DBuilder.createReplaceableCompositeType( @@ -3443,6 +3535,11 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, } else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D)) { Name = getObjCMethodName(OMD); Flags |= llvm::DINode::FlagPrototyped; + } else if (isa<VarDecl>(D) && + GD.getDynamicInitKind() != DynamicInitKind::NoStub) { + // This is a global initializer or atexit destructor for a global variable. + Name = getDynamicInitializerName(cast<VarDecl>(D), GD.getDynamicInitKind(), + Fn); } else { // Use llvm function name. Name = Fn->getName(); @@ -3488,6 +3585,15 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, if (HasDecl && isa<FunctionDecl>(D)) DeclCache[D->getCanonicalDecl()].reset(SP); + // We use the SPDefCache only in the case when the debug entry values option + // is set, in order to speed up parameters modification analysis. + // + // FIXME: Use AbstractCallee here to support ObjCMethodDecl. + if (CGM.getCodeGenOpts().EnableDebugEntryValues && HasDecl) + if (auto *FD = dyn_cast<FunctionDecl>(D)) + if (FD->hasBody() && !FD->param_empty()) + SPDefCache[FD].reset(SP); + if (CGM.getCodeGenOpts().DwarfVersion >= 5) { // Starting with DWARF V5 method declarations are emitted as children of // the interface type. @@ -3516,7 +3622,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, } void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, - QualType FnType) { + QualType FnType, llvm::Function *Fn) { StringRef Name; StringRef LinkageName; @@ -3526,7 +3632,9 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero; llvm::DIFile *Unit = getOrCreateFile(Loc); - llvm::DIScope *FDContext = getDeclContextDescriptor(D); + bool IsDeclForCallSite = Fn ? true : false; + llvm::DIScope *FDContext = + IsDeclForCallSite ? Unit : getDeclContextDescriptor(D); llvm::DINodeArray TParamsArray; if (isa<FunctionDecl>(D)) { // If there is a DISubprogram for this function available then use it. @@ -3553,10 +3661,38 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, if (CGM.getLangOpts().Optimize) SPFlags |= llvm::DISubprogram::SPFlagOptimized; - DBuilder.retainType(DBuilder.createFunction( + llvm::DISubprogram *SP = DBuilder.createFunction( FDContext, Name, LinkageName, Unit, LineNo, getOrCreateFunctionType(D, FnType, Unit), ScopeLine, Flags, SPFlags, - TParamsArray.get(), getFunctionDeclaration(D))); + TParamsArray.get(), getFunctionDeclaration(D)); + + if (IsDeclForCallSite) + Fn->setSubprogram(SP); + + DBuilder.retainType(SP); +} + +void CGDebugInfo::EmitFuncDeclForCallSite(llvm::CallBase *CallOrInvoke, + QualType CalleeType, + const FunctionDecl *CalleeDecl) { + auto &CGOpts = CGM.getCodeGenOpts(); + if (!CGOpts.EnableDebugEntryValues || !CGM.getLangOpts().Optimize || + !CallOrInvoke || + CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo) + return; + + auto *Func = CallOrInvoke->getCalledFunction(); + if (!Func) + return; + + // If there is no DISubprogram attached to the function being called, + // create the one describing the function in order to have complete + // call site debug info. + if (Func->getSubprogram()) + return; + + if (!CalleeDecl->isStatic() && !CalleeDecl->isInlined()) + EmitFunctionDecl(CalleeDecl, CalleeDecl->getLocation(), CalleeType, Func); } void CGDebugInfo::EmitInlineFunctionStart(CGBuilderTy &Builder, GlobalDecl GD) { @@ -3735,7 +3871,8 @@ CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD, llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage, llvm::Optional<unsigned> ArgNo, - CGBuilderTy &Builder) { + CGBuilderTy &Builder, + const bool UsePointerValue) { assert(DebugKind >= codegenoptions::LimitedDebugInfo); assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!"); if (VD->hasAttr<NoDebugAttr>()) @@ -3840,6 +3977,16 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, } } + // Clang stores the sret pointer provided by the caller in a static alloca. + // Use DW_OP_deref to tell the debugger to load the pointer and treat it as + // the address of the variable. + if (UsePointerValue) { + assert(std::find(Expr.begin(), Expr.end(), llvm::dwarf::DW_OP_deref) == + Expr.end() && + "Debug info already contains DW_OP_deref."); + Expr.push_back(llvm::dwarf::DW_OP_deref); + } + // Create the descriptor for the variable. auto *D = ArgNo ? DBuilder.createParameterVariable( Scope, Name, *ArgNo, Unit, Line, Ty, @@ -3853,14 +4000,46 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt), Builder.GetInsertBlock()); + if (CGM.getCodeGenOpts().EnableDebugEntryValues && ArgNo) { + if (auto *PD = dyn_cast<ParmVarDecl>(VD)) + ParamCache[PD].reset(D); + } + return D; } llvm::DILocalVariable * CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD, llvm::Value *Storage, - CGBuilderTy &Builder) { + CGBuilderTy &Builder, + const bool UsePointerValue) { assert(DebugKind >= codegenoptions::LimitedDebugInfo); - return EmitDeclare(VD, Storage, llvm::None, Builder); + return EmitDeclare(VD, Storage, llvm::None, Builder, UsePointerValue); +} + +void CGDebugInfo::EmitLabel(const LabelDecl *D, CGBuilderTy &Builder) { + assert(DebugKind >= codegenoptions::LimitedDebugInfo); + assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!"); + + if (D->hasAttr<NoDebugAttr>()) + return; + + auto *Scope = cast<llvm::DIScope>(LexicalBlockStack.back()); + llvm::DIFile *Unit = getOrCreateFile(D->getLocation()); + + // Get location information. + unsigned Line = getLineNumber(D->getLocation()); + unsigned Column = getColumnNumber(D->getLocation()); + + StringRef Name = D->getName(); + + // Create the descriptor for the label. + auto *L = + DBuilder.createLabel(Scope, Name, Unit, Line, CGM.getLangOpts().Optimize); + + // Insert an llvm.dbg.label into the current block. + DBuilder.insertLabel(L, + llvm::DebugLoc::get(Line, Column, Scope, CurInlinedAt), + Builder.GetInsertBlock()); } llvm::DIType *CGDebugInfo::CreateSelfType(const QualType &QualTy, @@ -4125,7 +4304,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, llvm::DIDerivedType * CGDebugInfo::getOrCreateStaticDataMemberDeclarationOrNull(const VarDecl *D) { - if (!D->isStaticDataMember()) + if (!D || !D->isStaticDataMember()) return nullptr; auto MI = StaticDataMemberCache.find(D->getCanonicalDecl()); @@ -4207,6 +4386,14 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, SmallVector<int64_t, 4> Expr; unsigned AddressSpace = CGM.getContext().getTargetAddressSpace(D->getType()); + if (CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) { + if (D->hasAttr<CUDASharedAttr>()) + AddressSpace = + CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared); + else if (D->hasAttr<CUDAConstantAttr>()) + AddressSpace = + CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant); + } AppendAddressSpaceXDeref(AddressSpace, Expr); GVE = DBuilder.createGlobalVariableExpression( @@ -4229,22 +4416,32 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) { llvm::DIFile *Unit = getOrCreateFile(VD->getLocation()); StringRef Name = VD->getName(); llvm::DIType *Ty = getOrCreateType(VD->getType(), Unit); + + // Do not use global variables for enums, unless in CodeView. if (const auto *ECD = dyn_cast<EnumConstantDecl>(VD)) { const auto *ED = cast<EnumDecl>(ECD->getDeclContext()); assert(isa<EnumType>(ED->getTypeForDecl()) && "Enum without EnumType?"); - Ty = getOrCreateType(QualType(ED->getTypeForDecl(), 0), Unit); + (void)ED; + + // If CodeView, emit enums as global variables, unless they are defined + // inside a class. We do this because MSVC doesn't emit S_CONSTANTs for + // enums in classes, and because it is difficult to attach this scope + // information to the global variable. + if (!CGM.getCodeGenOpts().EmitCodeView || + isa<RecordDecl>(ED->getDeclContext())) + return; } - // Do not use global variables for enums. - // - // FIXME: why not? - if (Ty->getTag() == llvm::dwarf::DW_TAG_enumeration_type) - return; - // Do not emit separate definitions for function local const/statics. + + llvm::DIScope *DContext = nullptr; + + // Do not emit separate definitions for function local consts. if (isa<FunctionDecl>(VD->getDeclContext())) return; + + // Emit definition for static members in CodeView. VD = cast<ValueDecl>(VD->getCanonicalDecl()); - auto *VarD = cast<VarDecl>(VD); - if (VarD->isStaticDataMember()) { + auto *VarD = dyn_cast<VarDecl>(VD); + if (VarD && VarD->isStaticDataMember()) { auto *RD = cast<RecordDecl>(VarD->getDeclContext()); getDeclContextDescriptor(VarD); // Ensure that the type is retained even though it's otherwise unreferenced. @@ -4253,10 +4450,16 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) { // through its scope. RetainedTypes.push_back( CGM.getContext().getRecordType(RD).getAsOpaquePtr()); - return; - } - llvm::DIScope *DContext = getDeclContextDescriptor(VD); + if (!CGM.getCodeGenOpts().EmitCodeView) + return; + + // Use the global scope for static members. + DContext = getContextDescriptor( + cast<Decl>(CGM.getContext().getTranslationUnitDecl()), TheCU); + } else { + DContext = getDeclContextDescriptor(VD); + } auto &GV = DeclCache[VD]; if (GV) @@ -4393,6 +4596,29 @@ void CGDebugInfo::setDwoId(uint64_t Signature) { TheCU->setDWOId(Signature); } +/// Analyzes each function parameter to determine whether it is constant +/// throughout the function body. +static void analyzeParametersModification( + ASTContext &Ctx, + llvm::DenseMap<const FunctionDecl *, llvm::TrackingMDRef> &SPDefCache, + llvm::DenseMap<const ParmVarDecl *, llvm::TrackingMDRef> &ParamCache) { + for (auto &SP : SPDefCache) { + auto *FD = SP.first; + assert(FD->hasBody() && "Functions must have body here"); + const Stmt *FuncBody = (*FD).getBody(); + for (auto Parm : FD->parameters()) { + ExprMutationAnalyzer FuncAnalyzer(*FuncBody, Ctx); + if (FuncAnalyzer.isMutated(Parm)) + continue; + + auto I = ParamCache.find(Parm); + assert(I != ParamCache.end() && "Parameters should be already cached"); + auto *DIParm = cast<llvm::DILocalVariable>(I->second); + DIParm->setIsNotModified(); + } + } +} + void CGDebugInfo::finalize() { // Creating types might create further types - invalidating the current // element and the size(), so don't cache/reference them. @@ -4465,6 +4691,10 @@ void CGDebugInfo::finalize() { if (auto MD = TypeCache[RT]) DBuilder.retainType(cast<llvm::DIType>(MD)); + if (CGM.getCodeGenOpts().EnableDebugEntryValues) + // This will be used to emit debug entry values. + analyzeParametersModification(CGM.getContext(), SPDefCache, ParamCache); + DBuilder.finalize(); } @@ -4497,7 +4727,10 @@ llvm::DINode::DIFlags CGDebugInfo::getCallSiteRelatedAttrs() const { // were part of DWARF v4. bool SupportsDWARFv4Ext = CGM.getCodeGenOpts().DwarfVersion == 4 && - CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::LLDB; + (CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::LLDB || + (CGM.getCodeGenOpts().EnableDebugEntryValues && + CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::GDB)); + if (!SupportsDWARFv4Ext && CGM.getCodeGenOpts().DwarfVersion < 5) return llvm::DINode::FlagZero; diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h index 031e40b9dde9..7edbea86633a 100644 --- a/lib/CodeGen/CGDebugInfo.h +++ b/lib/CodeGen/CGDebugInfo.h @@ -1,9 +1,8 @@ //===--- CGDebugInfo.h - DebugInfo for LLVM CodeGen -------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -42,6 +41,7 @@ class ObjCInterfaceDecl; class ObjCIvarDecl; class UsingDecl; class VarDecl; +enum class DynamicInitKind : unsigned; namespace CodeGen { class CodeGenModule; @@ -134,6 +134,10 @@ class CGDebugInfo { llvm::DenseMap<const char *, llvm::TrackingMDRef> DIFileCache; llvm::DenseMap<const FunctionDecl *, llvm::TrackingMDRef> SPCache; + /// Cache function definitions relevant to use for parameters mutation + /// analysis. + llvm::DenseMap<const FunctionDecl *, llvm::TrackingMDRef> SPDefCache; + llvm::DenseMap<const ParmVarDecl *, llvm::TrackingMDRef> ParamCache; /// Cache declarations relevant to DW_TAG_imported_declarations (C++ /// using declarations) that aren't covered by other more specific caches. llvm::DenseMap<const Decl *, llvm::TrackingMDRef> DeclCache; @@ -405,7 +409,15 @@ public: void EmitInlineFunctionEnd(CGBuilderTy &Builder); /// Emit debug info for a function declaration. - void EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, QualType FnType); + /// \p Fn is set only when a declaration for a debug call site gets created. + void EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, + QualType FnType, llvm::Function *Fn = nullptr); + + /// Emit debug info for an extern function being called. + /// This is needed for call site debug info. + void EmitFuncDeclForCallSite(llvm::CallBase *CallOrInvoke, + QualType CalleeType, + const FunctionDecl *CalleeDecl); /// Constructs the debug code for exiting a function. void EmitFunctionEnd(CGBuilderTy &Builder, llvm::Function *Fn); @@ -422,9 +434,13 @@ public: /// declaration. /// Returns a pointer to the DILocalVariable associated with the /// llvm.dbg.declare, or nullptr otherwise. - llvm::DILocalVariable *EmitDeclareOfAutoVariable(const VarDecl *Decl, - llvm::Value *AI, - CGBuilderTy &Builder); + llvm::DILocalVariable * + EmitDeclareOfAutoVariable(const VarDecl *Decl, llvm::Value *AI, + CGBuilderTy &Builder, + const bool UsePointerValue = false); + + /// Emit call to \c llvm.dbg.label for an label. + void EmitLabel(const LabelDecl *D, CGBuilderTy &Builder); /// Emit call to \c llvm.dbg.declare for an imported variable /// declaration in a block. @@ -474,6 +490,10 @@ public: /// Emit standalone debug info for a type. llvm::DIType *getOrCreateStandaloneType(QualType Ty, SourceLocation Loc); + /// Add heapallocsite metadata for MSAllocator calls. + void addHeapAllocSiteMetadata(llvm::Instruction *CallSite, QualType Ty, + SourceLocation Loc); + void completeType(const EnumDecl *ED); void completeType(const RecordDecl *RD); void completeRequiredType(const RecordDecl *RD); @@ -500,7 +520,8 @@ private: /// llvm.dbg.declare, or nullptr otherwise. llvm::DILocalVariable *EmitDeclare(const VarDecl *decl, llvm::Value *AI, llvm::Optional<unsigned> ArgNo, - CGBuilderTy &Builder); + CGBuilderTy &Builder, + const bool UsePointerValue = false); struct BlockByRefType { /// The wrapper struct used inside the __block_literal struct. @@ -642,6 +663,12 @@ private: /// Get the vtable name for the given class. StringRef getVTableName(const CXXRecordDecl *Decl); + /// Get the name to use in the debug info for a dynamic initializer or atexit + /// stub function. + StringRef getDynamicInitializerName(const VarDecl *VD, + DynamicInitKind StubKind, + llvm::Function *InitFn); + /// Get line number for the location. If location is invalid /// then use current location. unsigned getLineNumber(SourceLocation Loc); diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp index 5959d889b455..6ad43cefc4d2 100644 --- a/lib/CodeGen/CGDecl.cpp +++ b/lib/CodeGen/CGDecl.cpp @@ -1,9 +1,8 @@ //===--- CGDecl.cpp - Emit LLVM Code for declarations ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -20,6 +19,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" +#include "PatternInit.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/CharUnits.h" @@ -104,9 +104,11 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::Label: // __label__ x; case Decl::Import: case Decl::OMPThreadPrivate: + case Decl::OMPAllocate: case Decl::OMPCapturedExpr: case Decl::OMPRequires: case Decl::Empty: + case Decl::Concept: // None of these decls require codegen support. return; @@ -142,6 +144,9 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::OMPDeclareReduction: return CGM.EmitOMPDeclareReduction(cast<OMPDeclareReductionDecl>(&D), this); + case Decl::OMPDeclareMapper: + return CGM.EmitOMPDeclareMapper(cast<OMPDeclareMapperDecl>(&D), this); + case Decl::Typedef: // typedef int X; case Decl::TypeAlias: { // using X = int; [C++0x] const TypedefNameDecl &TD = cast<TypedefNameDecl>(D); @@ -149,6 +154,8 @@ void CodeGenFunction::EmitDecl(const Decl &D) { if (Ty->isVariablyModifiedType()) EmitVariablyModifiedType(Ty); + + return; } } } @@ -169,7 +176,7 @@ void CodeGenFunction::EmitVarDecl(const VarDecl &D) { return; llvm::GlobalValue::LinkageTypes Linkage = - CGM.getLLVMLinkageVarDefinition(&D, /*isConstant=*/false); + CGM.getLLVMLinkageVarDefinition(&D, /*IsConstant=*/false); // FIXME: We need to force the emission/use of a guard variable for // some variables even if we can constant-evaluate them because @@ -473,11 +480,12 @@ namespace { template <class Derived> struct DestroyNRVOVariable : EHScopeStack::Cleanup { - DestroyNRVOVariable(Address addr, llvm::Value *NRVOFlag) - : NRVOFlag(NRVOFlag), Loc(addr) {} + DestroyNRVOVariable(Address addr, QualType type, llvm::Value *NRVOFlag) + : NRVOFlag(NRVOFlag), Loc(addr), Ty(type) {} llvm::Value *NRVOFlag; Address Loc; + QualType Ty; void Emit(CodeGenFunction &CGF, Flags flags) override { // Along the exceptions path we always execute the dtor. @@ -504,26 +512,24 @@ namespace { struct DestroyNRVOVariableCXX final : DestroyNRVOVariable<DestroyNRVOVariableCXX> { - DestroyNRVOVariableCXX(Address addr, const CXXDestructorDecl *Dtor, - llvm::Value *NRVOFlag) - : DestroyNRVOVariable<DestroyNRVOVariableCXX>(addr, NRVOFlag), - Dtor(Dtor) {} + DestroyNRVOVariableCXX(Address addr, QualType type, + const CXXDestructorDecl *Dtor, llvm::Value *NRVOFlag) + : DestroyNRVOVariable<DestroyNRVOVariableCXX>(addr, type, NRVOFlag), + Dtor(Dtor) {} const CXXDestructorDecl *Dtor; void emitDestructorCall(CodeGenFunction &CGF) { CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false, - /*Delegating=*/false, Loc); + /*Delegating=*/false, Loc, Ty); } }; struct DestroyNRVOVariableC final : DestroyNRVOVariable<DestroyNRVOVariableC> { DestroyNRVOVariableC(Address addr, llvm::Value *NRVOFlag, QualType Ty) - : DestroyNRVOVariable<DestroyNRVOVariableC>(addr, NRVOFlag), Ty(Ty) {} - - QualType Ty; + : DestroyNRVOVariable<DestroyNRVOVariableC>(addr, Ty, NRVOFlag) {} void emitDestructorCall(CodeGenFunction &CGF) { CGF.destroyNonTrivialCStruct(CGF, Loc, Ty); @@ -535,7 +541,7 @@ namespace { CallStackRestore(Address Stack) : Stack(Stack) {} void Emit(CodeGenFunction &CGF, Flags flags) override { llvm::Value *V = CGF.Builder.CreateLoad(Stack); - llvm::Value *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore); + llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore); CGF.Builder.CreateCall(F, V); } }; @@ -915,9 +921,8 @@ static void emitStoresForInitAfterBZero(CodeGenModule &CGM, // If necessary, get a pointer to the element and emit it. if (!Elt->isNullValue() && !isa<llvm::UndefValue>(Elt)) emitStoresForInitAfterBZero( - CGM, Elt, - Builder.CreateConstInBoundsGEP2_32(Loc, 0, i, CGM.getDataLayout()), - isVolatile, Builder); + CGM, Elt, Builder.CreateConstInBoundsGEP2_32(Loc, 0, i), isVolatile, + Builder); } return; } @@ -930,10 +935,9 @@ static void emitStoresForInitAfterBZero(CodeGenModule &CGM, // If necessary, get a pointer to the element and emit it. if (!Elt->isNullValue() && !isa<llvm::UndefValue>(Elt)) - emitStoresForInitAfterBZero( - CGM, Elt, - Builder.CreateConstInBoundsGEP2_32(Loc, 0, i, CGM.getDataLayout()), - isVolatile, Builder); + emitStoresForInitAfterBZero(CGM, Elt, + Builder.CreateConstInBoundsGEP2_32(Loc, 0, i), + isVolatile, Builder); } } @@ -962,103 +966,130 @@ static bool shouldUseBZeroPlusStoresToInitialize(llvm::Constant *Init, /// FIXME We could be more clever, as we are for bzero above, and generate /// memset followed by stores. It's unclear that's worth the effort. static llvm::Value *shouldUseMemSetToInitialize(llvm::Constant *Init, - uint64_t GlobalSize) { + uint64_t GlobalSize, + const llvm::DataLayout &DL) { uint64_t SizeLimit = 32; if (GlobalSize <= SizeLimit) return nullptr; - return llvm::isBytewiseValue(Init); + return llvm::isBytewiseValue(Init, DL); } -static llvm::Constant *patternFor(CodeGenModule &CGM, llvm::Type *Ty) { - // The following value is a guaranteed unmappable pointer value and has a - // repeated byte-pattern which makes it easier to synthesize. We use it for - // pointers as well as integers so that aggregates are likely to be - // initialized with this repeated value. - constexpr uint64_t LargeValue = 0xAAAAAAAAAAAAAAAAull; - // For 32-bit platforms it's a bit trickier because, across systems, only the - // zero page can reasonably be expected to be unmapped, and even then we need - // a very low address. We use a smaller value, and that value sadly doesn't - // have a repeated byte-pattern. We don't use it for integers. - constexpr uint32_t SmallValue = 0x000000AA; - // Floating-point values are initialized as NaNs because they propagate. Using - // a repeated byte pattern means that it will be easier to initialize - // all-floating-point aggregates and arrays with memset. Further, aggregates - // which mix integral and a few floats might also initialize with memset - // followed by a handful of stores for the floats. Using fairly unique NaNs - // also means they'll be easier to distinguish in a crash. - constexpr bool NegativeNaN = true; - constexpr uint64_t NaNPayload = 0xFFFFFFFFFFFFFFFFull; - if (Ty->isIntOrIntVectorTy()) { - unsigned BitWidth = cast<llvm::IntegerType>( - Ty->isVectorTy() ? Ty->getVectorElementType() : Ty) - ->getBitWidth(); - if (BitWidth <= 64) - return llvm::ConstantInt::get(Ty, LargeValue); - return llvm::ConstantInt::get( - Ty, llvm::APInt::getSplat(BitWidth, llvm::APInt(64, LargeValue))); - } - if (Ty->isPtrOrPtrVectorTy()) { - auto *PtrTy = cast<llvm::PointerType>( - Ty->isVectorTy() ? Ty->getVectorElementType() : Ty); - unsigned PtrWidth = CGM.getContext().getTargetInfo().getPointerWidth( - PtrTy->getAddressSpace()); - llvm::Type *IntTy = llvm::IntegerType::get(CGM.getLLVMContext(), PtrWidth); - uint64_t IntValue; - switch (PtrWidth) { - default: - llvm_unreachable("pattern initialization of unsupported pointer width"); - case 64: - IntValue = LargeValue; - break; - case 32: - IntValue = SmallValue; - break; +/// Decide whether we want to split a constant structure or array store into a +/// sequence of its fields' stores. This may cost us code size and compilation +/// speed, but plays better with store optimizations. +static bool shouldSplitConstantStore(CodeGenModule &CGM, + uint64_t GlobalByteSize) { + // Don't break things that occupy more than one cacheline. + uint64_t ByteSizeLimit = 64; + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + return false; + if (GlobalByteSize <= ByteSizeLimit) + return true; + return false; +} + +enum class IsPattern { No, Yes }; + +/// Generate a constant filled with either a pattern or zeroes. +static llvm::Constant *patternOrZeroFor(CodeGenModule &CGM, IsPattern isPattern, + llvm::Type *Ty) { + if (isPattern == IsPattern::Yes) + return initializationPatternFor(CGM, Ty); + else + return llvm::Constant::getNullValue(Ty); +} + +static llvm::Constant *constWithPadding(CodeGenModule &CGM, IsPattern isPattern, + llvm::Constant *constant); + +/// Helper function for constWithPadding() to deal with padding in structures. +static llvm::Constant *constStructWithPadding(CodeGenModule &CGM, + IsPattern isPattern, + llvm::StructType *STy, + llvm::Constant *constant) { + const llvm::DataLayout &DL = CGM.getDataLayout(); + const llvm::StructLayout *Layout = DL.getStructLayout(STy); + llvm::Type *Int8Ty = llvm::IntegerType::getInt8Ty(CGM.getLLVMContext()); + unsigned SizeSoFar = 0; + SmallVector<llvm::Constant *, 8> Values; + bool NestedIntact = true; + for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { + unsigned CurOff = Layout->getElementOffset(i); + if (SizeSoFar < CurOff) { + assert(!STy->isPacked()); + auto *PadTy = llvm::ArrayType::get(Int8Ty, CurOff - SizeSoFar); + Values.push_back(patternOrZeroFor(CGM, isPattern, PadTy)); } - auto *Int = llvm::ConstantInt::get(IntTy, IntValue); - return llvm::ConstantExpr::getIntToPtr(Int, PtrTy); - } - if (Ty->isFPOrFPVectorTy()) { - unsigned BitWidth = llvm::APFloat::semanticsSizeInBits( - (Ty->isVectorTy() ? Ty->getVectorElementType() : Ty) - ->getFltSemantics()); - llvm::APInt Payload(64, NaNPayload); - if (BitWidth >= 64) - Payload = llvm::APInt::getSplat(BitWidth, Payload); - return llvm::ConstantFP::getQNaN(Ty, NegativeNaN, &Payload); - } - if (Ty->isArrayTy()) { - // Note: this doesn't touch tail padding (at the end of an object, before - // the next array object). It is instead handled by replaceUndef. - auto *ArrTy = cast<llvm::ArrayType>(Ty); - llvm::SmallVector<llvm::Constant *, 8> Element( - ArrTy->getNumElements(), patternFor(CGM, ArrTy->getElementType())); - return llvm::ConstantArray::get(ArrTy, Element); - } - - // Note: this doesn't touch struct padding. It will initialize as much union - // padding as is required for the largest type in the union. Padding is - // instead handled by replaceUndef. Stores to structs with volatile members - // don't have a volatile qualifier when initialized according to C++. This is - // fine because stack-based volatiles don't really have volatile semantics - // anyways, and the initialization shouldn't be observable. - auto *StructTy = cast<llvm::StructType>(Ty); - llvm::SmallVector<llvm::Constant *, 8> Struct(StructTy->getNumElements()); - for (unsigned El = 0; El != Struct.size(); ++El) - Struct[El] = patternFor(CGM, StructTy->getElementType(El)); - return llvm::ConstantStruct::get(StructTy, Struct); + llvm::Constant *CurOp; + if (constant->isZeroValue()) + CurOp = llvm::Constant::getNullValue(STy->getElementType(i)); + else + CurOp = cast<llvm::Constant>(constant->getAggregateElement(i)); + auto *NewOp = constWithPadding(CGM, isPattern, CurOp); + if (CurOp != NewOp) + NestedIntact = false; + Values.push_back(NewOp); + SizeSoFar = CurOff + DL.getTypeAllocSize(CurOp->getType()); + } + unsigned TotalSize = Layout->getSizeInBytes(); + if (SizeSoFar < TotalSize) { + auto *PadTy = llvm::ArrayType::get(Int8Ty, TotalSize - SizeSoFar); + Values.push_back(patternOrZeroFor(CGM, isPattern, PadTy)); + } + if (NestedIntact && Values.size() == STy->getNumElements()) + return constant; + return llvm::ConstantStruct::getAnon(Values, STy->isPacked()); } -static Address createUnnamedGlobalFrom(CodeGenModule &CGM, const VarDecl &D, - CGBuilderTy &Builder, - llvm::Constant *Constant, - CharUnits Align) { +/// Replace all padding bytes in a given constant with either a pattern byte or +/// 0x00. +static llvm::Constant *constWithPadding(CodeGenModule &CGM, IsPattern isPattern, + llvm::Constant *constant) { + llvm::Type *OrigTy = constant->getType(); + if (const auto STy = dyn_cast<llvm::StructType>(OrigTy)) + return constStructWithPadding(CGM, isPattern, STy, constant); + if (auto *STy = dyn_cast<llvm::SequentialType>(OrigTy)) { + llvm::SmallVector<llvm::Constant *, 8> Values; + unsigned Size = STy->getNumElements(); + if (!Size) + return constant; + llvm::Type *ElemTy = STy->getElementType(); + bool ZeroInitializer = constant->isZeroValue(); + llvm::Constant *OpValue, *PaddedOp; + if (ZeroInitializer) { + OpValue = llvm::Constant::getNullValue(ElemTy); + PaddedOp = constWithPadding(CGM, isPattern, OpValue); + } + for (unsigned Op = 0; Op != Size; ++Op) { + if (!ZeroInitializer) { + OpValue = constant->getAggregateElement(Op); + PaddedOp = constWithPadding(CGM, isPattern, OpValue); + } + Values.push_back(PaddedOp); + } + auto *NewElemTy = Values[0]->getType(); + if (NewElemTy == ElemTy) + return constant; + if (OrigTy->isArrayTy()) { + auto *ArrayTy = llvm::ArrayType::get(NewElemTy, Size); + return llvm::ConstantArray::get(ArrayTy, Values); + } else { + return llvm::ConstantVector::get(Values); + } + } + return constant; +} + +Address CodeGenModule::createUnnamedGlobalFrom(const VarDecl &D, + llvm::Constant *Constant, + CharUnits Align) { auto FunctionName = [&](const DeclContext *DC) -> std::string { if (const auto *FD = dyn_cast<FunctionDecl>(DC)) { if (const auto *CC = dyn_cast<CXXConstructorDecl>(FD)) return CC->getNameAsString(); if (const auto *CD = dyn_cast<CXXDestructorDecl>(FD)) return CD->getNameAsString(); - return CGM.getMangledName(FD); + return getMangledName(FD); } else if (const auto *OM = dyn_cast<ObjCMethodDecl>(DC)) { return OM->getNameAsString(); } else if (isa<BlockDecl>(DC)) { @@ -1066,26 +1097,47 @@ static Address createUnnamedGlobalFrom(CodeGenModule &CGM, const VarDecl &D, } else if (isa<CapturedDecl>(DC)) { return "<captured>"; } else { - llvm::llvm_unreachable_internal("expected a function or method"); + llvm_unreachable("expected a function or method"); } }; - auto *Ty = Constant->getType(); - bool isConstant = true; - llvm::GlobalVariable *InsertBefore = nullptr; - unsigned AS = CGM.getContext().getTargetAddressSpace( - CGM.getStringLiteralAddressSpace()); - llvm::GlobalVariable *GV = new llvm::GlobalVariable( - CGM.getModule(), Ty, isConstant, llvm::GlobalValue::PrivateLinkage, - Constant, - "__const." + FunctionName(D.getParentFunctionOrMethod()) + "." + - D.getName(), - InsertBefore, llvm::GlobalValue::NotThreadLocal, AS); - GV->setAlignment(Align.getQuantity()); - GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - - Address SrcPtr = Address(GV, Align); - llvm::Type *BP = llvm::PointerType::getInt8PtrTy(CGM.getLLVMContext(), AS); + // Form a simple per-variable cache of these values in case we find we + // want to reuse them. + llvm::GlobalVariable *&CacheEntry = InitializerConstants[&D]; + if (!CacheEntry || CacheEntry->getInitializer() != Constant) { + auto *Ty = Constant->getType(); + bool isConstant = true; + llvm::GlobalVariable *InsertBefore = nullptr; + unsigned AS = + getContext().getTargetAddressSpace(getStringLiteralAddressSpace()); + std::string Name; + if (D.hasGlobalStorage()) + Name = getMangledName(&D).str() + ".const"; + else if (const DeclContext *DC = D.getParentFunctionOrMethod()) + Name = ("__const." + FunctionName(DC) + "." + D.getName()).str(); + else + llvm_unreachable("local variable has no parent function or method"); + llvm::GlobalVariable *GV = new llvm::GlobalVariable( + getModule(), Ty, isConstant, llvm::GlobalValue::PrivateLinkage, + Constant, Name, InsertBefore, llvm::GlobalValue::NotThreadLocal, AS); + GV->setAlignment(Align.getQuantity()); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + CacheEntry = GV; + } else if (CacheEntry->getAlignment() < Align.getQuantity()) { + CacheEntry->setAlignment(Align.getQuantity()); + } + + return Address(CacheEntry, Align); +} + +static Address createUnnamedGlobalForMemcpyFrom(CodeGenModule &CGM, + const VarDecl &D, + CGBuilderTy &Builder, + llvm::Constant *Constant, + CharUnits Align) { + Address SrcPtr = CGM.createUnnamedGlobalFrom(D, Constant, Align); + llvm::Type *BP = llvm::PointerType::getInt8PtrTy(CGM.getLLVMContext(), + SrcPtr.getAddressSpace()); if (SrcPtr.getType() != BP) SrcPtr = Builder.CreateBitCast(SrcPtr, BP); return SrcPtr; @@ -1096,22 +1148,23 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, CGBuilderTy &Builder, llvm::Constant *constant) { auto *Ty = constant->getType(); - bool isScalar = Ty->isIntOrIntVectorTy() || Ty->isPtrOrPtrVectorTy() || - Ty->isFPOrFPVectorTy(); - if (isScalar) { + uint64_t ConstantSize = CGM.getDataLayout().getTypeAllocSize(Ty); + if (!ConstantSize) + return; + + bool canDoSingleStore = Ty->isIntOrIntVectorTy() || + Ty->isPtrOrPtrVectorTy() || Ty->isFPOrFPVectorTy(); + if (canDoSingleStore) { Builder.CreateStore(constant, Loc, isVolatile); return; } - auto *Int8Ty = llvm::IntegerType::getInt8Ty(CGM.getLLVMContext()); - auto *IntPtrTy = CGM.getDataLayout().getIntPtrType(CGM.getLLVMContext()); + auto *SizeVal = llvm::ConstantInt::get(CGM.IntPtrTy, ConstantSize); // If the initializer is all or mostly the same, codegen with bzero / memset // then do a few stores afterward. - uint64_t ConstantSize = CGM.getDataLayout().getTypeAllocSize(Ty); - auto *SizeVal = llvm::ConstantInt::get(IntPtrTy, ConstantSize); if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) { - Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal, + Builder.CreateMemSet(Loc, llvm::ConstantInt::get(CGM.Int8Ty, 0), SizeVal, isVolatile); bool valueAlreadyCorrect = @@ -1123,7 +1176,9 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, return; } - llvm::Value *Pattern = shouldUseMemSetToInitialize(constant, ConstantSize); + // If the initializer is a repeated byte pattern, use memset. + llvm::Value *Pattern = + shouldUseMemSetToInitialize(constant, ConstantSize, CGM.getDataLayout()); if (Pattern) { uint64_t Value = 0x00; if (!isa<llvm::UndefValue>(Pattern)) { @@ -1131,22 +1186,51 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, assert(AP.getBitWidth() <= 8); Value = AP.getLimitedValue(); } - Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, Value), SizeVal, + Builder.CreateMemSet(Loc, llvm::ConstantInt::get(CGM.Int8Ty, Value), SizeVal, isVolatile); return; } - Builder.CreateMemCpy( - Loc, - createUnnamedGlobalFrom(CGM, D, Builder, constant, Loc.getAlignment()), - SizeVal, isVolatile); + // If the initializer is small, use a handful of stores. + if (shouldSplitConstantStore(CGM, ConstantSize)) { + if (auto *STy = dyn_cast<llvm::StructType>(Ty)) { + // FIXME: handle the case when STy != Loc.getElementType(). + if (STy == Loc.getElementType()) { + for (unsigned i = 0; i != constant->getNumOperands(); i++) { + Address EltPtr = Builder.CreateStructGEP(Loc, i); + emitStoresForConstant( + CGM, D, EltPtr, isVolatile, Builder, + cast<llvm::Constant>(Builder.CreateExtractValue(constant, i))); + } + return; + } + } else if (auto *ATy = dyn_cast<llvm::ArrayType>(Ty)) { + // FIXME: handle the case when ATy != Loc.getElementType(). + if (ATy == Loc.getElementType()) { + for (unsigned i = 0; i != ATy->getNumElements(); i++) { + Address EltPtr = Builder.CreateConstArrayGEP(Loc, i); + emitStoresForConstant( + CGM, D, EltPtr, isVolatile, Builder, + cast<llvm::Constant>(Builder.CreateExtractValue(constant, i))); + } + return; + } + } + } + + // Copy from a global. + Builder.CreateMemCpy(Loc, + createUnnamedGlobalForMemcpyFrom( + CGM, D, Builder, constant, Loc.getAlignment()), + SizeVal, isVolatile); } static void emitStoresForZeroInit(CodeGenModule &CGM, const VarDecl &D, Address Loc, bool isVolatile, CGBuilderTy &Builder) { llvm::Type *ElTy = Loc.getElementType(); - llvm::Constant *constant = llvm::Constant::getNullValue(ElTy); + llvm::Constant *constant = + constWithPadding(CGM, IsPattern::No, llvm::Constant::getNullValue(ElTy)); emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant); } @@ -1154,7 +1238,8 @@ static void emitStoresForPatternInit(CodeGenModule &CGM, const VarDecl &D, Address Loc, bool isVolatile, CGBuilderTy &Builder) { llvm::Type *ElTy = Loc.getElementType(); - llvm::Constant *constant = patternFor(CGM, ElTy); + llvm::Constant *constant = constWithPadding( + CGM, IsPattern::Yes, initializationPatternFor(CGM, ElTy)); assert(!isa<llvm::UndefValue>(constant)); emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant); } @@ -1170,13 +1255,11 @@ static bool containsUndef(llvm::Constant *constant) { return false; } -static llvm::Constant *replaceUndef(llvm::Constant *constant) { - // FIXME: when doing pattern initialization, replace undef with 0xAA instead. - // FIXME: also replace padding between values by creating a new struct type - // which has no padding. +static llvm::Constant *replaceUndef(CodeGenModule &CGM, IsPattern isPattern, + llvm::Constant *constant) { auto *Ty = constant->getType(); if (isa<llvm::UndefValue>(constant)) - return llvm::Constant::getNullValue(Ty); + return patternOrZeroFor(CGM, isPattern, Ty); if (!(Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy())) return constant; if (!containsUndef(constant)) @@ -1184,7 +1267,7 @@ static llvm::Constant *replaceUndef(llvm::Constant *constant) { llvm::SmallVector<llvm::Constant *, 8> Values(constant->getNumOperands()); for (unsigned Op = 0, NumOp = constant->getNumOperands(); Op != NumOp; ++Op) { auto *OpValue = cast<llvm::Constant>(constant->getOperand(Op)); - Values[Op] = replaceUndef(OpValue); + Values[Op] = replaceUndef(CGM, isPattern, OpValue); } if (Ty->isStructTy()) return llvm::ConstantStruct::get(cast<llvm::StructType>(Ty), Values); @@ -1318,10 +1401,15 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { Address address = Address::invalid(); Address AllocaAddr = Address::invalid(); - if (Ty->isConstantSizeType()) { - bool NRVO = getLangOpts().ElideConstructors && - D.isNRVOVariable(); - + Address OpenMPLocalAddr = + getLangOpts().OpenMP + ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D) + : Address::invalid(); + bool NRVO = getLangOpts().ElideConstructors && D.isNRVOVariable(); + + if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) { + address = OpenMPLocalAddr; + } else if (Ty->isConstantSizeType()) { // If this value is an array or struct with a statically determinable // constant initializer, there are optimizations we can do. // @@ -1361,14 +1449,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // unless: // - it's an NRVO variable. // - we are compiling OpenMP and it's an OpenMP local variable. - - Address OpenMPLocalAddr = - getLangOpts().OpenMP - ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D) - : Address::invalid(); - if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) { - address = OpenMPLocalAddr; - } else if (NRVO) { + if (NRVO) { // The named return value optimization: allocate this variable in the // return slot, so that we can elide the copy when returning this // variable (C++0x [class.copy]p34). @@ -1451,7 +1532,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { Address Stack = CreateTempAlloca(Int8PtrTy, getPointerAlign(), "saved_stack"); - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::stacksave); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::stacksave); llvm::Value *V = Builder.CreateCall(F); Builder.CreateStore(V, Stack); @@ -1481,11 +1562,19 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // Emit debug info for local var declaration. if (EmitDebugInfo && HaveInsertPoint()) { + Address DebugAddr = address; + bool UsePointerValue = NRVO && ReturnValuePointer.isValid(); DI->setLocation(D.getLocation()); - (void)DI->EmitDeclareOfAutoVariable(&D, address.getPointer(), Builder); + + // If NRVO, use a pointer to the return address. + if (UsePointerValue) + DebugAddr = ReturnValuePointer; + + (void)DI->EmitDeclareOfAutoVariable(&D, DebugAddr.getPointer(), Builder, + UsePointerValue); } - if (D.hasAttr<AnnotateAttr>()) + if (D.hasAttr<AnnotateAttr>() && HaveInsertPoint()) EmitVarAnnotations(&D, address.getPointer()); // Make sure we call @llvm.lifetime.end. @@ -1575,6 +1664,87 @@ bool CodeGenFunction::isTrivialInitializer(const Expr *Init) { return false; } +void CodeGenFunction::emitZeroOrPatternForAutoVarInit(QualType type, + const VarDecl &D, + Address Loc) { + auto trivialAutoVarInit = getContext().getLangOpts().getTrivialAutoVarInit(); + CharUnits Size = getContext().getTypeSizeInChars(type); + bool isVolatile = type.isVolatileQualified(); + if (!Size.isZero()) { + switch (trivialAutoVarInit) { + case LangOptions::TrivialAutoVarInitKind::Uninitialized: + llvm_unreachable("Uninitialized handled by caller"); + case LangOptions::TrivialAutoVarInitKind::Zero: + emitStoresForZeroInit(CGM, D, Loc, isVolatile, Builder); + break; + case LangOptions::TrivialAutoVarInitKind::Pattern: + emitStoresForPatternInit(CGM, D, Loc, isVolatile, Builder); + break; + } + return; + } + + // VLAs look zero-sized to getTypeInfo. We can't emit constant stores to + // them, so emit a memcpy with the VLA size to initialize each element. + // Technically zero-sized or negative-sized VLAs are undefined, and UBSan + // will catch that code, but there exists code which generates zero-sized + // VLAs. Be nice and initialize whatever they requested. + const auto *VlaType = getContext().getAsVariableArrayType(type); + if (!VlaType) + return; + auto VlaSize = getVLASize(VlaType); + auto SizeVal = VlaSize.NumElts; + CharUnits EltSize = getContext().getTypeSizeInChars(VlaSize.Type); + switch (trivialAutoVarInit) { + case LangOptions::TrivialAutoVarInitKind::Uninitialized: + llvm_unreachable("Uninitialized handled by caller"); + + case LangOptions::TrivialAutoVarInitKind::Zero: + if (!EltSize.isOne()) + SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(EltSize)); + Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal, + isVolatile); + break; + + case LangOptions::TrivialAutoVarInitKind::Pattern: { + llvm::Type *ElTy = Loc.getElementType(); + llvm::Constant *Constant = constWithPadding( + CGM, IsPattern::Yes, initializationPatternFor(CGM, ElTy)); + CharUnits ConstantAlign = getContext().getTypeAlignInChars(VlaSize.Type); + llvm::BasicBlock *SetupBB = createBasicBlock("vla-setup.loop"); + llvm::BasicBlock *LoopBB = createBasicBlock("vla-init.loop"); + llvm::BasicBlock *ContBB = createBasicBlock("vla-init.cont"); + llvm::Value *IsZeroSizedVLA = Builder.CreateICmpEQ( + SizeVal, llvm::ConstantInt::get(SizeVal->getType(), 0), + "vla.iszerosized"); + Builder.CreateCondBr(IsZeroSizedVLA, ContBB, SetupBB); + EmitBlock(SetupBB); + if (!EltSize.isOne()) + SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(EltSize)); + llvm::Value *BaseSizeInChars = + llvm::ConstantInt::get(IntPtrTy, EltSize.getQuantity()); + Address Begin = Builder.CreateElementBitCast(Loc, Int8Ty, "vla.begin"); + llvm::Value *End = + Builder.CreateInBoundsGEP(Begin.getPointer(), SizeVal, "vla.end"); + llvm::BasicBlock *OriginBB = Builder.GetInsertBlock(); + EmitBlock(LoopBB); + llvm::PHINode *Cur = Builder.CreatePHI(Begin.getType(), 2, "vla.cur"); + Cur->addIncoming(Begin.getPointer(), OriginBB); + CharUnits CurAlign = Loc.getAlignment().alignmentOfArrayElement(EltSize); + Builder.CreateMemCpy(Address(Cur, CurAlign), + createUnnamedGlobalForMemcpyFrom( + CGM, D, Builder, Constant, ConstantAlign), + BaseSizeInChars, isVolatile); + llvm::Value *Next = + Builder.CreateInBoundsGEP(Int8Ty, Cur, BaseSizeInChars, "vla.next"); + llvm::Value *Done = Builder.CreateICmpEQ(Next, End, "vla-init.isdone"); + Builder.CreateCondBr(Done, ContBB, LoopBB); + Cur->addIncoming(Next, LoopBB); + EmitBlock(ContBB); + } break; + } +} + void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { assert(emission.Variable && "emission was not valid!"); @@ -1585,8 +1755,6 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { auto DL = ApplyDebugLocation::CreateDefaultArtificial(*this, D.getLocation()); QualType type = D.getType(); - bool isVolatile = type.isVolatileQualified(); - // If this local has an initializer, emit it now. const Expr *Init = D.getInit(); @@ -1620,8 +1788,9 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { bool capturedByInit = Init && emission.IsEscapingByRef && isCapturedBy(D, Init); - Address Loc = - capturedByInit ? emission.Addr : emission.getObjectAddress(*this); + bool locIsByrefHeader = !capturedByInit; + const Address Loc = + locIsByrefHeader ? emission.getObjectAddress(*this) : emission.Addr; // Note: constexpr already initializes everything correctly. LangOptions::TrivialAutoVarInitKind trivialAutoVarInit = @@ -1631,103 +1800,46 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { ? LangOptions::TrivialAutoVarInitKind::Uninitialized : getContext().getLangOpts().getTrivialAutoVarInit())); - auto initializeWhatIsTechnicallyUninitialized = [&]() { + auto initializeWhatIsTechnicallyUninitialized = [&](Address Loc) { if (trivialAutoVarInit == LangOptions::TrivialAutoVarInitKind::Uninitialized) return; - CharUnits Size = getContext().getTypeSizeInChars(type); - if (!Size.isZero()) { - switch (trivialAutoVarInit) { - case LangOptions::TrivialAutoVarInitKind::Uninitialized: - llvm_unreachable("Uninitialized handled above"); - case LangOptions::TrivialAutoVarInitKind::Zero: - emitStoresForZeroInit(CGM, D, Loc, isVolatile, Builder); - break; - case LangOptions::TrivialAutoVarInitKind::Pattern: - emitStoresForPatternInit(CGM, D, Loc, isVolatile, Builder); - break; - } - return; - } - - // VLAs look zero-sized to getTypeInfo. We can't emit constant stores to - // them, so emit a memcpy with the VLA size to initialize each element. - // Technically zero-sized or negative-sized VLAs are undefined, and UBSan - // will catch that code, but there exists code which generates zero-sized - // VLAs. Be nice and initialize whatever they requested. - const VariableArrayType *VlaType = - dyn_cast_or_null<VariableArrayType>(getContext().getAsArrayType(type)); - if (!VlaType) - return; - auto VlaSize = getVLASize(VlaType); - auto SizeVal = VlaSize.NumElts; - CharUnits EltSize = getContext().getTypeSizeInChars(VlaSize.Type); - switch (trivialAutoVarInit) { - case LangOptions::TrivialAutoVarInitKind::Uninitialized: - llvm_unreachable("Uninitialized handled above"); - - case LangOptions::TrivialAutoVarInitKind::Zero: - if (!EltSize.isOne()) - SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(EltSize)); - Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal, - isVolatile); - break; + // Only initialize a __block's storage: we always initialize the header. + if (emission.IsEscapingByRef && !locIsByrefHeader) + Loc = emitBlockByrefAddress(Loc, &D, /*follow=*/false); - case LangOptions::TrivialAutoVarInitKind::Pattern: { - llvm::Type *ElTy = Loc.getElementType(); - llvm::Constant *Constant = patternFor(CGM, ElTy); - CharUnits ConstantAlign = getContext().getTypeAlignInChars(VlaSize.Type); - llvm::BasicBlock *SetupBB = createBasicBlock("vla-setup.loop"); - llvm::BasicBlock *LoopBB = createBasicBlock("vla-init.loop"); - llvm::BasicBlock *ContBB = createBasicBlock("vla-init.cont"); - llvm::Value *IsZeroSizedVLA = Builder.CreateICmpEQ( - SizeVal, llvm::ConstantInt::get(SizeVal->getType(), 0), - "vla.iszerosized"); - Builder.CreateCondBr(IsZeroSizedVLA, ContBB, SetupBB); - EmitBlock(SetupBB); - if (!EltSize.isOne()) - SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(EltSize)); - llvm::Value *BaseSizeInChars = - llvm::ConstantInt::get(IntPtrTy, EltSize.getQuantity()); - Address Begin = Builder.CreateElementBitCast(Loc, Int8Ty, "vla.begin"); - llvm::Value *End = - Builder.CreateInBoundsGEP(Begin.getPointer(), SizeVal, "vla.end"); - llvm::BasicBlock *OriginBB = Builder.GetInsertBlock(); - EmitBlock(LoopBB); - llvm::PHINode *Cur = Builder.CreatePHI(Begin.getType(), 2, "vla.cur"); - Cur->addIncoming(Begin.getPointer(), OriginBB); - CharUnits CurAlign = Loc.getAlignment().alignmentOfArrayElement(EltSize); - Builder.CreateMemCpy( - Address(Cur, CurAlign), - createUnnamedGlobalFrom(CGM, D, Builder, Constant, ConstantAlign), - BaseSizeInChars, isVolatile); - llvm::Value *Next = - Builder.CreateInBoundsGEP(Int8Ty, Cur, BaseSizeInChars, "vla.next"); - llvm::Value *Done = Builder.CreateICmpEQ(Next, End, "vla-init.isdone"); - Builder.CreateCondBr(Done, ContBB, LoopBB); - Cur->addIncoming(Next, LoopBB); - EmitBlock(ContBB); - } break; - } + return emitZeroOrPatternForAutoVarInit(type, D, Loc); }; - if (isTrivialInitializer(Init)) { - initializeWhatIsTechnicallyUninitialized(); - return; - } + if (isTrivialInitializer(Init)) + return initializeWhatIsTechnicallyUninitialized(Loc); llvm::Constant *constant = nullptr; - if (emission.IsConstantAggregate || D.isConstexpr()) { + if (emission.IsConstantAggregate || + D.mightBeUsableInConstantExpressions(getContext())) { assert(!capturedByInit && "constant init contains a capturing block?"); constant = ConstantEmitter(*this).tryEmitAbstractForInitializer(D); - if (constant && trivialAutoVarInit != - LangOptions::TrivialAutoVarInitKind::Uninitialized) - constant = replaceUndef(constant); + if (constant && !constant->isZeroValue() && + (trivialAutoVarInit != + LangOptions::TrivialAutoVarInitKind::Uninitialized)) { + IsPattern isPattern = + (trivialAutoVarInit == LangOptions::TrivialAutoVarInitKind::Pattern) + ? IsPattern::Yes + : IsPattern::No; + // C guarantees that brace-init with fewer initializers than members in + // the aggregate will initialize the rest of the aggregate as-if it were + // static initialization. In turn static initialization guarantees that + // padding is initialized to zero bits. We could instead pattern-init if D + // has any ImplicitValueInitExpr, but that seems to be unintuitive + // behavior. + constant = constWithPadding(CGM, IsPattern::No, + replaceUndef(CGM, isPattern, constant)); + } } if (!constant) { - initializeWhatIsTechnicallyUninitialized(); + initializeWhatIsTechnicallyUninitialized(Loc); LValue lv = MakeAddrLValue(Loc, type); lv.setNonGC(true); return EmitExprAsInit(Init, &D, lv, capturedByInit); @@ -1741,10 +1853,9 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { } llvm::Type *BP = CGM.Int8Ty->getPointerTo(Loc.getAddressSpace()); - if (Loc.getType() != BP) - Loc = Builder.CreateBitCast(Loc, BP); - - emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant); + emitStoresForConstant( + CGM, D, (Loc.getType() == BP) ? Loc : Builder.CreateBitCast(Loc, BP), + type.isVolatileQualified(), Builder, constant); } /// Emit an expression as an initializer for an object (variable, field, etc.) @@ -1789,7 +1900,7 @@ void CodeGenFunction::EmitExprAsInit(const Expr *init, const ValueDecl *D, if (isa<VarDecl>(D)) Overlap = AggValueSlot::DoesNotOverlap; else if (auto *FD = dyn_cast<FieldDecl>(D)) - Overlap = overlapForFieldInit(FD); + Overlap = getOverlapForFieldInit(FD); // TODO: how can we delay here if D is captured by its initializer? EmitAggExpr(init, AggValueSlot::forLValue(lvalue, AggValueSlot::IsDestructed, @@ -1828,7 +1939,7 @@ void CodeGenFunction::emitAutoVarTypeCleanup( if (emission.NRVOFlag) { assert(!type->isArrayType()); CXXDestructorDecl *dtor = type->getAsCXXRecordDecl()->getDestructor(); - EHStack.pushCleanup<DestroyNRVOVariableCXX>(cleanupKind, addr, dtor, + EHStack.pushCleanup<DestroyNRVOVariableCXX>(cleanupKind, addr, type, dtor, emission.NRVOFlag); return; } @@ -2199,7 +2310,7 @@ void CodeGenFunction::pushRegularPartialArrayCleanup(llvm::Value *arrayBegin, } /// Lazily declare the @llvm.lifetime.start intrinsic. -llvm::Constant *CodeGenModule::getLLVMLifetimeStartFn() { +llvm::Function *CodeGenModule::getLLVMLifetimeStartFn() { if (LifetimeStartFn) return LifetimeStartFn; LifetimeStartFn = llvm::Intrinsic::getDeclaration(&getModule(), @@ -2208,7 +2319,7 @@ llvm::Constant *CodeGenModule::getLLVMLifetimeStartFn() { } /// Lazily declare the @llvm.lifetime.end intrinsic. -llvm::Constant *CodeGenModule::getLLVMLifetimeEndFn() { +llvm::Function *CodeGenModule::getLLVMLifetimeEndFn() { if (LifetimeEndFn) return LifetimeEndFn; LifetimeEndFn = llvm::Intrinsic::getDeclaration(&getModule(), @@ -2417,6 +2528,13 @@ void CodeGenModule::EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D, getOpenMPRuntime().emitUserDefinedReduction(CGF, D); } +void CodeGenModule::EmitOMPDeclareMapper(const OMPDeclareMapperDecl *D, + CodeGenFunction *CGF) { + if (!LangOpts.OpenMP || (!LangOpts.EmitAllDecls && !D->isUsed())) + return; + // FIXME: need to implement mapper code generation +} + void CodeGenModule::EmitOMPRequiresDecl(const OMPRequiresDecl *D) { - getOpenMPRuntime().checkArchForUnifiedAddressing(*this, D); + getOpenMPRuntime().checkArchForUnifiedAddressing(D); } diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp index 9aa31f181e99..7a0605b8450a 100644 --- a/lib/CodeGen/CGDeclCXX.cpp +++ b/lib/CodeGen/CGDeclCXX.cpp @@ -1,9 +1,8 @@ //===--- CGDeclCXX.cpp - Emit LLVM Code for C++ declarations --------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -15,6 +14,7 @@ #include "CGCXXABI.h" #include "CGObjCRuntime.h" #include "CGOpenMPRuntime.h" +#include "TargetInfo.h" #include "clang/Basic/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Intrinsics.h" @@ -75,7 +75,7 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, // bails even if the attribute is not present. if (D.isNoDestroy(CGF.getContext())) return; - + CodeGenModule &CGM = CGF.CGM; // FIXME: __attribute__((cleanup)) ? @@ -98,7 +98,7 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, return; } - llvm::Constant *Func; + llvm::FunctionCallee Func; llvm::Constant *Argument; // Special-case non-array C++ destructors, if they have the right signature. @@ -118,10 +118,23 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, assert(!Record->hasTrivialDestructor()); CXXDestructorDecl *Dtor = Record->getDestructor(); - Func = CGM.getAddrOfCXXStructor(Dtor, StructorType::Complete); - Argument = llvm::ConstantExpr::getBitCast( - Addr.getPointer(), CGF.getTypes().ConvertType(Type)->getPointerTo()); - + Func = CGM.getAddrAndTypeOfCXXStructor(GlobalDecl(Dtor, Dtor_Complete)); + if (CGF.getContext().getLangOpts().OpenCL) { + auto DestAS = + CGM.getTargetCodeGenInfo().getAddrSpaceOfCxaAtexitPtrParam(); + auto DestTy = CGF.getTypes().ConvertType(Type)->getPointerTo( + CGM.getContext().getTargetAddressSpace(DestAS)); + auto SrcAS = D.getType().getQualifiers().getAddressSpace(); + if (DestAS == SrcAS) + Argument = llvm::ConstantExpr::getBitCast(Addr.getPointer(), DestTy); + else + // FIXME: On addr space mismatch we are passing NULL. The generation + // of the global destructor function should be adjusted accordingly. + Argument = llvm::ConstantPointerNull::get(DestTy); + } else { + Argument = llvm::ConstantExpr::getBitCast( + Addr.getPointer(), CGF.getTypes().ConvertType(Type)->getPointerTo()); + } // Otherwise, the standard logic requires a helper function. } else { Func = CodeGenFunction(CGM) @@ -150,7 +163,7 @@ void CodeGenFunction::EmitInvariantStart(llvm::Constant *Addr, CharUnits Size) { llvm::Intrinsic::ID InvStartID = llvm::Intrinsic::invariant_start; // Overloaded address space type. llvm::Type *ObjectPtr[1] = {Int8PtrTy}; - llvm::Constant *InvariantStart = CGM.getIntrinsic(InvStartID, ObjectPtr); + llvm::Function *InvariantStart = CGM.getIntrinsic(InvStartID, ObjectPtr); // Emit a call with the size in bytes of the object. uint64_t Width = Size.getQuantity(); @@ -215,8 +228,8 @@ void CodeGenFunction::EmitCXXGlobalVarDeclInit(const VarDecl &D, /// Create a stub function, suitable for being passed to atexit, /// which passes the given address to the given destructor function. -llvm::Constant *CodeGenFunction::createAtExitStub(const VarDecl &VD, - llvm::Constant *dtor, +llvm::Function *CodeGenFunction::createAtExitStub(const VarDecl &VD, + llvm::FunctionCallee dtor, llvm::Constant *addr) { // Get the destructor function type, void(*)(void). llvm::FunctionType *ty = llvm::FunctionType::get(CGM.VoidTy, false); @@ -227,19 +240,19 @@ llvm::Constant *CodeGenFunction::createAtExitStub(const VarDecl &VD, } const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); - llvm::Function *fn = CGM.CreateGlobalInitOrDestructFunction(ty, FnName.str(), - FI, - VD.getLocation()); + llvm::Function *fn = CGM.CreateGlobalInitOrDestructFunction( + ty, FnName.str(), FI, VD.getLocation()); CodeGenFunction CGF(CGM); - CGF.StartFunction(&VD, CGM.getContext().VoidTy, fn, FI, FunctionArgList()); + CGF.StartFunction(GlobalDecl(&VD, DynamicInitKind::AtExit), + CGM.getContext().VoidTy, fn, FI, FunctionArgList()); llvm::CallInst *call = CGF.Builder.CreateCall(dtor, addr); // Make sure the call and the callee agree on calling convention. if (llvm::Function *dtorFn = - dyn_cast<llvm::Function>(dtor->stripPointerCasts())) + dyn_cast<llvm::Function>(dtor.getCallee()->stripPointerCasts())) call->setCallingConv(dtorFn->getCallingConv()); CGF.FinishFunction(); @@ -249,7 +262,7 @@ llvm::Constant *CodeGenFunction::createAtExitStub(const VarDecl &VD, /// Register a global destructor using the C atexit runtime function. void CodeGenFunction::registerGlobalDtorWithAtExit(const VarDecl &VD, - llvm::Constant *dtor, + llvm::FunctionCallee dtor, llvm::Constant *addr) { // Create a function which calls the destructor. llvm::Constant *dtorStub = createAtExitStub(VD, dtor, addr); @@ -261,10 +274,10 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(llvm::Constant *dtorStub) { llvm::FunctionType *atexitTy = llvm::FunctionType::get(IntTy, dtorStub->getType(), false); - llvm::Constant *atexit = + llvm::FunctionCallee atexit = CGM.CreateRuntimeFunction(atexitTy, "atexit", llvm::AttributeList(), /*Local=*/true); - if (llvm::Function *atexitFn = dyn_cast<llvm::Function>(atexit)) + if (llvm::Function *atexitFn = dyn_cast<llvm::Function>(atexit.getCallee())) atexitFn->setDoesNotThrow(); EmitNounwindRuntimeCall(atexit, dtorStub); @@ -356,6 +369,10 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction( !isInSanitizerBlacklist(SanitizerKind::KernelHWAddress, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SanitizeHWAddress); + if (getLangOpts().Sanitize.has(SanitizerKind::MemTag) && + !isInSanitizerBlacklist(SanitizerKind::MemTag, Fn, Loc)) + Fn->addFnAttr(llvm::Attribute::SanitizeMemTag); + if (getLangOpts().Sanitize.has(SanitizerKind::Thread) && !isInSanitizerBlacklist(SanitizerKind::Thread, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SanitizeThread); @@ -468,7 +485,8 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, } else if (auto *IPA = D->getAttr<InitPriorityAttr>()) { OrderGlobalInits Key(IPA->getPriority(), PrioritizedCXXGlobalInits.size()); PrioritizedCXXGlobalInits.push_back(std::make_pair(Key, Fn)); - } else if (isTemplateInstantiation(D->getTemplateSpecializationKind())) { + } else if (isTemplateInstantiation(D->getTemplateSpecializationKind()) || + getContext().GetGVALinkageForVariable(D) == GVA_DiscardableODR) { // C++ [basic.start.init]p2: // Definitions of explicitly specialized class template static data // members have ordered initialization. Other class template static data @@ -482,6 +500,11 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, // minor startup time optimization. In the MS C++ ABI, there are no guard // variables, so this COMDAT key is required for correctness. AddGlobalCtor(Fn, 65535, COMDATKey); + if (getTarget().getCXXABI().isMicrosoft() && COMDATKey) { + // In The MS C++, MS add template static data member in the linker + // drective. + addUsedGlobal(COMDATKey); + } } else if (D->hasAttr<SelectAnyAttr>()) { // SelectAny globals will be comdat-folded. Put the initializer into a // COMDAT group associated with the global, so the initializers get folded @@ -575,6 +598,19 @@ CodeGenModule::EmitCXXGlobalInitFunc() { CodeGenFunction(*this).GenerateCXXGlobalInitFunc(Fn, CXXGlobalInits); AddGlobalCtor(Fn); + // In OpenCL global init functions must be converted to kernels in order to + // be able to launch them from the host. + // FIXME: Some more work might be needed to handle destructors correctly. + // Current initialization function makes use of function pointers callbacks. + // We can't support function pointers especially between host and device. + // However it seems global destruction has little meaning without any + // dynamic resource allocation on the device and program scope variables are + // destroyed by the runtime when program is released. + if (getLangOpts().OpenCL) { + GenOpenCLArgMetadata(Fn); + Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL); + } + CXXGlobalInits.clear(); } @@ -604,15 +640,21 @@ void CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn, CurEHLocation = D->getBeginLoc(); - StartFunction(GlobalDecl(D), getContext().VoidTy, Fn, - getTypes().arrangeNullaryFunction(), + StartFunction(GlobalDecl(D, DynamicInitKind::Initializer), + getContext().VoidTy, Fn, getTypes().arrangeNullaryFunction(), FunctionArgList(), D->getLocation(), D->getInit()->getExprLoc()); // Use guarded initialization if the global variable is weak. This // occurs for, e.g., instantiated static data members and // definitions explicitly marked weak. - if (Addr->hasWeakLinkage() || Addr->hasLinkOnceLinkage()) { + // + // Also use guarded initialization for a variable with dynamic TLS and + // unordered initialization. (If the initialization is ordered, the ABI + // layer will guard the whole-TU initialization for us.) + if (Addr->hasWeakLinkage() || Addr->hasLinkOnceLinkage() || + (D->getTLSKind() == VarDecl::TLS_Dynamic && + isTemplateInstantiation(D->getTemplateSpecializationKind()))) { EmitCXXGuardedInit(*D, Addr, PerformInit); } else { EmitCXXGlobalVarDeclInit(*D, Addr, PerformInit); @@ -682,8 +724,8 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn, void CodeGenFunction::GenerateCXXGlobalDtorsFunc( llvm::Function *Fn, - const std::vector<std::pair<llvm::WeakTrackingVH, llvm::Constant *>> - &DtorsAndObjects) { + const std::vector<std::tuple<llvm::FunctionType *, llvm::WeakTrackingVH, + llvm::Constant *>> &DtorsAndObjects) { { auto NL = ApplyDebugLocation::CreateEmpty(*this); StartFunction(GlobalDecl(), getContext().VoidTy, Fn, @@ -693,9 +735,11 @@ void CodeGenFunction::GenerateCXXGlobalDtorsFunc( // Emit the dtors, in reverse order from construction. for (unsigned i = 0, e = DtorsAndObjects.size(); i != e; ++i) { - llvm::Value *Callee = DtorsAndObjects[e - i - 1].first; - llvm::CallInst *CI = Builder.CreateCall(Callee, - DtorsAndObjects[e - i - 1].second); + llvm::FunctionType *CalleeTy; + llvm::Value *Callee; + llvm::Constant *Arg; + std::tie(CalleeTy, Callee, Arg) = DtorsAndObjects[e - i - 1]; + llvm::CallInst *CI = Builder.CreateCall(CalleeTy, Callee, Arg); // Make sure the call and the callee agree on calling convention. if (llvm::Function *F = dyn_cast<llvm::Function>(Callee)) CI->setCallingConv(F->getCallingConv()); diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp index 5756e13d2623..3b7a88a0b769 100644 --- a/lib/CodeGen/CGException.cpp +++ b/lib/CodeGen/CGException.cpp @@ -1,9 +1,8 @@ //===--- CGException.cpp - Emit LLVM Code for C++ exceptions ----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -22,7 +21,6 @@ #include "clang/AST/StmtObjC.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/TargetBuiltins.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/SaveAndRestore.h" @@ -30,29 +28,29 @@ using namespace clang; using namespace CodeGen; -static llvm::Constant *getFreeExceptionFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getFreeExceptionFn(CodeGenModule &CGM) { // void __cxa_free_exception(void *thrown_exception); llvm::FunctionType *FTy = - llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); + llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FTy, "__cxa_free_exception"); } -static llvm::Constant *getUnexpectedFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getUnexpectedFn(CodeGenModule &CGM) { // void __cxa_call_unexpected(void *thrown_exception); llvm::FunctionType *FTy = - llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); + llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FTy, "__cxa_call_unexpected"); } -llvm::Constant *CodeGenModule::getTerminateFn() { +llvm::FunctionCallee CodeGenModule::getTerminateFn() { // void __terminate(); llvm::FunctionType *FTy = - llvm::FunctionType::get(VoidTy, /*IsVarArgs=*/false); + llvm::FunctionType::get(VoidTy, /*isVarArg=*/false); StringRef name; @@ -74,10 +72,10 @@ llvm::Constant *CodeGenModule::getTerminateFn() { return CreateRuntimeFunction(FTy, name); } -static llvm::Constant *getCatchallRethrowFn(CodeGenModule &CGM, - StringRef Name) { +static llvm::FunctionCallee getCatchallRethrowFn(CodeGenModule &CGM, + StringRef Name) { llvm::FunctionType *FTy = - llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); + llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FTy, Name); } @@ -240,8 +238,8 @@ const EHPersonality &EHPersonality::get(CodeGenFunction &CGF) { return get(CGF.CGM, dyn_cast_or_null<FunctionDecl>(FD)); } -static llvm::Constant *getPersonalityFn(CodeGenModule &CGM, - const EHPersonality &Personality) { +static llvm::FunctionCallee getPersonalityFn(CodeGenModule &CGM, + const EHPersonality &Personality) { return CGM.CreateRuntimeFunction(llvm::FunctionType::get(CGM.Int32Ty, true), Personality.PersonalityFn, llvm::AttributeList(), /*Local=*/true); @@ -249,12 +247,13 @@ static llvm::Constant *getPersonalityFn(CodeGenModule &CGM, static llvm::Constant *getOpaquePersonalityFn(CodeGenModule &CGM, const EHPersonality &Personality) { - llvm::Constant *Fn = getPersonalityFn(CGM, Personality); + llvm::FunctionCallee Fn = getPersonalityFn(CGM, Personality); llvm::PointerType* Int8PtrTy = llvm::PointerType::get( llvm::Type::getInt8Ty(CGM.getLLVMContext()), CGM.getDataLayout().getProgramAddressSpace()); - return llvm::ConstantExpr::getBitCast(Fn, Int8PtrTy); + return llvm::ConstantExpr::getBitCast(cast<llvm::Constant>(Fn.getCallee()), + Int8PtrTy); } /// Check whether a landingpad instruction only uses C++ features. @@ -345,12 +344,13 @@ void CodeGenModule::SimplifyPersonality() { // Create the C++ personality function and kill off the old // function. - llvm::Constant *CXXFn = getPersonalityFn(*this, CXX); + llvm::FunctionCallee CXXFn = getPersonalityFn(*this, CXX); // This can happen if the user is screwing with us. - if (Fn->getType() != CXXFn->getType()) return; + if (Fn->getType() != CXXFn.getCallee()->getType()) + return; - Fn->replaceAllUsesWith(CXXFn); + Fn->replaceAllUsesWith(CXXFn.getCallee()); Fn->eraseFromParent(); } @@ -977,15 +977,15 @@ static void emitWasmCatchPadBlock(CodeGenFunction &CGF, // Create calls to wasm.get.exception and wasm.get.ehselector intrinsics. // Before they are lowered appropriately later, they provide values for the // exception and selector. - llvm::Value *GetExnFn = + llvm::Function *GetExnFn = CGF.CGM.getIntrinsic(llvm::Intrinsic::wasm_get_exception); - llvm::Value *GetSelectorFn = + llvm::Function *GetSelectorFn = CGF.CGM.getIntrinsic(llvm::Intrinsic::wasm_get_ehselector); llvm::CallInst *Exn = CGF.Builder.CreateCall(GetExnFn, CPI); CGF.Builder.CreateStore(Exn, CGF.getExceptionSlot()); llvm::CallInst *Selector = CGF.Builder.CreateCall(GetSelectorFn, CPI); - llvm::Value *TypeIDFn = CGF.CGM.getIntrinsic(llvm::Intrinsic::eh_typeid_for); + llvm::Function *TypeIDFn = CGF.CGM.getIntrinsic(llvm::Intrinsic::eh_typeid_for); // If there's only a single catch-all, branch directly to its handler. if (CatchScope.getNumHandlers() == 1 && @@ -1069,7 +1069,7 @@ static void emitCatchDispatchBlock(CodeGenFunction &CGF, CGF.EmitBlockAfterUses(dispatchBlock); // Select the right handler. - llvm::Value *llvm_eh_typeid_for = + llvm::Function *llvm_eh_typeid_for = CGF.CGM.getIntrinsic(llvm::Intrinsic::eh_typeid_for); // Load the selector value. @@ -1259,7 +1259,9 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { } assert(RethrowBlock != WasmCatchStartBlock && RethrowBlock->empty()); Builder.SetInsertPoint(RethrowBlock); - CGM.getCXXABI().emitRethrow(*this, /*isNoReturn=*/true); + llvm::Function *RethrowInCatchFn = + CGM.getIntrinsic(llvm::Intrinsic::wasm_rethrow_in_catch); + EmitNoreturnRuntimeCallOrInvoke(RethrowInCatchFn, {}); } EmitBlock(ContBB); @@ -1269,9 +1271,10 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { namespace { struct CallEndCatchForFinally final : EHScopeStack::Cleanup { llvm::Value *ForEHVar; - llvm::Value *EndCatchFn; - CallEndCatchForFinally(llvm::Value *ForEHVar, llvm::Value *EndCatchFn) - : ForEHVar(ForEHVar), EndCatchFn(EndCatchFn) {} + llvm::FunctionCallee EndCatchFn; + CallEndCatchForFinally(llvm::Value *ForEHVar, + llvm::FunctionCallee EndCatchFn) + : ForEHVar(ForEHVar), EndCatchFn(EndCatchFn) {} void Emit(CodeGenFunction &CGF, Flags flags) override { llvm::BasicBlock *EndCatchBB = CGF.createBasicBlock("finally.endcatch"); @@ -1290,15 +1293,15 @@ namespace { struct PerformFinally final : EHScopeStack::Cleanup { const Stmt *Body; llvm::Value *ForEHVar; - llvm::Value *EndCatchFn; - llvm::Value *RethrowFn; + llvm::FunctionCallee EndCatchFn; + llvm::FunctionCallee RethrowFn; llvm::Value *SavedExnVar; PerformFinally(const Stmt *Body, llvm::Value *ForEHVar, - llvm::Value *EndCatchFn, - llvm::Value *RethrowFn, llvm::Value *SavedExnVar) - : Body(Body), ForEHVar(ForEHVar), EndCatchFn(EndCatchFn), - RethrowFn(RethrowFn), SavedExnVar(SavedExnVar) {} + llvm::FunctionCallee EndCatchFn, + llvm::FunctionCallee RethrowFn, llvm::Value *SavedExnVar) + : Body(Body), ForEHVar(ForEHVar), EndCatchFn(EndCatchFn), + RethrowFn(RethrowFn), SavedExnVar(SavedExnVar) {} void Emit(CodeGenFunction &CGF, Flags flags) override { // Enter a cleanup to call the end-catch function if one was provided. @@ -1360,12 +1363,11 @@ namespace { /// Enters a finally block for an implementation using zero-cost /// exceptions. This is mostly general, but hard-codes some /// language/ABI-specific behavior in the catch-all sections. -void CodeGenFunction::FinallyInfo::enter(CodeGenFunction &CGF, - const Stmt *body, - llvm::Constant *beginCatchFn, - llvm::Constant *endCatchFn, - llvm::Constant *rethrowFn) { - assert((beginCatchFn != nullptr) == (endCatchFn != nullptr) && +void CodeGenFunction::FinallyInfo::enter(CodeGenFunction &CGF, const Stmt *body, + llvm::FunctionCallee beginCatchFn, + llvm::FunctionCallee endCatchFn, + llvm::FunctionCallee rethrowFn) { + assert((!!beginCatchFn) == (!!endCatchFn) && "begin/end catch functions not paired"); assert(rethrowFn && "rethrow function is required"); @@ -1377,9 +1379,7 @@ void CodeGenFunction::FinallyInfo::enter(CodeGenFunction &CGF, // In the latter case we need to pass it the exception object. // But we can't use the exception slot because the @finally might // have a landing pad (which would overwrite the exception slot). - llvm::FunctionType *rethrowFnTy = - cast<llvm::FunctionType>( - cast<llvm::PointerType>(rethrowFn->getType())->getElementType()); + llvm::FunctionType *rethrowFnTy = rethrowFn.getFunctionType(); SavedExnVar = nullptr; if (rethrowFnTy->getNumParams()) SavedExnVar = CGF.CreateTempAlloca(CGF.Int8PtrTy, "finally.exn"); @@ -1545,7 +1545,7 @@ llvm::BasicBlock *CodeGenFunction::getTerminateFunclet() { // __clang_call_terminate function. if (getLangOpts().CPlusPlus && EHPersonality::get(*this).isWasmPersonality()) { - llvm::Value *GetExnFn = + llvm::Function *GetExnFn = CGM.getIntrinsic(llvm::Intrinsic::wasm_get_exception); Exn = Builder.CreateCall(GetExnFn, CurrentFuncletPad); } @@ -1632,7 +1632,7 @@ struct PerformSEHFinally final : EHScopeStack::Cleanup { if (CGF.IsOutlinedSEHHelper) { FP = &CGF.CurFn->arg_begin()[1]; } else { - llvm::Value *LocalAddrFn = + llvm::Function *LocalAddrFn = CGM.getIntrinsic(llvm::Intrinsic::localaddress); FP = CGF.Builder.CreateCall(LocalAddrFn); } diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index 34a921e2dc00..5a4b1188b711 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -1,9 +1,8 @@ //===--- CGExpr.cpp - Emit LLVM Code from Expressions ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -26,6 +25,7 @@ #include "clang/AST/Attr.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/NSAPI.h" +#include "clang/Basic/Builtins.h" #include "clang/Basic/CodeGenOptions.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringExtras.h" @@ -331,7 +331,7 @@ pushTemporaryCleanup(CodeGenFunction &CGF, const MaterializeTemporaryExpr *M, switch (M->getStorageDuration()) { case SD_Static: case SD_Thread: { - llvm::Constant *CleanupFn; + llvm::FunctionCallee CleanupFn; llvm::Constant *CleanupArg; if (E->getType()->isArrayType()) { CleanupFn = CodeGenFunction(CGF.CGM).generateDestroyHelper( @@ -340,8 +340,8 @@ pushTemporaryCleanup(CodeGenFunction &CGF, const MaterializeTemporaryExpr *M, dyn_cast_or_null<VarDecl>(M->getExtendingDecl())); CleanupArg = llvm::Constant::getNullValue(CGF.Int8PtrTy); } else { - CleanupFn = CGF.CGM.getAddrOfCXXStructor(ReferenceTemporaryDtor, - StructorType::Complete); + CleanupFn = CGF.CGM.getAddrAndTypeOfCXXStructor( + GlobalDecl(ReferenceTemporaryDtor, Dtor_Complete)); CleanupArg = cast<llvm::Constant>(ReferenceTemporary.getPointer()); } CGF.CGM.getCXXABI().registerGlobalDtor( @@ -653,7 +653,8 @@ bool CodeGenFunction::sanitizePerformTypeCheck() const { void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *Ptr, QualType Ty, CharUnits Alignment, - SanitizerSet SkippedChecks) { + SanitizerSet SkippedChecks, + llvm::Value *ArraySize) { if (!sanitizePerformTypeCheck()) return; @@ -711,21 +712,28 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, if (SanOpts.has(SanitizerKind::ObjectSize) && !SkippedChecks.has(SanitizerKind::ObjectSize) && !Ty->isIncompleteType()) { - uint64_t Size = getContext().getTypeSizeInChars(Ty).getQuantity(); - - // The glvalue must refer to a large enough storage region. - // FIXME: If Address Sanitizer is enabled, insert dynamic instrumentation - // to check this. - // FIXME: Get object address space - llvm::Type *Tys[2] = { IntPtrTy, Int8PtrTy }; - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::objectsize, Tys); - llvm::Value *Min = Builder.getFalse(); - llvm::Value *NullIsUnknown = Builder.getFalse(); - llvm::Value *CastAddr = Builder.CreateBitCast(Ptr, Int8PtrTy); - llvm::Value *LargeEnough = Builder.CreateICmpUGE( - Builder.CreateCall(F, {CastAddr, Min, NullIsUnknown}), - llvm::ConstantInt::get(IntPtrTy, Size)); - Checks.push_back(std::make_pair(LargeEnough, SanitizerKind::ObjectSize)); + uint64_t TySize = getContext().getTypeSizeInChars(Ty).getQuantity(); + llvm::Value *Size = llvm::ConstantInt::get(IntPtrTy, TySize); + if (ArraySize) + Size = Builder.CreateMul(Size, ArraySize); + + // Degenerate case: new X[0] does not need an objectsize check. + llvm::Constant *ConstantSize = dyn_cast<llvm::Constant>(Size); + if (!ConstantSize || !ConstantSize->isNullValue()) { + // The glvalue must refer to a large enough storage region. + // FIXME: If Address Sanitizer is enabled, insert dynamic instrumentation + // to check this. + // FIXME: Get object address space + llvm::Type *Tys[2] = { IntPtrTy, Int8PtrTy }; + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::objectsize, Tys); + llvm::Value *Min = Builder.getFalse(); + llvm::Value *NullIsUnknown = Builder.getFalse(); + llvm::Value *Dynamic = Builder.getFalse(); + llvm::Value *CastAddr = Builder.CreateBitCast(Ptr, Int8PtrTy); + llvm::Value *LargeEnough = Builder.CreateICmpUGE( + Builder.CreateCall(F, {CastAddr, Min, NullIsUnknown, Dynamic}), Size); + Checks.push_back(std::make_pair(LargeEnough, SanitizerKind::ObjectSize)); + } } uint64_t AlignVal = 0; @@ -1288,7 +1296,7 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { case Expr::CXXUuidofExprClass: return EmitCXXUuidofLValue(cast<CXXUuidofExpr>(E)); case Expr::LambdaExprClass: - return EmitLambdaLValue(cast<LambdaExpr>(E)); + return EmitAggExprToLValue(E); case Expr::ExprWithCleanupsClass: { const auto *cleanups = cast<ExprWithCleanups>(E); @@ -1308,11 +1316,15 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { return LV; } - case Expr::CXXDefaultArgExprClass: - return EmitLValue(cast<CXXDefaultArgExpr>(E)->getExpr()); + case Expr::CXXDefaultArgExprClass: { + auto *DAE = cast<CXXDefaultArgExpr>(E); + CXXDefaultArgExprScope Scope(*this, DAE); + return EmitLValue(DAE->getExpr()); + } case Expr::CXXDefaultInitExprClass: { - CXXDefaultInitExprScope Scope(*this); - return EmitLValue(cast<CXXDefaultInitExpr>(E)->getExpr()); + auto *DIE = cast<CXXDefaultInitExpr>(E); + CXXDefaultInitExprScope Scope(*this, DIE); + return EmitLValue(DIE->getExpr()); } case Expr::CXXTypeidExprClass: return EmitCXXTypeidLValue(cast<CXXTypeidExpr>(E)); @@ -1387,7 +1399,7 @@ static bool isConstantEmittableObjectType(QualType type) { /// Can we constant-emit a load of a reference to a variable of the /// given type? This is different from predicates like -/// Decl::isUsableInConstantExpressions because we do want it to apply +/// Decl::mightBeUsableInConstantExpressions because we do want it to apply /// in situations that don't necessarily satisfy the language's rules /// for this (e.g. C++'s ODR-use rules). For example, we want to able /// to do this with const float variables even if those variables @@ -1411,10 +1423,11 @@ static ConstantEmissionKind checkVarTypeForConstantEmission(QualType type) { } /// Try to emit a reference to the given value without producing it as -/// an l-value. This is actually more than an optimization: we can't -/// produce an l-value for variables that we never actually captured -/// in a block or lambda, which means const int variables or constexpr -/// literals or similar. +/// an l-value. This is just an optimization, but it avoids us needing +/// to emit global copies of variables if they're named without triggering +/// a formal use in a context where we can't emit a direct reference to them, +/// for instance if a block or lambda or a member of a local class uses a +/// const int variable or constexpr variable from an enclosing function. CodeGenFunction::ConstantEmission CodeGenFunction::tryEmitAsConstant(DeclRefExpr *refExpr) { ValueDecl *value = refExpr->getDecl(); @@ -1485,7 +1498,7 @@ static DeclRefExpr *tryToConvertMemberExprToDeclRefExpr(CodeGenFunction &CGF, return DeclRefExpr::Create( CGF.getContext(), NestedNameSpecifierLoc(), SourceLocation(), VD, /*RefersToEnclosingVariableOrCapture=*/false, ME->getExprLoc(), - ME->getType(), ME->getValueKind()); + ME->getType(), ME->getValueKind(), nullptr, nullptr, ME->isNonOdrUse()); } return nullptr; } @@ -1879,7 +1892,6 @@ Address CodeGenFunction::EmitExtVectorElementLValue(LValue LV) { Address VectorBasePtrPlusIx = Builder.CreateConstInBoundsGEP(CastToPointerElement, ix, - getContext().getTypeSizeInChars(EQT), "vector.elt"); return VectorBasePtrPlusIx; @@ -1899,7 +1911,7 @@ RValue CodeGenFunction::EmitLoadOfGlobalRegLValue(LValue LV) { Ty = CGM.getTypes().getDataLayout().getIntPtrType(OrigTy); llvm::Type *Types[] = { Ty }; - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); llvm::Value *Call = Builder.CreateCall( F, llvm::MetadataAsValue::get(Ty->getContext(), RegName)); if (OrigTy->isPointerTy()) @@ -2019,7 +2031,7 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst, // Cast the source to the storage type and shift it into place. SrcVal = Builder.CreateIntCast(SrcVal, Ptr.getElementType(), - /*IsSigned=*/false); + /*isSigned=*/false); llvm::Value *MaskedVal = SrcVal; // See if there are other bits in the bitfield's storage we'll need to load @@ -2160,7 +2172,7 @@ void CodeGenFunction::EmitStoreThroughGlobalRegLValue(RValue Src, LValue Dst) { Ty = CGM.getTypes().getDataLayout().getIntPtrType(OrigTy); llvm::Type *Types[] = { Ty }; - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); llvm::Value *Value = Src.getScalarVal(); if (OrigTy->isPointerTy()) Value = Builder.CreatePtrToInt(Value, Ty); @@ -2284,15 +2296,22 @@ static LValue EmitThreadPrivateVarDeclLValue( return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl); } -static Address emitDeclTargetLinkVarDeclLValue(CodeGenFunction &CGF, - const VarDecl *VD, QualType T) { +static Address emitDeclTargetVarDeclLValue(CodeGenFunction &CGF, + const VarDecl *VD, QualType T) { llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); - if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_To) + // Return an invalid address if variable is MT_To and unified + // memory is not enabled. For all other cases: MT_Link and + // MT_To with unified memory, return a valid address. + if (!Res || (*Res == OMPDeclareTargetDeclAttr::MT_To && + !CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) return Address::invalid(); - assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && "Expected link clause"); + assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || + (*Res == OMPDeclareTargetDeclAttr::MT_To && + CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) && + "Expected link clause OR to clause with unified memory enabled."); QualType PtrTy = CGF.getContext().getPointerType(VD->getType()); - Address Addr = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); + Address Addr = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); return CGF.EmitLoadOfPointer(Addr, PtrTy->castAs<PointerType>()); } @@ -2348,7 +2367,7 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF, // Check if the variable is marked as declare target with link clause in // device codegen. if (CGF.getLangOpts().OpenMPIsDevice) { - Address Addr = emitDeclTargetLinkVarDeclLValue(CGF, VD, T); + Address Addr = emitDeclTargetVarDeclLValue(CGF, VD, T); if (Addr.isValid()) return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl); } @@ -2440,45 +2459,101 @@ static LValue EmitGlobalNamedRegister(const VarDecl *VD, CodeGenModule &CGM) { return LValue::MakeGlobalReg(Address(Ptr, Alignment), VD->getType()); } +/// Determine whether we can emit a reference to \p VD from the current +/// context, despite not necessarily having seen an odr-use of the variable in +/// this context. +static bool canEmitSpuriousReferenceToVariable(CodeGenFunction &CGF, + const DeclRefExpr *E, + const VarDecl *VD, + bool IsConstant) { + // For a variable declared in an enclosing scope, do not emit a spurious + // reference even if we have a capture, as that will emit an unwarranted + // reference to our capture state, and will likely generate worse code than + // emitting a local copy. + if (E->refersToEnclosingVariableOrCapture()) + return false; + + // For a local declaration declared in this function, we can always reference + // it even if we don't have an odr-use. + if (VD->hasLocalStorage()) { + return VD->getDeclContext() == + dyn_cast_or_null<DeclContext>(CGF.CurCodeDecl); + } + + // For a global declaration, we can emit a reference to it if we know + // for sure that we are able to emit a definition of it. + VD = VD->getDefinition(CGF.getContext()); + if (!VD) + return false; + + // Don't emit a spurious reference if it might be to a variable that only + // exists on a different device / target. + // FIXME: This is unnecessarily broad. Check whether this would actually be a + // cross-target reference. + if (CGF.getLangOpts().OpenMP || CGF.getLangOpts().CUDA || + CGF.getLangOpts().OpenCL) { + return false; + } + + // We can emit a spurious reference only if the linkage implies that we'll + // be emitting a non-interposable symbol that will be retained until link + // time. + switch (CGF.CGM.getLLVMLinkageVarDefinition(VD, IsConstant)) { + case llvm::GlobalValue::ExternalLinkage: + case llvm::GlobalValue::LinkOnceODRLinkage: + case llvm::GlobalValue::WeakODRLinkage: + case llvm::GlobalValue::InternalLinkage: + case llvm::GlobalValue::PrivateLinkage: + return true; + default: + return false; + } +} + LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { const NamedDecl *ND = E->getDecl(); QualType T = E->getType(); + assert(E->isNonOdrUse() != NOUR_Unevaluated && + "should not emit an unevaluated operand"); + if (const auto *VD = dyn_cast<VarDecl>(ND)) { // Global Named registers access via intrinsics only if (VD->getStorageClass() == SC_Register && VD->hasAttr<AsmLabelAttr>() && !VD->isLocalVarDecl()) return EmitGlobalNamedRegister(VD, CGM); - // A DeclRefExpr for a reference initialized by a constant expression can - // appear without being odr-used. Directly emit the constant initializer. - const Expr *Init = VD->getAnyInitializer(VD); - const auto *BD = dyn_cast_or_null<BlockDecl>(CurCodeDecl); - if (Init && !isa<ParmVarDecl>(VD) && VD->getType()->isReferenceType() && - VD->isUsableInConstantExpressions(getContext()) && - VD->checkInitIsICE() && - // Do not emit if it is private OpenMP variable. - !(E->refersToEnclosingVariableOrCapture() && - ((CapturedStmtInfo && - (LocalDeclMap.count(VD->getCanonicalDecl()) || - CapturedStmtInfo->lookup(VD->getCanonicalDecl()))) || - LambdaCaptureFields.lookup(VD->getCanonicalDecl()) || - (BD && BD->capturesVariable(VD))))) { - llvm::Constant *Val = - ConstantEmitter(*this).emitAbstract(E->getLocation(), - *VD->evaluateValue(), - VD->getType()); - assert(Val && "failed to emit reference constant expression"); - // FIXME: Eventually we will want to emit vector element references. - - // Should we be using the alignment of the constant pointer we emitted? - CharUnits Alignment = getNaturalTypeAlignment(E->getType(), - /* BaseInfo= */ nullptr, - /* TBAAInfo= */ nullptr, - /* forPointeeType= */ true); - return MakeAddrLValue(Address(Val, Alignment), T, AlignmentSource::Decl); + // If this DeclRefExpr does not constitute an odr-use of the variable, + // we're not permitted to emit a reference to it in general, and it might + // not be captured if capture would be necessary for a use. Emit the + // constant value directly instead. + if (E->isNonOdrUse() == NOUR_Constant && + (VD->getType()->isReferenceType() || + !canEmitSpuriousReferenceToVariable(*this, E, VD, true))) { + VD->getAnyInitializer(VD); + llvm::Constant *Val = ConstantEmitter(*this).emitAbstract( + E->getLocation(), *VD->evaluateValue(), VD->getType()); + assert(Val && "failed to emit constant expression"); + + Address Addr = Address::invalid(); + if (!VD->getType()->isReferenceType()) { + // Spill the constant value to a global. + Addr = CGM.createUnnamedGlobalFrom(*VD, Val, + getContext().getDeclAlign(VD)); + } else { + // Should we be using the alignment of the constant pointer we emitted? + CharUnits Alignment = + getNaturalTypeAlignment(E->getType(), + /* BaseInfo= */ nullptr, + /* TBAAInfo= */ nullptr, + /* forPointeeType= */ true); + Addr = Address(Val, Alignment); + } + return MakeAddrLValue(Addr, T, AlignmentSource::Decl); } + // FIXME: Handle other kinds of non-odr-use DeclRefExprs. + // Check for captured variables. if (E->refersToEnclosingVariableOrCapture()) { VD = VD->getCanonicalDecl(); @@ -2510,7 +2585,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { // FIXME: We should be able to assert this for FunctionDecls as well! // FIXME: We should be able to assert this for all DeclRefExprs, not just // those with a valid source location. - assert((ND->isUsed(false) || !isa<VarDecl>(ND) || + assert((ND->isUsed(false) || !isa<VarDecl>(ND) || E->isNonOdrUse() || !E->getLocation().isValid()) && "Should not use decl without marking it used!"); @@ -2536,7 +2611,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { // some reason; most likely, because it's in an outer function. } else if (VD->isStaticLocal()) { addr = Address(CGM.getOrCreateStaticVarDecl( - *VD, CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false)), + *VD, CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false)), getContext().getDeclAlign(VD)); // No other cases for now. @@ -2851,16 +2926,13 @@ enum class CheckRecoverableKind { } static CheckRecoverableKind getRecoverableKind(SanitizerMask Kind) { - assert(llvm::countPopulation(Kind) == 1); - switch (Kind) { - case SanitizerKind::Vptr: + assert(Kind.countPopulation() == 1); + if (Kind == SanitizerKind::Function || Kind == SanitizerKind::Vptr) return CheckRecoverableKind::AlwaysRecoverable; - case SanitizerKind::Return: - case SanitizerKind::Unreachable: + else if (Kind == SanitizerKind::Return || Kind == SanitizerKind::Unreachable) return CheckRecoverableKind::Unrecoverable; - default: + else return CheckRecoverableKind::Recoverable; - } } namespace { @@ -2910,7 +2982,7 @@ static void emitCheckHandlerCall(CodeGenFunction &CGF, } B.addAttribute(llvm::Attribute::UWTable); - llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction( + llvm::FunctionCallee Fn = CGF.CGM.CreateRuntimeFunction( FnType, FnName, llvm::AttributeList::get(CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, B), @@ -3051,7 +3123,7 @@ void CodeGenFunction::EmitCfiSlowPathCheck( bool WithDiag = !CGM.getCodeGenOpts().SanitizeTrap.has(Kind); llvm::CallInst *CheckCall; - llvm::Constant *SlowPathFn; + llvm::FunctionCallee SlowPathFn; if (WithDiag) { llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs); auto *InfoPtr = @@ -3073,7 +3145,8 @@ void CodeGenFunction::EmitCfiSlowPathCheck( CheckCall = Builder.CreateCall(SlowPathFn, {TypeId, Ptr}); } - CGM.setDSOLocal(cast<llvm::GlobalValue>(SlowPathFn->stripPointerCasts())); + CGM.setDSOLocal( + cast<llvm::GlobalValue>(SlowPathFn.getCallee()->stripPointerCasts())); CheckCall->setDoesNotThrow(); EmitBlock(Cont); @@ -3252,7 +3325,7 @@ Address CodeGenFunction::EmitArrayToPointerDecay(const Expr *E, if (!E->getType()->isVariableArrayType()) { assert(isa<llvm::ArrayType>(Addr.getElementType()) && "Expected pointer to array"); - Addr = Builder.CreateStructGEP(Addr, 0, CharUnits::Zero(), "arraydecay"); + Addr = Builder.CreateConstArrayGEP(Addr, 0, "arraydecay"); } // The result of this decay conversion points to an array element within the @@ -3346,8 +3419,20 @@ static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr, CharUnits eltAlign = getArrayElementAlign(addr.getAlignment(), indices.back(), eltSize); - llvm::Value *eltPtr = emitArraySubscriptGEP( - CGF, addr.getPointer(), indices, inbounds, signedIndices, loc, name); + llvm::Value *eltPtr; + auto LastIndex = dyn_cast<llvm::ConstantInt>(indices.back()); + if (!CGF.IsInPreservedAIRegion || !LastIndex) { + eltPtr = emitArraySubscriptGEP( + CGF, addr.getPointer(), indices, inbounds, signedIndices, + loc, name); + } else { + // Remember the original array subscript for bpf target + unsigned idx = LastIndex->getZExtValue(); + eltPtr = CGF.Builder.CreatePreserveArrayAccessIndex(addr.getPointer(), + indices.size() - 1, + idx); + } + return Address(eltPtr, eltAlign); } @@ -3529,8 +3614,7 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, if (!BaseTy->isVariableArrayType()) { assert(isa<llvm::ArrayType>(Addr.getElementType()) && "Expected pointer to array"); - Addr = CGF.Builder.CreateStructGEP(Addr, 0, CharUnits::Zero(), - "arraydecay"); + Addr = CGF.Builder.CreateConstArrayGEP(Addr, 0, "arraydecay"); } return CGF.Builder.CreateElementBitCast(Addr, @@ -3665,7 +3749,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, Idx = Builder.CreateNSWMul(Idx, NumElements); EltPtr = emitArraySubscriptGEP(*this, Base, Idx, VLA->getElementType(), !getLangOpts().isSignedOverflowDefined(), - /*SignedIndices=*/false, E->getExprLoc()); + /*signedIndices=*/false, E->getExprLoc()); } else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) { // If this is A[i] where A is an array, the frontend will have decayed the // base to be a ArrayToPointerDecay implicit cast. While correct, it is @@ -3685,7 +3769,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, EltPtr = emitArraySubscriptGEP( *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx}, ResultExprTy, !getLangOpts().isSignedOverflowDefined(), - /*SignedIndices=*/false, E->getExprLoc()); + /*signedIndices=*/false, E->getExprLoc()); BaseInfo = ArrayLV.getBaseInfo(); TBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, ResultExprTy); } else { @@ -3694,7 +3778,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, IsLowerBound); EltPtr = emitArraySubscriptGEP(*this, Base, Idx, ResultExprTy, !getLangOpts().isSignedOverflowDefined(), - /*SignedIndices=*/false, E->getExprLoc()); + /*signedIndices=*/false, E->getExprLoc()); } return MakeAddrLValue(EltPtr, ResultExprTy, BaseInfo, TBAAInfo); @@ -3808,31 +3892,63 @@ LValue CodeGenFunction::EmitLValueForLambdaField(const FieldDecl *Field) { return EmitLValueForField(LambdaLV, Field); } +/// Get the field index in the debug info. The debug info structure/union +/// will ignore the unnamed bitfields. +unsigned CodeGenFunction::getDebugInfoFIndex(const RecordDecl *Rec, + unsigned FieldIndex) { + unsigned I = 0, Skipped = 0; + + for (auto F : Rec->getDefinition()->fields()) { + if (I == FieldIndex) + break; + if (F->isUnnamedBitfield()) + Skipped++; + I++; + } + + return FieldIndex - Skipped; +} + +/// Get the address of a zero-sized field within a record. The resulting +/// address doesn't necessarily have the right type. +static Address emitAddrOfZeroSizeField(CodeGenFunction &CGF, Address Base, + const FieldDecl *Field) { + CharUnits Offset = CGF.getContext().toCharUnitsFromBits( + CGF.getContext().getFieldOffset(Field)); + if (Offset.isZero()) + return Base; + Base = CGF.Builder.CreateElementBitCast(Base, CGF.Int8Ty); + return CGF.Builder.CreateConstInBoundsByteGEP(Base, Offset); +} + /// Drill down to the storage of a field without walking into /// reference types. /// /// The resulting address doesn't necessarily have the right type. static Address emitAddrOfFieldStorage(CodeGenFunction &CGF, Address base, const FieldDecl *field) { + if (field->isZeroSize(CGF.getContext())) + return emitAddrOfZeroSizeField(CGF, base, field); + const RecordDecl *rec = field->getParent(); unsigned idx = CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field); - CharUnits offset; - // Adjust the alignment down to the given offset. - // As a special case, if the LLVM field index is 0, we know that this - // is zero. - assert((idx != 0 || CGF.getContext().getASTRecordLayout(rec) - .getFieldOffset(field->getFieldIndex()) == 0) && - "LLVM field at index zero had non-zero offset?"); - if (idx != 0) { - auto &recLayout = CGF.getContext().getASTRecordLayout(rec); - auto offsetInBits = recLayout.getFieldOffset(field->getFieldIndex()); - offset = CGF.getContext().toCharUnitsFromBits(offsetInBits); - } + return CGF.Builder.CreateStructGEP(base, idx, field->getName()); +} + +static Address emitPreserveStructAccess(CodeGenFunction &CGF, Address base, + const FieldDecl *field) { + const RecordDecl *rec = field->getParent(); + llvm::DIType *DbgInfo = CGF.getDebugInfo()->getOrCreateRecordType( + CGF.getContext().getRecordType(rec), rec->getLocation()); + + unsigned idx = + CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field); - return CGF.Builder.CreateStructGEP(base, idx, offset, field->getName()); + return CGF.Builder.CreatePreserveStructAccessIndex( + base, idx, CGF.getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo); } static bool hasAnyVptr(const QualType Type, const ASTContext &Context) { @@ -3866,8 +3982,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, unsigned Idx = RL.getLLVMFieldNo(field); if (Idx != 0) // For structs, we GEP to the field that the record layout suggests. - Addr = Builder.CreateStructGEP(Addr, Idx, Info.StorageOffset, - field->getName()); + Addr = Builder.CreateStructGEP(Addr, Idx, field->getName()); // Get the access type. llvm::Type *FieldIntTy = llvm::Type::getIntNTy(getLLVMContext(), Info.StorageSize); @@ -3943,9 +4058,24 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, // a barrier every time CXXRecord field with vptr is referenced. addr = Address(Builder.CreateLaunderInvariantGroup(addr.getPointer()), addr.getAlignment()); + + if (IsInPreservedAIRegion) { + // Remember the original union field index + llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType( + getContext().getRecordType(rec), rec->getLocation()); + addr = Address( + Builder.CreatePreserveUnionAccessIndex( + addr.getPointer(), getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo), + addr.getAlignment()); + } } else { - // For structs, we GEP to the field that the record layout suggests. - addr = emitAddrOfFieldStorage(*this, addr, field); + + if (!IsInPreservedAIRegion) + // For structs, we GEP to the field that the record layout suggests. + addr = emitAddrOfFieldStorage(*this, addr, field); + else + // Remember the original struct field index + addr = emitPreserveStructAccess(*this, addr, field); // If this is a reference field, load the reference right now. if (FieldType->isReferenceType()) { @@ -4137,6 +4267,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { switch (E->getCastKind()) { case CK_ToVoid: case CK_BitCast: + case CK_LValueToRValueBitCast: case CK_ArrayToPointerDecay: case CK_FunctionToPointerDecay: case CK_NullToMemberPointer: @@ -4175,6 +4306,8 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { case CK_IntToOCLSampler: case CK_FixedPointCast: case CK_FixedPointToBoolean: + case CK_FixedPointToIntegral: + case CK_IntegralToFixedPoint: return EmitUnsupportedLValue(E, "unexpected cast lvalue"); case CK_Dependent: @@ -4548,13 +4681,6 @@ CodeGenFunction::EmitCXXBindTemporaryLValue(const CXXBindTemporaryExpr *E) { return MakeAddrLValue(Slot.getAddress(), E->getType(), AlignmentSource::Decl); } -LValue -CodeGenFunction::EmitLambdaLValue(const LambdaExpr *E) { - AggValueSlot Slot = CreateAggTemp(E->getType(), "temp.lvalue"); - EmitLambdaExpr(E, Slot); - return MakeAddrLValue(Slot.getAddress(), E->getType(), AlignmentSource::Decl); -} - LValue CodeGenFunction::EmitObjCMessageExprLValue(const ObjCMessageExpr *E) { RValue RV = EmitObjCMessageExpr(E); @@ -4630,17 +4756,6 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee const Decl *TargetDecl = OrigCallee.getAbstractInfo().getCalleeDecl().getDecl(); - if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) - // We can only guarantee that a function is called from the correct - // context/function based on the appropriate target attributes, - // so only check in the case where we have both always_inline and target - // since otherwise we could be making a conditional call after a check for - // the proper cpu features (and it won't cause code generation issues due to - // function based code generation). - if (TargetDecl->hasAttr<AlwaysInlineAttr>() && - TargetDecl->hasAttr<TargetAttr>()) - checkTargetFeatures(E, FD); - CalleeType = getContext().getCanonicalType(CalleeType); auto PointeeType = cast<PointerType>(CalleeType)->getPointeeType(); @@ -4688,7 +4803,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee llvm::Constant *StaticData[] = {EmitCheckSourceLocation(E->getBeginLoc()), EmitCheckTypeDescriptor(CalleeType)}; EmitCheck(std::make_pair(CalleeRTTIMatch, SanitizerKind::Function), - SanitizerHandler::FunctionTypeMismatch, StaticData, CalleePtr); + SanitizerHandler::FunctionTypeMismatch, StaticData, + {CalleePtr, CalleeRTTI, FTRTTIConst}); Builder.CreateBr(Cont); EmitBlock(Cont); @@ -4768,7 +4884,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee E->getDirectCallee(), /*ParamsToSkip*/ 0, Order); const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionCall( - Args, FnType, /*isChainCall=*/Chain); + Args, FnType, /*ChainCall=*/Chain); // C99 6.5.2.2p6: // If the expression that denotes the called function has a type @@ -4799,7 +4915,19 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee Callee.setFunctionPointer(CalleePtr); } - return EmitCall(FnInfo, Callee, ReturnValue, Args, nullptr, E->getExprLoc()); + llvm::CallBase *CallOrInvoke = nullptr; + RValue Call = EmitCall(FnInfo, Callee, ReturnValue, Args, &CallOrInvoke, + E->getExprLoc()); + + // Generate function declaration DISuprogram in order to be used + // in debug info about call sites. + if (CGDebugInfo *DI = getDebugInfo()) { + if (auto *CalleeDecl = dyn_cast_or_null<FunctionDecl>(TargetDecl)) + DI->EmitFuncDeclForCallSite(CallOrInvoke, QualType(FnType, 0), + CalleeDecl); + } + + return Call; } LValue CodeGenFunction:: diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp index db49b3f28a59..695facd50b67 100644 --- a/lib/CodeGen/CGExprAgg.cpp +++ b/lib/CodeGen/CGExprAgg.cpp @@ -1,9 +1,8 @@ //===--- CGExprAgg.cpp - Emit LLVM Code from Aggregate Expressions --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -166,10 +165,11 @@ public: void VisitImplicitValueInitExpr(ImplicitValueInitExpr *E); void VisitNoInitExpr(NoInitExpr *E) { } // Do nothing. void VisitCXXDefaultArgExpr(CXXDefaultArgExpr *DAE) { + CodeGenFunction::CXXDefaultArgExprScope Scope(CGF, DAE); Visit(DAE->getExpr()); } void VisitCXXDefaultInitExpr(CXXDefaultInitExpr *DIE) { - CodeGenFunction::CXXDefaultInitExprScope Scope(CGF); + CodeGenFunction::CXXDefaultInitExprScope Scope(CGF, DIE); Visit(DIE->getExpr()); } void VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E); @@ -711,6 +711,25 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { break; } + case CK_LValueToRValueBitCast: { + if (Dest.isIgnored()) { + CGF.EmitAnyExpr(E->getSubExpr(), AggValueSlot::ignored(), + /*ignoreResult=*/true); + break; + } + + LValue SourceLV = CGF.EmitLValue(E->getSubExpr()); + Address SourceAddress = + Builder.CreateElementBitCast(SourceLV.getAddress(), CGF.Int8Ty); + Address DestAddress = + Builder.CreateElementBitCast(Dest.getAddress(), CGF.Int8Ty); + llvm::Value *SizeVal = llvm::ConstantInt::get( + CGF.SizeTy, + CGF.getContext().getTypeSizeInChars(E->getType()).getQuantity()); + Builder.CreateMemCpy(DestAddress, SourceAddress, SizeVal); + break; + } + case CK_DerivedToBase: case CK_BaseToDerived: case CK_UncheckedDerivedToBase: { @@ -760,8 +779,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { // Build a GEP to refer to the subobject. Address valueAddr = - CGF.Builder.CreateStructGEP(valueDest.getAddress(), 0, - CharUnits()); + CGF.Builder.CreateStructGEP(valueDest.getAddress(), 0); valueDest = AggValueSlot::forAddr(valueAddr, valueDest.getQualifiers(), valueDest.isExternallyDestructed(), @@ -781,11 +799,12 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { CGF.CreateAggTemp(atomicType, "atomic-to-nonatomic.temp"); CGF.EmitAggExpr(E->getSubExpr(), atomicSlot); - Address valueAddr = - Builder.CreateStructGEP(atomicSlot.getAddress(), 0, CharUnits()); + Address valueAddr = Builder.CreateStructGEP(atomicSlot.getAddress(), 0); RValue rvalue = RValue::getAggregate(valueAddr, atomicSlot.isVolatile()); return EmitFinalDestCopy(valueType, rvalue); } + case CK_AddressSpaceConversion: + return Visit(E->getSubExpr()); case CK_LValueToRValue: // If we're loading from a volatile type, force the destination @@ -797,6 +816,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { LLVM_FALLTHROUGH; + case CK_NoOp: case CK_UserDefinedConversion: case CK_ConstructorConversion: @@ -852,10 +872,12 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { case CK_CopyAndAutoreleaseBlockObject: case CK_BuiltinFnToFnPtr: case CK_ZeroToOCLOpaqueType: - case CK_AddressSpaceConversion: + case CK_IntToOCLSampler: case CK_FixedPointCast: case CK_FixedPointToBoolean: + case CK_FixedPointToIntegral: + case CK_IntegralToFixedPoint: llvm_unreachable("cast kind invalid for aggregate types"); } } @@ -1264,7 +1286,52 @@ void AggExprEmitter::VisitCXXInheritedCtorInitExpr( void AggExprEmitter::VisitLambdaExpr(LambdaExpr *E) { AggValueSlot Slot = EnsureSlot(E->getType()); - CGF.EmitLambdaExpr(E, Slot); + LValue SlotLV = CGF.MakeAddrLValue(Slot.getAddress(), E->getType()); + + // We'll need to enter cleanup scopes in case any of the element + // initializers throws an exception. + SmallVector<EHScopeStack::stable_iterator, 16> Cleanups; + llvm::Instruction *CleanupDominator = nullptr; + + CXXRecordDecl::field_iterator CurField = E->getLambdaClass()->field_begin(); + for (LambdaExpr::const_capture_init_iterator i = E->capture_init_begin(), + e = E->capture_init_end(); + i != e; ++i, ++CurField) { + // Emit initialization + LValue LV = CGF.EmitLValueForFieldInitialization(SlotLV, *CurField); + if (CurField->hasCapturedVLAType()) { + CGF.EmitLambdaVLACapture(CurField->getCapturedVLAType(), LV); + continue; + } + + EmitInitializationToLValue(*i, LV); + + // Push a destructor if necessary. + if (QualType::DestructionKind DtorKind = + CurField->getType().isDestructedType()) { + assert(LV.isSimple()); + if (CGF.needsEHCleanup(DtorKind)) { + if (!CleanupDominator) + CleanupDominator = CGF.Builder.CreateAlignedLoad( + CGF.Int8Ty, + llvm::Constant::getNullValue(CGF.Int8PtrTy), + CharUnits::One()); // placeholder + + CGF.pushDestroy(EHCleanup, LV.getAddress(), CurField->getType(), + CGF.getDestroyer(DtorKind), false); + Cleanups.push_back(CGF.EHStack.stable_begin()); + } + } + } + + // Deactivate all the partial cleanups in reverse order, which + // generally means popping them. + for (unsigned i = Cleanups.size(); i != 0; --i) + CGF.DeactivateCleanupBlock(Cleanups[i-1], CleanupDominator); + + // Destroy the placeholder if we made one. + if (CleanupDominator) + CleanupDominator->eraseFromParent(); } void AggExprEmitter::VisitExprWithCleanups(ExprWithCleanups *E) { @@ -1304,7 +1371,8 @@ static bool isSimpleZero(const Expr *E, CodeGenFunction &CGF) { // (int*)0 - Null pointer expressions. if (const CastExpr *ICE = dyn_cast<CastExpr>(E)) return ICE->getCastKind() == CK_NullToPointer && - CGF.getTypes().isPointerZeroInitializable(E->getType()); + CGF.getTypes().isPointerZeroInitializable(E->getType()) && + !E->HasSideEffects(CGF.getContext()); // '\0' if (const CharacterLiteral *CL = dyn_cast<CharacterLiteral>(E)) return CL->getValue() == 0; @@ -1445,7 +1513,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, - CGF.overlapForBaseInit(CXXRD, BaseRD, Base.isVirtual())); + CGF.getOverlapForBaseInit(CXXRD, BaseRD, Base.isVirtual())); CGF.EmitAggExpr(E->getInit(curInitIndex++), AggSlot); if (QualType::DestructionKind dtorKind = @@ -1797,15 +1865,32 @@ LValue CodeGenFunction::EmitAggExprToLValue(const Expr *E) { return LV; } -AggValueSlot::Overlap_t CodeGenFunction::overlapForBaseInit( +AggValueSlot::Overlap_t +CodeGenFunction::getOverlapForFieldInit(const FieldDecl *FD) { + if (!FD->hasAttr<NoUniqueAddressAttr>() || !FD->getType()->isRecordType()) + return AggValueSlot::DoesNotOverlap; + + // If the field lies entirely within the enclosing class's nvsize, its tail + // padding cannot overlap any already-initialized object. (The only subobjects + // with greater addresses that might already be initialized are vbases.) + const RecordDecl *ClassRD = FD->getParent(); + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(ClassRD); + if (Layout.getFieldOffset(FD->getFieldIndex()) + + getContext().getTypeSize(FD->getType()) <= + (uint64_t)getContext().toBits(Layout.getNonVirtualSize())) + return AggValueSlot::DoesNotOverlap; + + // The tail padding may contain values we need to preserve. + return AggValueSlot::MayOverlap; +} + +AggValueSlot::Overlap_t CodeGenFunction::getOverlapForBaseInit( const CXXRecordDecl *RD, const CXXRecordDecl *BaseRD, bool IsVirtual) { - // Virtual bases are initialized first, in address order, so there's never - // any overlap during their initialization. - // - // FIXME: Under P0840, this is no longer true: the tail padding of a vbase - // of a field could be reused by a vbase of a containing class. + // If the most-derived object is a field declared with [[no_unique_address]], + // the tail padding of any virtual base could be reused for other subobjects + // of that field's class. if (IsVirtual) - return AggValueSlot::DoesNotOverlap; + return AggValueSlot::MayOverlap; // If the base class is laid out entirely within the nvsize of the derived // class, its tail padding cannot yet be initialized, so we can issue diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp index 884ce96859c5..5476d13b7c46 100644 --- a/lib/CodeGen/CGExprCXX.cpp +++ b/lib/CodeGen/CGExprCXX.cpp @@ -1,9 +1,8 @@ //===--- CGExprCXX.cpp - Emit LLVM Code for C++ expressions ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -11,15 +10,15 @@ // //===----------------------------------------------------------------------===// -#include "CodeGenFunction.h" #include "CGCUDARuntime.h" #include "CGCXXABI.h" #include "CGDebugInfo.h" #include "CGObjCRuntime.h" +#include "CodeGenFunction.h" #include "ConstantEmitter.h" +#include "TargetInfo.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Intrinsics.h" using namespace clang; @@ -42,13 +41,11 @@ commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD, isa<CXXOperatorCallExpr>(CE)); assert(MD->isInstance() && "Trying to emit a member or operator call expr on a static method!"); - ASTContext &C = CGF.getContext(); // Push the this ptr. const CXXRecordDecl *RD = CGF.CGM.getCXXABI().getThisArgumentTypeForMethod(MD); - Args.add(RValue::get(This), - RD ? C.getPointerType(C.getTypeDeclType(RD)) : C.VoidPtrTy); + Args.add(RValue::get(This), CGF.getTypes().DeriveThisType(RD, MD)); // If there is an implicit parameter (e.g. VTT), emit it. if (ImplicitParam) { @@ -56,7 +53,7 @@ commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD, } const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>(); - RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, Args.size(), MD); + RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, Args.size()); unsigned PrefixSize = Args.size() - 1; // And the rest of the call args. @@ -94,14 +91,28 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorCall( } RValue CodeGenFunction::EmitCXXDestructorCall( - const CXXDestructorDecl *DD, const CGCallee &Callee, llvm::Value *This, - llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *CE, - StructorType Type) { + GlobalDecl Dtor, const CGCallee &Callee, llvm::Value *This, QualType ThisTy, + llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *CE) { + const CXXMethodDecl *DtorDecl = cast<CXXMethodDecl>(Dtor.getDecl()); + + assert(!ThisTy.isNull()); + assert(ThisTy->getAsCXXRecordDecl() == DtorDecl->getParent() && + "Pointer/Object mixup"); + + LangAS SrcAS = ThisTy.getAddressSpace(); + LangAS DstAS = DtorDecl->getMethodQualifiers().getAddressSpace(); + if (SrcAS != DstAS) { + QualType DstTy = DtorDecl->getThisType(); + llvm::Type *NewType = CGM.getTypes().ConvertType(DstTy); + This = getTargetHooks().performAddrSpaceCast(*this, This, SrcAS, DstAS, + NewType); + } + CallArgList Args; - commonEmitCXXMemberOrOperatorCall(*this, DD, This, ImplicitParam, + commonEmitCXXMemberOrOperatorCall(*this, DtorDecl, This, ImplicitParam, ImplicitParamTy, CE, Args, nullptr); - return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(DD, Type), - Callee, ReturnValueSlot(), Args); + return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(Dtor), Callee, + ReturnValueSlot(), Args); } RValue CodeGenFunction::EmitCXXPseudoDestructorExpr( @@ -253,13 +264,25 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( This = EmitLValue(Base); } + if (const CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(MD)) { + // This is the MSVC p->Ctor::Ctor(...) extension. We assume that's + // constructing a new complete object of type Ctor. + assert(!RtlArgs); + assert(ReturnValue.isNull() && "Constructor shouldn't have return value"); + CallArgList Args; + commonEmitCXXMemberOrOperatorCall( + *this, Ctor, This.getPointer(), /*ImplicitParam=*/nullptr, + /*ImplicitParamTy=*/QualType(), CE, Args, nullptr); + + EmitCXXConstructorCall(Ctor, Ctor_Complete, /*ForVirtualBase=*/false, + /*Delegating=*/false, This.getAddress(), Args, + AggValueSlot::DoesNotOverlap, CE->getExprLoc(), + /*NewPointerIsChecked=*/false); + return RValue::get(nullptr); + } if (MD->isTrivial() || (MD->isDefaulted() && MD->getParent()->isUnion())) { if (isa<CXXDestructorDecl>(MD)) return RValue::get(nullptr); - if (isa<CXXConstructorDecl>(MD) && - cast<CXXConstructorDecl>(MD)->isDefaultConstructor()) - return RValue::get(nullptr); - if (!MD->getParent()->mayInsertExtraPadding()) { if (MD->isCopyAssignmentOperator() || MD->isMoveAssignmentOperator()) { // We don't like to generate the trivial copy/move assignment operator @@ -272,20 +295,6 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( EmitAggregateAssign(This, RHS, CE->getType()); return RValue::get(This.getPointer()); } - - if (isa<CXXConstructorDecl>(MD) && - cast<CXXConstructorDecl>(MD)->isCopyOrMoveConstructor()) { - // Trivial move and copy ctor are the same. - assert(CE->getNumArgs() == 1 && "unexpected argcount for trivial ctor"); - const Expr *Arg = *CE->arg_begin(); - LValue RHS = EmitLValue(Arg); - LValue Dest = MakeAddrLValue(This.getAddress(), Arg->getType()); - // This is the MSVC p->Ctor::Ctor(...) extension. We assume that's - // constructing a new complete object of type Ctor. - EmitAggregateCopy(Dest, RHS, Arg->getType(), - AggValueSlot::DoesNotOverlap); - return RValue::get(This.getPointer()); - } llvm_unreachable("unknown trivial member function"); } } @@ -296,10 +305,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( const CGFunctionInfo *FInfo = nullptr; if (const auto *Dtor = dyn_cast<CXXDestructorDecl>(CalleeDecl)) FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration( - Dtor, StructorType::Complete); - else if (const auto *Ctor = dyn_cast<CXXConstructorDecl>(CalleeDecl)) - FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration( - Ctor, StructorType::Complete); + GlobalDecl(Dtor, Dtor_Complete)); else FInfo = &CGM.getTypes().arrangeCXXMethodDeclaration(CalleeDecl); @@ -322,14 +328,9 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( if (IsImplicitObjectCXXThis || isa<DeclRefExpr>(IOA)) SkippedChecks.set(SanitizerKind::Null, true); } - EmitTypeCheck( - isa<CXXConstructorDecl>(CalleeDecl) ? CodeGenFunction::TCK_ConstructorCall - : CodeGenFunction::TCK_MemberCall, - CallLoc, This.getPointer(), C.getRecordType(CalleeDecl->getParent()), - /*Alignment=*/CharUnits::Zero(), SkippedChecks); - - // FIXME: Uses of 'MD' past this point need to be audited. We may need to use - // 'CalleeDecl' instead. + EmitTypeCheck(CodeGenFunction::TCK_MemberCall, CallLoc, This.getPointer(), + C.getRecordType(CalleeDecl->getParent()), + /*Alignment=*/CharUnits::Zero(), SkippedChecks); // C++ [class.virtual]p12: // Explicit qualification with the scope operator (5.1) suppresses the @@ -339,7 +340,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( // because then we know what the type is. bool UseVirtualCall = CanUseVirtualCall && !DevirtualizedMethod; - if (const CXXDestructorDecl *Dtor = dyn_cast<CXXDestructorDecl>(MD)) { + if (const CXXDestructorDecl *Dtor = dyn_cast<CXXDestructorDecl>(CalleeDecl)) { assert(CE->arg_begin() == CE->arg_end() && "Destructor shouldn't have explicit parameters"); assert(ReturnValue.isNull() && "Destructor shouldn't have return value"); @@ -348,33 +349,31 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( *this, Dtor, Dtor_Complete, This.getAddress(), cast<CXXMemberCallExpr>(CE)); } else { + GlobalDecl GD(Dtor, Dtor_Complete); CGCallee Callee; - if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier) - Callee = BuildAppleKextVirtualCall(MD, Qualifier, Ty); + if (getLangOpts().AppleKext && Dtor->isVirtual() && HasQualifier) + Callee = BuildAppleKextVirtualCall(Dtor, Qualifier, Ty); else if (!DevirtualizedMethod) - Callee = CGCallee::forDirect( - CGM.getAddrOfCXXStructor(Dtor, StructorType::Complete, FInfo, Ty), - GlobalDecl(Dtor, Dtor_Complete)); + Callee = + CGCallee::forDirect(CGM.getAddrOfCXXStructor(GD, FInfo, Ty), GD); else { - const CXXDestructorDecl *DDtor = - cast<CXXDestructorDecl>(DevirtualizedMethod); - Callee = CGCallee::forDirect( - CGM.GetAddrOfFunction(GlobalDecl(DDtor, Dtor_Complete), Ty), - GlobalDecl(DDtor, Dtor_Complete)); + Callee = CGCallee::forDirect(CGM.GetAddrOfFunction(GD, Ty), GD); } - EmitCXXMemberOrOperatorCall( - CalleeDecl, Callee, ReturnValue, This.getPointer(), - /*ImplicitParam=*/nullptr, QualType(), CE, nullptr); + + QualType ThisTy = + IsArrow ? Base->getType()->getPointeeType() : Base->getType(); + EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy, + /*ImplicitParam=*/nullptr, + /*ImplicitParamTy=*/QualType(), nullptr); } return RValue::get(nullptr); } + // FIXME: Uses of 'MD' past this point need to be audited. We may need to use + // 'CalleeDecl' instead. + CGCallee Callee; - if (const CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(MD)) { - Callee = CGCallee::forDirect( - CGM.GetAddrOfFunction(GlobalDecl(Ctor, Ctor_Complete), Ty), - GlobalDecl(Ctor, Ctor_Complete)); - } else if (UseVirtualCall) { + if (UseVirtualCall) { Callee = CGCallee::forVirtual(CE, MD, This.getAddress(), Ty); } else { if (SanOpts.has(SanitizerKind::CFINVCall) && @@ -454,8 +453,7 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E, // Push the this ptr. Args.add(RValue::get(ThisPtrForCall), ThisType); - RequiredArgs required = - RequiredArgs::forPrototypePlus(FPT, 1, /*FD=*/nullptr); + RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, 1); // And the rest of the call args EmitCallArgs(Args, FPT, E->arguments()); @@ -633,12 +631,10 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E, case CXXConstructExpr::CK_NonVirtualBase: Type = Ctor_Base; - } + } - // Call the constructor. - EmitCXXConstructorCall(CD, Type, ForVirtualBase, Delegating, - Dest.getAddress(), E, Dest.mayOverlap(), - Dest.isSanitizerChecked()); + // Call the constructor. + EmitCXXConstructorCall(CD, Type, ForVirtualBase, Delegating, Dest, E); } } @@ -702,9 +698,9 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF, // We multiply the size of all dimensions for NumElements. // e.g for 'int[2][3]', ElemType is 'int' and NumElements is 6. numElements = - ConstantEmitter(CGF).tryEmitAbstract(e->getArraySize(), e->getType()); + ConstantEmitter(CGF).tryEmitAbstract(*e->getArraySize(), e->getType()); if (!numElements) - numElements = CGF.EmitScalarExpr(e->getArraySize()); + numElements = CGF.EmitScalarExpr(*e->getArraySize()); assert(isa<llvm::IntegerType>(numElements->getType())); // The number of elements can be have an arbitrary integer type; @@ -714,7 +710,7 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF, // important way: if the count is negative, it's an error even if // the cookie size would bring the total size >= 0. bool isSigned - = e->getArraySize()->getType()->isSignedIntegerOrEnumerationType(); + = (*e->getArraySize())->getType()->isSignedIntegerOrEnumerationType(); llvm::IntegerType *numElementsType = cast<llvm::IntegerType>(numElements->getType()); unsigned numElementsWidth = numElementsType->getBitWidth(); @@ -866,7 +862,7 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF, // can be ignored because the result shouldn't be used if // allocation fails. if (typeSizeMultiplier != 1) { - llvm::Value *umul_with_overflow + llvm::Function *umul_with_overflow = CGF.CGM.getIntrinsic(llvm::Intrinsic::umul_with_overflow, CGF.SizeTy); llvm::Value *tsmV = @@ -906,7 +902,7 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF, if (cookieSize != 0) { sizeWithoutCookie = size; - llvm::Value *uadd_with_overflow + llvm::Function *uadd_with_overflow = CGF.CGM.getIntrinsic(llvm::Intrinsic::uadd_with_overflow, CGF.SizeTy); llvm::Value *cookieSizeV = llvm::ConstantInt::get(CGF.SizeTy, cookieSize); @@ -1293,12 +1289,12 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF, const FunctionDecl *CalleeDecl, const FunctionProtoType *CalleeType, const CallArgList &Args) { - llvm::Instruction *CallOrInvoke; + llvm::CallBase *CallOrInvoke; llvm::Constant *CalleePtr = CGF.CGM.GetAddrOfFunction(CalleeDecl); CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(CalleeDecl)); RValue RV = CGF.EmitCall(CGF.CGM.getTypes().arrangeFreeFunctionCall( - Args, CalleeType, /*chainCall=*/false), + Args, CalleeType, /*ChainCall=*/false), Callee, ReturnValueSlot(), Args, &CallOrInvoke); /// C++1y [expr.new]p10: @@ -1309,15 +1305,8 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF, llvm::Function *Fn = dyn_cast<llvm::Function>(CalleePtr); if (CalleeDecl->isReplaceableGlobalAllocationFunction() && Fn && Fn->hasFnAttribute(llvm::Attribute::NoBuiltin)) { - // FIXME: Add addAttribute to CallSite. - if (llvm::CallInst *CI = dyn_cast<llvm::CallInst>(CallOrInvoke)) - CI->addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::Builtin); - else if (llvm::InvokeInst *II = dyn_cast<llvm::InvokeInst>(CallOrInvoke)) - II->addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::Builtin); - else - llvm_unreachable("unexpected kind of call instruction"); + CallOrInvoke->addAttribute(llvm::AttributeList::FunctionIndex, + llvm::Attribute::Builtin); } return RV; @@ -1715,10 +1704,16 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) { result.getAlignment()); // Emit sanitizer checks for pointer value now, so that in the case of an - // array it was checked only once and not at each constructor call. + // array it was checked only once and not at each constructor call. We may + // have already checked that the pointer is non-null. + // FIXME: If we have an array cookie and a potentially-throwing allocator, + // we'll null check the wrong pointer here. + SanitizerSet SkippedChecks; + SkippedChecks.set(SanitizerKind::Null, nullCheck); EmitTypeCheck(CodeGenFunction::TCK_ConstructorCall, - E->getAllocatedTypeSourceInfo()->getTypeLoc().getBeginLoc(), - result.getPointer(), allocType); + E->getAllocatedTypeSourceInfo()->getTypeLoc().getBeginLoc(), + result.getPointer(), allocType, result.getAlignment(), + SkippedChecks, numElements); EmitNewInitializer(*this, E, allocType, elementTy, result, numElements, allocSizeWithoutCookie); @@ -1905,7 +1900,7 @@ static void EmitObjectDelete(CodeGenFunction &CGF, CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false, /*Delegating=*/false, - Ptr); + Ptr, ElementType); else if (auto Lifetime = ElementType.getObjCLifetime()) { switch (Lifetime) { case Qualifiers::OCL_None: @@ -2253,21 +2248,3 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr, return Value; } - -void CodeGenFunction::EmitLambdaExpr(const LambdaExpr *E, AggValueSlot Slot) { - LValue SlotLV = MakeAddrLValue(Slot.getAddress(), E->getType()); - - CXXRecordDecl::field_iterator CurField = E->getLambdaClass()->field_begin(); - for (LambdaExpr::const_capture_init_iterator i = E->capture_init_begin(), - e = E->capture_init_end(); - i != e; ++i, ++CurField) { - // Emit initialization - LValue LV = EmitLValueForFieldInitialization(SlotLV, *CurField); - if (CurField->hasCapturedVLAType()) { - auto VAT = CurField->getCapturedVLAType(); - EmitStoreThroughLValue(RValue::get(VLASizeMap[VAT->getSizeExpr()]), LV); - } else { - EmitInitializerForField(*CurField, LV, *i); - } - } -} diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp index 2db693b44c90..6a5fb45ba259 100644 --- a/lib/CodeGen/CGExprComplex.cpp +++ b/lib/CodeGen/CGExprComplex.cpp @@ -1,9 +1,8 @@ //===--- CGExprComplex.cpp - Emit LLVM Code for Complex Exprs -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -214,10 +213,11 @@ public: return Visit(E->getSubExpr()); } ComplexPairTy VisitCXXDefaultArgExpr(CXXDefaultArgExpr *DAE) { + CodeGenFunction::CXXDefaultArgExprScope Scope(CGF, DAE); return Visit(DAE->getExpr()); } ComplexPairTy VisitCXXDefaultInitExpr(CXXDefaultInitExpr *DIE) { - CodeGenFunction::CXXDefaultInitExprScope Scope(CGF); + CodeGenFunction::CXXDefaultInitExprScope Scope(CGF, DIE); return Visit(DIE->getExpr()); } ComplexPairTy VisitExprWithCleanups(ExprWithCleanups *E) { @@ -328,15 +328,12 @@ public: Address CodeGenFunction::emitAddrOfRealComponent(Address addr, QualType complexType) { - CharUnits offset = CharUnits::Zero(); - return Builder.CreateStructGEP(addr, 0, offset, addr.getName() + ".realp"); + return Builder.CreateStructGEP(addr, 0, addr.getName() + ".realp"); } Address CodeGenFunction::emitAddrOfImagComponent(Address addr, QualType complexType) { - QualType eltType = complexType->castAs<ComplexType>()->getElementType(); - CharUnits offset = getContext().getTypeSizeInChars(eltType); - return Builder.CreateStructGEP(addr, 1, offset, addr.getName() + ".imagp"); + return Builder.CreateStructGEP(addr, 1, addr.getName() + ".imagp"); } /// EmitLoadOfLValue - Given an RValue reference for a complex, emit code to @@ -467,6 +464,15 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op, return EmitLoadOfLValue(CGF.MakeAddrLValue(V, DestTy), Op->getExprLoc()); } + case CK_LValueToRValueBitCast: { + LValue SourceLVal = CGF.EmitLValue(Op); + Address Addr = Builder.CreateElementBitCast(SourceLVal.getAddress(), + CGF.ConvertTypeForMem(DestTy)); + LValue DestLV = CGF.MakeAddrLValue(Addr, DestTy); + DestLV.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo()); + return EmitLoadOfLValue(DestLV, Op->getExprLoc()); + } + case CK_BitCast: case CK_BaseToDerived: case CK_DerivedToBase: @@ -513,6 +519,8 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op, case CK_IntToOCLSampler: case CK_FixedPointCast: case CK_FixedPointToBoolean: + case CK_FixedPointToIntegral: + case CK_IntegralToFixedPoint: llvm_unreachable("invalid cast kind for complex value"); case CK_FloatingRealToComplex: @@ -628,12 +636,13 @@ ComplexPairTy ComplexExprEmitter::EmitComplexBinOpLibCall(StringRef LibCallName, Args, cast<FunctionType>(FQTy.getTypePtr()), false); llvm::FunctionType *FTy = CGF.CGM.getTypes().GetFunctionType(FuncInfo); - llvm::Constant *Func = CGF.CGM.CreateBuiltinFunction(FTy, LibCallName); + llvm::FunctionCallee Func = CGF.CGM.CreateRuntimeFunction( + FTy, LibCallName, llvm::AttributeList(), true); CGCallee Callee = CGCallee::forDirect(Func, FQTy->getAs<FunctionProtoType>()); - llvm::Instruction *Call; + llvm::CallBase *Call; RValue Res = CGF.EmitCall(FuncInfo, Callee, ReturnValueSlot(), Args, &Call); - cast<llvm::CallInst>(Call)->setCallingConv(CGF.CGM.getRuntimeCC()); + Call->setCallingConv(CGF.CGM.getRuntimeCC()); return Res.getComplexVal(); } diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp index c9475840aeeb..31cf2aef1ba0 100644 --- a/lib/CodeGen/CGExprConstant.cpp +++ b/lib/CodeGen/CGExprConstant.cpp @@ -1,9 +1,8 @@ //===--- CGExprConstant.cpp - Emit LLVM Code from Constant Expressions ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -23,6 +22,8 @@ #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/Builtins.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -31,345 +32,637 @@ using namespace clang; using namespace CodeGen; //===----------------------------------------------------------------------===// -// ConstStructBuilder +// ConstantAggregateBuilder //===----------------------------------------------------------------------===// namespace { class ConstExprEmitter; -class ConstStructBuilder { - CodeGenModule &CGM; - ConstantEmitter &Emitter; - - bool Packed; - CharUnits NextFieldOffsetInChars; - CharUnits LLVMStructAlignment; - SmallVector<llvm::Constant *, 32> Elements; -public: - static llvm::Constant *BuildStruct(ConstantEmitter &Emitter, - ConstExprEmitter *ExprEmitter, - llvm::Constant *Base, - InitListExpr *Updater, - QualType ValTy); - static llvm::Constant *BuildStruct(ConstantEmitter &Emitter, - InitListExpr *ILE, QualType StructTy); - static llvm::Constant *BuildStruct(ConstantEmitter &Emitter, - const APValue &Value, QualType ValTy); - -private: - ConstStructBuilder(ConstantEmitter &emitter) - : CGM(emitter.CGM), Emitter(emitter), Packed(false), - NextFieldOffsetInChars(CharUnits::Zero()), - LLVMStructAlignment(CharUnits::One()) { } - - void AppendField(const FieldDecl *Field, uint64_t FieldOffset, - llvm::Constant *InitExpr); - - void AppendBytes(CharUnits FieldOffsetInChars, llvm::Constant *InitCst); - - void AppendBitField(const FieldDecl *Field, uint64_t FieldOffset, - llvm::ConstantInt *InitExpr); - - void AppendPadding(CharUnits PadSize); - void AppendTailPadding(CharUnits RecordSize); - - void ConvertStructToPacked(); +struct ConstantAggregateBuilderUtils { + CodeGenModule &CGM; - bool Build(InitListExpr *ILE); - bool Build(ConstExprEmitter *Emitter, llvm::Constant *Base, - InitListExpr *Updater); - bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase, - const CXXRecordDecl *VTableClass, CharUnits BaseOffset); - llvm::Constant *Finalize(QualType Ty); + ConstantAggregateBuilderUtils(CodeGenModule &CGM) : CGM(CGM) {} CharUnits getAlignment(const llvm::Constant *C) const { - if (Packed) return CharUnits::One(); return CharUnits::fromQuantity( CGM.getDataLayout().getABITypeAlignment(C->getType())); } - CharUnits getSizeInChars(const llvm::Constant *C) const { - return CharUnits::fromQuantity( - CGM.getDataLayout().getTypeAllocSize(C->getType())); + CharUnits getSize(llvm::Type *Ty) const { + return CharUnits::fromQuantity(CGM.getDataLayout().getTypeAllocSize(Ty)); } -}; - -void ConstStructBuilder:: -AppendField(const FieldDecl *Field, uint64_t FieldOffset, - llvm::Constant *InitCst) { - const ASTContext &Context = CGM.getContext(); - - CharUnits FieldOffsetInChars = Context.toCharUnitsFromBits(FieldOffset); - - AppendBytes(FieldOffsetInChars, InitCst); -} - -void ConstStructBuilder:: -AppendBytes(CharUnits FieldOffsetInChars, llvm::Constant *InitCst) { - - assert(NextFieldOffsetInChars <= FieldOffsetInChars - && "Field offset mismatch!"); - - CharUnits FieldAlignment = getAlignment(InitCst); - - // Round up the field offset to the alignment of the field type. - CharUnits AlignedNextFieldOffsetInChars = - NextFieldOffsetInChars.alignTo(FieldAlignment); - if (AlignedNextFieldOffsetInChars < FieldOffsetInChars) { - // We need to append padding. - AppendPadding(FieldOffsetInChars - NextFieldOffsetInChars); - - assert(NextFieldOffsetInChars == FieldOffsetInChars && - "Did not add enough padding!"); + CharUnits getSize(const llvm::Constant *C) const { + return getSize(C->getType()); + } - AlignedNextFieldOffsetInChars = - NextFieldOffsetInChars.alignTo(FieldAlignment); + llvm::Constant *getPadding(CharUnits PadSize) const { + llvm::Type *Ty = CGM.Int8Ty; + if (PadSize > CharUnits::One()) + Ty = llvm::ArrayType::get(Ty, PadSize.getQuantity()); + return llvm::UndefValue::get(Ty); } - if (AlignedNextFieldOffsetInChars > FieldOffsetInChars) { - assert(!Packed && "Alignment is wrong even with a packed struct!"); + llvm::Constant *getZeroes(CharUnits ZeroSize) const { + llvm::Type *Ty = llvm::ArrayType::get(CGM.Int8Ty, ZeroSize.getQuantity()); + return llvm::ConstantAggregateZero::get(Ty); + } +}; - // Convert the struct to a packed struct. - ConvertStructToPacked(); +/// Incremental builder for an llvm::Constant* holding a struct or array +/// constant. +class ConstantAggregateBuilder : private ConstantAggregateBuilderUtils { + /// The elements of the constant. These two arrays must have the same size; + /// Offsets[i] describes the offset of Elems[i] within the constant. The + /// elements are kept in increasing offset order, and we ensure that there + /// is no overlap: Offsets[i+1] >= Offsets[i] + getSize(Elemes[i]). + /// + /// This may contain explicit padding elements (in order to create a + /// natural layout), but need not. Gaps between elements are implicitly + /// considered to be filled with undef. + llvm::SmallVector<llvm::Constant*, 32> Elems; + llvm::SmallVector<CharUnits, 32> Offsets; + + /// The size of the constant (the maximum end offset of any added element). + /// May be larger than the end of Elems.back() if we split the last element + /// and removed some trailing undefs. + CharUnits Size = CharUnits::Zero(); + + /// This is true only if laying out Elems in order as the elements of a + /// non-packed LLVM struct will give the correct layout. + bool NaturalLayout = true; + + bool split(size_t Index, CharUnits Hint); + Optional<size_t> splitAt(CharUnits Pos); + + static llvm::Constant *buildFrom(CodeGenModule &CGM, + ArrayRef<llvm::Constant *> Elems, + ArrayRef<CharUnits> Offsets, + CharUnits StartOffset, CharUnits Size, + bool NaturalLayout, llvm::Type *DesiredTy, + bool AllowOversized); - // After we pack the struct, we may need to insert padding. - if (NextFieldOffsetInChars < FieldOffsetInChars) { - // We need to append padding. - AppendPadding(FieldOffsetInChars - NextFieldOffsetInChars); +public: + ConstantAggregateBuilder(CodeGenModule &CGM) + : ConstantAggregateBuilderUtils(CGM) {} + + /// Update or overwrite the value starting at \p Offset with \c C. + /// + /// \param AllowOverwrite If \c true, this constant might overwrite (part of) + /// a constant that has already been added. This flag is only used to + /// detect bugs. + bool add(llvm::Constant *C, CharUnits Offset, bool AllowOverwrite); + + /// Update or overwrite the bits starting at \p OffsetInBits with \p Bits. + bool addBits(llvm::APInt Bits, uint64_t OffsetInBits, bool AllowOverwrite); + + /// Attempt to condense the value starting at \p Offset to a constant of type + /// \p DesiredTy. + void condense(CharUnits Offset, llvm::Type *DesiredTy); + + /// Produce a constant representing the entire accumulated value, ideally of + /// the specified type. If \p AllowOversized, the constant might be larger + /// than implied by \p DesiredTy (eg, if there is a flexible array member). + /// Otherwise, the constant will be of exactly the same size as \p DesiredTy + /// even if we can't represent it as that type. + llvm::Constant *build(llvm::Type *DesiredTy, bool AllowOversized) const { + return buildFrom(CGM, Elems, Offsets, CharUnits::Zero(), Size, + NaturalLayout, DesiredTy, AllowOversized); + } +}; - assert(NextFieldOffsetInChars == FieldOffsetInChars && - "Did not add enough padding!"); +template<typename Container, typename Range = std::initializer_list< + typename Container::value_type>> +static void replace(Container &C, size_t BeginOff, size_t EndOff, Range Vals) { + assert(BeginOff <= EndOff && "invalid replacement range"); + llvm::replace(C, C.begin() + BeginOff, C.begin() + EndOff, Vals); +} + +bool ConstantAggregateBuilder::add(llvm::Constant *C, CharUnits Offset, + bool AllowOverwrite) { + // Common case: appending to a layout. + if (Offset >= Size) { + CharUnits Align = getAlignment(C); + CharUnits AlignedSize = Size.alignTo(Align); + if (AlignedSize > Offset || Offset.alignTo(Align) != Offset) + NaturalLayout = false; + else if (AlignedSize < Offset) { + Elems.push_back(getPadding(Offset - Size)); + Offsets.push_back(Size); } - AlignedNextFieldOffsetInChars = NextFieldOffsetInChars; + Elems.push_back(C); + Offsets.push_back(Offset); + Size = Offset + getSize(C); + return true; } - // Add the field. - Elements.push_back(InitCst); - NextFieldOffsetInChars = AlignedNextFieldOffsetInChars + - getSizeInChars(InitCst); + // Uncommon case: constant overlaps what we've already created. + llvm::Optional<size_t> FirstElemToReplace = splitAt(Offset); + if (!FirstElemToReplace) + return false; - if (Packed) - assert(LLVMStructAlignment == CharUnits::One() && - "Packed struct not byte-aligned!"); - else - LLVMStructAlignment = std::max(LLVMStructAlignment, FieldAlignment); + CharUnits CSize = getSize(C); + llvm::Optional<size_t> LastElemToReplace = splitAt(Offset + CSize); + if (!LastElemToReplace) + return false; + + assert((FirstElemToReplace == LastElemToReplace || AllowOverwrite) && + "unexpectedly overwriting field"); + + replace(Elems, *FirstElemToReplace, *LastElemToReplace, {C}); + replace(Offsets, *FirstElemToReplace, *LastElemToReplace, {Offset}); + Size = std::max(Size, Offset + CSize); + NaturalLayout = false; + return true; } -void ConstStructBuilder::AppendBitField(const FieldDecl *Field, - uint64_t FieldOffset, - llvm::ConstantInt *CI) { +bool ConstantAggregateBuilder::addBits(llvm::APInt Bits, uint64_t OffsetInBits, + bool AllowOverwrite) { const ASTContext &Context = CGM.getContext(); - const uint64_t CharWidth = Context.getCharWidth(); - uint64_t NextFieldOffsetInBits = Context.toBits(NextFieldOffsetInChars); - if (FieldOffset > NextFieldOffsetInBits) { - // We need to add padding. - CharUnits PadSize = Context.toCharUnitsFromBits( - llvm::alignTo(FieldOffset - NextFieldOffsetInBits, - Context.getTargetInfo().getCharAlign())); + const uint64_t CharWidth = CGM.getContext().getCharWidth(); + + // Offset of where we want the first bit to go within the bits of the + // current char. + unsigned OffsetWithinChar = OffsetInBits % CharWidth; + + // We split bit-fields up into individual bytes. Walk over the bytes and + // update them. + for (CharUnits OffsetInChars = + Context.toCharUnitsFromBits(OffsetInBits - OffsetWithinChar); + /**/; ++OffsetInChars) { + // Number of bits we want to fill in this char. + unsigned WantedBits = + std::min((uint64_t)Bits.getBitWidth(), CharWidth - OffsetWithinChar); + + // Get a char containing the bits we want in the right places. The other + // bits have unspecified values. + llvm::APInt BitsThisChar = Bits; + if (BitsThisChar.getBitWidth() < CharWidth) + BitsThisChar = BitsThisChar.zext(CharWidth); + if (CGM.getDataLayout().isBigEndian()) { + // Figure out how much to shift by. We may need to left-shift if we have + // less than one byte of Bits left. + int Shift = Bits.getBitWidth() - CharWidth + OffsetWithinChar; + if (Shift > 0) + BitsThisChar.lshrInPlace(Shift); + else if (Shift < 0) + BitsThisChar = BitsThisChar.shl(-Shift); + } else { + BitsThisChar = BitsThisChar.shl(OffsetWithinChar); + } + if (BitsThisChar.getBitWidth() > CharWidth) + BitsThisChar = BitsThisChar.trunc(CharWidth); - AppendPadding(PadSize); - } + if (WantedBits == CharWidth) { + // Got a full byte: just add it directly. + add(llvm::ConstantInt::get(CGM.getLLVMContext(), BitsThisChar), + OffsetInChars, AllowOverwrite); + } else { + // Partial byte: update the existing integer if there is one. If we + // can't split out a 1-CharUnit range to update, then we can't add + // these bits and fail the entire constant emission. + llvm::Optional<size_t> FirstElemToUpdate = splitAt(OffsetInChars); + if (!FirstElemToUpdate) + return false; + llvm::Optional<size_t> LastElemToUpdate = + splitAt(OffsetInChars + CharUnits::One()); + if (!LastElemToUpdate) + return false; + assert(*LastElemToUpdate - *FirstElemToUpdate < 2 && + "should have at most one element covering one byte"); + + // Figure out which bits we want and discard the rest. + llvm::APInt UpdateMask(CharWidth, 0); + if (CGM.getDataLayout().isBigEndian()) + UpdateMask.setBits(CharWidth - OffsetWithinChar - WantedBits, + CharWidth - OffsetWithinChar); + else + UpdateMask.setBits(OffsetWithinChar, OffsetWithinChar + WantedBits); + BitsThisChar &= UpdateMask; + + if (*FirstElemToUpdate == *LastElemToUpdate || + Elems[*FirstElemToUpdate]->isNullValue() || + isa<llvm::UndefValue>(Elems[*FirstElemToUpdate])) { + // All existing bits are either zero or undef. + add(llvm::ConstantInt::get(CGM.getLLVMContext(), BitsThisChar), + OffsetInChars, /*AllowOverwrite*/ true); + } else { + llvm::Constant *&ToUpdate = Elems[*FirstElemToUpdate]; + // In order to perform a partial update, we need the existing bitwise + // value, which we can only extract for a constant int. + auto *CI = dyn_cast<llvm::ConstantInt>(ToUpdate); + if (!CI) + return false; + // Because this is a 1-CharUnit range, the constant occupying it must + // be exactly one CharUnit wide. + assert(CI->getBitWidth() == CharWidth && "splitAt failed"); + assert((!(CI->getValue() & UpdateMask) || AllowOverwrite) && + "unexpectedly overwriting bitfield"); + BitsThisChar |= (CI->getValue() & ~UpdateMask); + ToUpdate = llvm::ConstantInt::get(CGM.getLLVMContext(), BitsThisChar); + } + } - uint64_t FieldSize = Field->getBitWidthValue(Context); + // Stop if we've added all the bits. + if (WantedBits == Bits.getBitWidth()) + break; - llvm::APInt FieldValue = CI->getValue(); + // Remove the consumed bits from Bits. + if (!CGM.getDataLayout().isBigEndian()) + Bits.lshrInPlace(WantedBits); + Bits = Bits.trunc(Bits.getBitWidth() - WantedBits); - // Promote the size of FieldValue if necessary - // FIXME: This should never occur, but currently it can because initializer - // constants are cast to bool, and because clang is not enforcing bitfield - // width limits. - if (FieldSize > FieldValue.getBitWidth()) - FieldValue = FieldValue.zext(FieldSize); + // The remanining bits go at the start of the following bytes. + OffsetWithinChar = 0; + } - // Truncate the size of FieldValue to the bit field size. - if (FieldSize < FieldValue.getBitWidth()) - FieldValue = FieldValue.trunc(FieldSize); + return true; +} - NextFieldOffsetInBits = Context.toBits(NextFieldOffsetInChars); - if (FieldOffset < NextFieldOffsetInBits) { - // Either part of the field or the entire field can go into the previous - // byte. - assert(!Elements.empty() && "Elements can't be empty!"); +/// Returns a position within Elems and Offsets such that all elements +/// before the returned index end before Pos and all elements at or after +/// the returned index begin at or after Pos. Splits elements as necessary +/// to ensure this. Returns None if we find something we can't split. +Optional<size_t> ConstantAggregateBuilder::splitAt(CharUnits Pos) { + if (Pos >= Size) + return Offsets.size(); + + while (true) { + auto FirstAfterPos = llvm::upper_bound(Offsets, Pos); + if (FirstAfterPos == Offsets.begin()) + return 0; + + // If we already have an element starting at Pos, we're done. + size_t LastAtOrBeforePosIndex = FirstAfterPos - Offsets.begin() - 1; + if (Offsets[LastAtOrBeforePosIndex] == Pos) + return LastAtOrBeforePosIndex; + + // We found an element starting before Pos. Check for overlap. + if (Offsets[LastAtOrBeforePosIndex] + + getSize(Elems[LastAtOrBeforePosIndex]) <= Pos) + return LastAtOrBeforePosIndex + 1; + + // Try to decompose it into smaller constants. + if (!split(LastAtOrBeforePosIndex, Pos)) + return None; + } +} + +/// Split the constant at index Index, if possible. Return true if we did. +/// Hint indicates the location at which we'd like to split, but may be +/// ignored. +bool ConstantAggregateBuilder::split(size_t Index, CharUnits Hint) { + NaturalLayout = false; + llvm::Constant *C = Elems[Index]; + CharUnits Offset = Offsets[Index]; + + if (auto *CA = dyn_cast<llvm::ConstantAggregate>(C)) { + replace(Elems, Index, Index + 1, + llvm::map_range(llvm::seq(0u, CA->getNumOperands()), + [&](unsigned Op) { return CA->getOperand(Op); })); + if (auto *Seq = dyn_cast<llvm::SequentialType>(CA->getType())) { + // Array or vector. + CharUnits ElemSize = getSize(Seq->getElementType()); + replace( + Offsets, Index, Index + 1, + llvm::map_range(llvm::seq(0u, CA->getNumOperands()), + [&](unsigned Op) { return Offset + Op * ElemSize; })); + } else { + // Must be a struct. + auto *ST = cast<llvm::StructType>(CA->getType()); + const llvm::StructLayout *Layout = + CGM.getDataLayout().getStructLayout(ST); + replace(Offsets, Index, Index + 1, + llvm::map_range( + llvm::seq(0u, CA->getNumOperands()), [&](unsigned Op) { + return Offset + CharUnits::fromQuantity( + Layout->getElementOffset(Op)); + })); + } + return true; + } - unsigned BitsInPreviousByte = NextFieldOffsetInBits - FieldOffset; + if (auto *CDS = dyn_cast<llvm::ConstantDataSequential>(C)) { + // FIXME: If possible, split into two ConstantDataSequentials at Hint. + CharUnits ElemSize = getSize(CDS->getElementType()); + replace(Elems, Index, Index + 1, + llvm::map_range(llvm::seq(0u, CDS->getNumElements()), + [&](unsigned Elem) { + return CDS->getElementAsConstant(Elem); + })); + replace(Offsets, Index, Index + 1, + llvm::map_range( + llvm::seq(0u, CDS->getNumElements()), + [&](unsigned Elem) { return Offset + Elem * ElemSize; })); + return true; + } - bool FitsCompletelyInPreviousByte = - BitsInPreviousByte >= FieldValue.getBitWidth(); + if (isa<llvm::ConstantAggregateZero>(C)) { + CharUnits ElemSize = getSize(C); + assert(Hint > Offset && Hint < Offset + ElemSize && "nothing to split"); + replace(Elems, Index, Index + 1, + {getZeroes(Hint - Offset), getZeroes(Offset + ElemSize - Hint)}); + replace(Offsets, Index, Index + 1, {Offset, Hint}); + return true; + } - llvm::APInt Tmp = FieldValue; + if (isa<llvm::UndefValue>(C)) { + replace(Elems, Index, Index + 1, {}); + replace(Offsets, Index, Index + 1, {}); + return true; + } - if (!FitsCompletelyInPreviousByte) { - unsigned NewFieldWidth = FieldSize - BitsInPreviousByte; + // FIXME: We could split a ConstantInt if the need ever arose. + // We don't need to do this to handle bit-fields because we always eagerly + // split them into 1-byte chunks. - if (CGM.getDataLayout().isBigEndian()) { - Tmp.lshrInPlace(NewFieldWidth); - Tmp = Tmp.trunc(BitsInPreviousByte); + return false; +} - // We want the remaining high bits. - FieldValue = FieldValue.trunc(NewFieldWidth); - } else { - Tmp = Tmp.trunc(BitsInPreviousByte); +static llvm::Constant * +EmitArrayConstant(CodeGenModule &CGM, llvm::ArrayType *DesiredType, + llvm::Type *CommonElementType, unsigned ArrayBound, + SmallVectorImpl<llvm::Constant *> &Elements, + llvm::Constant *Filler); + +llvm::Constant *ConstantAggregateBuilder::buildFrom( + CodeGenModule &CGM, ArrayRef<llvm::Constant *> Elems, + ArrayRef<CharUnits> Offsets, CharUnits StartOffset, CharUnits Size, + bool NaturalLayout, llvm::Type *DesiredTy, bool AllowOversized) { + ConstantAggregateBuilderUtils Utils(CGM); + + if (Elems.empty()) + return llvm::UndefValue::get(DesiredTy); + + auto Offset = [&](size_t I) { return Offsets[I] - StartOffset; }; + + // If we want an array type, see if all the elements are the same type and + // appropriately spaced. + if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(DesiredTy)) { + assert(!AllowOversized && "oversized array emission not supported"); + + bool CanEmitArray = true; + llvm::Type *CommonType = Elems[0]->getType(); + llvm::Constant *Filler = llvm::Constant::getNullValue(CommonType); + CharUnits ElemSize = Utils.getSize(ATy->getElementType()); + SmallVector<llvm::Constant*, 32> ArrayElements; + for (size_t I = 0; I != Elems.size(); ++I) { + // Skip zeroes; we'll use a zero value as our array filler. + if (Elems[I]->isNullValue()) + continue; - // We want the remaining low bits. - FieldValue.lshrInPlace(BitsInPreviousByte); - FieldValue = FieldValue.trunc(NewFieldWidth); + // All remaining elements must be the same type. + if (Elems[I]->getType() != CommonType || + Offset(I) % ElemSize != 0) { + CanEmitArray = false; + break; } + ArrayElements.resize(Offset(I) / ElemSize + 1, Filler); + ArrayElements.back() = Elems[I]; } - Tmp = Tmp.zext(CharWidth); - if (CGM.getDataLayout().isBigEndian()) { - if (FitsCompletelyInPreviousByte) - Tmp = Tmp.shl(BitsInPreviousByte - FieldValue.getBitWidth()); - } else { - Tmp = Tmp.shl(CharWidth - BitsInPreviousByte); + if (CanEmitArray) { + return EmitArrayConstant(CGM, ATy, CommonType, ATy->getNumElements(), + ArrayElements, Filler); } - // 'or' in the bits that go into the previous byte. - llvm::Value *LastElt = Elements.back(); - if (llvm::ConstantInt *Val = dyn_cast<llvm::ConstantInt>(LastElt)) - Tmp |= Val->getValue(); - else { - assert(isa<llvm::UndefValue>(LastElt)); - // If there is an undef field that we're adding to, it can either be a - // scalar undef (in which case, we just replace it with our field) or it - // is an array. If it is an array, we have to pull one byte off the - // array so that the other undef bytes stay around. - if (!isa<llvm::IntegerType>(LastElt->getType())) { - // The undef padding will be a multibyte array, create a new smaller - // padding and then an hole for our i8 to get plopped into. - assert(isa<llvm::ArrayType>(LastElt->getType()) && - "Expected array padding of undefs"); - llvm::ArrayType *AT = cast<llvm::ArrayType>(LastElt->getType()); - assert(AT->getElementType()->isIntegerTy(CharWidth) && - AT->getNumElements() != 0 && - "Expected non-empty array padding of undefs"); - - // Remove the padding array. - NextFieldOffsetInChars -= CharUnits::fromQuantity(AT->getNumElements()); - Elements.pop_back(); - - // Add the padding back in two chunks. - AppendPadding(CharUnits::fromQuantity(AT->getNumElements()-1)); - AppendPadding(CharUnits::One()); - assert(isa<llvm::UndefValue>(Elements.back()) && - Elements.back()->getType()->isIntegerTy(CharWidth) && - "Padding addition didn't work right"); - } + // Can't emit as an array, carry on to emit as a struct. + } + + CharUnits DesiredSize = Utils.getSize(DesiredTy); + CharUnits Align = CharUnits::One(); + for (llvm::Constant *C : Elems) + Align = std::max(Align, Utils.getAlignment(C)); + CharUnits AlignedSize = Size.alignTo(Align); + + bool Packed = false; + ArrayRef<llvm::Constant*> UnpackedElems = Elems; + llvm::SmallVector<llvm::Constant*, 32> UnpackedElemStorage; + if ((DesiredSize < AlignedSize && !AllowOversized) || + DesiredSize.alignTo(Align) != DesiredSize) { + // The natural layout would be the wrong size; force use of a packed layout. + NaturalLayout = false; + Packed = true; + } else if (DesiredSize > AlignedSize) { + // The constant would be too small. Add padding to fix it. + UnpackedElemStorage.assign(Elems.begin(), Elems.end()); + UnpackedElemStorage.push_back(Utils.getPadding(DesiredSize - Size)); + UnpackedElems = UnpackedElemStorage; + } + + // If we don't have a natural layout, insert padding as necessary. + // As we go, double-check to see if we can actually just emit Elems + // as a non-packed struct and do so opportunistically if possible. + llvm::SmallVector<llvm::Constant*, 32> PackedElems; + if (!NaturalLayout) { + CharUnits SizeSoFar = CharUnits::Zero(); + for (size_t I = 0; I != Elems.size(); ++I) { + CharUnits Align = Utils.getAlignment(Elems[I]); + CharUnits NaturalOffset = SizeSoFar.alignTo(Align); + CharUnits DesiredOffset = Offset(I); + assert(DesiredOffset >= SizeSoFar && "elements out of order"); + + if (DesiredOffset != NaturalOffset) + Packed = true; + if (DesiredOffset != SizeSoFar) + PackedElems.push_back(Utils.getPadding(DesiredOffset - SizeSoFar)); + PackedElems.push_back(Elems[I]); + SizeSoFar = DesiredOffset + Utils.getSize(Elems[I]); + } + // If we're using the packed layout, pad it out to the desired size if + // necessary. + if (Packed) { + assert((SizeSoFar <= DesiredSize || AllowOversized) && + "requested size is too small for contents"); + if (SizeSoFar < DesiredSize) + PackedElems.push_back(Utils.getPadding(DesiredSize - SizeSoFar)); } + } - Elements.back() = llvm::ConstantInt::get(CGM.getLLVMContext(), Tmp); + llvm::StructType *STy = llvm::ConstantStruct::getTypeForElements( + CGM.getLLVMContext(), Packed ? PackedElems : UnpackedElems, Packed); - if (FitsCompletelyInPreviousByte) - return; + // Pick the type to use. If the type is layout identical to the desired + // type then use it, otherwise use whatever the builder produced for us. + if (llvm::StructType *DesiredSTy = dyn_cast<llvm::StructType>(DesiredTy)) { + if (DesiredSTy->isLayoutIdentical(STy)) + STy = DesiredSTy; } - while (FieldValue.getBitWidth() > CharWidth) { - llvm::APInt Tmp; + return llvm::ConstantStruct::get(STy, Packed ? PackedElems : UnpackedElems); +} - if (CGM.getDataLayout().isBigEndian()) { - // We want the high bits. - Tmp = - FieldValue.lshr(FieldValue.getBitWidth() - CharWidth).trunc(CharWidth); - } else { - // We want the low bits. - Tmp = FieldValue.trunc(CharWidth); +void ConstantAggregateBuilder::condense(CharUnits Offset, + llvm::Type *DesiredTy) { + CharUnits Size = getSize(DesiredTy); - FieldValue.lshrInPlace(CharWidth); - } + llvm::Optional<size_t> FirstElemToReplace = splitAt(Offset); + if (!FirstElemToReplace) + return; + size_t First = *FirstElemToReplace; - Elements.push_back(llvm::ConstantInt::get(CGM.getLLVMContext(), Tmp)); - ++NextFieldOffsetInChars; + llvm::Optional<size_t> LastElemToReplace = splitAt(Offset + Size); + if (!LastElemToReplace) + return; + size_t Last = *LastElemToReplace; - FieldValue = FieldValue.trunc(FieldValue.getBitWidth() - CharWidth); + size_t Length = Last - First; + if (Length == 0) + return; + + if (Length == 1 && Offsets[First] == Offset && + getSize(Elems[First]) == Size) { + // Re-wrap single element structs if necessary. Otherwise, leave any single + // element constant of the right size alone even if it has the wrong type. + auto *STy = dyn_cast<llvm::StructType>(DesiredTy); + if (STy && STy->getNumElements() == 1 && + STy->getElementType(0) == Elems[First]->getType()) + Elems[First] = llvm::ConstantStruct::get(STy, Elems[First]); + return; } - assert(FieldValue.getBitWidth() > 0 && - "Should have at least one bit left!"); - assert(FieldValue.getBitWidth() <= CharWidth && - "Should not have more than a byte left!"); + llvm::Constant *Replacement = buildFrom( + CGM, makeArrayRef(Elems).slice(First, Length), + makeArrayRef(Offsets).slice(First, Length), Offset, getSize(DesiredTy), + /*known to have natural layout=*/false, DesiredTy, false); + replace(Elems, First, Last, {Replacement}); + replace(Offsets, First, Last, {Offset}); +} - if (FieldValue.getBitWidth() < CharWidth) { - if (CGM.getDataLayout().isBigEndian()) { - unsigned BitWidth = FieldValue.getBitWidth(); +//===----------------------------------------------------------------------===// +// ConstStructBuilder +//===----------------------------------------------------------------------===// - FieldValue = FieldValue.zext(CharWidth) << (CharWidth - BitWidth); - } else - FieldValue = FieldValue.zext(CharWidth); - } +class ConstStructBuilder { + CodeGenModule &CGM; + ConstantEmitter &Emitter; + ConstantAggregateBuilder &Builder; + CharUnits StartOffset; - // Append the last element. - Elements.push_back(llvm::ConstantInt::get(CGM.getLLVMContext(), - FieldValue)); - ++NextFieldOffsetInChars; -} +public: + static llvm::Constant *BuildStruct(ConstantEmitter &Emitter, + InitListExpr *ILE, QualType StructTy); + static llvm::Constant *BuildStruct(ConstantEmitter &Emitter, + const APValue &Value, QualType ValTy); + static bool UpdateStruct(ConstantEmitter &Emitter, + ConstantAggregateBuilder &Const, CharUnits Offset, + InitListExpr *Updater); -void ConstStructBuilder::AppendPadding(CharUnits PadSize) { - if (PadSize.isZero()) - return; +private: + ConstStructBuilder(ConstantEmitter &Emitter, + ConstantAggregateBuilder &Builder, CharUnits StartOffset) + : CGM(Emitter.CGM), Emitter(Emitter), Builder(Builder), + StartOffset(StartOffset) {} - llvm::Type *Ty = CGM.Int8Ty; - if (PadSize > CharUnits::One()) - Ty = llvm::ArrayType::get(Ty, PadSize.getQuantity()); + bool AppendField(const FieldDecl *Field, uint64_t FieldOffset, + llvm::Constant *InitExpr, bool AllowOverwrite = false); - llvm::Constant *C = llvm::UndefValue::get(Ty); - Elements.push_back(C); - assert(getAlignment(C) == CharUnits::One() && - "Padding must have 1 byte alignment!"); + bool AppendBytes(CharUnits FieldOffsetInChars, llvm::Constant *InitCst, + bool AllowOverwrite = false); - NextFieldOffsetInChars += getSizeInChars(C); -} + bool AppendBitField(const FieldDecl *Field, uint64_t FieldOffset, + llvm::ConstantInt *InitExpr, bool AllowOverwrite = false); -void ConstStructBuilder::AppendTailPadding(CharUnits RecordSize) { - assert(NextFieldOffsetInChars <= RecordSize && - "Size mismatch!"); + bool Build(InitListExpr *ILE, bool AllowOverwrite); + bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase, + const CXXRecordDecl *VTableClass, CharUnits BaseOffset); + llvm::Constant *Finalize(QualType Ty); +}; - AppendPadding(RecordSize - NextFieldOffsetInChars); -} +bool ConstStructBuilder::AppendField( + const FieldDecl *Field, uint64_t FieldOffset, llvm::Constant *InitCst, + bool AllowOverwrite) { + const ASTContext &Context = CGM.getContext(); -void ConstStructBuilder::ConvertStructToPacked() { - SmallVector<llvm::Constant *, 16> PackedElements; - CharUnits ElementOffsetInChars = CharUnits::Zero(); + CharUnits FieldOffsetInChars = Context.toCharUnitsFromBits(FieldOffset); - for (unsigned i = 0, e = Elements.size(); i != e; ++i) { - llvm::Constant *C = Elements[i]; + return AppendBytes(FieldOffsetInChars, InitCst, AllowOverwrite); +} - CharUnits ElementAlign = CharUnits::fromQuantity( - CGM.getDataLayout().getABITypeAlignment(C->getType())); - CharUnits AlignedElementOffsetInChars = - ElementOffsetInChars.alignTo(ElementAlign); +bool ConstStructBuilder::AppendBytes(CharUnits FieldOffsetInChars, + llvm::Constant *InitCst, + bool AllowOverwrite) { + return Builder.add(InitCst, StartOffset + FieldOffsetInChars, AllowOverwrite); +} - if (AlignedElementOffsetInChars > ElementOffsetInChars) { - // We need some padding. - CharUnits NumChars = - AlignedElementOffsetInChars - ElementOffsetInChars; +bool ConstStructBuilder::AppendBitField( + const FieldDecl *Field, uint64_t FieldOffset, llvm::ConstantInt *CI, + bool AllowOverwrite) { + uint64_t FieldSize = Field->getBitWidthValue(CGM.getContext()); + llvm::APInt FieldValue = CI->getValue(); - llvm::Type *Ty = CGM.Int8Ty; - if (NumChars > CharUnits::One()) - Ty = llvm::ArrayType::get(Ty, NumChars.getQuantity()); + // Promote the size of FieldValue if necessary + // FIXME: This should never occur, but currently it can because initializer + // constants are cast to bool, and because clang is not enforcing bitfield + // width limits. + if (FieldSize > FieldValue.getBitWidth()) + FieldValue = FieldValue.zext(FieldSize); - llvm::Constant *Padding = llvm::UndefValue::get(Ty); - PackedElements.push_back(Padding); - ElementOffsetInChars += getSizeInChars(Padding); - } + // Truncate the size of FieldValue to the bit field size. + if (FieldSize < FieldValue.getBitWidth()) + FieldValue = FieldValue.trunc(FieldSize); - PackedElements.push_back(C); - ElementOffsetInChars += getSizeInChars(C); + return Builder.addBits(FieldValue, + CGM.getContext().toBits(StartOffset) + FieldOffset, + AllowOverwrite); +} + +static bool EmitDesignatedInitUpdater(ConstantEmitter &Emitter, + ConstantAggregateBuilder &Const, + CharUnits Offset, QualType Type, + InitListExpr *Updater) { + if (Type->isRecordType()) + return ConstStructBuilder::UpdateStruct(Emitter, Const, Offset, Updater); + + auto CAT = Emitter.CGM.getContext().getAsConstantArrayType(Type); + if (!CAT) + return false; + QualType ElemType = CAT->getElementType(); + CharUnits ElemSize = Emitter.CGM.getContext().getTypeSizeInChars(ElemType); + llvm::Type *ElemTy = Emitter.CGM.getTypes().ConvertTypeForMem(ElemType); + + llvm::Constant *FillC = nullptr; + if (Expr *Filler = Updater->getArrayFiller()) { + if (!isa<NoInitExpr>(Filler)) { + FillC = Emitter.tryEmitAbstractForMemory(Filler, ElemType); + if (!FillC) + return false; + } } - assert(ElementOffsetInChars == NextFieldOffsetInChars && - "Packing the struct changed its size!"); + unsigned NumElementsToUpdate = + FillC ? CAT->getSize().getZExtValue() : Updater->getNumInits(); + for (unsigned I = 0; I != NumElementsToUpdate; ++I, Offset += ElemSize) { + Expr *Init = nullptr; + if (I < Updater->getNumInits()) + Init = Updater->getInit(I); + + if (!Init && FillC) { + if (!Const.add(FillC, Offset, true)) + return false; + } else if (!Init || isa<NoInitExpr>(Init)) { + continue; + } else if (InitListExpr *ChildILE = dyn_cast<InitListExpr>(Init)) { + if (!EmitDesignatedInitUpdater(Emitter, Const, Offset, ElemType, + ChildILE)) + return false; + // Attempt to reduce the array element to a single constant if necessary. + Const.condense(Offset, ElemTy); + } else { + llvm::Constant *Val = Emitter.tryEmitPrivateForMemory(Init, ElemType); + if (!Const.add(Val, Offset, true)) + return false; + } + } - Elements.swap(PackedElements); - LLVMStructAlignment = CharUnits::One(); - Packed = true; + return true; } -bool ConstStructBuilder::Build(InitListExpr *ILE) { +bool ConstStructBuilder::Build(InitListExpr *ILE, bool AllowOverwrite) { RecordDecl *RD = ILE->getType()->getAs<RecordType>()->getDecl(); const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD); - unsigned FieldNo = 0; + unsigned FieldNo = -1; unsigned ElementNo = 0; // Bail out if we have base classes. We could support these, but they only @@ -379,35 +672,66 @@ bool ConstStructBuilder::Build(InitListExpr *ILE) { if (CXXRD->getNumBases()) return false; - for (RecordDecl::field_iterator Field = RD->field_begin(), - FieldEnd = RD->field_end(); Field != FieldEnd; ++Field, ++FieldNo) { + for (FieldDecl *Field : RD->fields()) { + ++FieldNo; + // If this is a union, skip all the fields that aren't being initialized. - if (RD->isUnion() && ILE->getInitializedFieldInUnion() != *Field) + if (RD->isUnion() && + !declaresSameEntity(ILE->getInitializedFieldInUnion(), Field)) continue; - // Don't emit anonymous bitfields, they just affect layout. - if (Field->isUnnamedBitfield()) + // Don't emit anonymous bitfields or zero-sized fields. + if (Field->isUnnamedBitfield() || Field->isZeroSize(CGM.getContext())) continue; // Get the initializer. A struct can include fields without initializers, // we just use explicit null values for them. - llvm::Constant *EltInit; + Expr *Init = nullptr; if (ElementNo < ILE->getNumInits()) - EltInit = Emitter.tryEmitPrivateForMemory(ILE->getInit(ElementNo++), - Field->getType()); - else - EltInit = Emitter.emitNullForMemory(Field->getType()); + Init = ILE->getInit(ElementNo++); + if (Init && isa<NoInitExpr>(Init)) + continue; + // When emitting a DesignatedInitUpdateExpr, a nested InitListExpr + // represents additional overwriting of our current constant value, and not + // a new constant to emit independently. + if (AllowOverwrite && + (Field->getType()->isArrayType() || Field->getType()->isRecordType())) { + if (auto *SubILE = dyn_cast<InitListExpr>(Init)) { + CharUnits Offset = CGM.getContext().toCharUnitsFromBits( + Layout.getFieldOffset(FieldNo)); + if (!EmitDesignatedInitUpdater(Emitter, Builder, StartOffset + Offset, + Field->getType(), SubILE)) + return false; + // If we split apart the field's value, try to collapse it down to a + // single value now. + Builder.condense(StartOffset + Offset, + CGM.getTypes().ConvertTypeForMem(Field->getType())); + continue; + } + } + + llvm::Constant *EltInit = + Init ? Emitter.tryEmitPrivateForMemory(Init, Field->getType()) + : Emitter.emitNullForMemory(Field->getType()); if (!EltInit) return false; if (!Field->isBitField()) { // Handle non-bitfield members. - AppendField(*Field, Layout.getFieldOffset(FieldNo), EltInit); + if (!AppendField(Field, Layout.getFieldOffset(FieldNo), EltInit, + AllowOverwrite)) + return false; + // After emitting a non-empty field with [[no_unique_address]], we may + // need to overwrite its tail padding. + if (Field->hasAttr<NoUniqueAddressAttr>()) + AllowOverwrite = true; } else { // Otherwise we have a bitfield. if (auto *CI = dyn_cast<llvm::ConstantInt>(EltInit)) { - AppendBitField(*Field, Layout.getFieldOffset(FieldNo), CI); + if (!AppendBitField(Field, Layout.getFieldOffset(FieldNo), CI, + AllowOverwrite)) + return false; } else { // We are trying to initialize a bitfield with a non-trivial constant, // this must require run-time code. @@ -445,7 +769,8 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, llvm::Constant *VTableAddressPoint = CGM.getCXXABI().getVTableAddressPointForConstExpr( BaseSubobject(CD, Offset), VTableClass); - AppendBytes(Offset, VTableAddressPoint); + if (!AppendBytes(Offset, VTableAddressPoint)) + return false; } // Accumulate and sort bases, in order to visit them in address order, which @@ -460,7 +785,7 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, CharUnits BaseOffset = Layout.getBaseClassOffset(BD); Bases.push_back(BaseInfo(BD, BaseOffset, BaseNo)); } - std::stable_sort(Bases.begin(), Bases.end()); + llvm::stable_sort(Bases); for (unsigned I = 0, N = Bases.size(); I != N; ++I) { BaseInfo &Base = Bases[I]; @@ -474,14 +799,15 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, unsigned FieldNo = 0; uint64_t OffsetBits = CGM.getContext().toBits(Offset); + bool AllowOverwrite = false; for (RecordDecl::field_iterator Field = RD->field_begin(), FieldEnd = RD->field_end(); Field != FieldEnd; ++Field, ++FieldNo) { // If this is a union, skip all the fields that aren't being initialized. - if (RD->isUnion() && Val.getUnionField() != *Field) + if (RD->isUnion() && !declaresSameEntity(Val.getUnionField(), *Field)) continue; - // Don't emit anonymous bitfields, they just affect layout. - if (Field->isUnnamedBitfield()) + // Don't emit anonymous bitfields or zero-sized fields. + if (Field->isUnnamedBitfield() || Field->isZeroSize(CGM.getContext())) continue; // Emit the value of the initializer. @@ -494,93 +820,37 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, if (!Field->isBitField()) { // Handle non-bitfield members. - AppendField(*Field, Layout.getFieldOffset(FieldNo) + OffsetBits, EltInit); + if (!AppendField(*Field, Layout.getFieldOffset(FieldNo) + OffsetBits, + EltInit, AllowOverwrite)) + return false; + // After emitting a non-empty field with [[no_unique_address]], we may + // need to overwrite its tail padding. + if (Field->hasAttr<NoUniqueAddressAttr>()) + AllowOverwrite = true; } else { // Otherwise we have a bitfield. - AppendBitField(*Field, Layout.getFieldOffset(FieldNo) + OffsetBits, - cast<llvm::ConstantInt>(EltInit)); + if (!AppendBitField(*Field, Layout.getFieldOffset(FieldNo) + OffsetBits, + cast<llvm::ConstantInt>(EltInit), AllowOverwrite)) + return false; } } return true; } -llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) { - RecordDecl *RD = Ty->getAs<RecordType>()->getDecl(); - const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD); - - CharUnits LayoutSizeInChars = Layout.getSize(); - - if (NextFieldOffsetInChars > LayoutSizeInChars) { - // If the struct is bigger than the size of the record type, - // we must have a flexible array member at the end. - assert(RD->hasFlexibleArrayMember() && - "Must have flexible array member if struct is bigger than type!"); - - // No tail padding is necessary. - } else { - // Append tail padding if necessary. - CharUnits LLVMSizeInChars = - NextFieldOffsetInChars.alignTo(LLVMStructAlignment); - - if (LLVMSizeInChars != LayoutSizeInChars) - AppendTailPadding(LayoutSizeInChars); - - LLVMSizeInChars = NextFieldOffsetInChars.alignTo(LLVMStructAlignment); - - // Check if we need to convert the struct to a packed struct. - if (NextFieldOffsetInChars <= LayoutSizeInChars && - LLVMSizeInChars > LayoutSizeInChars) { - assert(!Packed && "Size mismatch!"); - - ConvertStructToPacked(); - assert(NextFieldOffsetInChars <= LayoutSizeInChars && - "Converting to packed did not help!"); - } - - LLVMSizeInChars = NextFieldOffsetInChars.alignTo(LLVMStructAlignment); - - assert(LayoutSizeInChars == LLVMSizeInChars && - "Tail padding mismatch!"); - } - - // Pick the type to use. If the type is layout identical to the ConvertType - // type then use it, otherwise use whatever the builder produced for us. - llvm::StructType *STy = - llvm::ConstantStruct::getTypeForElements(CGM.getLLVMContext(), - Elements, Packed); - llvm::Type *ValTy = CGM.getTypes().ConvertType(Ty); - if (llvm::StructType *ValSTy = dyn_cast<llvm::StructType>(ValTy)) { - if (ValSTy->isLayoutIdentical(STy)) - STy = ValSTy; - } - - llvm::Constant *Result = llvm::ConstantStruct::get(STy, Elements); - - assert(NextFieldOffsetInChars.alignTo(getAlignment(Result)) == - getSizeInChars(Result) && - "Size mismatch!"); - - return Result; -} - -llvm::Constant *ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter, - ConstExprEmitter *ExprEmitter, - llvm::Constant *Base, - InitListExpr *Updater, - QualType ValTy) { - ConstStructBuilder Builder(Emitter); - if (!Builder.Build(ExprEmitter, Base, Updater)) - return nullptr; - return Builder.Finalize(ValTy); +llvm::Constant *ConstStructBuilder::Finalize(QualType Type) { + RecordDecl *RD = Type->getAs<RecordType>()->getDecl(); + llvm::Type *ValTy = CGM.getTypes().ConvertType(Type); + return Builder.build(ValTy, RD->hasFlexibleArrayMember()); } llvm::Constant *ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter, InitListExpr *ILE, QualType ValTy) { - ConstStructBuilder Builder(Emitter); + ConstantAggregateBuilder Const(Emitter.CGM); + ConstStructBuilder Builder(Emitter, Const, CharUnits::Zero()); - if (!Builder.Build(ILE)) + if (!Builder.Build(ILE, /*AllowOverwrite*/false)) return nullptr; return Builder.Finalize(ValTy); @@ -589,7 +859,8 @@ llvm::Constant *ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter, llvm::Constant *ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter, const APValue &Val, QualType ValTy) { - ConstStructBuilder Builder(Emitter); + ConstantAggregateBuilder Const(Emitter.CGM); + ConstStructBuilder Builder(Emitter, Const, CharUnits::Zero()); const RecordDecl *RD = ValTy->castAs<RecordType>()->getDecl(); const CXXRecordDecl *CD = dyn_cast<CXXRecordDecl>(RD); @@ -599,6 +870,12 @@ llvm::Constant *ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter, return Builder.Finalize(ValTy); } +bool ConstStructBuilder::UpdateStruct(ConstantEmitter &Emitter, + ConstantAggregateBuilder &Const, + CharUnits Offset, InitListExpr *Updater) { + return ConstStructBuilder(Emitter, Const, Offset) + .Build(Updater, /*AllowOverwrite*/ true); +} //===----------------------------------------------------------------------===// // ConstExprEmitter @@ -636,7 +913,7 @@ static ConstantAddress tryEmitGlobalCompoundLiteral(CodeGenModule &CGM, } static llvm::Constant * -EmitArrayConstant(CodeGenModule &CGM, const ConstantArrayType *DestType, +EmitArrayConstant(CodeGenModule &CGM, llvm::ArrayType *DesiredType, llvm::Type *CommonElementType, unsigned ArrayBound, SmallVectorImpl<llvm::Constant *> &Elements, llvm::Constant *Filler) { @@ -649,10 +926,8 @@ EmitArrayConstant(CodeGenModule &CGM, const ConstantArrayType *DestType, --NonzeroLength; } - if (NonzeroLength == 0) { - return llvm::ConstantAggregateZero::get( - CGM.getTypes().ConvertType(QualType(DestType, 0))); - } + if (NonzeroLength == 0) + return llvm::ConstantAggregateZero::get(DesiredType); // Add a zeroinitializer array filler if we have lots of trailing zeroes. unsigned TrailingZeroes = ArrayBound - NonzeroLength; @@ -673,9 +948,7 @@ EmitArrayConstant(CodeGenModule &CGM, const ConstantArrayType *DestType, } auto *FillerType = - CommonElementType - ? CommonElementType - : CGM.getTypes().ConvertType(DestType->getElementType()); + CommonElementType ? CommonElementType : DesiredType->getElementType(); FillerType = llvm::ArrayType::get(FillerType, TrailingZeroes); Elements.back() = llvm::ConstantAggregateZero::get(FillerType); CommonElementType = nullptr; @@ -701,10 +974,12 @@ EmitArrayConstant(CodeGenModule &CGM, const ConstantArrayType *DestType, return llvm::ConstantStruct::get(SType, Elements); } -/// This class only needs to handle two cases: -/// 1) Literals (this is used by APValue emission to emit literals). -/// 2) Arrays, structs and unions (outside C++11 mode, we don't currently -/// constant fold these types). +// This class only needs to handle arrays, structs and unions. Outside C++11 +// mode, we don't currently constant fold those types. All other types are +// handled by constant folding. +// +// Constant folding is currently missing support for a few features supported +// here: CK_ToUnion, CK_ReinterpretMemberPointer, and DesignatedInitUpdateExpr. class ConstExprEmitter : public StmtVisitor<ConstExprEmitter, llvm::Constant*, QualType> { CodeGenModule &CGM; @@ -840,6 +1115,7 @@ public: case CK_ToVoid: case CK_Dynamic: case CK_LValueBitCast: + case CK_LValueToRValueBitCast: case CK_NullToMemberPointer: case CK_UserDefinedConversion: case CK_CPointerToObjCPointerCast: @@ -875,16 +1151,14 @@ public: case CK_FloatingCast: case CK_FixedPointCast: case CK_FixedPointToBoolean: + case CK_FixedPointToIntegral: + case CK_IntegralToFixedPoint: case CK_ZeroToOCLOpaqueType: return nullptr; } llvm_unreachable("Invalid CastKind"); } - llvm::Constant *VisitCXXDefaultArgExpr(CXXDefaultArgExpr *DAE, QualType T) { - return Visit(DAE->getExpr(), T); - } - llvm::Constant *VisitCXXDefaultInitExpr(CXXDefaultInitExpr *DIE, QualType T) { // No need for a DefaultInitExprScope: we don't handle 'this' in a // constant expression. @@ -942,7 +1216,9 @@ public: Elts.push_back(C); } - return EmitArrayConstant(CGM, CAT, CommonElementType, NumElements, Elts, + llvm::ArrayType *Desired = + cast<llvm::ArrayType>(CGM.getTypes().ConvertType(ILE->getType())); + return EmitArrayConstant(CGM, Desired, CommonElementType, NumElements, Elts, fillC); } @@ -968,80 +1244,24 @@ public: return nullptr; } - llvm::Constant *EmitDesignatedInitUpdater(llvm::Constant *Base, - InitListExpr *Updater, - QualType destType) { - if (auto destAT = CGM.getContext().getAsArrayType(destType)) { - llvm::ArrayType *AType = cast<llvm::ArrayType>(ConvertType(destType)); - llvm::Type *ElemType = AType->getElementType(); - - unsigned NumInitElements = Updater->getNumInits(); - unsigned NumElements = AType->getNumElements(); - - std::vector<llvm::Constant *> Elts; - Elts.reserve(NumElements); - - QualType destElemType = destAT->getElementType(); - - if (auto DataArray = dyn_cast<llvm::ConstantDataArray>(Base)) - for (unsigned i = 0; i != NumElements; ++i) - Elts.push_back(DataArray->getElementAsConstant(i)); - else if (auto Array = dyn_cast<llvm::ConstantArray>(Base)) - for (unsigned i = 0; i != NumElements; ++i) - Elts.push_back(Array->getOperand(i)); - else - return nullptr; // FIXME: other array types not implemented - - llvm::Constant *fillC = nullptr; - if (Expr *filler = Updater->getArrayFiller()) - if (!isa<NoInitExpr>(filler)) - fillC = Emitter.tryEmitAbstractForMemory(filler, destElemType); - bool RewriteType = (fillC && fillC->getType() != ElemType); - - for (unsigned i = 0; i != NumElements; ++i) { - Expr *Init = nullptr; - if (i < NumInitElements) - Init = Updater->getInit(i); - - if (!Init && fillC) - Elts[i] = fillC; - else if (!Init || isa<NoInitExpr>(Init)) - ; // Do nothing. - else if (InitListExpr *ChildILE = dyn_cast<InitListExpr>(Init)) - Elts[i] = EmitDesignatedInitUpdater(Elts[i], ChildILE, destElemType); - else - Elts[i] = Emitter.tryEmitPrivateForMemory(Init, destElemType); - - if (!Elts[i]) - return nullptr; - RewriteType |= (Elts[i]->getType() != ElemType); - } - - if (RewriteType) { - std::vector<llvm::Type *> Types; - Types.reserve(NumElements); - for (unsigned i = 0; i != NumElements; ++i) - Types.push_back(Elts[i]->getType()); - llvm::StructType *SType = llvm::StructType::get(AType->getContext(), - Types, true); - return llvm::ConstantStruct::get(SType, Elts); - } - - return llvm::ConstantArray::get(AType, Elts); - } - - if (destType->isRecordType()) - return ConstStructBuilder::BuildStruct(Emitter, this, Base, Updater, - destType); - - return nullptr; - } - llvm::Constant *VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E, QualType destType) { auto C = Visit(E->getBase(), destType); - if (!C) return nullptr; - return EmitDesignatedInitUpdater(C, E->getUpdater(), destType); + if (!C) + return nullptr; + + ConstantAggregateBuilder Const(CGM); + Const.add(C, CharUnits::Zero(), false); + + if (!EmitDesignatedInitUpdater(Emitter, Const, CharUnits::Zero(), destType, + E->getUpdater())) + return nullptr; + + llvm::Type *ValTy = CGM.getTypes().ConvertType(destType); + bool HasFlexibleArray = false; + if (auto *RT = destType->getAs<RecordType>()) + HasFlexibleArray = RT->getDecl()->hasFlexibleArrayMember(); + return Const.build(ValTy, HasFlexibleArray); } llvm::Constant *VisitCXXConstructExpr(CXXConstructExpr *E, QualType Ty) { @@ -1077,6 +1297,7 @@ public: } llvm::Constant *VisitStringLiteral(StringLiteral *E, QualType T) { + // This is a string literal initializing an array in an initializer. return CGM.GetConstantArrayFromStringLiteral(E); } @@ -1106,76 +1327,6 @@ public: } // end anonymous namespace. -bool ConstStructBuilder::Build(ConstExprEmitter *ExprEmitter, - llvm::Constant *Base, - InitListExpr *Updater) { - assert(Base && "base expression should not be empty"); - - QualType ExprType = Updater->getType(); - RecordDecl *RD = ExprType->getAs<RecordType>()->getDecl(); - const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD); - const llvm::StructLayout *BaseLayout = CGM.getDataLayout().getStructLayout( - cast<llvm::StructType>(Base->getType())); - unsigned FieldNo = -1; - unsigned ElementNo = 0; - - // Bail out if we have base classes. We could support these, but they only - // arise in C++1z where we will have already constant folded most interesting - // cases. FIXME: There are still a few more cases we can handle this way. - if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) - if (CXXRD->getNumBases()) - return false; - - for (FieldDecl *Field : RD->fields()) { - ++FieldNo; - - if (RD->isUnion() && Updater->getInitializedFieldInUnion() != Field) - continue; - - // Skip anonymous bitfields. - if (Field->isUnnamedBitfield()) - continue; - - llvm::Constant *EltInit = Base->getAggregateElement(ElementNo); - - // Bail out if the type of the ConstantStruct does not have the same layout - // as the type of the InitListExpr. - if (CGM.getTypes().ConvertType(Field->getType()) != EltInit->getType() || - Layout.getFieldOffset(ElementNo) != - BaseLayout->getElementOffsetInBits(ElementNo)) - return false; - - // Get the initializer. If we encounter an empty field or a NoInitExpr, - // we use values from the base expression. - Expr *Init = nullptr; - if (ElementNo < Updater->getNumInits()) - Init = Updater->getInit(ElementNo); - - if (!Init || isa<NoInitExpr>(Init)) - ; // Do nothing. - else if (InitListExpr *ChildILE = dyn_cast<InitListExpr>(Init)) - EltInit = ExprEmitter->EmitDesignatedInitUpdater(EltInit, ChildILE, - Field->getType()); - else - EltInit = Emitter.tryEmitPrivateForMemory(Init, Field->getType()); - - ++ElementNo; - - if (!EltInit) - return false; - - if (!Field->isBitField()) - AppendField(Field, Layout.getFieldOffset(FieldNo), EltInit); - else if (llvm::ConstantInt *CI = dyn_cast<llvm::ConstantInt>(EltInit)) - AppendBitField(Field, Layout.getFieldOffset(FieldNo), CI); - else - // Initializing a bitfield with a non-trivial constant? - return false; - } - - return true; -} - llvm::Constant *ConstantEmitter::validateAndPopAbstract(llvm::Constant *C, AbstractState saved) { Abstract = saved.OldValue; @@ -1609,6 +1760,7 @@ private: ConstantLValue VisitConstantExpr(const ConstantExpr *E); ConstantLValue VisitCompoundLiteralExpr(const CompoundLiteralExpr *E); ConstantLValue VisitStringLiteral(const StringLiteral *E); + ConstantLValue VisitObjCBoxedExpr(const ObjCBoxedExpr *E); ConstantLValue VisitObjCEncodeExpr(const ObjCEncodeExpr *E); ConstantLValue VisitObjCStringLiteral(const ObjCStringLiteral *E); ConstantLValue VisitPredefinedExpr(const PredefinedExpr *E); @@ -1650,17 +1802,7 @@ private: llvm::Constant *ConstantLValueEmitter::tryEmit() { const APValue::LValueBase &base = Value.getLValueBase(); - // Certain special array initializers are represented in APValue - // as l-values referring to the base expression which generates the - // array. This happens with e.g. string literals. These should - // probably just get their own representation kind in APValue. - if (DestType->isArrayType()) { - assert(!hasNonZeroOffset() && "offset on array initializer"); - auto expr = const_cast<Expr*>(base.get<const Expr*>()); - return ConstExprEmitter(Emitter).Visit(expr, DestType); - } - - // Otherwise, the destination type should be a pointer or reference + // The destination type should be a pointer or reference // type, but it might also be a cast thereof. // // FIXME: the chain of casts required should be reflected in the APValue. @@ -1700,34 +1842,21 @@ llvm::Constant *ConstantLValueEmitter::tryEmit() { /// bitcast to pointer type. llvm::Constant * ConstantLValueEmitter::tryEmitAbsolute(llvm::Type *destTy) { - auto offset = getOffset(); - // If we're producing a pointer, this is easy. - if (auto destPtrTy = cast<llvm::PointerType>(destTy)) { - if (Value.isNullPointer()) { - // FIXME: integer offsets from non-zero null pointers. - return CGM.getNullPointer(destPtrTy, DestType); - } - - // Convert the integer to a pointer-sized integer before converting it - // to a pointer. - // FIXME: signedness depends on the original integer type. - auto intptrTy = CGM.getDataLayout().getIntPtrType(destPtrTy); - llvm::Constant *C = offset; - C = llvm::ConstantExpr::getIntegerCast(getOffset(), intptrTy, - /*isSigned*/ false); - C = llvm::ConstantExpr::getIntToPtr(C, destPtrTy); - return C; + auto destPtrTy = cast<llvm::PointerType>(destTy); + if (Value.isNullPointer()) { + // FIXME: integer offsets from non-zero null pointers. + return CGM.getNullPointer(destPtrTy, DestType); } - // Otherwise, we're basically returning an integer constant. - - // FIXME: this does the wrong thing with ptrtoint of a null pointer, - // but since we don't know the original pointer type, there's not much - // we can do about it. - - auto C = getOffset(); - C = llvm::ConstantExpr::getIntegerCast(C, destTy, /*isSigned*/ false); + // Convert the integer to a pointer-sized integer before converting it + // to a pointer. + // FIXME: signedness depends on the original integer type. + auto intptrTy = CGM.getDataLayout().getIntPtrType(destPtrTy); + llvm::Constant *C; + C = llvm::ConstantExpr::getIntegerCast(getOffset(), intptrTy, + /*isSigned*/ false); + C = llvm::ConstantExpr::getIntToPtr(C, destPtrTy); return C; } @@ -1749,7 +1878,7 @@ ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) { if (VD->isLocalVarDecl()) { return CGM.getOrCreateStaticVarDecl( - *VD, CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false)); + *VD, CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false)); } } } @@ -1757,6 +1886,17 @@ ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) { return nullptr; } + // Handle typeid(T). + if (TypeInfoLValue TI = base.dyn_cast<TypeInfoLValue>()) { + llvm::Type *StdTypeInfoPtrTy = + CGM.getTypes().ConvertType(base.getTypeInfoType())->getPointerTo(); + llvm::Constant *TypeInfo = + CGM.GetAddrOfRTTIDescriptor(QualType(TI.getType(), 0)); + if (TypeInfo->getType() != StdTypeInfoPtrTy) + TypeInfo = llvm::ConstantExpr::getBitCast(TypeInfo, StdTypeInfoPtrTy); + return TypeInfo; + } + // Otherwise, it must be an expression. return Visit(base.get<const Expr*>()); } @@ -1781,25 +1921,29 @@ ConstantLValueEmitter::VisitObjCEncodeExpr(const ObjCEncodeExpr *E) { return CGM.GetAddrOfConstantStringFromObjCEncode(E); } +static ConstantLValue emitConstantObjCStringLiteral(const StringLiteral *S, + QualType T, + CodeGenModule &CGM) { + auto C = CGM.getObjCRuntime().GenerateConstantString(S); + return C.getElementBitCast(CGM.getTypes().ConvertTypeForMem(T)); +} + ConstantLValue ConstantLValueEmitter::VisitObjCStringLiteral(const ObjCStringLiteral *E) { - auto C = CGM.getObjCRuntime().GenerateConstantString(E->getString()); - return C.getElementBitCast(CGM.getTypes().ConvertTypeForMem(E->getType())); + return emitConstantObjCStringLiteral(E->getString(), E->getType(), CGM); } ConstantLValue -ConstantLValueEmitter::VisitPredefinedExpr(const PredefinedExpr *E) { - if (auto CGF = Emitter.CGF) { - LValue Res = CGF->EmitPredefinedLValue(E); - return cast<ConstantAddress>(Res.getAddress()); - } - - auto kind = E->getIdentKind(); - if (kind == PredefinedExpr::PrettyFunction) { - return CGM.GetAddrOfConstantCString("top level", ".tmp"); - } +ConstantLValueEmitter::VisitObjCBoxedExpr(const ObjCBoxedExpr *E) { + assert(E->isExpressibleAsConstantInitializer() && + "this boxed expression can't be emitted as a compile-time constant"); + auto *SL = cast<StringLiteral>(E->getSubExpr()->IgnoreParenCasts()); + return emitConstantObjCStringLiteral(SL, E->getType(), CGM); +} - return CGM.GetAddrOfConstantCString("", ".tmp"); +ConstantLValue +ConstantLValueEmitter::VisitPredefinedExpr(const PredefinedExpr *E) { + return CGM.GetAddrOfConstantStringFromLiteral(E->getFunctionName()); } ConstantLValue @@ -1867,12 +2011,17 @@ ConstantLValueEmitter::VisitMaterializeTemporaryExpr( llvm::Constant *ConstantEmitter::tryEmitPrivate(const APValue &Value, QualType DestType) { switch (Value.getKind()) { - case APValue::Uninitialized: - llvm_unreachable("Constant expressions should be initialized."); + case APValue::None: + case APValue::Indeterminate: + // Out-of-lifetime and indeterminate values can be modeled as 'undef'. + return llvm::UndefValue::get(CGM.getTypes().ConvertType(DestType)); case APValue::LValue: return ConstantLValueEmitter(*this, Value, DestType).tryEmit(); case APValue::Int: return llvm::ConstantInt::get(CGM.getLLVMContext(), Value.getInt()); + case APValue::FixedPoint: + return llvm::ConstantInt::get(CGM.getLLVMContext(), + Value.getFixedPoint().getValue()); case APValue::ComplexInt: { llvm::Constant *Complex[2]; @@ -1990,7 +2139,9 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const APValue &Value, return llvm::ConstantAggregateZero::get(AType); } - return EmitArrayConstant(CGM, CAT, CommonElementType, NumElements, Elts, + llvm::ArrayType *Desired = + cast<llvm::ArrayType>(CGM.getTypes().ConvertType(DestType)); + return EmitArrayConstant(CGM, Desired, CommonElementType, NumElements, Elts, Filler); } case APValue::MemberPointer: @@ -2077,7 +2228,7 @@ static llvm::Constant *EmitNullConstant(CodeGenModule &CGM, for (const auto *Field : record->fields()) { // Fill in non-bitfields. (Bitfields always use a zero pattern, which we // will fill in later.) - if (!Field->isBitField()) { + if (!Field->isBitField() && !Field->isZeroSize(CGM.getContext())) { unsigned fieldIndex = layout.getLLVMFieldNo(Field); elements[fieldIndex] = CGM.EmitNullConstant(Field->getType()); } diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp index 1c14d4c99a23..3d082de2a14f 100644 --- a/lib/CodeGen/CGExprScalar.cpp +++ b/lib/CodeGen/CGExprScalar.cpp @@ -1,9 +1,8 @@ //===--- CGExprScalar.cpp - Emit LLVM Code for Scalar Exprs ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -17,6 +16,7 @@ #include "CGObjCRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "ConstantEmitter.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclObjC.h" @@ -125,6 +125,21 @@ struct BinOpInfo { return CFP->isZero(); return true; } + + /// Check if either operand is a fixed point type or integer type, with at + /// least one being a fixed point type. In any case, this + /// operation did not follow usual arithmetic conversion and both operands may + /// not be the same. + bool isFixedPointBinOp() const { + // We cannot simply check the result type since comparison operations return + // an int. + if (const auto *BinOp = dyn_cast<BinaryOperator>(E)) { + QualType LHSType = BinOp->getLHS()->getType(); + QualType RHSType = BinOp->getRHS()->getType(); + return LHSType->isFixedPointType() || RHSType->isFixedPointType(); + } + return false; + } }; static bool MustVisitNullValue(const Expr *E) { @@ -298,7 +313,7 @@ public: /// boolean (i1) truth value. This is equivalent to "Val != 0". Value *EmitConversionToBool(Value *Src, QualType DstTy); - /// Emit a check that a conversion to or from a floating-point type does not + /// Emit a check that a conversion from a floating-point type does not /// overflow. void EmitFloatConversionCheck(Value *OrigSrc, QualType OrigSrcType, Value *Src, QualType SrcType, QualType DstType, @@ -349,8 +364,14 @@ public: SourceLocation Loc, ScalarConversionOpts Opts = ScalarConversionOpts()); + /// Convert between either a fixed point and other fixed point or fixed point + /// and an integer. Value *EmitFixedPointConversion(Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc); + Value *EmitFixedPointConversion(Value *Src, FixedPointSemantics &SrcFixedSema, + FixedPointSemantics &DstFixedSema, + SourceLocation Loc, + bool DstIsInteger = false); /// Emit a conversion from the specified complex type to the specified /// destination type, where the destination type is an LLVM scalar type. @@ -620,12 +641,20 @@ public: Value *VisitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E) { return EmitLoadOfLValue(E); } + Value *VisitSourceLocExpr(SourceLocExpr *SLE) { + auto &Ctx = CGF.getContext(); + APValue Evaluated = + SLE->EvaluateInContext(Ctx, CGF.CurSourceLocExprScope.getDefaultExpr()); + return ConstantEmitter(CGF.CGM, &CGF) + .emitAbstract(SLE->getLocation(), Evaluated, SLE->getType()); + } Value *VisitCXXDefaultArgExpr(CXXDefaultArgExpr *DAE) { + CodeGenFunction::CXXDefaultArgExprScope Scope(CGF, DAE); return Visit(DAE->getExpr()); } Value *VisitCXXDefaultInitExpr(CXXDefaultInitExpr *DIE) { - CodeGenFunction::CXXDefaultInitExprScope Scope(CGF); + CodeGenFunction::CXXDefaultInitExprScope Scope(CGF, DIE); return Visit(DIE->getExpr()); } Value *VisitCXXThisExpr(CXXThisExpr *TE) { @@ -729,6 +758,9 @@ public: return Builder.CreateOr(Ops.LHS, Ops.RHS, "or"); } + // Helper functions for fixed point binary operations. + Value *EmitFixedPointBinOp(const BinOpInfo &Ops); + BinOpInfo EmitBinOps(const BinaryOperator *E); LValue EmitCompoundAssignLValue(const CompoundAssignOperator *E, Value *(ScalarExprEmitter::*F)(const BinOpInfo &), @@ -832,128 +864,63 @@ Value *ScalarExprEmitter::EmitConversionToBool(Value *Src, QualType SrcType) { void ScalarExprEmitter::EmitFloatConversionCheck( Value *OrigSrc, QualType OrigSrcType, Value *Src, QualType SrcType, QualType DstType, llvm::Type *DstTy, SourceLocation Loc) { + assert(SrcType->isFloatingType() && "not a conversion from floating point"); + if (!isa<llvm::IntegerType>(DstTy)) + return; + CodeGenFunction::SanitizerScope SanScope(&CGF); using llvm::APFloat; using llvm::APSInt; - llvm::Type *SrcTy = Src->getType(); - llvm::Value *Check = nullptr; - if (llvm::IntegerType *IntTy = dyn_cast<llvm::IntegerType>(SrcTy)) { - // Integer to floating-point. This can fail for unsigned short -> __half - // or unsigned __int128 -> float. - assert(DstType->isFloatingType()); - bool SrcIsUnsigned = OrigSrcType->isUnsignedIntegerOrEnumerationType(); - - APFloat LargestFloat = - APFloat::getLargest(CGF.getContext().getFloatTypeSemantics(DstType)); - APSInt LargestInt(IntTy->getBitWidth(), SrcIsUnsigned); - - bool IsExact; - if (LargestFloat.convertToInteger(LargestInt, APFloat::rmTowardZero, - &IsExact) != APFloat::opOK) - // The range of representable values of this floating point type includes - // all values of this integer type. Don't need an overflow check. - return; - - llvm::Value *Max = llvm::ConstantInt::get(VMContext, LargestInt); - if (SrcIsUnsigned) - Check = Builder.CreateICmpULE(Src, Max); - else { - llvm::Value *Min = llvm::ConstantInt::get(VMContext, -LargestInt); - llvm::Value *GE = Builder.CreateICmpSGE(Src, Min); - llvm::Value *LE = Builder.CreateICmpSLE(Src, Max); - Check = Builder.CreateAnd(GE, LE); - } - } else { - const llvm::fltSemantics &SrcSema = - CGF.getContext().getFloatTypeSemantics(OrigSrcType); - if (isa<llvm::IntegerType>(DstTy)) { - // Floating-point to integer. This has undefined behavior if the source is - // +-Inf, NaN, or doesn't fit into the destination type (after truncation - // to an integer). - unsigned Width = CGF.getContext().getIntWidth(DstType); - bool Unsigned = DstType->isUnsignedIntegerOrEnumerationType(); - - APSInt Min = APSInt::getMinValue(Width, Unsigned); - APFloat MinSrc(SrcSema, APFloat::uninitialized); - if (MinSrc.convertFromAPInt(Min, !Unsigned, APFloat::rmTowardZero) & - APFloat::opOverflow) - // Don't need an overflow check for lower bound. Just check for - // -Inf/NaN. - MinSrc = APFloat::getInf(SrcSema, true); - else - // Find the largest value which is too small to represent (before - // truncation toward zero). - MinSrc.subtract(APFloat(SrcSema, 1), APFloat::rmTowardNegative); - - APSInt Max = APSInt::getMaxValue(Width, Unsigned); - APFloat MaxSrc(SrcSema, APFloat::uninitialized); - if (MaxSrc.convertFromAPInt(Max, !Unsigned, APFloat::rmTowardZero) & - APFloat::opOverflow) - // Don't need an overflow check for upper bound. Just check for - // +Inf/NaN. - MaxSrc = APFloat::getInf(SrcSema, false); - else - // Find the smallest value which is too large to represent (before - // truncation toward zero). - MaxSrc.add(APFloat(SrcSema, 1), APFloat::rmTowardPositive); - - // If we're converting from __half, convert the range to float to match - // the type of src. - if (OrigSrcType->isHalfType()) { - const llvm::fltSemantics &Sema = - CGF.getContext().getFloatTypeSemantics(SrcType); - bool IsInexact; - MinSrc.convert(Sema, APFloat::rmTowardZero, &IsInexact); - MaxSrc.convert(Sema, APFloat::rmTowardZero, &IsInexact); - } - - llvm::Value *GE = - Builder.CreateFCmpOGT(Src, llvm::ConstantFP::get(VMContext, MinSrc)); - llvm::Value *LE = - Builder.CreateFCmpOLT(Src, llvm::ConstantFP::get(VMContext, MaxSrc)); - Check = Builder.CreateAnd(GE, LE); - } else { - // FIXME: Maybe split this sanitizer out from float-cast-overflow. - // - // Floating-point to floating-point. This has undefined behavior if the - // source is not in the range of representable values of the destination - // type. The C and C++ standards are spectacularly unclear here. We - // diagnose finite out-of-range conversions, but allow infinities and NaNs - // to convert to the corresponding value in the smaller type. - // - // C11 Annex F gives all such conversions defined behavior for IEC 60559 - // conforming implementations. Unfortunately, LLVM's fptrunc instruction - // does not. - - // Converting from a lower rank to a higher rank can never have - // undefined behavior, since higher-rank types must have a superset - // of values of lower-rank types. - if (CGF.getContext().getFloatingTypeOrder(OrigSrcType, DstType) != 1) - return; - - assert(!OrigSrcType->isHalfType() && - "should not check conversion from __half, it has the lowest rank"); - - const llvm::fltSemantics &DstSema = - CGF.getContext().getFloatTypeSemantics(DstType); - APFloat MinBad = APFloat::getLargest(DstSema, false); - APFloat MaxBad = APFloat::getInf(DstSema, false); - - bool IsInexact; - MinBad.convert(SrcSema, APFloat::rmTowardZero, &IsInexact); - MaxBad.convert(SrcSema, APFloat::rmTowardZero, &IsInexact); - - Value *AbsSrc = CGF.EmitNounwindRuntimeCall( - CGF.CGM.getIntrinsic(llvm::Intrinsic::fabs, Src->getType()), Src); - llvm::Value *GE = - Builder.CreateFCmpOGT(AbsSrc, llvm::ConstantFP::get(VMContext, MinBad)); - llvm::Value *LE = - Builder.CreateFCmpOLT(AbsSrc, llvm::ConstantFP::get(VMContext, MaxBad)); - Check = Builder.CreateNot(Builder.CreateAnd(GE, LE)); - } - } + const llvm::fltSemantics &SrcSema = + CGF.getContext().getFloatTypeSemantics(OrigSrcType); + + // Floating-point to integer. This has undefined behavior if the source is + // +-Inf, NaN, or doesn't fit into the destination type (after truncation + // to an integer). + unsigned Width = CGF.getContext().getIntWidth(DstType); + bool Unsigned = DstType->isUnsignedIntegerOrEnumerationType(); + + APSInt Min = APSInt::getMinValue(Width, Unsigned); + APFloat MinSrc(SrcSema, APFloat::uninitialized); + if (MinSrc.convertFromAPInt(Min, !Unsigned, APFloat::rmTowardZero) & + APFloat::opOverflow) + // Don't need an overflow check for lower bound. Just check for + // -Inf/NaN. + MinSrc = APFloat::getInf(SrcSema, true); + else + // Find the largest value which is too small to represent (before + // truncation toward zero). + MinSrc.subtract(APFloat(SrcSema, 1), APFloat::rmTowardNegative); + + APSInt Max = APSInt::getMaxValue(Width, Unsigned); + APFloat MaxSrc(SrcSema, APFloat::uninitialized); + if (MaxSrc.convertFromAPInt(Max, !Unsigned, APFloat::rmTowardZero) & + APFloat::opOverflow) + // Don't need an overflow check for upper bound. Just check for + // +Inf/NaN. + MaxSrc = APFloat::getInf(SrcSema, false); + else + // Find the smallest value which is too large to represent (before + // truncation toward zero). + MaxSrc.add(APFloat(SrcSema, 1), APFloat::rmTowardPositive); + + // If we're converting from __half, convert the range to float to match + // the type of src. + if (OrigSrcType->isHalfType()) { + const llvm::fltSemantics &Sema = + CGF.getContext().getFloatTypeSemantics(SrcType); + bool IsInexact; + MinSrc.convert(Sema, APFloat::rmTowardZero, &IsInexact); + MaxSrc.convert(Sema, APFloat::rmTowardZero, &IsInexact); + } + + llvm::Value *GE = + Builder.CreateFCmpOGT(Src, llvm::ConstantFP::get(VMContext, MinSrc)); + llvm::Value *LE = + Builder.CreateFCmpOLT(Src, llvm::ConstantFP::get(VMContext, MaxSrc)); + Check = Builder.CreateAnd(GE, LE); llvm::Constant *StaticArgs[] = {CGF.EmitCheckSourceLocation(Loc), CGF.EmitCheckTypeDescriptor(OrigSrcType), @@ -1205,17 +1172,25 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, // TODO(leonardchan): When necessary, add another if statement checking for // conversions to fixed point types from other types. if (SrcType->isFixedPointType()) { - if (DstType->isFixedPointType()) { - return EmitFixedPointConversion(Src, SrcType, DstType, Loc); - } else if (DstType->isBooleanType()) { + if (DstType->isBooleanType()) + // It is important that we check this before checking if the dest type is + // an integer because booleans are technically integer types. // We do not need to check the padding bit on unsigned types if unsigned // padding is enabled because overflow into this bit is undefined // behavior. return Builder.CreateIsNotNull(Src, "tobool"); - } + if (DstType->isFixedPointType() || DstType->isIntegerType()) + return EmitFixedPointConversion(Src, SrcType, DstType, Loc); + + llvm_unreachable( + "Unhandled scalar conversion from a fixed point type to another type."); + } else if (DstType->isFixedPointType()) { + if (SrcType->isIntegerType()) + // This also includes converting booleans and enums to fixed point types. + return EmitFixedPointConversion(Src, SrcType, DstType, Loc); llvm_unreachable( - "Unhandled scalar conversion involving a fixed point type."); + "Unhandled scalar conversion to a fixed point type from another type."); } QualType NoncanonicalSrcType = SrcType; @@ -1351,9 +1326,12 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, llvm::Type *ResTy = DstTy; // An overflowing conversion has undefined behavior if either the source type - // or the destination type is a floating-point type. + // or the destination type is a floating-point type. However, we consider the + // range of representable values for all floating-point types to be + // [-inf,+inf], so no overflow can ever happen when the destination type is a + // floating-point type. if (CGF.SanOpts.has(SanitizerKind::FloatCastOverflow) && - (OrigSrcType->isFloatingType() || DstType->isFloatingType())) + OrigSrcType->isFloatingType()) EmitFloatConversionCheck(OrigSrc, OrigSrcType, Src, SrcType, DstType, DstTy, Loc); @@ -1423,17 +1401,21 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, Value *ScalarExprEmitter::EmitFixedPointConversion(Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc) { - using llvm::APInt; - using llvm::ConstantInt; - using llvm::Value; - - assert(SrcTy->isFixedPointType()); - assert(DstTy->isFixedPointType()); - FixedPointSemantics SrcFPSema = CGF.getContext().getFixedPointSemantics(SrcTy); FixedPointSemantics DstFPSema = CGF.getContext().getFixedPointSemantics(DstTy); + return EmitFixedPointConversion(Src, SrcFPSema, DstFPSema, Loc, + DstTy->isIntegerType()); +} + +Value *ScalarExprEmitter::EmitFixedPointConversion( + Value *Src, FixedPointSemantics &SrcFPSema, FixedPointSemantics &DstFPSema, + SourceLocation Loc, bool DstIsInteger) { + using llvm::APInt; + using llvm::ConstantInt; + using llvm::Value; + unsigned SrcWidth = SrcFPSema.getWidth(); unsigned DstWidth = DstFPSema.getWidth(); unsigned SrcScale = SrcFPSema.getScale(); @@ -1446,13 +1428,26 @@ Value *ScalarExprEmitter::EmitFixedPointConversion(Value *Src, QualType SrcTy, Value *Result = Src; unsigned ResultWidth = SrcWidth; - if (!DstFPSema.isSaturated()) { - // Downscale. - if (DstScale < SrcScale) - Result = SrcIsSigned ? - Builder.CreateAShr(Result, SrcScale - DstScale, "downscale") : - Builder.CreateLShr(Result, SrcScale - DstScale, "downscale"); + // Downscale. + if (DstScale < SrcScale) { + // When converting to integers, we round towards zero. For negative numbers, + // right shifting rounds towards negative infinity. In this case, we can + // just round up before shifting. + if (DstIsInteger && SrcIsSigned) { + Value *Zero = llvm::Constant::getNullValue(Result->getType()); + Value *IsNegative = Builder.CreateICmpSLT(Result, Zero); + Value *LowBits = ConstantInt::get( + CGF.getLLVMContext(), APInt::getLowBitsSet(ResultWidth, SrcScale)); + Value *Rounded = Builder.CreateAdd(Result, LowBits); + Result = Builder.CreateSelect(IsNegative, Rounded, Result); + } + Result = SrcIsSigned + ? Builder.CreateAShr(Result, SrcScale - DstScale, "downscale") + : Builder.CreateLShr(Result, SrcScale - DstScale, "downscale"); + } + + if (!DstFPSema.isSaturated()) { // Resize. Result = Builder.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize"); @@ -1462,14 +1457,11 @@ Value *ScalarExprEmitter::EmitFixedPointConversion(Value *Src, QualType SrcTy, } else { // Adjust the number of fractional bits. if (DstScale > SrcScale) { - ResultWidth = SrcWidth + DstScale - SrcScale; + // Compare to DstWidth to prevent resizing twice. + ResultWidth = std::max(SrcWidth + DstScale - SrcScale, DstWidth); llvm::Type *UpscaledTy = Builder.getIntNTy(ResultWidth); Result = Builder.CreateIntCast(Result, UpscaledTy, SrcIsSigned, "resize"); Result = Builder.CreateShl(Result, DstScale - SrcScale, "upscale"); - } else if (DstScale < SrcScale) { - Result = SrcIsSigned ? - Builder.CreateAShr(Result, SrcScale - DstScale, "downscale") : - Builder.CreateLShr(Result, SrcScale - DstScale, "downscale"); } // Handle saturation. @@ -1493,7 +1485,8 @@ Value *ScalarExprEmitter::EmitFixedPointConversion(Value *Src, QualType SrcTy, } // Resize the integer part to get the final destination size. - Result = Builder.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize"); + if (ResultWidth != DstWidth) + Result = Builder.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize"); } return Result; } @@ -1978,6 +1971,15 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { return EmitLoadOfLValue(LV, CE->getExprLoc()); } + case CK_LValueToRValueBitCast: { + LValue SourceLVal = CGF.EmitLValue(E); + Address Addr = Builder.CreateElementBitCast(SourceLVal.getAddress(), + CGF.ConvertTypeForMem(DestTy)); + LValue DestLV = CGF.MakeAddrLValue(Addr, DestTy); + DestLV.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo()); + return EmitLoadOfLValue(DestLV, CE->getExprLoc()); + } + case CK_CPointerToObjCPointerCast: case CK_BlockPointerToObjCPointerCast: case CK_AnyPointerToBlockPointerCast: @@ -2017,6 +2019,12 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { } } + // Update heapallocsite metadata when there is an explicit cast. + if (llvm::CallInst *CI = dyn_cast<llvm::CallInst>(Src)) + if (CI->getMetadata("heapallocsite") && isa<ExplicitCastExpr>(CE)) + CGF.getDebugInfo()-> + addHeapAllocSiteMetadata(CI, CE->getType(), CE->getExprLoc()); + return Builder.CreateBitCast(Src, DstTy); } case CK_AddressSpaceConversion: { @@ -2087,14 +2095,14 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { case CK_NullToPointer: if (MustVisitNullValue(E)) - (void) Visit(E); + CGF.EmitIgnoredExpr(E); return CGF.CGM.getNullPointer(cast<llvm::PointerType>(ConvertType(DestTy)), DestTy); case CK_NullToMemberPointer: { if (MustVisitNullValue(E)) - (void) Visit(E); + CGF.EmitIgnoredExpr(E); const MemberPointerType *MPT = CE->getType()->getAs<MemberPointerType>(); return CGF.CGM.getCXXABI().EmitNullMemberPointer(MPT); @@ -2200,6 +2208,21 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { return EmitScalarConversion(Visit(E), E->getType(), DestTy, CE->getExprLoc()); + case CK_FixedPointToIntegral: + assert(E->getType()->isFixedPointType() && + "Expected src type to be fixed point type"); + assert(DestTy->isIntegerType() && "Expected dest type to be an integer"); + return EmitScalarConversion(Visit(E), E->getType(), DestTy, + CE->getExprLoc()); + + case CK_IntegralToFixedPoint: + assert(E->getType()->isIntegerType() && + "Expected src type to be an integer"); + assert(DestTy->isFixedPointType() && + "Expected dest type to be fixed point type"); + return EmitScalarConversion(Visit(E), E->getType(), DestTy, + CE->getExprLoc()); + case CK_IntegralCast: { ScalarConversionOpts Opts; if (auto *ICE = dyn_cast<ImplicitCastExpr>(CE)) { @@ -2527,14 +2550,14 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, } if (atomicPHI) { - llvm::BasicBlock *opBB = Builder.GetInsertBlock(); + llvm::BasicBlock *curBlock = Builder.GetInsertBlock(); llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn); auto Pair = CGF.EmitAtomicCompareExchange( LV, RValue::get(atomicPHI), RValue::get(value), E->getExprLoc()); llvm::Value *old = CGF.EmitToMemory(Pair.first.getScalarVal(), type); llvm::Value *success = Pair.second; - atomicPHI->addIncoming(old, opBB); - Builder.CreateCondBr(success, contBB, opBB); + atomicPHI->addIncoming(old, curBlock); + Builder.CreateCondBr(success, contBB, atomicPHI->getParent()); Builder.SetInsertPoint(contBB); return isPre ? value : input; } @@ -2881,14 +2904,14 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( Loc, ScalarConversionOpts(CGF.SanOpts)); if (atomicPHI) { - llvm::BasicBlock *opBB = Builder.GetInsertBlock(); + llvm::BasicBlock *curBlock = Builder.GetInsertBlock(); llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn); auto Pair = CGF.EmitAtomicCompareExchange( LHSLV, RValue::get(atomicPHI), RValue::get(Result), E->getExprLoc()); llvm::Value *old = CGF.EmitToMemory(Pair.first.getScalarVal(), LHSTy); llvm::Value *success = Pair.second; - atomicPHI->addIncoming(old, opBB); - Builder.CreateCondBr(success, contBB, opBB); + atomicPHI->addIncoming(old, curBlock); + Builder.CreateCondBr(success, contBB, atomicPHI->getParent()); Builder.SetInsertPoint(contBB); return LHSLV; } @@ -2908,7 +2931,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( Value *ScalarExprEmitter::EmitCompoundAssign(const CompoundAssignOperator *E, Value *(ScalarExprEmitter::*Func)(const BinOpInfo &)) { bool Ignore = TestAndClearIgnoreResultAssign(); - Value *RHS; + Value *RHS = nullptr; LValue LHS = EmitCompoundAssignLValue(E, Func, RHS); // If the result is clearly ignored, return now. @@ -3090,7 +3113,8 @@ Value *ScalarExprEmitter::EmitOverflowCheckedBinOp(const BinOpInfo &Ops) { llvm::Type *argTypes[] = { CGF.Int64Ty, CGF.Int64Ty, Int8Ty, Int8Ty }; llvm::FunctionType *handlerTy = llvm::FunctionType::get(CGF.Int64Ty, argTypes, true); - llvm::Value *handler = CGF.CGM.CreateRuntimeFunction(handlerTy, *handlerName); + llvm::FunctionCallee handler = + CGF.CGM.CreateRuntimeFunction(handlerTy, *handlerName); // Sign extend the args to 64-bit, so that we can use the same handler for // all types of overflow. @@ -3338,9 +3362,119 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) { return propagateFMFlags(V, op); } + if (op.isFixedPointBinOp()) + return EmitFixedPointBinOp(op); + return Builder.CreateAdd(op.LHS, op.RHS, "add"); } +/// The resulting value must be calculated with exact precision, so the operands +/// may not be the same type. +Value *ScalarExprEmitter::EmitFixedPointBinOp(const BinOpInfo &op) { + using llvm::APSInt; + using llvm::ConstantInt; + + const auto *BinOp = cast<BinaryOperator>(op.E); + + // The result is a fixed point type and at least one of the operands is fixed + // point while the other is either fixed point or an int. This resulting type + // should be determined by Sema::handleFixedPointConversions(). + QualType ResultTy = op.Ty; + QualType LHSTy = BinOp->getLHS()->getType(); + QualType RHSTy = BinOp->getRHS()->getType(); + ASTContext &Ctx = CGF.getContext(); + Value *LHS = op.LHS; + Value *RHS = op.RHS; + + auto LHSFixedSema = Ctx.getFixedPointSemantics(LHSTy); + auto RHSFixedSema = Ctx.getFixedPointSemantics(RHSTy); + auto ResultFixedSema = Ctx.getFixedPointSemantics(ResultTy); + auto CommonFixedSema = LHSFixedSema.getCommonSemantics(RHSFixedSema); + + // Convert the operands to the full precision type. + Value *FullLHS = EmitFixedPointConversion(LHS, LHSFixedSema, CommonFixedSema, + BinOp->getExprLoc()); + Value *FullRHS = EmitFixedPointConversion(RHS, RHSFixedSema, CommonFixedSema, + BinOp->getExprLoc()); + + // Perform the actual addition. + Value *Result; + switch (BinOp->getOpcode()) { + case BO_Add: { + if (ResultFixedSema.isSaturated()) { + llvm::Intrinsic::ID IID = ResultFixedSema.isSigned() + ? llvm::Intrinsic::sadd_sat + : llvm::Intrinsic::uadd_sat; + Result = Builder.CreateBinaryIntrinsic(IID, FullLHS, FullRHS); + } else { + Result = Builder.CreateAdd(FullLHS, FullRHS); + } + break; + } + case BO_Sub: { + if (ResultFixedSema.isSaturated()) { + llvm::Intrinsic::ID IID = ResultFixedSema.isSigned() + ? llvm::Intrinsic::ssub_sat + : llvm::Intrinsic::usub_sat; + Result = Builder.CreateBinaryIntrinsic(IID, FullLHS, FullRHS); + } else { + Result = Builder.CreateSub(FullLHS, FullRHS); + } + break; + } + case BO_LT: + return CommonFixedSema.isSigned() ? Builder.CreateICmpSLT(FullLHS, FullRHS) + : Builder.CreateICmpULT(FullLHS, FullRHS); + case BO_GT: + return CommonFixedSema.isSigned() ? Builder.CreateICmpSGT(FullLHS, FullRHS) + : Builder.CreateICmpUGT(FullLHS, FullRHS); + case BO_LE: + return CommonFixedSema.isSigned() ? Builder.CreateICmpSLE(FullLHS, FullRHS) + : Builder.CreateICmpULE(FullLHS, FullRHS); + case BO_GE: + return CommonFixedSema.isSigned() ? Builder.CreateICmpSGE(FullLHS, FullRHS) + : Builder.CreateICmpUGE(FullLHS, FullRHS); + case BO_EQ: + // For equality operations, we assume any padding bits on unsigned types are + // zero'd out. They could be overwritten through non-saturating operations + // that cause overflow, but this leads to undefined behavior. + return Builder.CreateICmpEQ(FullLHS, FullRHS); + case BO_NE: + return Builder.CreateICmpNE(FullLHS, FullRHS); + case BO_Mul: + case BO_Div: + case BO_Shl: + case BO_Shr: + case BO_Cmp: + case BO_LAnd: + case BO_LOr: + case BO_MulAssign: + case BO_DivAssign: + case BO_AddAssign: + case BO_SubAssign: + case BO_ShlAssign: + case BO_ShrAssign: + llvm_unreachable("Found unimplemented fixed point binary operation"); + case BO_PtrMemD: + case BO_PtrMemI: + case BO_Rem: + case BO_Xor: + case BO_And: + case BO_Or: + case BO_Assign: + case BO_RemAssign: + case BO_AndAssign: + case BO_XorAssign: + case BO_OrAssign: + case BO_Comma: + llvm_unreachable("Found unsupported binary operation for fixed point types."); + } + + // Convert to the result type. + return EmitFixedPointConversion(Result, CommonFixedSema, ResultFixedSema, + BinOp->getExprLoc()); +} + Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { // The LHS is always a pointer if either side is. if (!op.LHS->getType()->isPointerTy()) { @@ -3372,6 +3506,9 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { return propagateFMFlags(V, op); } + if (op.isFixedPointBinOp()) + return EmitFixedPointBinOp(op); + return Builder.CreateSub(op.LHS, op.RHS, "sub"); } @@ -3450,7 +3587,8 @@ Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) { bool SanitizeBase = CGF.SanOpts.has(SanitizerKind::ShiftBase) && Ops.Ty->hasSignedIntegerRepresentation() && - !CGF.getLangOpts().isSignedOverflowDefined(); + !CGF.getLangOpts().isSignedOverflowDefined() && + !CGF.getLangOpts().CPlusPlus2a; bool SanitizeExponent = CGF.SanOpts.has(SanitizerKind::ShiftExponent); // OpenCL 6.3j: shift values are effectively % word size of LHS. if (CGF.getLangOpts().OpenCL) @@ -3591,8 +3729,9 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E, Result = CGF.CGM.getCXXABI().EmitMemberPointerComparison( CGF, LHS, RHS, MPT, E->getOpcode() == BO_NE); } else if (!LHSTy->isAnyComplexType() && !RHSTy->isAnyComplexType()) { - Value *LHS = Visit(E->getLHS()); - Value *RHS = Visit(E->getRHS()); + BinOpInfo BOInfo = EmitBinOps(E); + Value *LHS = BOInfo.LHS; + Value *RHS = BOInfo.RHS; // If AltiVec, the comparison results in a numeric type, so we use // intrinsics comparing vectors and giving 0 or 1 as a result @@ -3670,7 +3809,9 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E, E->getExprLoc()); } - if (LHS->getType()->isFPOrFPVectorTy()) { + if (BOInfo.isFixedPointBinOp()) { + Result = EmitFixedPointBinOp(BOInfo); + } else if (LHS->getType()->isFPOrFPVectorTy()) { Result = Builder.CreateFCmp(FCmpOpc, LHS, RHS, "cmp"); } else if (LHSTy->hasSignedIntegerRepresentation()) { Result = Builder.CreateICmp(SICmpOpc, LHS, RHS, "cmp"); diff --git a/lib/CodeGen/CGGPUBuiltin.cpp b/lib/CodeGen/CGGPUBuiltin.cpp index b5375ffb8db7..d7e267630762 100644 --- a/lib/CodeGen/CGGPUBuiltin.cpp +++ b/lib/CodeGen/CGGPUBuiltin.cpp @@ -1,9 +1,8 @@ //===------ CGGPUBuiltin.cpp - Codegen for GPU builtins -------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CGLoopInfo.cpp b/lib/CodeGen/CGLoopInfo.cpp index fd0a9c773a2e..b2bc42bfa013 100644 --- a/lib/CodeGen/CGLoopInfo.cpp +++ b/lib/CodeGen/CGLoopInfo.cpp @@ -1,9 +1,8 @@ //===---- CGLoopInfo.cpp - LLVM CodeGen for loop metadata -*- C++ -*-------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -19,138 +18,396 @@ using namespace clang::CodeGen; using namespace llvm; -static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs, - const llvm::DebugLoc &StartLoc, - const llvm::DebugLoc &EndLoc, MDNode *&AccGroup) { +MDNode * +LoopInfo::createLoopPropertiesMetadata(ArrayRef<Metadata *> LoopProperties) { + LLVMContext &Ctx = Header->getContext(); + SmallVector<Metadata *, 4> NewLoopProperties; + TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); + NewLoopProperties.push_back(TempNode.get()); + NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); - if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 && - Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 && - Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled && - Attrs.PipelineInitiationInterval == 0 && - Attrs.VectorizeEnable == LoopAttributes::Unspecified && - Attrs.UnrollEnable == LoopAttributes::Unspecified && - Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified && - Attrs.DistributeEnable == LoopAttributes::Unspecified && !StartLoc && - !EndLoc) - return nullptr; + MDNode *LoopID = MDNode::getDistinct(Ctx, NewLoopProperties); + LoopID->replaceOperandWith(0, LoopID); + return LoopID; +} + +MDNode *LoopInfo::createPipeliningMetadata(const LoopAttributes &Attrs, + ArrayRef<Metadata *> LoopProperties, + bool &HasUserTransforms) { + LLVMContext &Ctx = Header->getContext(); + + Optional<bool> Enabled; + if (Attrs.PipelineDisabled) + Enabled = false; + else if (Attrs.PipelineInitiationInterval != 0) + Enabled = true; + + if (Enabled != true) { + SmallVector<Metadata *, 4> NewLoopProperties; + if (Enabled == false) { + NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + NewLoopProperties.push_back( + MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.pipeline.disable"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt1Ty(Ctx), 1))})); + LoopProperties = NewLoopProperties; + } + return createLoopPropertiesMetadata(LoopProperties); + } SmallVector<Metadata *, 4> Args; - // Reserve operand 0 for loop id self reference. - auto TempNode = MDNode::getTemporary(Ctx, None); + TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); Args.push_back(TempNode.get()); + Args.append(LoopProperties.begin(), LoopProperties.end()); - // If we have a valid start debug location for the loop, add it. - if (StartLoc) { - Args.push_back(StartLoc.getAsMDNode()); - - // If we also have a valid end debug location for the loop, add it. - if (EndLoc) - Args.push_back(EndLoc.getAsMDNode()); - } - - // Setting vectorize.width - if (Attrs.VectorizeWidth > 0) { - Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.width"), - ConstantAsMetadata::get(ConstantInt::get( - Type::getInt32Ty(Ctx), Attrs.VectorizeWidth))}; + if (Attrs.PipelineInitiationInterval > 0) { + Metadata *Vals[] = { + MDString::get(Ctx, "llvm.loop.pipeline.initiationinterval"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt32Ty(Ctx), Attrs.PipelineInitiationInterval))}; Args.push_back(MDNode::get(Ctx, Vals)); } - // Setting interleave.count - if (Attrs.InterleaveCount > 0) { - Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.interleave.count"), - ConstantAsMetadata::get(ConstantInt::get( - Type::getInt32Ty(Ctx), Attrs.InterleaveCount))}; - Args.push_back(MDNode::get(Ctx, Vals)); + // No follow-up: This is the last transformation. + + MDNode *LoopID = MDNode::getDistinct(Ctx, Args); + LoopID->replaceOperandWith(0, LoopID); + HasUserTransforms = true; + return LoopID; +} + +MDNode * +LoopInfo::createPartialUnrollMetadata(const LoopAttributes &Attrs, + ArrayRef<Metadata *> LoopProperties, + bool &HasUserTransforms) { + LLVMContext &Ctx = Header->getContext(); + + Optional<bool> Enabled; + if (Attrs.UnrollEnable == LoopAttributes::Disable) + Enabled = false; + else if (Attrs.UnrollEnable == LoopAttributes::Full) + Enabled = None; + else if (Attrs.UnrollEnable != LoopAttributes::Unspecified || + Attrs.UnrollCount != 0) + Enabled = true; + + if (Enabled != true) { + // createFullUnrollMetadata will already have added llvm.loop.unroll.disable + // if unrolling is disabled. + return createPipeliningMetadata(Attrs, LoopProperties, HasUserTransforms); } + SmallVector<Metadata *, 4> FollowupLoopProperties; + + // Apply all loop properties to the unrolled loop. + FollowupLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + + // Don't unroll an already unrolled loop. + FollowupLoopProperties.push_back( + MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.disable"))); + + bool FollowupHasTransforms = false; + MDNode *Followup = createPipeliningMetadata(Attrs, FollowupLoopProperties, + FollowupHasTransforms); + + SmallVector<Metadata *, 4> Args; + TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); + Args.push_back(TempNode.get()); + Args.append(LoopProperties.begin(), LoopProperties.end()); + // Setting unroll.count if (Attrs.UnrollCount > 0) { Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll.count"), ConstantAsMetadata::get(ConstantInt::get( - Type::getInt32Ty(Ctx), Attrs.UnrollCount))}; + llvm::Type::getInt32Ty(Ctx), Attrs.UnrollCount))}; Args.push_back(MDNode::get(Ctx, Vals)); } - // Setting unroll_and_jam.count - if (Attrs.UnrollAndJamCount > 0) { - Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll_and_jam.count"), - ConstantAsMetadata::get(ConstantInt::get( - Type::getInt32Ty(Ctx), Attrs.UnrollAndJamCount))}; + // Setting unroll.full or unroll.disable + if (Attrs.UnrollEnable == LoopAttributes::Enable) { + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll.enable")}; Args.push_back(MDNode::get(Ctx, Vals)); } - // Setting vectorize.enable - if (Attrs.VectorizeEnable != LoopAttributes::Unspecified) { - Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.enable"), - ConstantAsMetadata::get(ConstantInt::get( - Type::getInt1Ty(Ctx), (Attrs.VectorizeEnable == - LoopAttributes::Enable)))}; - Args.push_back(MDNode::get(Ctx, Vals)); - } + if (FollowupHasTransforms) + Args.push_back(MDNode::get( + Ctx, {MDString::get(Ctx, "llvm.loop.unroll.followup_all"), Followup})); - // Setting unroll.full or unroll.disable - if (Attrs.UnrollEnable != LoopAttributes::Unspecified) { - std::string Name; - if (Attrs.UnrollEnable == LoopAttributes::Enable) - Name = "llvm.loop.unroll.enable"; - else if (Attrs.UnrollEnable == LoopAttributes::Full) - Name = "llvm.loop.unroll.full"; - else - Name = "llvm.loop.unroll.disable"; - Metadata *Vals[] = {MDString::get(Ctx, Name)}; - Args.push_back(MDNode::get(Ctx, Vals)); + MDNode *LoopID = MDNode::getDistinct(Ctx, Args); + LoopID->replaceOperandWith(0, LoopID); + HasUserTransforms = true; + return LoopID; +} + +MDNode * +LoopInfo::createUnrollAndJamMetadata(const LoopAttributes &Attrs, + ArrayRef<Metadata *> LoopProperties, + bool &HasUserTransforms) { + LLVMContext &Ctx = Header->getContext(); + + Optional<bool> Enabled; + if (Attrs.UnrollAndJamEnable == LoopAttributes::Disable) + Enabled = false; + else if (Attrs.UnrollAndJamEnable == LoopAttributes::Enable || + Attrs.UnrollAndJamCount != 0) + Enabled = true; + + if (Enabled != true) { + SmallVector<Metadata *, 4> NewLoopProperties; + if (Enabled == false) { + NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + NewLoopProperties.push_back(MDNode::get( + Ctx, MDString::get(Ctx, "llvm.loop.unroll_and_jam.disable"))); + LoopProperties = NewLoopProperties; + } + return createPartialUnrollMetadata(Attrs, LoopProperties, + HasUserTransforms); } - // Setting unroll_and_jam.full or unroll_and_jam.disable - if (Attrs.UnrollAndJamEnable != LoopAttributes::Unspecified) { - std::string Name; - if (Attrs.UnrollAndJamEnable == LoopAttributes::Enable) - Name = "llvm.loop.unroll_and_jam.enable"; - else if (Attrs.UnrollAndJamEnable == LoopAttributes::Full) - Name = "llvm.loop.unroll_and_jam.full"; - else - Name = "llvm.loop.unroll_and_jam.disable"; - Metadata *Vals[] = {MDString::get(Ctx, Name)}; + SmallVector<Metadata *, 4> FollowupLoopProperties; + FollowupLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + FollowupLoopProperties.push_back( + MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll_and_jam.disable"))); + + bool FollowupHasTransforms = false; + MDNode *Followup = createPartialUnrollMetadata(Attrs, FollowupLoopProperties, + FollowupHasTransforms); + + SmallVector<Metadata *, 4> Args; + TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); + Args.push_back(TempNode.get()); + Args.append(LoopProperties.begin(), LoopProperties.end()); + + // Setting unroll_and_jam.count + if (Attrs.UnrollAndJamCount > 0) { + Metadata *Vals[] = { + MDString::get(Ctx, "llvm.loop.unroll_and_jam.count"), + ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt32Ty(Ctx), + Attrs.UnrollAndJamCount))}; Args.push_back(MDNode::get(Ctx, Vals)); } - if (Attrs.DistributeEnable != LoopAttributes::Unspecified) { - Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.distribute.enable"), - ConstantAsMetadata::get(ConstantInt::get( - Type::getInt1Ty(Ctx), (Attrs.DistributeEnable == - LoopAttributes::Enable)))}; + if (Attrs.UnrollAndJamEnable == LoopAttributes::Enable) { + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll_and_jam.enable")}; Args.push_back(MDNode::get(Ctx, Vals)); } - if (Attrs.IsParallel) { - AccGroup = MDNode::getDistinct(Ctx, {}); + if (FollowupHasTransforms) Args.push_back(MDNode::get( - Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"), AccGroup})); + Ctx, {MDString::get(Ctx, "llvm.loop.unroll_and_jam.followup_outer"), + Followup})); + + if (UnrollAndJamInnerFollowup) + Args.push_back(MDNode::get( + Ctx, {MDString::get(Ctx, "llvm.loop.unroll_and_jam.followup_inner"), + UnrollAndJamInnerFollowup})); + + MDNode *LoopID = MDNode::getDistinct(Ctx, Args); + LoopID->replaceOperandWith(0, LoopID); + HasUserTransforms = true; + return LoopID; +} + +MDNode * +LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs, + ArrayRef<Metadata *> LoopProperties, + bool &HasUserTransforms) { + LLVMContext &Ctx = Header->getContext(); + + Optional<bool> Enabled; + if (Attrs.VectorizeEnable == LoopAttributes::Disable) + Enabled = false; + else if (Attrs.VectorizeEnable != LoopAttributes::Unspecified || + Attrs.InterleaveCount != 0 || Attrs.VectorizeWidth != 0) + Enabled = true; + + if (Enabled != true) { + SmallVector<Metadata *, 4> NewLoopProperties; + if (Enabled == false) { + NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + NewLoopProperties.push_back( + MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt1Ty(Ctx), 0))})); + LoopProperties = NewLoopProperties; + } + return createUnrollAndJamMetadata(Attrs, LoopProperties, HasUserTransforms); + } + + // Apply all loop properties to the vectorized loop. + SmallVector<Metadata *, 4> FollowupLoopProperties; + FollowupLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + + // Don't vectorize an already vectorized loop. + FollowupLoopProperties.push_back( + MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized"))); + + bool FollowupHasTransforms = false; + MDNode *Followup = createUnrollAndJamMetadata(Attrs, FollowupLoopProperties, + FollowupHasTransforms); + + SmallVector<Metadata *, 4> Args; + TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); + Args.push_back(TempNode.get()); + Args.append(LoopProperties.begin(), LoopProperties.end()); + + // Setting vectorize.width + if (Attrs.VectorizeWidth > 0) { + Metadata *Vals[] = { + MDString::get(Ctx, "llvm.loop.vectorize.width"), + ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt32Ty(Ctx), + Attrs.VectorizeWidth))}; + Args.push_back(MDNode::get(Ctx, Vals)); } - if (Attrs.PipelineDisabled) { + // Setting interleave.count + if (Attrs.InterleaveCount > 0) { Metadata *Vals[] = { - MDString::get(Ctx, "llvm.loop.pipeline.disable"), - ConstantAsMetadata::get(ConstantInt::get( - Type::getInt1Ty(Ctx), (Attrs.PipelineDisabled == true)))}; + MDString::get(Ctx, "llvm.loop.interleave.count"), + ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt32Ty(Ctx), + Attrs.InterleaveCount))}; Args.push_back(MDNode::get(Ctx, Vals)); } - if (Attrs.PipelineInitiationInterval > 0) { + // Setting vectorize.enable + if (Attrs.VectorizeEnable != LoopAttributes::Unspecified) { Metadata *Vals[] = { - MDString::get(Ctx, "llvm.loop.pipeline.initiationinterval"), + MDString::get(Ctx, "llvm.loop.vectorize.enable"), ConstantAsMetadata::get(ConstantInt::get( - Type::getInt32Ty(Ctx), Attrs.PipelineInitiationInterval))}; + llvm::Type::getInt1Ty(Ctx), + (Attrs.VectorizeEnable == LoopAttributes::Enable)))}; Args.push_back(MDNode::get(Ctx, Vals)); } - // Set the first operand to itself. + if (FollowupHasTransforms) + Args.push_back(MDNode::get( + Ctx, + {MDString::get(Ctx, "llvm.loop.vectorize.followup_all"), Followup})); + MDNode *LoopID = MDNode::get(Ctx, Args); LoopID->replaceOperandWith(0, LoopID); + HasUserTransforms = true; return LoopID; } +MDNode * +LoopInfo::createLoopDistributeMetadata(const LoopAttributes &Attrs, + ArrayRef<Metadata *> LoopProperties, + bool &HasUserTransforms) { + LLVMContext &Ctx = Header->getContext(); + + Optional<bool> Enabled; + if (Attrs.DistributeEnable == LoopAttributes::Disable) + Enabled = false; + if (Attrs.DistributeEnable == LoopAttributes::Enable) + Enabled = true; + + if (Enabled != true) { + SmallVector<Metadata *, 4> NewLoopProperties; + if (Enabled == false) { + NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + NewLoopProperties.push_back( + MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.distribute.enable"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt1Ty(Ctx), 0))})); + LoopProperties = NewLoopProperties; + } + return createLoopVectorizeMetadata(Attrs, LoopProperties, + HasUserTransforms); + } + + bool FollowupHasTransforms = false; + MDNode *Followup = + createLoopVectorizeMetadata(Attrs, LoopProperties, FollowupHasTransforms); + + SmallVector<Metadata *, 4> Args; + TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); + Args.push_back(TempNode.get()); + Args.append(LoopProperties.begin(), LoopProperties.end()); + + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.distribute.enable"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt1Ty(Ctx), + (Attrs.DistributeEnable == LoopAttributes::Enable)))}; + Args.push_back(MDNode::get(Ctx, Vals)); + + if (FollowupHasTransforms) + Args.push_back(MDNode::get( + Ctx, + {MDString::get(Ctx, "llvm.loop.distribute.followup_all"), Followup})); + + MDNode *LoopID = MDNode::get(Ctx, Args); + LoopID->replaceOperandWith(0, LoopID); + HasUserTransforms = true; + return LoopID; +} + +MDNode *LoopInfo::createFullUnrollMetadata(const LoopAttributes &Attrs, + ArrayRef<Metadata *> LoopProperties, + bool &HasUserTransforms) { + LLVMContext &Ctx = Header->getContext(); + + Optional<bool> Enabled; + if (Attrs.UnrollEnable == LoopAttributes::Disable) + Enabled = false; + else if (Attrs.UnrollEnable == LoopAttributes::Full) + Enabled = true; + + if (Enabled != true) { + SmallVector<Metadata *, 4> NewLoopProperties; + if (Enabled == false) { + NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end()); + NewLoopProperties.push_back( + MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.disable"))); + LoopProperties = NewLoopProperties; + } + return createLoopDistributeMetadata(Attrs, LoopProperties, + HasUserTransforms); + } + + SmallVector<Metadata *, 4> Args; + TempMDTuple TempNode = MDNode::getTemporary(Ctx, None); + Args.push_back(TempNode.get()); + Args.append(LoopProperties.begin(), LoopProperties.end()); + Args.push_back(MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.full"))); + + // No follow-up: there is no loop after full unrolling. + // TODO: Warn if there are transformations after full unrolling. + + MDNode *LoopID = MDNode::getDistinct(Ctx, Args); + LoopID->replaceOperandWith(0, LoopID); + HasUserTransforms = true; + return LoopID; +} + +MDNode *LoopInfo::createMetadata( + const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> AdditionalLoopProperties, + bool &HasUserTransforms) { + SmallVector<Metadata *, 3> LoopProperties; + + // If we have a valid start debug location for the loop, add it. + if (StartLoc) { + LoopProperties.push_back(StartLoc.getAsMDNode()); + + // If we also have a valid end debug location for the loop, add it. + if (EndLoc) + LoopProperties.push_back(EndLoc.getAsMDNode()); + } + + assert(!!AccGroup == Attrs.IsParallel && + "There must be an access group iff the loop is parallel"); + if (Attrs.IsParallel) { + LLVMContext &Ctx = Header->getContext(); + LoopProperties.push_back(MDNode::get( + Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"), AccGroup})); + } + + LoopProperties.insert(LoopProperties.end(), AdditionalLoopProperties.begin(), + AdditionalLoopProperties.end()); + return createFullUnrollMetadata(Attrs, LoopProperties, HasUserTransforms); +} + LoopAttributes::LoopAttributes(bool IsParallel) : IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified), UnrollEnable(LoopAttributes::Unspecified), @@ -174,15 +431,114 @@ void LoopAttributes::clear() { } LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs, - const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc) - : LoopID(nullptr), Header(Header), Attrs(Attrs) { - LoopID = - createMetadata(Header->getContext(), Attrs, StartLoc, EndLoc, AccGroup); + const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc, + LoopInfo *Parent) + : Header(Header), Attrs(Attrs), StartLoc(StartLoc), EndLoc(EndLoc), + Parent(Parent) { + + if (Attrs.IsParallel) { + // Create an access group for this loop. + LLVMContext &Ctx = Header->getContext(); + AccGroup = MDNode::getDistinct(Ctx, {}); + } + + if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 && + Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 && + Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled && + Attrs.PipelineInitiationInterval == 0 && + Attrs.VectorizeEnable == LoopAttributes::Unspecified && + Attrs.UnrollEnable == LoopAttributes::Unspecified && + Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified && + Attrs.DistributeEnable == LoopAttributes::Unspecified && !StartLoc && + !EndLoc) + return; + + TempLoopID = MDNode::getTemporary(Header->getContext(), None); +} + +void LoopInfo::finish() { + // We did not annotate the loop body instructions because there are no + // attributes for this loop. + if (!TempLoopID) + return; + + MDNode *LoopID; + LoopAttributes CurLoopAttr = Attrs; + LLVMContext &Ctx = Header->getContext(); + + if (Parent && (Parent->Attrs.UnrollAndJamEnable || + Parent->Attrs.UnrollAndJamCount != 0)) { + // Parent unroll-and-jams this loop. + // Split the transformations in those that happens before the unroll-and-jam + // and those after. + + LoopAttributes BeforeJam, AfterJam; + + BeforeJam.IsParallel = AfterJam.IsParallel = Attrs.IsParallel; + + BeforeJam.VectorizeWidth = Attrs.VectorizeWidth; + BeforeJam.InterleaveCount = Attrs.InterleaveCount; + BeforeJam.VectorizeEnable = Attrs.VectorizeEnable; + BeforeJam.DistributeEnable = Attrs.DistributeEnable; + + switch (Attrs.UnrollEnable) { + case LoopAttributes::Unspecified: + case LoopAttributes::Disable: + BeforeJam.UnrollEnable = Attrs.UnrollEnable; + AfterJam.UnrollEnable = Attrs.UnrollEnable; + break; + case LoopAttributes::Full: + BeforeJam.UnrollEnable = LoopAttributes::Full; + break; + case LoopAttributes::Enable: + AfterJam.UnrollEnable = LoopAttributes::Enable; + break; + } + + AfterJam.UnrollCount = Attrs.UnrollCount; + AfterJam.PipelineDisabled = Attrs.PipelineDisabled; + AfterJam.PipelineInitiationInterval = Attrs.PipelineInitiationInterval; + + // If this loop is subject of an unroll-and-jam by the parent loop, and has + // an unroll-and-jam annotation itself, we have to decide whether to first + // apply the parent's unroll-and-jam or this loop's unroll-and-jam. The + // UnrollAndJam pass processes loops from inner to outer, so we apply the + // inner first. + BeforeJam.UnrollAndJamCount = Attrs.UnrollAndJamCount; + BeforeJam.UnrollAndJamEnable = Attrs.UnrollAndJamEnable; + + // Set the inner followup metadata to process by the outer loop. Only + // consider the first inner loop. + if (!Parent->UnrollAndJamInnerFollowup) { + // Splitting the attributes into a BeforeJam and an AfterJam part will + // stop 'llvm.loop.isvectorized' (generated by vectorization in BeforeJam) + // to be forwarded to the AfterJam part. We detect the situation here and + // add it manually. + SmallVector<Metadata *, 1> BeforeLoopProperties; + if (BeforeJam.VectorizeEnable != LoopAttributes::Unspecified || + BeforeJam.InterleaveCount != 0 || BeforeJam.VectorizeWidth != 0) + BeforeLoopProperties.push_back( + MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized"))); + + bool InnerFollowupHasTransform = false; + MDNode *InnerFollowup = createMetadata(AfterJam, BeforeLoopProperties, + InnerFollowupHasTransform); + if (InnerFollowupHasTransform) + Parent->UnrollAndJamInnerFollowup = InnerFollowup; + } + + CurLoopAttr = BeforeJam; + } + + bool HasUserTransforms = false; + LoopID = createMetadata(CurLoopAttr, {}, HasUserTransforms); + TempLoopID->replaceAllUsesWith(LoopID); } void LoopInfoStack::push(BasicBlock *Header, const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc) { - Active.push_back(LoopInfo(Header, StagedAttrs, StartLoc, EndLoc)); + Active.push_back(LoopInfo(Header, StagedAttrs, StartLoc, EndLoc, + Active.empty() ? nullptr : &Active.back())); // Clear the attributes so nested loops do not inherit them. StagedAttrs.clear(); } @@ -209,13 +565,13 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, // Translate opencl_unroll_hint attribute argument to // equivalent LoopHintAttr enums. // OpenCL v2.0 s6.11.5: - // 0 - full unroll (no argument). + // 0 - enable unroll (no argument). // 1 - disable unroll. // other positive integer n - unroll by n. if (OpenCLHint) { ValueInt = OpenCLHint->getUnrollHint(); if (ValueInt == 0) { - State = LoopHintAttr::Full; + State = LoopHintAttr::Enable; } else if (ValueInt != 1) { Option = LoopHintAttr::UnrollCount; State = LoopHintAttr::Numeric; @@ -365,6 +721,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, void LoopInfoStack::pop() { assert(!Active.empty() && "No active loops to pop"); + Active.back().finish(); Active.pop_back(); } diff --git a/lib/CodeGen/CGLoopInfo.h b/lib/CodeGen/CGLoopInfo.h index 84ba03bfb00b..35d0e00527b9 100644 --- a/lib/CodeGen/CGLoopInfo.h +++ b/lib/CodeGen/CGLoopInfo.h @@ -1,9 +1,8 @@ //===---- CGLoopInfo.h - LLVM CodeGen for loop metadata -*- C++ -*---------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -79,10 +78,11 @@ class LoopInfo { public: /// Construct a new LoopInfo for the loop with entry Header. LoopInfo(llvm::BasicBlock *Header, const LoopAttributes &Attrs, - const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc); + const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc, + LoopInfo *Parent); /// Get the loop id metadata for this loop. - llvm::MDNode *getLoopID() const { return LoopID; } + llvm::MDNode *getLoopID() const { return TempLoopID.get(); } /// Get the header block of this loop. llvm::BasicBlock *getHeader() const { return Header; } @@ -93,15 +93,92 @@ public: /// Return this loop's access group or nullptr if it does not have one. llvm::MDNode *getAccessGroup() const { return AccGroup; } + /// Create the loop's metadata. Must be called after its nested loops have + /// been processed. + void finish(); + private: /// Loop ID metadata. - llvm::MDNode *LoopID; + llvm::TempMDTuple TempLoopID; /// Header block of this loop. llvm::BasicBlock *Header; /// The attributes for this loop. LoopAttributes Attrs; /// The access group for memory accesses parallel to this loop. llvm::MDNode *AccGroup = nullptr; + /// Start location of this loop. + llvm::DebugLoc StartLoc; + /// End location of this loop. + llvm::DebugLoc EndLoc; + /// The next outer loop, or nullptr if this is the outermost loop. + LoopInfo *Parent; + /// If this loop has unroll-and-jam metadata, this can be set by the inner + /// loop's LoopInfo to set the llvm.loop.unroll_and_jam.followup_inner + /// metadata. + llvm::MDNode *UnrollAndJamInnerFollowup = nullptr; + + /// Create a LoopID without any transformations. + llvm::MDNode * + createLoopPropertiesMetadata(llvm::ArrayRef<llvm::Metadata *> LoopProperties); + + /// Create a LoopID for transformations. + /// + /// The methods call each other in case multiple transformations are applied + /// to a loop. The transformation first to be applied will use LoopID of the + /// next transformation in its followup attribute. + /// + /// @param Attrs The loop's transformations. + /// @param LoopProperties Non-transformation properties such as debug + /// location, parallel accesses and disabled + /// transformations. These are added to the returned + /// LoopID. + /// @param HasUserTransforms [out] Set to true if the returned MDNode encodes + /// at least one transformation. + /// + /// @return A LoopID (metadata node) that can be used for the llvm.loop + /// annotation or followup-attribute. + /// @{ + llvm::MDNode * + createPipeliningMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); + llvm::MDNode * + createPartialUnrollMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); + llvm::MDNode * + createUnrollAndJamMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); + llvm::MDNode * + createLoopVectorizeMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); + llvm::MDNode * + createLoopDistributeMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); + llvm::MDNode * + createFullUnrollMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); + /// @} + + /// Create a LoopID for this loop, including transformation-unspecific + /// metadata such as debug location. + /// + /// @param Attrs This loop's attributes and transformations. + /// @param LoopProperties Additional non-transformation properties to add + /// to the LoopID, such as transformation-specific + /// metadata that are not covered by @p Attrs. + /// @param HasUserTransforms [out] Set to true if the returned MDNode encodes + /// at least one transformation. + /// + /// @return A LoopID (metadata node) that can be used for the llvm.loop + /// annotation. + llvm::MDNode *createMetadata(const LoopAttributes &Attrs, + llvm::ArrayRef<llvm::Metadata *> LoopProperties, + bool &HasUserTransforms); }; /// A stack of loop information corresponding to loop nesting levels. diff --git a/lib/CodeGen/CGNonTrivialStruct.cpp b/lib/CodeGen/CGNonTrivialStruct.cpp index c6a96a912622..caf62d2ac93a 100644 --- a/lib/CodeGen/CGNonTrivialStruct.cpp +++ b/lib/CodeGen/CGNonTrivialStruct.cpp @@ -1,9 +1,8 @@ //===--- CGNonTrivialStruct.cpp - Emit Special Functions for C Structs ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -15,6 +14,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "clang/AST/NonTrivialTypeVisitor.h" +#include "clang/CodeGen/CodeGenABITypes.h" #include "llvm/Support/ScopedPrinter.h" #include <array> @@ -84,23 +84,22 @@ struct CopyStructVisitor : StructVisitor<Derived>, template <class... Ts> void preVisit(QualType::PrimitiveCopyKind PCK, QualType FT, - const FieldDecl *FD, CharUnits CurStructOffsset, - Ts &&... Args) { + const FieldDecl *FD, CharUnits CurStructOffset, Ts &&... Args) { if (PCK) asDerived().flushTrivialFields(std::forward<Ts>(Args)...); } template <class... Ts> void visitWithKind(QualType::PrimitiveCopyKind PCK, QualType FT, - const FieldDecl *FD, CharUnits CurStructOffsset, + const FieldDecl *FD, CharUnits CurStructOffset, Ts &&... Args) { if (const auto *AT = asDerived().getContext().getAsArrayType(FT)) { asDerived().visitArray(PCK, AT, FT.isVolatileQualified(), FD, - CurStructOffsset, std::forward<Ts>(Args)...); + CurStructOffset, std::forward<Ts>(Args)...); return; } - Super::visitWithKind(PCK, FT, FD, CurStructOffsset, + Super::visitWithKind(PCK, FT, FD, CurStructOffset, std::forward<Ts>(Args)...); } @@ -140,8 +139,8 @@ struct CopyStructVisitor : StructVisitor<Derived>, // <alignment-info> ::= <dst-alignment> ["_" <src-alignment>] // <struct-field-info> ::= <field-info>+ // <field-info> ::= <struct-or-scalar-field-info> | <array-field-info> -// <struct-or-scalar-field-info> ::= <struct-field-info> | <strong-field-info> | -// <trivial-field-info> +// <struct-or-scalar-field-info> ::= "_S" <struct-field-info> | +// <strong-field-info> | <trivial-field-info> // <array-field-info> ::= "_AB" <array-offset> "s" <element-size> "n" // <num-elements> <innermost-element-info> "_AE" // <innermost-element-info> ::= <struct-or-scalar-field-info> @@ -176,6 +175,7 @@ template <class Derived> struct GenFuncNameBase { void visitStruct(QualType QT, const FieldDecl *FD, CharUnits CurStructOffset) { CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD); + appendStr("_S"); asDerived().visitStructFields(QT, FieldOffset); } @@ -253,11 +253,11 @@ struct GenBinaryFuncName : CopyStructVisitor<GenBinaryFuncName<IsMove>, IsMove>, } void visitVolatileTrivial(QualType FT, const FieldDecl *FD, - CharUnits CurStackOffset) { + CharUnits CurStructOffset) { // Because volatile fields can be bit-fields and are individually copied, // their offset and width are in bits. uint64_t OffsetInBits = - this->Ctx.toBits(CurStackOffset) + this->getFieldOffsetInBits(FD); + this->Ctx.toBits(CurStructOffset) + this->getFieldOffsetInBits(FD); this->appendStr("_tv" + llvm::to_string(OffsetInBits) + "w" + llvm::to_string(getFieldSize(FD, FT, this->Ctx))); } @@ -286,8 +286,7 @@ struct GenDestructorFuncName : GenUnaryFuncName<GenDestructorFuncName>, using Super = DestructedTypeVisitor<GenDestructorFuncName>; GenDestructorFuncName(const char *Prefix, CharUnits DstAlignment, ASTContext &Ctx) - : GenUnaryFuncName<GenDestructorFuncName>(Prefix, DstAlignment, - Ctx) {} + : GenUnaryFuncName<GenDestructorFuncName>(Prefix, DstAlignment, Ctx) {} void visitWithKind(QualType::DestructionKind DK, QualType FT, const FieldDecl *FD, CharUnits CurStructOffset) { if (const auto *AT = getContext().getAsArrayType(FT)) { @@ -322,19 +321,19 @@ static const CGFunctionInfo &getFunctionInfo(CodeGenModule &CGM, // functions. template <class Derived> struct GenFuncBase { template <size_t N> - void visitStruct(QualType FT, const FieldDecl *FD, CharUnits CurStackOffset, + void visitStruct(QualType FT, const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, N> Addrs) { this->asDerived().callSpecialFunction( - FT, CurStackOffset + asDerived().getFieldOffset(FD), Addrs); + FT, CurStructOffset + asDerived().getFieldOffset(FD), Addrs); } template <class FieldKind, size_t N> void visitArray(FieldKind FK, const ArrayType *AT, bool IsVolatile, - const FieldDecl *FD, CharUnits CurStackOffset, + const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, N> Addrs) { // Non-volatile trivial fields are copied when flushTrivialFields is called. if (!FK) - return asDerived().visitTrivial(QualType(AT, 0), FD, CurStackOffset, + return asDerived().visitTrivial(QualType(AT, 0), FD, CurStructOffset, Addrs); asDerived().flushTrivialFields(Addrs); @@ -345,7 +344,7 @@ template <class Derived> struct GenFuncBase { QualType BaseEltQT; std::array<Address, N> StartAddrs = Addrs; for (unsigned I = 0; I < N; ++I) - StartAddrs[I] = getAddrWithOffset(Addrs[I], CurStackOffset, FD); + StartAddrs[I] = getAddrWithOffset(Addrs[I], CurStructOffset, FD); Address DstAddr = StartAddrs[DstIdx]; llvm::Value *NumElts = CGF.emitArrayLength(AT, BaseEltQT, DstAddr); unsigned BaseEltSize = Ctx.getTypeSizeInChars(BaseEltQT).getQuantity(); @@ -414,8 +413,7 @@ template <class Derived> struct GenFuncBase { if (Offset.getQuantity() == 0) return Addr; Addr = CGF->Builder.CreateBitCast(Addr, CGF->CGM.Int8PtrTy); - Addr = CGF->Builder.CreateConstInBoundsGEP(Addr, Offset.getQuantity(), - CharUnits::One()); + Addr = CGF->Builder.CreateConstInBoundsGEP(Addr, Offset.getQuantity()); return CGF->Builder.CreateBitCast(Addr, CGF->CGM.Int8PtrPtrTy); } @@ -586,15 +584,15 @@ struct GenDestructor : StructVisitor<GenDestructor>, } void visitARCStrong(QualType QT, const FieldDecl *FD, - CharUnits CurStackOffset, std::array<Address, 1> Addrs) { + CharUnits CurStructOffset, std::array<Address, 1> Addrs) { CGF->destroyARCStrongImprecise( - *CGF, getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT); + *CGF, getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD), QT); } - void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, 1> Addrs) { CGF->destroyARCWeak( - *CGF, getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT); + *CGF, getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD), QT); } void callSpecialFunction(QualType FT, CharUnits Offset, @@ -627,35 +625,35 @@ struct GenDefaultInitialize } void visitARCStrong(QualType QT, const FieldDecl *FD, - CharUnits CurStackOffset, std::array<Address, 1> Addrs) { + CharUnits CurStructOffset, std::array<Address, 1> Addrs) { CGF->EmitNullInitialization( - getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT); + getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD), QT); } - void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, 1> Addrs) { CGF->EmitNullInitialization( - getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD), QT); + getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD), QT); } template <class FieldKind, size_t... Is> void visitArray(FieldKind FK, const ArrayType *AT, bool IsVolatile, - const FieldDecl *FD, CharUnits CurStackOffset, + const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, 1> Addrs) { if (!FK) - return visitTrivial(QualType(AT, 0), FD, CurStackOffset, Addrs); + return visitTrivial(QualType(AT, 0), FD, CurStructOffset, Addrs); ASTContext &Ctx = getContext(); CharUnits Size = Ctx.getTypeSizeInChars(QualType(AT, 0)); QualType EltTy = Ctx.getBaseElementType(QualType(AT, 0)); if (Size < CharUnits::fromQuantity(16) || EltTy->getAs<RecordType>()) { - GenFuncBaseTy::visitArray(FK, AT, IsVolatile, FD, CurStackOffset, Addrs); + GenFuncBaseTy::visitArray(FK, AT, IsVolatile, FD, CurStructOffset, Addrs); return; } llvm::Constant *SizeVal = CGF->Builder.getInt64(Size.getQuantity()); - Address DstAddr = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); + Address DstAddr = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); Address Loc = CGF->Builder.CreateElementBitCast(DstAddr, CGF->Int8Ty); CGF->Builder.CreateMemSet(Loc, CGF->Builder.getInt8(0), SizeVal, IsVolatile); @@ -673,24 +671,26 @@ struct GenCopyConstructor : GenBinaryFunc<GenCopyConstructor, false> { : GenBinaryFunc<GenCopyConstructor, false>(Ctx) {} void visitARCStrong(QualType QT, const FieldDecl *FD, - CharUnits CurStackOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + CharUnits CurStructOffset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); llvm::Value *SrcVal = CGF->EmitLoadOfScalar( Addrs[SrcIdx], QT.isVolatileQualified(), QT, SourceLocation()); llvm::Value *Val = CGF->EmitARCRetain(QT, SrcVal); CGF->EmitStoreOfScalar(Val, CGF->MakeAddrLValue(Addrs[DstIdx], QT), true); } - void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); CGF->EmitARCCopyWeak(Addrs[DstIdx], Addrs[SrcIdx]); } void callSpecialFunction(QualType FT, CharUnits Offset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], Offset); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], Offset); CGF->callCStructCopyConstructor(CGF->MakeAddrLValue(Addrs[DstIdx], FT), CGF->MakeAddrLValue(Addrs[SrcIdx], FT)); } @@ -701,9 +701,9 @@ struct GenMoveConstructor : GenBinaryFunc<GenMoveConstructor, true> { : GenBinaryFunc<GenMoveConstructor, true>(Ctx) {} void visitARCStrong(QualType QT, const FieldDecl *FD, - CharUnits CurStackOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + CharUnits CurStructOffset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); LValue SrcLV = CGF->MakeAddrLValue(Addrs[SrcIdx], QT); llvm::Value *SrcVal = CGF->EmitLoadOfLValue(SrcLV, SourceLocation()).getScalarVal(); @@ -712,15 +712,17 @@ struct GenMoveConstructor : GenBinaryFunc<GenMoveConstructor, true> { /* isInitialization */ true); } - void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); CGF->EmitARCMoveWeak(Addrs[DstIdx], Addrs[SrcIdx]); } void callSpecialFunction(QualType FT, CharUnits Offset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], Offset); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], Offset); CGF->callCStructMoveConstructor(CGF->MakeAddrLValue(Addrs[DstIdx], FT), CGF->MakeAddrLValue(Addrs[SrcIdx], FT)); } @@ -731,24 +733,26 @@ struct GenCopyAssignment : GenBinaryFunc<GenCopyAssignment, false> { : GenBinaryFunc<GenCopyAssignment, false>(Ctx) {} void visitARCStrong(QualType QT, const FieldDecl *FD, - CharUnits CurStackOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + CharUnits CurStructOffset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); llvm::Value *SrcVal = CGF->EmitLoadOfScalar( Addrs[SrcIdx], QT.isVolatileQualified(), QT, SourceLocation()); CGF->EmitARCStoreStrong(CGF->MakeAddrLValue(Addrs[DstIdx], QT), SrcVal, false); } - void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); CGF->emitARCCopyAssignWeak(QT, Addrs[DstIdx], Addrs[SrcIdx]); } void callSpecialFunction(QualType FT, CharUnits Offset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], Offset); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], Offset); CGF->callCStructCopyAssignmentOperator( CGF->MakeAddrLValue(Addrs[DstIdx], FT), CGF->MakeAddrLValue(Addrs[SrcIdx], FT)); @@ -760,9 +764,9 @@ struct GenMoveAssignment : GenBinaryFunc<GenMoveAssignment, true> { : GenBinaryFunc<GenMoveAssignment, true>(Ctx) {} void visitARCStrong(QualType QT, const FieldDecl *FD, - CharUnits CurStackOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + CharUnits CurStructOffset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); LValue SrcLV = CGF->MakeAddrLValue(Addrs[SrcIdx], QT); llvm::Value *SrcVal = CGF->EmitLoadOfLValue(SrcLV, SourceLocation()).getScalarVal(); @@ -774,15 +778,17 @@ struct GenMoveAssignment : GenBinaryFunc<GenMoveAssignment, true> { CGF->EmitARCRelease(DstVal, ARCImpreciseLifetime); } - void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStackOffset, + void visitARCWeak(QualType QT, const FieldDecl *FD, CharUnits CurStructOffset, std::array<Address, 2> Addrs) { - Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStackOffset, FD); - Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStackOffset, FD); + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], CurStructOffset, FD); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], CurStructOffset, FD); CGF->emitARCMoveAssignWeak(QT, Addrs[DstIdx], Addrs[SrcIdx]); } void callSpecialFunction(QualType FT, CharUnits Offset, std::array<Address, 2> Addrs) { + Addrs[DstIdx] = getAddrWithOffset(Addrs[DstIdx], Offset); + Addrs[SrcIdx] = getAddrWithOffset(Addrs[SrcIdx], Offset); CGF->callCStructMoveAssignmentOperator( CGF->MakeAddrLValue(Addrs[DstIdx], FT), CGF->MakeAddrLValue(Addrs[SrcIdx], FT)); @@ -817,6 +823,29 @@ static void callSpecialFunction(G &&Gen, StringRef FuncName, QualType QT, Gen.callFunc(FuncName, QT, Addrs, CGF); } +template <size_t N> std::array<Address, N> createNullAddressArray(); + +template <> std::array<Address, 1> createNullAddressArray() { + return std::array<Address, 1>({{Address(nullptr, CharUnits::Zero())}}); +} + +template <> std::array<Address, 2> createNullAddressArray() { + return std::array<Address, 2>({{Address(nullptr, CharUnits::Zero()), + Address(nullptr, CharUnits::Zero())}}); +} + +template <class G, size_t N> +static llvm::Function * +getSpecialFunction(G &&Gen, StringRef FuncName, QualType QT, bool IsVolatile, + std::array<CharUnits, N> Alignments, CodeGenModule &CGM) { + QT = IsVolatile ? QT.withVolatile() : QT; + // The following call requires an array of addresses as arguments, but doesn't + // actually use them (it overwrites them with the addresses of the arguments + // of the created function). + return Gen.getFunction(FuncName, QT, createNullAddressArray<N>(), Alignments, + CGM); +} + // Functions to emit calls to the special functions of a non-trivial C struct. void CodeGenFunction::callCStructDefaultConstructor(LValue Dst) { bool IsVolatile = Dst.isVolatile(); @@ -828,18 +857,16 @@ void CodeGenFunction::callCStructDefaultConstructor(LValue Dst) { IsVolatile, *this, std::array<Address, 1>({{DstPtr}})); } -std::string -CodeGenFunction::getNonTrivialCopyConstructorStr(QualType QT, - CharUnits Alignment, - bool IsVolatile, - ASTContext &Ctx) { +std::string CodeGenFunction::getNonTrivialCopyConstructorStr( + QualType QT, CharUnits Alignment, bool IsVolatile, ASTContext &Ctx) { GenBinaryFuncName<false> GenName("", Alignment, Alignment, Ctx); return GenName.getName(QT, IsVolatile); } -std::string -CodeGenFunction::getNonTrivialDestructorStr(QualType QT, CharUnits Alignment, - bool IsVolatile, ASTContext &Ctx) { +std::string CodeGenFunction::getNonTrivialDestructorStr(QualType QT, + CharUnits Alignment, + bool IsVolatile, + ASTContext &Ctx) { GenDestructorFuncName GenName("", Alignment, Ctx); return GenName.getName(QT, IsVolatile); } @@ -904,3 +931,69 @@ void CodeGenFunction::callCStructMoveAssignmentOperator(LValue Dst, LValue Src callSpecialFunction(GenMoveAssignment(getContext()), FuncName, QT, IsVolatile, *this, std::array<Address, 2>({{DstPtr, SrcPtr}})); } + +llvm::Function *clang::CodeGen::getNonTrivialCStructDefaultConstructor( + CodeGenModule &CGM, CharUnits DstAlignment, bool IsVolatile, QualType QT) { + ASTContext &Ctx = CGM.getContext(); + GenDefaultInitializeFuncName GenName(DstAlignment, Ctx); + std::string FuncName = GenName.getName(QT, IsVolatile); + return getSpecialFunction(GenDefaultInitialize(Ctx), FuncName, QT, IsVolatile, + std::array<CharUnits, 1>({{DstAlignment}}), CGM); +} + +llvm::Function *clang::CodeGen::getNonTrivialCStructCopyConstructor( + CodeGenModule &CGM, CharUnits DstAlignment, CharUnits SrcAlignment, + bool IsVolatile, QualType QT) { + ASTContext &Ctx = CGM.getContext(); + GenBinaryFuncName<false> GenName("__copy_constructor_", DstAlignment, + SrcAlignment, Ctx); + std::string FuncName = GenName.getName(QT, IsVolatile); + return getSpecialFunction( + GenCopyConstructor(Ctx), FuncName, QT, IsVolatile, + std::array<CharUnits, 2>({{DstAlignment, SrcAlignment}}), CGM); +} + +llvm::Function *clang::CodeGen::getNonTrivialCStructMoveConstructor( + CodeGenModule &CGM, CharUnits DstAlignment, CharUnits SrcAlignment, + bool IsVolatile, QualType QT) { + ASTContext &Ctx = CGM.getContext(); + GenBinaryFuncName<true> GenName("__move_constructor_", DstAlignment, + SrcAlignment, Ctx); + std::string FuncName = GenName.getName(QT, IsVolatile); + return getSpecialFunction( + GenMoveConstructor(Ctx), FuncName, QT, IsVolatile, + std::array<CharUnits, 2>({{DstAlignment, SrcAlignment}}), CGM); +} + +llvm::Function *clang::CodeGen::getNonTrivialCStructCopyAssignmentOperator( + CodeGenModule &CGM, CharUnits DstAlignment, CharUnits SrcAlignment, + bool IsVolatile, QualType QT) { + ASTContext &Ctx = CGM.getContext(); + GenBinaryFuncName<false> GenName("__copy_assignment_", DstAlignment, + SrcAlignment, Ctx); + std::string FuncName = GenName.getName(QT, IsVolatile); + return getSpecialFunction( + GenCopyAssignment(Ctx), FuncName, QT, IsVolatile, + std::array<CharUnits, 2>({{DstAlignment, SrcAlignment}}), CGM); +} + +llvm::Function *clang::CodeGen::getNonTrivialCStructMoveAssignmentOperator( + CodeGenModule &CGM, CharUnits DstAlignment, CharUnits SrcAlignment, + bool IsVolatile, QualType QT) { + ASTContext &Ctx = CGM.getContext(); + GenBinaryFuncName<true> GenName("__move_assignment_", DstAlignment, + SrcAlignment, Ctx); + std::string FuncName = GenName.getName(QT, IsVolatile); + return getSpecialFunction( + GenMoveAssignment(Ctx), FuncName, QT, IsVolatile, + std::array<CharUnits, 2>({{DstAlignment, SrcAlignment}}), CGM); +} + +llvm::Function *clang::CodeGen::getNonTrivialCStructDestructor( + CodeGenModule &CGM, CharUnits DstAlignment, bool IsVolatile, QualType QT) { + ASTContext &Ctx = CGM.getContext(); + GenDestructorFuncName GenName("__destructor_", DstAlignment, Ctx); + std::string FuncName = GenName.getName(QT, IsVolatile); + return getSpecialFunction(GenDestructor(Ctx), FuncName, QT, IsVolatile, + std::array<CharUnits, 1>({{DstAlignment}}), CGM); +} diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp index 9c66ff0e8fb2..1dd7ec52230e 100644 --- a/lib/CodeGen/CGObjC.cpp +++ b/lib/CodeGen/CGObjC.cpp @@ -1,9 +1,8 @@ //===---- CGObjC.cpp - Emit LLVM Code for Objective-C ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -15,6 +14,7 @@ #include "CGObjCRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "ConstantEmitter.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclObjC.h" @@ -22,7 +22,6 @@ #include "clang/Basic/Diagnostic.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" using namespace clang; @@ -62,7 +61,12 @@ CodeGenFunction::EmitObjCBoxedExpr(const ObjCBoxedExpr *E) { // Get the method. const ObjCMethodDecl *BoxingMethod = E->getBoxingMethod(); const Expr *SubExpr = E->getSubExpr(); - assert(BoxingMethod && "BoxingMethod is null"); + + if (E->isExpressibleAsConstantInitializer()) { + ConstantEmitter ConstEmitter(CGM); + return ConstEmitter.tryEmitAbstract(E, E->getType()); + } + assert(BoxingMethod->isClassMethod() && "BoxingMethod must be a class method"); Selector Sel = BoxingMethod->getSelector(); @@ -160,9 +164,8 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, if (ALE) { // Emit the element and store it to the appropriate array slot. const Expr *Rhs = ALE->getElement(i); - LValue LV = MakeAddrLValue( - Builder.CreateConstArrayGEP(Objects, i, getPointerSize()), - ElementType, AlignmentSource::Decl); + LValue LV = MakeAddrLValue(Builder.CreateConstArrayGEP(Objects, i), + ElementType, AlignmentSource::Decl); llvm::Value *value = EmitScalarExpr(Rhs); EmitStoreThroughLValue(RValue::get(value), LV, true); @@ -172,17 +175,15 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, } else { // Emit the key and store it to the appropriate array slot. const Expr *Key = DLE->getKeyValueElement(i).Key; - LValue KeyLV = MakeAddrLValue( - Builder.CreateConstArrayGEP(Keys, i, getPointerSize()), - ElementType, AlignmentSource::Decl); + LValue KeyLV = MakeAddrLValue(Builder.CreateConstArrayGEP(Keys, i), + ElementType, AlignmentSource::Decl); llvm::Value *keyValue = EmitScalarExpr(Key); EmitStoreThroughLValue(RValue::get(keyValue), KeyLV, /*isInit=*/true); // Emit the value and store it to the appropriate array slot. const Expr *Value = DLE->getKeyValueElement(i).Value; - LValue ValueLV = MakeAddrLValue( - Builder.CreateConstArrayGEP(Objects, i, getPointerSize()), - ElementType, AlignmentSource::Decl); + LValue ValueLV = MakeAddrLValue(Builder.CreateConstArrayGEP(Objects, i), + ElementType, AlignmentSource::Decl); llvm::Value *valueValue = EmitScalarExpr(Value); EmitStoreThroughLValue(RValue::get(valueValue), ValueLV, /*isInit=*/true); if (TrackNeededObjects) { @@ -382,10 +383,12 @@ tryGenerateSpecializedMessageSend(CodeGenFunction &CGF, QualType ResultType, if (isClassMessage && Runtime.shouldUseRuntimeFunctionsForAlloc() && ResultType->isObjCObjectPointerType()) { - // [Foo alloc] -> objc_alloc(Foo) + // [Foo alloc] -> objc_alloc(Foo) or + // [self alloc] -> objc_alloc(self) if (Sel.isUnarySelector() && Sel.getNameForSlot(0) == "alloc") return CGF.EmitObjCAlloc(Receiver, CGF.ConvertType(ResultType)); - // [Foo allocWithZone:nil] -> objc_allocWithZone(Foo) + // [Foo allocWithZone:nil] -> objc_allocWithZone(Foo) or + // [self allocWithZone:nil] -> objc_allocWithZone(self) if (Sel.isKeywordSelector() && Sel.getNumArgs() == 1 && Args.size() == 1 && Args.front().getType()->isPointerType() && Sel.getNameForSlot(0) == "allocWithZone") { @@ -427,6 +430,57 @@ tryGenerateSpecializedMessageSend(CodeGenFunction &CGF, QualType ResultType, return None; } +/// Instead of '[[MyClass alloc] init]', try to generate +/// 'objc_alloc_init(MyClass)'. This provides a code size improvement on the +/// caller side, as well as the optimized objc_alloc. +static Optional<llvm::Value *> +tryEmitSpecializedAllocInit(CodeGenFunction &CGF, const ObjCMessageExpr *OME) { + auto &Runtime = CGF.getLangOpts().ObjCRuntime; + if (!Runtime.shouldUseRuntimeFunctionForCombinedAllocInit()) + return None; + + // Match the exact pattern '[[MyClass alloc] init]'. + Selector Sel = OME->getSelector(); + if (OME->getReceiverKind() != ObjCMessageExpr::Instance || + !OME->getType()->isObjCObjectPointerType() || !Sel.isUnarySelector() || + Sel.getNameForSlot(0) != "init") + return None; + + // Okay, this is '[receiver init]', check if 'receiver' is '[cls alloc]' or + // we are in an ObjC class method and 'receiver' is '[self alloc]'. + auto *SubOME = + dyn_cast<ObjCMessageExpr>(OME->getInstanceReceiver()->IgnoreParenCasts()); + if (!SubOME) + return None; + Selector SubSel = SubOME->getSelector(); + + // Check if we are in an ObjC class method and the receiver expression is + // 'self'. + const Expr *SelfInClassMethod = nullptr; + if (const auto *CurMD = dyn_cast_or_null<ObjCMethodDecl>(CGF.CurFuncDecl)) + if (CurMD->isClassMethod()) + if ((SelfInClassMethod = SubOME->getInstanceReceiver())) + if (!SelfInClassMethod->isObjCSelfExpr()) + SelfInClassMethod = nullptr; + + if ((SubOME->getReceiverKind() != ObjCMessageExpr::Class && + !SelfInClassMethod) || !SubOME->getType()->isObjCObjectPointerType() || + !SubSel.isUnarySelector() || SubSel.getNameForSlot(0) != "alloc") + return None; + + llvm::Value *Receiver; + if (SelfInClassMethod) { + Receiver = CGF.EmitScalarExpr(SelfInClassMethod); + } else { + QualType ReceiverType = SubOME->getClassReceiver(); + const ObjCObjectType *ObjTy = ReceiverType->getAs<ObjCObjectType>(); + const ObjCInterfaceDecl *ID = ObjTy->getInterface(); + assert(ID && "null interface should be impossible here"); + Receiver = CGF.CGM.getObjCRuntime().GetClass(CGF, ID); + } + return CGF.EmitObjCAllocInit(Receiver, CGF.ConvertType(OME->getType())); +} + RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E, ReturnValueSlot Return) { // Only the lookup mechanism and first two arguments of the method @@ -448,6 +502,9 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E, } } + if (Optional<llvm::Value *> Val = tryEmitSpecializedAllocInit(*this, E)) + return AdjustObjCObjectType(*this, E->getType(), RValue::get(*Val)); + // We don't retain the receiver in delegate init calls, and this is // safe because the receiver value is always loaded from 'self', // which we zero out. We don't want to Block_copy block receivers, @@ -468,6 +525,10 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E, switch (E->getReceiverKind()) { case ObjCMessageExpr::Instance: ReceiverType = E->getInstanceReceiver()->getType(); + if (auto *OMD = dyn_cast_or_null<ObjCMethodDecl>(CurFuncDecl)) + if (OMD->isClassMethod()) + if (E->getInstanceReceiver()->isObjCSelfExpr()) + isClassMessage = true; if (retainSelf) { TryEmitResult ter = tryEmitARCRetainScalarExpr(*this, E->getInstanceReceiver()); @@ -685,7 +746,7 @@ static void emitStructGetterCall(CodeGenFunction &CGF, ObjCIvarDecl *ivar, args.add(RValue::get(CGF.Builder.getInt1(isAtomic)), Context.BoolTy); args.add(RValue::get(CGF.Builder.getInt1(hasStrong)), Context.BoolTy); - llvm::Constant *fn = CGF.CGM.getObjCRuntime().GetGetStructFunction(); + llvm::FunctionCallee fn = CGF.CGM.getObjCRuntime().GetGetStructFunction(); CGCallee callee = CGCallee::forDirect(fn); CGF.EmitCall(CGF.getTypes().arrangeBuiltinFunctionCall(Context.VoidTy, args), callee, ReturnValueSlot(), args); @@ -949,8 +1010,8 @@ static void emitCPPObjectAtomicGetterCall(CodeGenFunction &CGF, // Third argument is the helper function. args.add(RValue::get(AtomicHelperFn), CGF.getContext().VoidPtrTy); - llvm::Constant *copyCppAtomicObjectFn = - CGF.CGM.getObjCRuntime().GetCppAtomicObjectGetFunction(); + llvm::FunctionCallee copyCppAtomicObjectFn = + CGF.CGM.getObjCRuntime().GetCppAtomicObjectGetFunction(); CGCallee callee = CGCallee::forDirect(copyCppAtomicObjectFn); CGF.EmitCall( CGF.getTypes().arrangeBuiltinFunctionCall(CGF.getContext().VoidTy, args), @@ -1026,8 +1087,8 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, } case PropertyImplStrategy::GetSetProperty: { - llvm::Constant *getPropertyFn = - CGM.getObjCRuntime().GetPropertyGetFunction(); + llvm::FunctionCallee getPropertyFn = + CGM.getObjCRuntime().GetPropertyGetFunction(); if (!getPropertyFn) { CGM.ErrorUnsupported(propImpl, "Obj-C getter requiring atomic copy"); return; @@ -1052,10 +1113,10 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, // FIXME: We shouldn't need to get the function info here, the // runtime already should have computed it to build the function. - llvm::Instruction *CallInstruction; - RValue RV = EmitCall( - getTypes().arrangeBuiltinFunctionCall(propType, args), - callee, ReturnValueSlot(), args, &CallInstruction); + llvm::CallBase *CallInstruction; + RValue RV = EmitCall(getTypes().arrangeBuiltinFunctionCall( + getContext().getObjCIdType(), args), + callee, ReturnValueSlot(), args, &CallInstruction); if (llvm::CallInst *call = dyn_cast<llvm::CallInst>(CallInstruction)) call->setTailCall(); @@ -1096,7 +1157,7 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, // that's not necessarily the same as "on the stack", so // we still potentially need objc_memmove_collectable. EmitAggregateCopy(/* Dest= */ MakeAddrLValue(ReturnValue, ivarType), - /* Src= */ LV, ivarType, overlapForReturnValue()); + /* Src= */ LV, ivarType, getOverlapForReturnValue()); return; } case TEK_Scalar: { @@ -1170,7 +1231,7 @@ static void emitStructSetterCall(CodeGenFunction &CGF, ObjCMethodDecl *OMD, // FIXME: should this really always be false? args.add(RValue::get(CGF.Builder.getFalse()), CGF.getContext().BoolTy); - llvm::Constant *fn = CGF.CGM.getObjCRuntime().GetSetStructFunction(); + llvm::FunctionCallee fn = CGF.CGM.getObjCRuntime().GetSetStructFunction(); CGCallee callee = CGCallee::forDirect(fn); CGF.EmitCall( CGF.getTypes().arrangeBuiltinFunctionCall(CGF.getContext().VoidTy, args), @@ -1207,8 +1268,8 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF, // Third argument is the helper function. args.add(RValue::get(AtomicHelperFn), CGF.getContext().VoidPtrTy); - llvm::Constant *fn = - CGF.CGM.getObjCRuntime().GetCppAtomicObjectSetFunction(); + llvm::FunctionCallee fn = + CGF.CGM.getObjCRuntime().GetCppAtomicObjectSetFunction(); CGCallee callee = CGCallee::forDirect(fn); CGF.EmitCall( CGF.getTypes().arrangeBuiltinFunctionCall(CGF.getContext().VoidTy, args), @@ -1302,14 +1363,13 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, case PropertyImplStrategy::GetSetProperty: case PropertyImplStrategy::SetPropertyAndExpressionGet: { - llvm::Constant *setOptimizedPropertyFn = nullptr; - llvm::Constant *setPropertyFn = nullptr; + llvm::FunctionCallee setOptimizedPropertyFn = nullptr; + llvm::FunctionCallee setPropertyFn = nullptr; if (UseOptimizedSetter(CGM)) { // 10.8 and iOS 6.0 code and GC is off setOptimizedPropertyFn = - CGM.getObjCRuntime() - .GetOptimizedPropertySetFunction(strategy.isAtomic(), - strategy.isCopy()); + CGM.getObjCRuntime().GetOptimizedPropertySetFunction( + strategy.isAtomic(), strategy.isCopy()); if (!setOptimizedPropertyFn) { CGM.ErrorUnsupported(propImpl, "Obj-C optimized setter - NYI"); return; @@ -1560,8 +1620,8 @@ QualType CodeGenFunction::TypeOfSelfObject() { } void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ - llvm::Constant *EnumerationMutationFnPtr = - CGM.getObjCRuntime().EnumerationMutationFunction(); + llvm::FunctionCallee EnumerationMutationFnPtr = + CGM.getObjCRuntime().EnumerationMutationFunction(); if (!EnumerationMutationFnPtr) { CGM.ErrorUnsupported(&S, "Obj-C fast enumeration for this runtime"); return; @@ -1669,8 +1729,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ // Save the initial mutations value. This is the value at an // address that was written into the state object by // countByEnumeratingWithState:objects:count:. - Address StateMutationsPtrPtr = Builder.CreateStructGEP( - StatePtr, 2, 2 * getPointerSize(), "mutationsptr.ptr"); + Address StateMutationsPtrPtr = + Builder.CreateStructGEP(StatePtr, 2, "mutationsptr.ptr"); llvm::Value *StateMutationsPtr = Builder.CreateLoad(StateMutationsPtrPtr, "mutationsptr"); @@ -1751,8 +1811,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ // Fetch the buffer out of the enumeration state. // TODO: this pointer should actually be invariant between // refreshes, which would help us do certain loop optimizations. - Address StateItemsPtr = Builder.CreateStructGEP( - StatePtr, 1, getPointerSize(), "stateitems.ptr"); + Address StateItemsPtr = + Builder.CreateStructGEP(StatePtr, 1, "stateitems.ptr"); llvm::Value *EnumStateItems = Builder.CreateLoad(StateItemsPtr, "stateitems"); @@ -1891,7 +1951,7 @@ llvm::Value *CodeGenFunction::EmitObjCExtendObjectLifetime(QualType type, /// Given a number of pointers, inform the optimizer that they're /// being intrinsically used up until this point in the program. void CodeGenFunction::EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values) { - llvm::Constant *&fn = CGM.getObjCEntrypoints().clang_arc_use; + llvm::Function *&fn = CGM.getObjCEntrypoints().clang_arc_use; if (!fn) fn = CGM.getIntrinsic(llvm::Intrinsic::objc_clang_arc_use); @@ -1900,8 +1960,7 @@ void CodeGenFunction::EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values) { EmitNounwindRuntimeCall(fn, values); } -static void setARCRuntimeFunctionLinkage(CodeGenModule &CGM, - llvm::Constant *RTF) { +static void setARCRuntimeFunctionLinkage(CodeGenModule &CGM, llvm::Value *RTF) { if (auto *F = dyn_cast<llvm::Function>(RTF)) { // If the target runtime doesn't naturally support ARC, emit weak // references to the runtime support library. We don't really @@ -1913,15 +1972,18 @@ static void setARCRuntimeFunctionLinkage(CodeGenModule &CGM, } } +static void setARCRuntimeFunctionLinkage(CodeGenModule &CGM, + llvm::FunctionCallee RTF) { + setARCRuntimeFunctionLinkage(CGM, RTF.getCallee()); +} + /// Perform an operation having the signature /// i8* (i8*) /// where a null input causes a no-op and returns null. -static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF, - llvm::Value *value, - llvm::Type *returnType, - llvm::Constant *&fn, - llvm::Intrinsic::ID IntID, - bool isTailCall = false) { +static llvm::Value *emitARCValueOperation( + CodeGenFunction &CGF, llvm::Value *value, llvm::Type *returnType, + llvm::Function *&fn, llvm::Intrinsic::ID IntID, + llvm::CallInst::TailCallKind tailKind = llvm::CallInst::TCK_None) { if (isa<llvm::ConstantPointerNull>(value)) return value; @@ -1936,8 +1998,7 @@ static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF, // Call the function. llvm::CallInst *call = CGF.EmitNounwindRuntimeCall(fn, value); - if (isTailCall) - call->setTailCall(); + call->setTailCallKind(tailKind); // Cast the result back to the original type. return CGF.Builder.CreateBitCast(call, origType); @@ -1945,9 +2006,8 @@ static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF, /// Perform an operation having the following signature: /// i8* (i8**) -static llvm::Value *emitARCLoadOperation(CodeGenFunction &CGF, - Address addr, - llvm::Constant *&fn, +static llvm::Value *emitARCLoadOperation(CodeGenFunction &CGF, Address addr, + llvm::Function *&fn, llvm::Intrinsic::ID IntID) { if (!fn) { fn = CGF.CGM.getIntrinsic(IntID); @@ -1970,10 +2030,9 @@ static llvm::Value *emitARCLoadOperation(CodeGenFunction &CGF, /// Perform an operation having the following signature: /// i8* (i8**, i8*) -static llvm::Value *emitARCStoreOperation(CodeGenFunction &CGF, - Address addr, +static llvm::Value *emitARCStoreOperation(CodeGenFunction &CGF, Address addr, llvm::Value *value, - llvm::Constant *&fn, + llvm::Function *&fn, llvm::Intrinsic::ID IntID, bool ignored) { assert(addr.getElementType() == value->getType()); @@ -1998,10 +2057,8 @@ static llvm::Value *emitARCStoreOperation(CodeGenFunction &CGF, /// Perform an operation having the following signature: /// void (i8**, i8**) -static void emitARCCopyOperation(CodeGenFunction &CGF, - Address dst, - Address src, - llvm::Constant *&fn, +static void emitARCCopyOperation(CodeGenFunction &CGF, Address dst, Address src, + llvm::Function *&fn, llvm::Intrinsic::ID IntID) { assert(dst.getType() == src.getType()); @@ -2023,7 +2080,7 @@ static void emitARCCopyOperation(CodeGenFunction &CGF, static llvm::Value *emitObjCValueOperation(CodeGenFunction &CGF, llvm::Value *value, llvm::Type *returnType, - llvm::Constant *&fn, + llvm::FunctionCallee &fn, StringRef fnName) { if (isa<llvm::ConstantPointerNull>(value)) return value; @@ -2034,7 +2091,7 @@ static llvm::Value *emitObjCValueOperation(CodeGenFunction &CGF, fn = CGF.CGM.CreateRuntimeFunction(fnType, fnName); // We have Native ARC, so set nonlazybind attribute for performance - if (llvm::Function *f = dyn_cast<llvm::Function>(fn)) + if (llvm::Function *f = dyn_cast<llvm::Function>(fn.getCallee())) if (fnName == "objc_retain") f->addFnAttr(llvm::Attribute::NonLazyBind); } @@ -2044,10 +2101,10 @@ static llvm::Value *emitObjCValueOperation(CodeGenFunction &CGF, value = CGF.Builder.CreateBitCast(value, CGF.Int8PtrTy); // Call the function. - llvm::CallInst *call = CGF.EmitNounwindRuntimeCall(fn, value); + llvm::CallBase *Inst = CGF.EmitCallOrInvoke(fn, value); // Cast the result back to the original type. - return CGF.Builder.CreateBitCast(call, origType); + return CGF.Builder.CreateBitCast(Inst, origType); } /// Produce the code to do a retain. Based on the type, calls one of: @@ -2122,14 +2179,10 @@ static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) { // with this marker yet, so leave a breadcrumb for the ARC // optimizer to pick up. } else { - llvm::NamedMDNode *metadata = - CGF.CGM.getModule().getOrInsertNamedMetadata( - "clang.arc.retainAutoreleasedReturnValueMarker"); - assert(metadata->getNumOperands() <= 1); - if (metadata->getNumOperands() == 0) { - auto &ctx = CGF.getLLVMContext(); - metadata->addOperand(llvm::MDNode::get(ctx, - llvm::MDString::get(ctx, assembly))); + const char *markerKey = "clang.arc.retainAutoreleasedReturnValueMarker"; + if (!CGF.CGM.getModule().getModuleFlag(markerKey)) { + auto *str = llvm::MDString::get(CGF.getLLVMContext(), assembly); + CGF.CGM.getModule().addModuleFlag(llvm::Module::Error, markerKey, str); } } } @@ -2147,9 +2200,15 @@ static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) { llvm::Value * CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) { emitAutoreleasedReturnValueMarker(*this); - return emitARCValueOperation(*this, value, nullptr, - CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue, - llvm::Intrinsic::objc_retainAutoreleasedReturnValue); + llvm::CallInst::TailCallKind tailKind = + CGM.getTargetCodeGenInfo() + .shouldSuppressTailCallsOfRetainAutoreleasedReturnValue() + ? llvm::CallInst::TCK_NoTail + : llvm::CallInst::TCK_None; + return emitARCValueOperation( + *this, value, nullptr, + CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue, + llvm::Intrinsic::objc_retainAutoreleasedReturnValue, tailKind); } /// Claim a possibly-autoreleased return value at +0. This is only @@ -2173,7 +2232,7 @@ void CodeGenFunction::EmitARCRelease(llvm::Value *value, ARCPreciseLifetime_t precise) { if (isa<llvm::ConstantPointerNull>(value)) return; - llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_release; + llvm::Function *&fn = CGM.getObjCEntrypoints().objc_release; if (!fn) { fn = CGM.getIntrinsic(llvm::Intrinsic::objc_release); setARCRuntimeFunctionLinkage(CGM, fn); @@ -2219,7 +2278,7 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrongCall(Address addr, bool ignored) { assert(addr.getElementType() == value->getType()); - llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_storeStrong; + llvm::Function *&fn = CGM.getObjCEntrypoints().objc_storeStrong; if (!fn) { fn = CGM.getIntrinsic(llvm::Intrinsic::objc_storeStrong); setARCRuntimeFunctionLinkage(CGM, fn); @@ -2286,7 +2345,7 @@ CodeGenFunction::EmitARCAutoreleaseReturnValue(llvm::Value *value) { return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_autoreleaseReturnValue, llvm::Intrinsic::objc_autoreleaseReturnValue, - /*isTailCall*/ true); + llvm::CallInst::TCK_Tail); } /// Do a fused retain/autorelease of the given object. @@ -2296,7 +2355,7 @@ CodeGenFunction::EmitARCRetainAutoreleaseReturnValue(llvm::Value *value) { return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_retainAutoreleaseReturnValue, llvm::Intrinsic::objc_retainAutoreleaseReturnValue, - /*isTailCall*/ true); + llvm::CallInst::TCK_Tail); } /// Do a fused retain/autorelease of the given object. @@ -2375,7 +2434,7 @@ void CodeGenFunction::EmitARCInitWeak(Address addr, llvm::Value *value) { /// void \@objc_destroyWeak(i8** %addr) /// Essentially objc_storeWeak(addr, nil). void CodeGenFunction::EmitARCDestroyWeak(Address addr) { - llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_destroyWeak; + llvm::Function *&fn = CGM.getObjCEntrypoints().objc_destroyWeak; if (!fn) { fn = CGM.getIntrinsic(llvm::Intrinsic::objc_destroyWeak); setARCRuntimeFunctionLinkage(CGM, fn); @@ -2423,7 +2482,7 @@ void CodeGenFunction::emitARCMoveAssignWeak(QualType Ty, Address DstAddr, /// Produce the code to do a objc_autoreleasepool_push. /// call i8* \@objc_autoreleasePoolPush(void) llvm::Value *CodeGenFunction::EmitObjCAutoreleasePoolPush() { - llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPush; + llvm::Function *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPush; if (!fn) { fn = CGM.getIntrinsic(llvm::Intrinsic::objc_autoreleasePoolPush); setARCRuntimeFunctionLinkage(CGM, fn); @@ -2439,8 +2498,8 @@ void CodeGenFunction::EmitObjCAutoreleasePoolPop(llvm::Value *value) { if (getInvokeDest()) { // Call the runtime method not the intrinsic if we are handling exceptions - llvm::Constant *&fn = - CGM.getObjCEntrypoints().objc_autoreleasePoolPopInvoke; + llvm::FunctionCallee &fn = + CGM.getObjCEntrypoints().objc_autoreleasePoolPopInvoke; if (!fn) { llvm::FunctionType *fnType = llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false); @@ -2451,7 +2510,7 @@ void CodeGenFunction::EmitObjCAutoreleasePoolPop(llvm::Value *value) { // objc_autoreleasePoolPop can throw. EmitRuntimeCallOrInvoke(fn, value); } else { - llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPop; + llvm::FunctionCallee &fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPop; if (!fn) { fn = CGM.getIntrinsic(llvm::Intrinsic::objc_autoreleasePoolPop); setARCRuntimeFunctionLinkage(CGM, fn); @@ -2507,6 +2566,13 @@ llvm::Value *CodeGenFunction::EmitObjCAllocWithZone(llvm::Value *value, "objc_allocWithZone"); } +llvm::Value *CodeGenFunction::EmitObjCAllocInit(llvm::Value *value, + llvm::Type *resultType) { + return emitObjCValueOperation(*this, value, resultType, + CGM.getObjCEntrypoints().objc_alloc_init, + "objc_alloc_init"); +} + /// Produce the code to do a primitive release. /// [tmp drain]; void CodeGenFunction::EmitObjCMRRAutoreleasePoolPop(llvm::Value *Arg) { @@ -2545,18 +2611,19 @@ void CodeGenFunction::emitARCIntrinsicUse(CodeGenFunction &CGF, Address addr, /// call i8* \@objc_autorelease(i8* %value) llvm::Value *CodeGenFunction::EmitObjCAutorelease(llvm::Value *value, llvm::Type *returnType) { - return emitObjCValueOperation(*this, value, returnType, - CGM.getObjCEntrypoints().objc_autoreleaseRuntimeFunction, - "objc_autorelease"); + return emitObjCValueOperation( + *this, value, returnType, + CGM.getObjCEntrypoints().objc_autoreleaseRuntimeFunction, + "objc_autorelease"); } /// Retain the given object, with normal retain semantics. /// call i8* \@objc_retain(i8* %value) llvm::Value *CodeGenFunction::EmitObjCRetainNonBlock(llvm::Value *value, llvm::Type *returnType) { - return emitObjCValueOperation(*this, value, returnType, - CGM.getObjCEntrypoints().objc_retainRuntimeFunction, - "objc_retain"); + return emitObjCValueOperation( + *this, value, returnType, + CGM.getObjCEntrypoints().objc_retainRuntimeFunction, "objc_retain"); } /// Release the given object. @@ -2565,24 +2632,23 @@ void CodeGenFunction::EmitObjCRelease(llvm::Value *value, ARCPreciseLifetime_t precise) { if (isa<llvm::ConstantPointerNull>(value)) return; - llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_release; + llvm::FunctionCallee &fn = + CGM.getObjCEntrypoints().objc_releaseRuntimeFunction; if (!fn) { - if (!fn) { - llvm::FunctionType *fnType = + llvm::FunctionType *fnType = llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false); - fn = CGM.CreateRuntimeFunction(fnType, "objc_release"); - setARCRuntimeFunctionLinkage(CGM, fn); - // We have Native ARC, so set nonlazybind attribute for performance - if (llvm::Function *f = dyn_cast<llvm::Function>(fn)) - f->addFnAttr(llvm::Attribute::NonLazyBind); - } + fn = CGM.CreateRuntimeFunction(fnType, "objc_release"); + setARCRuntimeFunctionLinkage(CGM, fn); + // We have Native ARC, so set nonlazybind attribute for performance + if (llvm::Function *f = dyn_cast<llvm::Function>(fn.getCallee())) + f->addFnAttr(llvm::Attribute::NonLazyBind); } // Cast the argument to 'id'. value = Builder.CreateBitCast(value, Int8PtrTy); // Call objc_release. - llvm::CallInst *call = EmitNounwindRuntimeCall(fn, value); + llvm::CallBase *call = EmitCallOrInvoke(fn, value); if (precise == ARCImpreciseLifetime) { call->setMetadata("clang.imprecise_release", @@ -2829,6 +2895,7 @@ public: Result visit(const Expr *e); Result visitCastExpr(const CastExpr *e); Result visitPseudoObjectExpr(const PseudoObjectExpr *e); + Result visitBlockExpr(const BlockExpr *e); Result visitBinaryOperator(const BinaryOperator *e); Result visitBinAssign(const BinaryOperator *e); Result visitBinAssignUnsafeUnretained(const BinaryOperator *e); @@ -2905,6 +2972,12 @@ ARCExprEmitter<Impl,Result>::visitPseudoObjectExpr(const PseudoObjectExpr *E) { } template <typename Impl, typename Result> +Result ARCExprEmitter<Impl, Result>::visitBlockExpr(const BlockExpr *e) { + // The default implementation just forwards the expression to visitExpr. + return asImpl().visitExpr(e); +} + +template <typename Impl, typename Result> Result ARCExprEmitter<Impl,Result>::visitCastExpr(const CastExpr *e) { switch (e->getCastKind()) { @@ -3047,7 +3120,8 @@ Result ARCExprEmitter<Impl,Result>::visit(const Expr *e) { // Look through pseudo-object expressions. } else if (const PseudoObjectExpr *pseudo = dyn_cast<PseudoObjectExpr>(e)) { return asImpl().visitPseudoObjectExpr(pseudo); - } + } else if (auto *be = dyn_cast<BlockExpr>(e)) + return asImpl().visitBlockExpr(be); return asImpl().visitExpr(e); } @@ -3082,6 +3156,15 @@ struct ARCRetainExprEmitter : return TryEmitResult(result, true); } + TryEmitResult visitBlockExpr(const BlockExpr *e) { + TryEmitResult result = visitExpr(e); + // Avoid the block-retain if this is a block literal that doesn't need to be + // copied to the heap. + if (e->getBlockDecl()->canAvoidCopyToHeap()) + result.setInt(true); + return result; + } + /// Block extends are net +0. Naively, we could just recurse on /// the subexpression, but actually we need to ensure that the /// value is copied as a block, so there's a little filter here. @@ -3384,11 +3467,10 @@ void CodeGenFunction::EmitExtendGCLifetime(llvm::Value *object) { // We just use an inline assembly. llvm::FunctionType *extenderType = llvm::FunctionType::get(VoidTy, VoidPtrTy, RequiredArgs::All); - llvm::Value *extender - = llvm::InlineAsm::get(extenderType, - /* assembly */ "", - /* constraints */ "r", - /* side effects */ true); + llvm::InlineAsm *extender = llvm::InlineAsm::get(extenderType, + /* assembly */ "", + /* constraints */ "r", + /* side effects */ true); object = Builder.CreateBitCast(object, VoidPtrTy); EmitNounwindRuntimeCall(extender, object); @@ -3647,19 +3729,25 @@ void CodeGenModule::emitAtAvailableLinkGuard() { // CoreFoundation is linked into the final binary. llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, {VoidPtrTy}, false); - llvm::Constant *CFFunc = + llvm::FunctionCallee CFFunc = CreateRuntimeFunction(FTy, "CFBundleGetVersionNumber"); llvm::FunctionType *CheckFTy = llvm::FunctionType::get(VoidTy, {}, false); - llvm::Function *CFLinkCheckFunc = cast<llvm::Function>(CreateBuiltinFunction( - CheckFTy, "__clang_at_available_requires_core_foundation_framework")); - CFLinkCheckFunc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); - CFLinkCheckFunc->setVisibility(llvm::GlobalValue::HiddenVisibility); - CodeGenFunction CGF(*this); - CGF.Builder.SetInsertPoint(CGF.createBasicBlock("", CFLinkCheckFunc)); - CGF.EmitNounwindRuntimeCall(CFFunc, llvm::Constant::getNullValue(VoidPtrTy)); - CGF.Builder.CreateUnreachable(); - addCompilerUsedGlobal(CFLinkCheckFunc); + llvm::FunctionCallee CFLinkCheckFuncRef = CreateRuntimeFunction( + CheckFTy, "__clang_at_available_requires_core_foundation_framework", + llvm::AttributeList(), /*Local=*/true); + llvm::Function *CFLinkCheckFunc = + cast<llvm::Function>(CFLinkCheckFuncRef.getCallee()->stripPointerCasts()); + if (CFLinkCheckFunc->empty()) { + CFLinkCheckFunc->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + CFLinkCheckFunc->setVisibility(llvm::GlobalValue::HiddenVisibility); + CodeGenFunction CGF(*this); + CGF.Builder.SetInsertPoint(CGF.createBasicBlock("", CFLinkCheckFunc)); + CGF.EmitNounwindRuntimeCall(CFFunc, + llvm::Constant::getNullValue(VoidPtrTy)); + CGF.Builder.CreateUnreachable(); + addCompilerUsedGlobal(CFLinkCheckFunc); + } } CGObjCRuntime::~CGObjCRuntime() {} diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp index 548bd6b3fd72..ee5c12aa35bd 100644 --- a/lib/CodeGen/CGObjCGNU.cpp +++ b/lib/CodeGen/CGObjCGNU.cpp @@ -1,9 +1,8 @@ //===------- CGObjCGNU.cpp - Emit LLVM Code from ASTs for a Module --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -29,7 +28,6 @@ #include "clang/Basic/SourceManager.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" @@ -60,7 +58,7 @@ class LazyRuntimeFunction { CodeGenModule *CGM; llvm::FunctionType *FTy; const char *FunctionName; - llvm::Constant *Function; + llvm::FunctionCallee Function; public: /// Constructor leaves this class uninitialized, because it is intended to @@ -90,7 +88,7 @@ public: /// Overloaded cast operator, allows the class to be implicitly cast to an /// LLVM constant. - operator llvm::Constant *() { + operator llvm::FunctionCallee() { if (!Function) { if (!FunctionName) return nullptr; @@ -98,9 +96,6 @@ public: } return Function; } - operator llvm::Function *() { - return cast<llvm::Function>((llvm::Constant *)*this); - } }; @@ -190,12 +185,16 @@ protected: (R.getVersion() >= VersionTuple(major, minor)); } - std::string SymbolForProtocol(StringRef Name) { - return (StringRef("._OBJC_PROTOCOL_") + Name).str(); + std::string ManglePublicSymbol(StringRef Name) { + return (StringRef(CGM.getTriple().isOSBinFormatCOFF() ? "$_" : "._") + Name).str(); + } + + std::string SymbolForProtocol(Twine Name) { + return (ManglePublicSymbol("OBJC_PROTOCOL_") + Name).str(); } std::string SymbolForProtocolRef(StringRef Name) { - return (StringRef("._OBJC_REF_PROTOCOL_") + Name).str(); + return (ManglePublicSymbol("OBJC_REF_PROTOCOL_") + Name).str(); } @@ -614,15 +613,15 @@ public: const ObjCProtocolDecl *PD) override; void GenerateProtocol(const ObjCProtocolDecl *PD) override; llvm::Function *ModuleInitFunction() override; - llvm::Constant *GetPropertyGetFunction() override; - llvm::Constant *GetPropertySetFunction() override; - llvm::Constant *GetOptimizedPropertySetFunction(bool atomic, - bool copy) override; - llvm::Constant *GetSetStructFunction() override; - llvm::Constant *GetGetStructFunction() override; - llvm::Constant *GetCppAtomicObjectGetFunction() override; - llvm::Constant *GetCppAtomicObjectSetFunction() override; - llvm::Constant *EnumerationMutationFunction() override; + llvm::FunctionCallee GetPropertyGetFunction() override; + llvm::FunctionCallee GetPropertySetFunction() override; + llvm::FunctionCallee GetOptimizedPropertySetFunction(bool atomic, + bool copy) override; + llvm::FunctionCallee GetSetStructFunction() override; + llvm::FunctionCallee GetGetStructFunction() override; + llvm::FunctionCallee GetCppAtomicObjectGetFunction() override; + llvm::FunctionCallee GetCppAtomicObjectSetFunction() override; + llvm::FunctionCallee EnumerationMutationFunction() override; void EmitTryStmt(CodeGenFunction &CGF, const ObjCAtTryStmt &S) override; @@ -691,9 +690,9 @@ protected: llvm::Value *args[] = { EnforceType(Builder, Receiver, IdTy), EnforceType(Builder, cmd, SelectorTy) }; - llvm::CallSite imp = CGF.EmitRuntimeCallOrInvoke(MsgLookupFn, args); + llvm::CallBase *imp = CGF.EmitRuntimeCallOrInvoke(MsgLookupFn, args); imp->setMetadata(msgSendMDKind, node); - return imp.getInstruction(); + return imp; } llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper, @@ -750,7 +749,7 @@ class CGObjCGNUstep : public CGObjCGNU { llvm::Value *cmd, llvm::MDNode *node, MessageSendInfo &MSI) override { CGBuilderTy &Builder = CGF.Builder; - llvm::Function *LookupFn = SlotLookupFn; + llvm::FunctionCallee LookupFn = SlotLookupFn; // Store the receiver on the stack so that we can reload it later Address ReceiverPtr = @@ -766,20 +765,20 @@ class CGObjCGNUstep : public CGObjCGNU { } // The lookup function is guaranteed not to capture the receiver pointer. - LookupFn->addParamAttr(0, llvm::Attribute::NoCapture); + if (auto *LookupFn2 = dyn_cast<llvm::Function>(LookupFn.getCallee())) + LookupFn2->addParamAttr(0, llvm::Attribute::NoCapture); llvm::Value *args[] = { EnforceType(Builder, ReceiverPtr.getPointer(), PtrToIdTy), EnforceType(Builder, cmd, SelectorTy), EnforceType(Builder, self, IdTy) }; - llvm::CallSite slot = CGF.EmitRuntimeCallOrInvoke(LookupFn, args); - slot.setOnlyReadsMemory(); + llvm::CallBase *slot = CGF.EmitRuntimeCallOrInvoke(LookupFn, args); + slot->setOnlyReadsMemory(); slot->setMetadata(msgSendMDKind, node); // Load the imp from the slot llvm::Value *imp = Builder.CreateAlignedLoad( - Builder.CreateStructGEP(nullptr, slot.getInstruction(), 4), - CGF.getPointerAlign()); + Builder.CreateStructGEP(nullptr, slot, 4), CGF.getPointerAlign()); // The lookup function may have changed the receiver, so make sure we use // the new one. @@ -859,7 +858,7 @@ class CGObjCGNUstep : public CGObjCGNU { PtrTy, PtrTy); } - llvm::Constant *GetCppAtomicObjectGetFunction() override { + llvm::FunctionCallee GetCppAtomicObjectGetFunction() override { // The optimised functions were added in version 1.7 of the GNUstep // runtime. assert (CGM.getLangOpts().ObjCRuntime.getVersion() >= @@ -867,7 +866,7 @@ class CGObjCGNUstep : public CGObjCGNU { return CxxAtomicObjectGetFn; } - llvm::Constant *GetCppAtomicObjectSetFunction() override { + llvm::FunctionCallee GetCppAtomicObjectSetFunction() override { // The optimised functions were added in version 1.7 of the GNUstep // runtime. assert (CGM.getLangOpts().ObjCRuntime.getVersion() >= @@ -875,8 +874,8 @@ class CGObjCGNUstep : public CGObjCGNU { return CxxAtomicObjectSetFn; } - llvm::Constant *GetOptimizedPropertySetFunction(bool atomic, - bool copy) override { + llvm::FunctionCallee GetOptimizedPropertySetFunction(bool atomic, + bool copy) override { // The optimised property functions omit the GC check, and so are not // safe to use in GC mode. The standard functions are fast in GC mode, // so there is less advantage in using them. @@ -911,12 +910,15 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { ConstantStringSection }; static const char *const SectionsBaseNames[8]; + static const char *const PECOFFSectionsBaseNames[8]; template<SectionKind K> std::string sectionName() { - std::string name(SectionsBaseNames[K]); - if (CGM.getTriple().isOSBinFormatCOFF()) + if (CGM.getTriple().isOSBinFormatCOFF()) { + std::string name(PECOFFSectionsBaseNames[K]); name += "$m"; - return name; + return name; + } + return SectionsBaseNames[K]; } /// The GCC ABI superclass message lookup function. Takes a pointer to a /// structure describing the receiver and the class, and a selector as @@ -937,15 +939,19 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { bool EmittedClass = false; /// Generate the name of a symbol for a reference to a class. Accesses to /// classes should be indirected via this. + + typedef std::pair<std::string, std::pair<llvm::Constant*, int>> EarlyInitPair; + std::vector<EarlyInitPair> EarlyInitList; + std::string SymbolForClassRef(StringRef Name, bool isWeak) { if (isWeak) - return (StringRef("._OBJC_WEAK_REF_CLASS_") + Name).str(); + return (ManglePublicSymbol("OBJC_WEAK_REF_CLASS_") + Name).str(); else - return (StringRef("._OBJC_REF_CLASS_") + Name).str(); + return (ManglePublicSymbol("OBJC_REF_CLASS_") + Name).str(); } /// Generate the name of a class symbol. std::string SymbolForClass(StringRef Name) { - return (StringRef("._OBJC_CLASS_") + Name).str(); + return (ManglePublicSymbol("OBJC_CLASS_") + Name).str(); } void CallRuntimeFunction(CGBuilderTy &B, StringRef FunctionName, ArrayRef<llvm::Value*> Args) { @@ -954,7 +960,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { Types.push_back(Arg->getType()); llvm::FunctionType *FT = llvm::FunctionType::get(B.getVoidTy(), Types, false); - llvm::Value *Fn = CGM.CreateRuntimeFunction(FT, FunctionName); + llvm::FunctionCallee Fn = CGM.CreateRuntimeFunction(FT, FunctionName); B.CreateCall(Fn, Args); } @@ -999,10 +1005,13 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { llvm::Constant *isa = TheModule.getNamedGlobal(Sym); - if (!isa) + if (!isa) { isa = new llvm::GlobalVariable(TheModule, IdTy, /* isConstant */false, llvm::GlobalValue::ExternalLinkage, nullptr, Sym); - else if (isa->getType() != PtrToIdTy) + if (CGM.getTriple().isOSBinFormatCOFF()) { + cast<llvm::GlobalValue>(isa)->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + } + } else if (isa->getType() != PtrToIdTy) isa = llvm::ConstantExpr::getBitCast(isa, PtrToIdTy); // struct @@ -1017,7 +1026,11 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { ConstantInitBuilder Builder(CGM); auto Fields = Builder.beginStruct(); - Fields.add(isa); + if (!CGM.getTriple().isOSBinFormatCOFF()) { + Fields.add(isa); + } else { + Fields.addNullPointer(PtrTy); + } // For now, all non-ASCII strings are represented as UTF-16. As such, the // number of bytes is simply double the number of UTF-16 codepoints. In // ASCII strings, the number of bytes is equal to the number of non-ASCII @@ -1088,6 +1101,10 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { ObjCStrGV->setComdat(TheModule.getOrInsertComdat(StringName)); ObjCStrGV->setVisibility(llvm::GlobalValue::HiddenVisibility); } + if (CGM.getTriple().isOSBinFormatCOFF()) { + std::pair<llvm::Constant*, int> v{ObjCStrGV, 0}; + EarlyInitList.emplace_back(Sym, v); + } llvm::Constant *ObjCStr = llvm::ConstantExpr::getBitCast(ObjCStrGV, IdTy); ObjCStrings[Str] = ObjCStr; ConstantStrings.push_back(ObjCStr); @@ -1201,6 +1218,33 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { ClassSymbol->setInitializer(new llvm::GlobalVariable(TheModule, Int8Ty, false, llvm::GlobalValue::ExternalWeakLinkage, nullptr, SymbolForClass(Name))); + else { + if (CGM.getTriple().isOSBinFormatCOFF()) { + IdentifierInfo &II = CGM.getContext().Idents.get(Name); + TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); + DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); + + const ObjCInterfaceDecl *OID = nullptr; + for (const auto &Result : DC->lookup(&II)) + if ((OID = dyn_cast<ObjCInterfaceDecl>(Result))) + break; + + // The first Interface we find may be a @class, + // which should only be treated as the source of + // truth in the absence of a true declaration. + const ObjCInterfaceDecl *OIDDef = OID->getDefinition(); + if (OIDDef != nullptr) + OID = OIDDef; + + auto Storage = llvm::GlobalValue::DefaultStorageClass; + if (OID->hasAttr<DLLImportAttr>()) + Storage = llvm::GlobalValue::DLLImportStorageClass; + else if (OID->hasAttr<DLLExportAttr>()) + Storage = llvm::GlobalValue::DLLExportStorageClass; + + cast<llvm::GlobalValue>(ClassSymbol)->setDLLStorageClass(Storage); + } + } assert(ClassSymbol->getName() == SymbolName); return ClassSymbol; } @@ -1453,7 +1497,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { Sym->setSection((Section + SecSuffix).str()); Sym->setComdat(TheModule.getOrInsertComdat((Prefix + Section).str())); - Sym->setAlignment(1); + Sym->setAlignment(CGM.getPointerAlign().getQuantity()); return Sym; }; return { Sym("__start_", "$a"), Sym("__stop", "$z") }; @@ -1488,11 +1532,12 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { ConstantInitBuilder builder(CGM); auto InitStructBuilder = builder.beginStruct(); InitStructBuilder.addInt(Int64Ty, 0); - for (auto *s : SectionsBaseNames) { + auto §ionVec = CGM.getTriple().isOSBinFormatCOFF() ? PECOFFSectionsBaseNames : SectionsBaseNames; + for (auto *s : sectionVec) { auto bounds = GetSectionBounds(s); InitStructBuilder.add(bounds.first); InitStructBuilder.add(bounds.second); - }; + } auto *InitStruct = InitStructBuilder.finishAndCreateGlobal(".objc_init", CGM.getPointerAlign(), false, llvm::GlobalValue::LinkOnceODRLinkage); InitStruct->setVisibility(llvm::GlobalValue::HiddenVisibility); @@ -1519,7 +1564,12 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { if (CGM.getTriple().isOSBinFormatCOFF()) InitVar->setSection(".CRT$XCLz"); else - InitVar->setSection(".ctors"); + { + if (CGM.getCodeGenOpts().UseInitArray) + InitVar->setSection(".init_array"); + else + InitVar->setSection(".ctors"); + } InitVar->setVisibility(llvm::GlobalValue::HiddenVisibility); InitVar->setComdat(TheModule.getOrInsertComdat(".objc_ctor")); CGM.addUsedGlobal(InitVar); @@ -1582,6 +1632,29 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { ConstantStrings.clear(); Categories.clear(); Classes.clear(); + + if (EarlyInitList.size() > 0) { + auto *Init = llvm::Function::Create(llvm::FunctionType::get(CGM.VoidTy, + {}), llvm::GlobalValue::InternalLinkage, ".objc_early_init", + &CGM.getModule()); + llvm::IRBuilder<> b(llvm::BasicBlock::Create(CGM.getLLVMContext(), "entry", + Init)); + for (const auto &lateInit : EarlyInitList) { + auto *global = TheModule.getGlobalVariable(lateInit.first); + if (global) { + b.CreateAlignedStore(global, + b.CreateStructGEP(lateInit.second.first, lateInit.second.second), CGM.getPointerAlign().getQuantity()); + } + } + b.CreateRetVoid(); + // We can't use the normal LLVM global initialisation array, because we + // need to specify that this runs early in library initialisation. + auto *InitVar = new llvm::GlobalVariable(CGM.getModule(), Init->getType(), + /*isConstant*/true, llvm::GlobalValue::InternalLinkage, + Init, ".objc_early_init_ptr"); + InitVar->setSection(".CRT$XCLb"); + CGM.addUsedGlobal(InitVar); + } return nullptr; } /// In the v2 ABI, ivar offset variables use the type encoding in their name @@ -1613,6 +1686,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { } void GenerateClass(const ObjCImplementationDecl *OID) override { ASTContext &Context = CGM.getContext(); + bool IsCOFF = CGM.getTriple().isOSBinFormatCOFF(); // Get the class name ObjCInterfaceDecl *classDecl = @@ -1671,8 +1745,9 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { // struct objc_property_list *properties metaclassFields.add(GeneratePropertyList(OID, classDecl, /*isClassProperty*/true)); - auto *metaclass = metaclassFields.finishAndCreateGlobal("._OBJC_METACLASS_" - + className, CGM.getPointerAlign()); + auto *metaclass = metaclassFields.finishAndCreateGlobal( + ManglePublicSymbol("OBJC_METACLASS_") + className, + CGM.getPointerAlign()); auto classFields = builder.beginStruct(); // struct objc_class *isa; @@ -1681,15 +1756,28 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { // Get the superclass name. const ObjCInterfaceDecl * SuperClassDecl = OID->getClassInterface()->getSuperClass(); + llvm::Constant *SuperClass = nullptr; if (SuperClassDecl) { auto SuperClassName = SymbolForClass(SuperClassDecl->getNameAsString()); - llvm::Constant *SuperClass = TheModule.getNamedGlobal(SuperClassName); + SuperClass = TheModule.getNamedGlobal(SuperClassName); if (!SuperClass) { SuperClass = new llvm::GlobalVariable(TheModule, PtrTy, false, llvm::GlobalValue::ExternalLinkage, nullptr, SuperClassName); + if (IsCOFF) { + auto Storage = llvm::GlobalValue::DefaultStorageClass; + if (SuperClassDecl->hasAttr<DLLImportAttr>()) + Storage = llvm::GlobalValue::DLLImportStorageClass; + else if (SuperClassDecl->hasAttr<DLLExportAttr>()) + Storage = llvm::GlobalValue::DLLExportStorageClass; + + cast<llvm::GlobalValue>(SuperClass)->setDLLStorageClass(Storage); + } } - classFields.add(llvm::ConstantExpr::getBitCast(SuperClass, PtrTy)); + if (!IsCOFF) + classFields.add(llvm::ConstantExpr::getBitCast(SuperClass, PtrTy)); + else + classFields.addNullPointer(PtrTy); } else classFields.addNullPointer(PtrTy); // const char *name; @@ -1731,7 +1819,6 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { CGM.getContext().getCharWidth()); // struct objc_ivar ivars[] auto ivarArrayBuilder = ivarListBuilder.beginArray(); - CodeGenTypes &Types = CGM.getTypes(); for (const ObjCIvarDecl *IVD = classDecl->all_declared_ivar_begin(); IVD; IVD = IVD->getNextIvar()) { auto ivarTy = IVD->getType(); @@ -1765,8 +1852,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { ivarBuilder.add(OffsetVar); // Ivar size ivarBuilder.addInt(Int32Ty, - td.getTypeSizeInBits(Types.ConvertType(ivarTy)) / - CGM.getContext().getCharWidth()); + CGM.getContext().getTypeSizeInChars(ivarTy).getQuantity()); // Alignment will be stored as a base-2 log of the alignment. int align = llvm::Log2_32(Context.getTypeAlignInChars(ivarTy).getQuantity()); // Objects that require more than 2^64-byte alignment should be impossible! @@ -1839,19 +1925,24 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { classFields.finishAndCreateGlobal(SymbolForClass(className), CGM.getPointerAlign(), false, llvm::GlobalValue::ExternalLinkage); - if (CGM.getTriple().isOSBinFormatCOFF()) { - auto Storage = llvm::GlobalValue::DefaultStorageClass; - if (OID->getClassInterface()->hasAttr<DLLImportAttr>()) - Storage = llvm::GlobalValue::DLLImportStorageClass; - else if (OID->getClassInterface()->hasAttr<DLLExportAttr>()) - Storage = llvm::GlobalValue::DLLExportStorageClass; - cast<llvm::GlobalValue>(classStruct)->setDLLStorageClass(Storage); - } - auto *classRefSymbol = GetClassVar(className); classRefSymbol->setSection(sectionName<ClassReferenceSection>()); classRefSymbol->setInitializer(llvm::ConstantExpr::getBitCast(classStruct, IdTy)); + if (IsCOFF) { + // we can't import a class struct. + if (OID->getClassInterface()->hasAttr<DLLExportAttr>()) { + cast<llvm::GlobalValue>(classStruct)->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); + cast<llvm::GlobalValue>(classRefSymbol)->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); + } + + if (SuperClass) { + std::pair<llvm::Constant*, int> v{classStruct, 1}; + EarlyInitList.emplace_back(SuperClass->getName(), std::move(v)); + } + + } + // Resolve the class aliases, if they exist. // FIXME: Class pointer aliases shouldn't exist! @@ -1879,7 +1970,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { auto classInitRef = new llvm::GlobalVariable(TheModule, classStruct->getType(), false, llvm::GlobalValue::ExternalLinkage, - classStruct, "._OBJC_INIT_CLASS_" + className); + classStruct, ManglePublicSymbol("OBJC_INIT_CLASS_") + className); classInitRef->setSection(sectionName<ClassSection>()); CGM.addUsedGlobal(classInitRef); @@ -1916,6 +2007,18 @@ const char *const CGObjCGNUstep2::SectionsBaseNames[8] = "__objc_constant_string" }; +const char *const CGObjCGNUstep2::PECOFFSectionsBaseNames[8] = +{ +".objcrt$SEL", +".objcrt$CLS", +".objcrt$CLR", +".objcrt$CAT", +".objcrt$PCL", +".objcrt$PCR", +".objcrt$CAL", +".objcrt$STR" +}; + /// Support for the ObjFW runtime. class CGObjCObjFW: public CGObjCGNU { protected: @@ -1938,14 +2041,14 @@ protected: EnforceType(Builder, Receiver, IdTy), EnforceType(Builder, cmd, SelectorTy) }; - llvm::CallSite imp; + llvm::CallBase *imp; if (CGM.ReturnTypeUsesSRet(MSI.CallInfo)) imp = CGF.EmitRuntimeCallOrInvoke(MsgLookupFnSRet, args); else imp = CGF.EmitRuntimeCallOrInvoke(MsgLookupFn, args); imp->setMetadata(msgSendMDKind, node); - return imp.getInstruction(); + return imp; } llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper, @@ -2174,9 +2277,8 @@ llvm::Value *CGObjCGNU::GetClassNamed(CodeGenFunction &CGF, if (!isWeak) EmitClassRef(Name); - llvm::Constant *ClassLookupFn = - CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, PtrToInt8Ty, true), - "objc_lookup_class"); + llvm::FunctionCallee ClassLookupFn = CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IdTy, PtrToInt8Ty, true), "objc_lookup_class"); return CGF.EmitNounwindRuntimeCall(ClassLookupFn, ClassName); } @@ -2432,7 +2534,7 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF, ReceiverClass = EnforceType(Builder, ReceiverClass, IdTy); } else { if (isCategoryImpl) { - llvm::Constant *classLookupFunction = nullptr; + llvm::FunctionCallee classLookupFunction = nullptr; if (IsClassMessage) { classLookupFunction = CGM.CreateRuntimeFunction(llvm::FunctionType::get( IdTy, PtrTy, true), "objc_get_meta_class"); @@ -2481,10 +2583,8 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF, Address ObjCSuper = CGF.CreateTempAlloca(ObjCSuperTy, CGF.getPointerAlign()); - Builder.CreateStore(Receiver, - Builder.CreateStructGEP(ObjCSuper, 0, CharUnits::Zero())); - Builder.CreateStore(ReceiverClass, - Builder.CreateStructGEP(ObjCSuper, 1, CGF.getPointerSize())); + Builder.CreateStore(Receiver, Builder.CreateStructGEP(ObjCSuper, 0)); + Builder.CreateStore(ReceiverClass, Builder.CreateStructGEP(ObjCSuper, 1)); ObjCSuper = EnforceType(Builder, ObjCSuper, PtrToObjCSuperTy); @@ -2501,7 +2601,7 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF, CGCallee callee(CGCalleeInfo(), imp); - llvm::Instruction *call; + llvm::CallBase *call; RValue msgRet = CGF.EmitCall(MSI.CallInfo, callee, Return, ActualArgs, &call); call->setMetadata(msgSendMDKind, node); return msgRet; @@ -2595,16 +2695,21 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF, case CodeGenOptions::Mixed: case CodeGenOptions::NonLegacy: if (CGM.ReturnTypeUsesFPRet(ResultType)) { - imp = CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, IdTy, true), - "objc_msgSend_fpret"); + imp = + CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, IdTy, true), + "objc_msgSend_fpret") + .getCallee(); } else if (CGM.ReturnTypeUsesSRet(MSI.CallInfo)) { // The actual types here don't matter - we're going to bitcast the // function anyway - imp = CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, IdTy, true), - "objc_msgSend_stret"); + imp = + CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, IdTy, true), + "objc_msgSend_stret") + .getCallee(); } else { - imp = CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, IdTy, true), - "objc_msgSend"); + imp = CGM.CreateRuntimeFunction( + llvm::FunctionType::get(IdTy, IdTy, true), "objc_msgSend") + .getCallee(); } } @@ -2613,7 +2718,7 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF, imp = EnforceType(Builder, imp, MSI.MessengerType); - llvm::Instruction *call; + llvm::CallBase *call; CGCallee callee(CGCalleeInfo(), imp); RValue msgRet = CGF.EmitCall(MSI.CallInfo, callee, Return, ActualArgs, &call); call->setMetadata(msgSendMDKind, node); @@ -3697,7 +3802,8 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() { llvm::FunctionType *FT = llvm::FunctionType::get(Builder.getVoidTy(), module->getType(), true); - llvm::Value *Register = CGM.CreateRuntimeFunction(FT, "__objc_exec_class"); + llvm::FunctionCallee Register = + CGM.CreateRuntimeFunction(FT, "__objc_exec_class"); Builder.CreateCall(Register, module); if (!ClassAliases.empty()) { @@ -3766,36 +3872,36 @@ llvm::Function *CGObjCGNU::GenerateMethod(const ObjCMethodDecl *OMD, return Method; } -llvm::Constant *CGObjCGNU::GetPropertyGetFunction() { +llvm::FunctionCallee CGObjCGNU::GetPropertyGetFunction() { return GetPropertyFn; } -llvm::Constant *CGObjCGNU::GetPropertySetFunction() { +llvm::FunctionCallee CGObjCGNU::GetPropertySetFunction() { return SetPropertyFn; } -llvm::Constant *CGObjCGNU::GetOptimizedPropertySetFunction(bool atomic, - bool copy) { +llvm::FunctionCallee CGObjCGNU::GetOptimizedPropertySetFunction(bool atomic, + bool copy) { return nullptr; } -llvm::Constant *CGObjCGNU::GetGetStructFunction() { +llvm::FunctionCallee CGObjCGNU::GetGetStructFunction() { return GetStructPropertyFn; } -llvm::Constant *CGObjCGNU::GetSetStructFunction() { +llvm::FunctionCallee CGObjCGNU::GetSetStructFunction() { return SetStructPropertyFn; } -llvm::Constant *CGObjCGNU::GetCppAtomicObjectGetFunction() { +llvm::FunctionCallee CGObjCGNU::GetCppAtomicObjectGetFunction() { return nullptr; } -llvm::Constant *CGObjCGNU::GetCppAtomicObjectSetFunction() { +llvm::FunctionCallee CGObjCGNU::GetCppAtomicObjectSetFunction() { return nullptr; } -llvm::Constant *CGObjCGNU::EnumerationMutationFunction() { +llvm::FunctionCallee CGObjCGNU::EnumerationMutationFunction() { return EnumerationMutationFn; } @@ -3844,13 +3950,14 @@ void CGObjCGNU::EmitThrowStmt(CodeGenFunction &CGF, // that was passed into the `@catch` block, then this code path is not // reached and we will instead call `objc_exception_throw` with an explicit // argument. - CGF.EmitRuntimeCallOrInvoke(ExceptionReThrowFn).setDoesNotReturn(); + llvm::CallBase *Throw = CGF.EmitRuntimeCallOrInvoke(ExceptionReThrowFn); + Throw->setDoesNotReturn(); } else { ExceptionAsObject = CGF.Builder.CreateBitCast(ExceptionAsObject, IdTy); - llvm::CallSite Throw = + llvm::CallBase *Throw = CGF.EmitRuntimeCallOrInvoke(ExceptionThrowFn, ExceptionAsObject); - Throw.setDoesNotReturn(); + Throw->setDoesNotReturn(); } CGF.Builder.CreateUnreachable(); if (ClearInsertionPoint) @@ -3861,8 +3968,7 @@ llvm::Value * CGObjCGNU::EmitObjCWeakRead(CodeGenFunction &CGF, Address AddrWeakObj) { CGBuilderTy &B = CGF.Builder; AddrWeakObj = EnforceType(B, AddrWeakObj, PtrToIdTy); - return B.CreateCall(WeakReadFn.getType(), WeakReadFn, - AddrWeakObj.getPointer()); + return B.CreateCall(WeakReadFn, AddrWeakObj.getPointer()); } void CGObjCGNU::EmitObjCWeakAssign(CodeGenFunction &CGF, @@ -3870,8 +3976,7 @@ void CGObjCGNU::EmitObjCWeakAssign(CodeGenFunction &CGF, CGBuilderTy &B = CGF.Builder; src = EnforceType(B, src, IdTy); dst = EnforceType(B, dst, PtrToIdTy); - B.CreateCall(WeakAssignFn.getType(), WeakAssignFn, - {src, dst.getPointer()}); + B.CreateCall(WeakAssignFn, {src, dst.getPointer()}); } void CGObjCGNU::EmitObjCGlobalAssign(CodeGenFunction &CGF, @@ -3882,8 +3987,7 @@ void CGObjCGNU::EmitObjCGlobalAssign(CodeGenFunction &CGF, dst = EnforceType(B, dst, PtrToIdTy); // FIXME. Add threadloca assign API assert(!threadlocal && "EmitObjCGlobalAssign - Threal Local API NYI"); - B.CreateCall(GlobalAssignFn.getType(), GlobalAssignFn, - {src, dst.getPointer()}); + B.CreateCall(GlobalAssignFn, {src, dst.getPointer()}); } void CGObjCGNU::EmitObjCIvarAssign(CodeGenFunction &CGF, @@ -3892,8 +3996,7 @@ void CGObjCGNU::EmitObjCIvarAssign(CodeGenFunction &CGF, CGBuilderTy &B = CGF.Builder; src = EnforceType(B, src, IdTy); dst = EnforceType(B, dst, IdTy); - B.CreateCall(IvarAssignFn.getType(), IvarAssignFn, - {src, dst.getPointer(), ivarOffset}); + B.CreateCall(IvarAssignFn, {src, dst.getPointer(), ivarOffset}); } void CGObjCGNU::EmitObjCStrongCastAssign(CodeGenFunction &CGF, @@ -3901,8 +4004,7 @@ void CGObjCGNU::EmitObjCStrongCastAssign(CodeGenFunction &CGF, CGBuilderTy &B = CGF.Builder; src = EnforceType(B, src, IdTy); dst = EnforceType(B, dst, PtrToIdTy); - B.CreateCall(StrongCastAssignFn.getType(), StrongCastAssignFn, - {src, dst.getPointer()}); + B.CreateCall(StrongCastAssignFn, {src, dst.getPointer()}); } void CGObjCGNU::EmitGCMemmoveCollectable(CodeGenFunction &CGF, @@ -3913,8 +4015,7 @@ void CGObjCGNU::EmitGCMemmoveCollectable(CodeGenFunction &CGF, DestPtr = EnforceType(B, DestPtr, PtrTy); SrcPtr = EnforceType(B, SrcPtr, PtrTy); - B.CreateCall(MemMoveFn.getType(), MemMoveFn, - {DestPtr.getPointer(), SrcPtr.getPointer(), Size}); + B.CreateCall(MemMoveFn, {DestPtr.getPointer(), SrcPtr.getPointer(), Size}); } llvm::GlobalVariable *CGObjCGNU::ObjCIvarOffsetVariable( diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp index d91eb43ca322..12880fecbadf 100644 --- a/lib/CodeGen/CGObjCMac.cpp +++ b/lib/CodeGen/CGObjCMac.cpp @@ -1,9 +1,8 @@ //===------- CGObjCMac.cpp - Interface to Apple Objective-C Runtime -------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -31,7 +30,6 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/IntrinsicInst.h" @@ -61,7 +59,7 @@ private: /// /// The default messenger, used for sends whose ABI is unchanged from /// the all-integer/pointer case. - llvm::Constant *getMessageSendFn() const { + llvm::FunctionCallee getMessageSendFn() const { // Add the non-lazy-bind attribute, since objc_msgSend is likely to // be called a lot. llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy }; @@ -77,12 +75,11 @@ private: /// The messenger used when the return value is an aggregate returned /// by indirect reference in the first argument, and therefore the /// self and selector parameters are shifted over by one. - llvm::Constant *getMessageSendStretFn() const { + llvm::FunctionCallee getMessageSendStretFn() const { llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy }; return CGM.CreateRuntimeFunction(llvm::FunctionType::get(CGM.VoidTy, params, true), "objc_msgSend_stret"); - } /// [double | long double] objc_msgSend_fpret(id self, SEL op, ...) @@ -90,12 +87,11 @@ private: /// The messenger used when the return value is returned on the x87 /// floating-point stack; without a special entrypoint, the nil case /// would be unbalanced. - llvm::Constant *getMessageSendFpretFn() const { + llvm::FunctionCallee getMessageSendFpretFn() const { llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy }; return CGM.CreateRuntimeFunction(llvm::FunctionType::get(CGM.DoubleTy, params, true), "objc_msgSend_fpret"); - } /// _Complex long double objc_msgSend_fp2ret(id self, SEL op, ...) @@ -103,7 +99,7 @@ private: /// The messenger used when the return value is returned in two values on the /// x87 floating point stack; without a special entrypoint, the nil case /// would be unbalanced. Only used on 64-bit X86. - llvm::Constant *getMessageSendFp2retFn() const { + llvm::FunctionCallee getMessageSendFp2retFn() const { llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy }; llvm::Type *longDoubleType = llvm::Type::getX86_FP80Ty(VMContext); llvm::Type *resultType = @@ -119,7 +115,7 @@ private: /// The messenger used for super calls, which have different dispatch /// semantics. The class passed is the superclass of the current /// class. - llvm::Constant *getMessageSendSuperFn() const { + llvm::FunctionCallee getMessageSendSuperFn() const { llvm::Type *params[] = { SuperPtrTy, SelectorPtrTy }; return CGM.CreateRuntimeFunction(llvm::FunctionType::get(ObjectPtrTy, params, true), @@ -130,7 +126,7 @@ private: /// /// A slightly different messenger used for super calls. The class /// passed is the current class. - llvm::Constant *getMessageSendSuperFn2() const { + llvm::FunctionCallee getMessageSendSuperFn2() const { llvm::Type *params[] = { SuperPtrTy, SelectorPtrTy }; return CGM.CreateRuntimeFunction(llvm::FunctionType::get(ObjectPtrTy, params, true), @@ -141,7 +137,7 @@ private: /// SEL op, ...) /// /// The messenger used for super calls which return an aggregate indirectly. - llvm::Constant *getMessageSendSuperStretFn() const { + llvm::FunctionCallee getMessageSendSuperStretFn() const { llvm::Type *params[] = { Int8PtrTy, SuperPtrTy, SelectorPtrTy }; return CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGM.VoidTy, params, true), @@ -152,19 +148,19 @@ private: /// SEL op, ...) /// /// objc_msgSendSuper_stret with the super2 semantics. - llvm::Constant *getMessageSendSuperStretFn2() const { + llvm::FunctionCallee getMessageSendSuperStretFn2() const { llvm::Type *params[] = { Int8PtrTy, SuperPtrTy, SelectorPtrTy }; return CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGM.VoidTy, params, true), "objc_msgSendSuper2_stret"); } - llvm::Constant *getMessageSendSuperFpretFn() const { + llvm::FunctionCallee getMessageSendSuperFpretFn() const { // There is no objc_msgSendSuper_fpret? How can that work? return getMessageSendSuperFn(); } - llvm::Constant *getMessageSendSuperFpretFn2() const { + llvm::FunctionCallee getMessageSendSuperFpretFn2() const { // There is no objc_msgSendSuper_fpret? How can that work? return getMessageSendSuperFn2(); } @@ -233,7 +229,7 @@ public: /// CachePtrTy - LLVM type for struct objc_cache *. llvm::PointerType *CachePtrTy; - llvm::Constant *getGetPropertyFn() { + llvm::FunctionCallee getGetPropertyFn() { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); // id objc_getProperty (id, SEL, ptrdiff_t, bool) @@ -248,7 +244,7 @@ public: return CGM.CreateRuntimeFunction(FTy, "objc_getProperty"); } - llvm::Constant *getSetPropertyFn() { + llvm::FunctionCallee getSetPropertyFn() { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); // void objc_setProperty (id, SEL, ptrdiff_t, id, bool, bool) @@ -267,7 +263,7 @@ public: return CGM.CreateRuntimeFunction(FTy, "objc_setProperty"); } - llvm::Constant *getOptimizedSetPropertyFn(bool atomic, bool copy) { + llvm::FunctionCallee getOptimizedSetPropertyFn(bool atomic, bool copy) { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); // void objc_setProperty_atomic(id self, SEL _cmd, @@ -302,7 +298,7 @@ public: return CGM.CreateRuntimeFunction(FTy, name); } - llvm::Constant *getCopyStructFn() { + llvm::FunctionCallee getCopyStructFn() { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); // void objc_copyStruct (void *, const void *, size_t, bool, bool) @@ -322,7 +318,7 @@ public: /// void objc_copyCppObjectAtomic( /// void *dest, const void *src, /// void (*copyHelper) (void *dest, const void *source)); - llvm::Constant *getCppAtomicObjectFunction() { + llvm::FunctionCallee getCppAtomicObjectFunction() { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); /// void objc_copyCppObjectAtomic(void *dest, const void *src, void *helper); @@ -336,7 +332,7 @@ public: return CGM.CreateRuntimeFunction(FTy, "objc_copyCppObjectAtomic"); } - llvm::Constant *getEnumerationMutationFn() { + llvm::FunctionCallee getEnumerationMutationFn() { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); // void objc_enumerationMutation (id) @@ -348,7 +344,7 @@ public: return CGM.CreateRuntimeFunction(FTy, "objc_enumerationMutation"); } - llvm::Constant *getLookUpClassFn() { + llvm::FunctionCallee getLookUpClassFn() { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); // Class objc_lookUpClass (const char *) @@ -363,7 +359,7 @@ public: } /// GcReadWeakFn -- LLVM objc_read_weak (id *src) function. - llvm::Constant *getGcReadWeakFn() { + llvm::FunctionCallee getGcReadWeakFn() { // id objc_read_weak (id *) llvm::Type *args[] = { ObjectPtrTy->getPointerTo() }; llvm::FunctionType *FTy = @@ -372,7 +368,7 @@ public: } /// GcAssignWeakFn -- LLVM objc_assign_weak function. - llvm::Constant *getGcAssignWeakFn() { + llvm::FunctionCallee getGcAssignWeakFn() { // id objc_assign_weak (id, id *) llvm::Type *args[] = { ObjectPtrTy, ObjectPtrTy->getPointerTo() }; llvm::FunctionType *FTy = @@ -381,7 +377,7 @@ public: } /// GcAssignGlobalFn -- LLVM objc_assign_global function. - llvm::Constant *getGcAssignGlobalFn() { + llvm::FunctionCallee getGcAssignGlobalFn() { // id objc_assign_global(id, id *) llvm::Type *args[] = { ObjectPtrTy, ObjectPtrTy->getPointerTo() }; llvm::FunctionType *FTy = @@ -390,7 +386,7 @@ public: } /// GcAssignThreadLocalFn -- LLVM objc_assign_threadlocal function. - llvm::Constant *getGcAssignThreadLocalFn() { + llvm::FunctionCallee getGcAssignThreadLocalFn() { // id objc_assign_threadlocal(id src, id * dest) llvm::Type *args[] = { ObjectPtrTy, ObjectPtrTy->getPointerTo() }; llvm::FunctionType *FTy = @@ -399,7 +395,7 @@ public: } /// GcAssignIvarFn -- LLVM objc_assign_ivar function. - llvm::Constant *getGcAssignIvarFn() { + llvm::FunctionCallee getGcAssignIvarFn() { // id objc_assign_ivar(id, id *, ptrdiff_t) llvm::Type *args[] = { ObjectPtrTy, ObjectPtrTy->getPointerTo(), CGM.PtrDiffTy }; @@ -409,7 +405,7 @@ public: } /// GcMemmoveCollectableFn -- LLVM objc_memmove_collectable function. - llvm::Constant *GcMemmoveCollectableFn() { + llvm::FunctionCallee GcMemmoveCollectableFn() { // void *objc_memmove_collectable(void *dst, const void *src, size_t size) llvm::Type *args[] = { Int8PtrTy, Int8PtrTy, LongTy }; llvm::FunctionType *FTy = llvm::FunctionType::get(Int8PtrTy, args, false); @@ -417,7 +413,7 @@ public: } /// GcAssignStrongCastFn -- LLVM objc_assign_strongCast function. - llvm::Constant *getGcAssignStrongCastFn() { + llvm::FunctionCallee getGcAssignStrongCastFn() { // id objc_assign_strongCast(id, id *) llvm::Type *args[] = { ObjectPtrTy, ObjectPtrTy->getPointerTo() }; llvm::FunctionType *FTy = @@ -426,7 +422,7 @@ public: } /// ExceptionThrowFn - LLVM objc_exception_throw function. - llvm::Constant *getExceptionThrowFn() { + llvm::FunctionCallee getExceptionThrowFn() { // void objc_exception_throw(id) llvm::Type *args[] = { ObjectPtrTy }; llvm::FunctionType *FTy = @@ -435,14 +431,14 @@ public: } /// ExceptionRethrowFn - LLVM objc_exception_rethrow function. - llvm::Constant *getExceptionRethrowFn() { + llvm::FunctionCallee getExceptionRethrowFn() { // void objc_exception_rethrow(void) llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, false); return CGM.CreateRuntimeFunction(FTy, "objc_exception_rethrow"); } /// SyncEnterFn - LLVM object_sync_enter function. - llvm::Constant *getSyncEnterFn() { + llvm::FunctionCallee getSyncEnterFn() { // int objc_sync_enter (id) llvm::Type *args[] = { ObjectPtrTy }; llvm::FunctionType *FTy = @@ -451,7 +447,7 @@ public: } /// SyncExitFn - LLVM object_sync_exit function. - llvm::Constant *getSyncExitFn() { + llvm::FunctionCallee getSyncExitFn() { // int objc_sync_exit (id) llvm::Type *args[] = { ObjectPtrTy }; llvm::FunctionType *FTy = @@ -459,35 +455,35 @@ public: return CGM.CreateRuntimeFunction(FTy, "objc_sync_exit"); } - llvm::Constant *getSendFn(bool IsSuper) const { + llvm::FunctionCallee getSendFn(bool IsSuper) const { return IsSuper ? getMessageSendSuperFn() : getMessageSendFn(); } - llvm::Constant *getSendFn2(bool IsSuper) const { + llvm::FunctionCallee getSendFn2(bool IsSuper) const { return IsSuper ? getMessageSendSuperFn2() : getMessageSendFn(); } - llvm::Constant *getSendStretFn(bool IsSuper) const { + llvm::FunctionCallee getSendStretFn(bool IsSuper) const { return IsSuper ? getMessageSendSuperStretFn() : getMessageSendStretFn(); } - llvm::Constant *getSendStretFn2(bool IsSuper) const { + llvm::FunctionCallee getSendStretFn2(bool IsSuper) const { return IsSuper ? getMessageSendSuperStretFn2() : getMessageSendStretFn(); } - llvm::Constant *getSendFpretFn(bool IsSuper) const { + llvm::FunctionCallee getSendFpretFn(bool IsSuper) const { return IsSuper ? getMessageSendSuperFpretFn() : getMessageSendFpretFn(); } - llvm::Constant *getSendFpretFn2(bool IsSuper) const { + llvm::FunctionCallee getSendFpretFn2(bool IsSuper) const { return IsSuper ? getMessageSendSuperFpretFn2() : getMessageSendFpretFn(); } - llvm::Constant *getSendFp2retFn(bool IsSuper) const { + llvm::FunctionCallee getSendFp2retFn(bool IsSuper) const { return IsSuper ? getMessageSendSuperFn() : getMessageSendFp2retFn(); } - llvm::Constant *getSendFp2RetFn2(bool IsSuper) const { + llvm::FunctionCallee getSendFp2RetFn2(bool IsSuper) const { return IsSuper ? getMessageSendSuperFn2() : getMessageSendFp2retFn(); } @@ -553,7 +549,7 @@ public: llvm::StructType *ExceptionDataTy; /// ExceptionTryEnterFn - LLVM objc_exception_try_enter function. - llvm::Constant *getExceptionTryEnterFn() { + llvm::FunctionCallee getExceptionTryEnterFn() { llvm::Type *params[] = { ExceptionDataTy->getPointerTo() }; return CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGM.VoidTy, params, false), @@ -561,7 +557,7 @@ public: } /// ExceptionTryExitFn - LLVM objc_exception_try_exit function. - llvm::Constant *getExceptionTryExitFn() { + llvm::FunctionCallee getExceptionTryExitFn() { llvm::Type *params[] = { ExceptionDataTy->getPointerTo() }; return CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGM.VoidTy, params, false), @@ -569,7 +565,7 @@ public: } /// ExceptionExtractFn - LLVM objc_exception_extract function. - llvm::Constant *getExceptionExtractFn() { + llvm::FunctionCallee getExceptionExtractFn() { llvm::Type *params[] = { ExceptionDataTy->getPointerTo() }; return CGM.CreateRuntimeFunction(llvm::FunctionType::get(ObjectPtrTy, params, false), @@ -577,7 +573,7 @@ public: } /// ExceptionMatchFn - LLVM objc_exception_match function. - llvm::Constant *getExceptionMatchFn() { + llvm::FunctionCallee getExceptionMatchFn() { llvm::Type *params[] = { ClassPtrTy, ObjectPtrTy }; return CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGM.Int32Ty, params, false), @@ -585,7 +581,7 @@ public: } /// SetJmpFn - LLVM _setjmp function. - llvm::Constant *getSetJmpFn() { + llvm::FunctionCallee getSetJmpFn() { // This is specifically the prototype for x86. llvm::Type *params[] = { CGM.Int32Ty->getPointerTo() }; return CGM.CreateRuntimeFunction( @@ -671,7 +667,7 @@ public: // SuperMessageRefPtrTy - LLVM for struct _super_message_ref_t* llvm::PointerType *SuperMessageRefPtrTy; - llvm::Constant *getMessageSendFixupFn() { + llvm::FunctionCallee getMessageSendFixupFn() { // id objc_msgSend_fixup(id, struct message_ref_t*, ...) llvm::Type *params[] = { ObjectPtrTy, MessageRefPtrTy }; return CGM.CreateRuntimeFunction(llvm::FunctionType::get(ObjectPtrTy, @@ -679,7 +675,7 @@ public: "objc_msgSend_fixup"); } - llvm::Constant *getMessageSendFpretFixupFn() { + llvm::FunctionCallee getMessageSendFpretFixupFn() { // id objc_msgSend_fpret_fixup(id, struct message_ref_t*, ...) llvm::Type *params[] = { ObjectPtrTy, MessageRefPtrTy }; return CGM.CreateRuntimeFunction(llvm::FunctionType::get(ObjectPtrTy, @@ -687,7 +683,7 @@ public: "objc_msgSend_fpret_fixup"); } - llvm::Constant *getMessageSendStretFixupFn() { + llvm::FunctionCallee getMessageSendStretFixupFn() { // id objc_msgSend_stret_fixup(id, struct message_ref_t*, ...) llvm::Type *params[] = { ObjectPtrTy, MessageRefPtrTy }; return CGM.CreateRuntimeFunction(llvm::FunctionType::get(ObjectPtrTy, @@ -695,7 +691,7 @@ public: "objc_msgSend_stret_fixup"); } - llvm::Constant *getMessageSendSuper2FixupFn() { + llvm::FunctionCallee getMessageSendSuper2FixupFn() { // id objc_msgSendSuper2_fixup (struct objc_super *, // struct _super_message_ref_t*, ...) llvm::Type *params[] = { SuperPtrTy, SuperMessageRefPtrTy }; @@ -704,7 +700,7 @@ public: "objc_msgSendSuper2_fixup"); } - llvm::Constant *getMessageSendSuper2StretFixupFn() { + llvm::FunctionCallee getMessageSendSuper2StretFixupFn() { // id objc_msgSendSuper2_stret_fixup(struct objc_super *, // struct _super_message_ref_t*, ...) llvm::Type *params[] = { SuperPtrTy, SuperMessageRefPtrTy }; @@ -713,19 +709,45 @@ public: "objc_msgSendSuper2_stret_fixup"); } - llvm::Constant *getObjCEndCatchFn() { + llvm::FunctionCallee getObjCEndCatchFn() { return CGM.CreateRuntimeFunction(llvm::FunctionType::get(CGM.VoidTy, false), "objc_end_catch"); - } - llvm::Constant *getObjCBeginCatchFn() { + llvm::FunctionCallee getObjCBeginCatchFn() { llvm::Type *params[] = { Int8PtrTy }; return CGM.CreateRuntimeFunction(llvm::FunctionType::get(Int8PtrTy, params, false), "objc_begin_catch"); } + /// Class objc_loadClassref (void *) + /// + /// Loads from a classref. For Objective-C stub classes, this invokes the + /// initialization callback stored inside the stub. For all other classes + /// this simply dereferences the pointer. + llvm::FunctionCallee getLoadClassrefFn() const { + // Add the non-lazy-bind attribute, since objc_loadClassref is likely to + // be called a lot. + // + // Also it is safe to make it readnone, since we never load or store the + // classref except by calling this function. + llvm::Type *params[] = { Int8PtrPtrTy }; + llvm::FunctionCallee F = CGM.CreateRuntimeFunction( + llvm::FunctionType::get(ClassnfABIPtrTy, params, false), + "objc_loadClassref", + llvm::AttributeList::get(CGM.getLLVMContext(), + llvm::AttributeList::FunctionIndex, + {llvm::Attribute::NonLazyBind, + llvm::Attribute::ReadNone, + llvm::Attribute::NoUnwind})); + if (!CGM.getTriple().isOSBinFormatCOFF()) + cast<llvm::Function>(F.getCallee())->setLinkage( + llvm::Function::ExternalWeakLinkage); + + return F; + } + llvm::StructType *EHTypeTy; llvm::Type *EHTypePtrTy; @@ -882,6 +904,9 @@ protected: /// DefinedCategories - List of defined categories. SmallVector<llvm::GlobalValue*, 16> DefinedCategories; + /// DefinedStubCategories - List of defined categories on class stubs. + SmallVector<llvm::GlobalValue*, 16> DefinedStubCategories; + /// DefinedNonLazyCategories - List of defined "non-lazy" categories. SmallVector<llvm::GlobalValue*, 16> DefinedNonLazyCategories; @@ -1325,15 +1350,15 @@ public: llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF, const ObjCProtocolDecl *PD) override; - llvm::Constant *GetPropertyGetFunction() override; - llvm::Constant *GetPropertySetFunction() override; - llvm::Constant *GetOptimizedPropertySetFunction(bool atomic, - bool copy) override; - llvm::Constant *GetGetStructFunction() override; - llvm::Constant *GetSetStructFunction() override; - llvm::Constant *GetCppAtomicObjectGetFunction() override; - llvm::Constant *GetCppAtomicObjectSetFunction() override; - llvm::Constant *EnumerationMutationFunction() override; + llvm::FunctionCallee GetPropertyGetFunction() override; + llvm::FunctionCallee GetPropertySetFunction() override; + llvm::FunctionCallee GetOptimizedPropertySetFunction(bool atomic, + bool copy) override; + llvm::FunctionCallee GetGetStructFunction() override; + llvm::FunctionCallee GetSetStructFunction() override; + llvm::FunctionCallee GetCppAtomicObjectGetFunction() override; + llvm::FunctionCallee GetCppAtomicObjectSetFunction() override; + llvm::FunctionCallee EnumerationMutationFunction() override; void EmitTryStmt(CodeGen::CodeGenFunction &CGF, const ObjCAtTryStmt &S) override; @@ -1469,6 +1494,12 @@ private: bool isMetaclass, ForDefinition_t isForDefinition); + llvm::Constant *GetClassGlobalForClassRef(const ObjCInterfaceDecl *ID); + + llvm::Value *EmitLoadOfClassRef(CodeGenFunction &CGF, + const ObjCInterfaceDecl *ID, + llvm::GlobalVariable *Entry); + /// EmitClassRef - Return a Value*, of type ObjCTypes.ClassPtrTy, /// for the given class reference. llvm::Value *EmitClassRef(CodeGenFunction &CGF, @@ -1550,6 +1581,15 @@ private: return false; } + bool isClassLayoutKnownStatically(const ObjCInterfaceDecl *ID) { + // NSObject is a fixed size. If we can see the @implementation of a class + // which inherits from NSObject then we know that all it's offsets also must + // be fixed. FIXME: Can we do this if see a chain of super classes with + // implementations leading to NSObject? + return ID->getImplementation() && ID->getSuperClass() && + ID->getSuperClass()->getName() == "NSObject"; + } + public: CGObjCNonFragileABIMac(CodeGen::CodeGenModule &cgm); @@ -1598,35 +1638,35 @@ public: llvm::Constant *GetEHType(QualType T) override; - llvm::Constant *GetPropertyGetFunction() override { + llvm::FunctionCallee GetPropertyGetFunction() override { return ObjCTypes.getGetPropertyFn(); } - llvm::Constant *GetPropertySetFunction() override { + llvm::FunctionCallee GetPropertySetFunction() override { return ObjCTypes.getSetPropertyFn(); } - llvm::Constant *GetOptimizedPropertySetFunction(bool atomic, - bool copy) override { + llvm::FunctionCallee GetOptimizedPropertySetFunction(bool atomic, + bool copy) override { return ObjCTypes.getOptimizedSetPropertyFn(atomic, copy); } - llvm::Constant *GetSetStructFunction() override { + llvm::FunctionCallee GetSetStructFunction() override { return ObjCTypes.getCopyStructFn(); } - llvm::Constant *GetGetStructFunction() override { + llvm::FunctionCallee GetGetStructFunction() override { return ObjCTypes.getCopyStructFn(); } - llvm::Constant *GetCppAtomicObjectSetFunction() override { + llvm::FunctionCallee GetCppAtomicObjectSetFunction() override { return ObjCTypes.getCppAtomicObjectFunction(); } - llvm::Constant *GetCppAtomicObjectGetFunction() override { + llvm::FunctionCallee GetCppAtomicObjectGetFunction() override { return ObjCTypes.getCppAtomicObjectFunction(); } - llvm::Constant *EnumerationMutationFunction() override { + llvm::FunctionCallee EnumerationMutationFunction() override { return ObjCTypes.getEnumerationMutationFn(); } @@ -1805,6 +1845,28 @@ static bool hasObjCExceptionAttribute(ASTContext &Context, return false; } +static llvm::GlobalValue::LinkageTypes +getLinkageTypeForObjCMetadata(CodeGenModule &CGM, StringRef Section) { + if (CGM.getTriple().isOSBinFormatMachO() && + (Section.empty() || Section.startswith("__DATA"))) + return llvm::GlobalValue::InternalLinkage; + return llvm::GlobalValue::PrivateLinkage; +} + +/// A helper function to create an internal or private global variable. +static llvm::GlobalVariable * +finishAndCreateGlobal(ConstantInitBuilder::StructBuilder &Builder, + const llvm::Twine &Name, CodeGenModule &CGM) { + std::string SectionName; + if (CGM.getTriple().isOSBinFormatMachO()) + SectionName = "__DATA, __objc_const"; + auto *GV = Builder.finishAndCreateGlobal( + Name, CGM.getPointerAlign(), /*constant*/ false, + getLinkageTypeForObjCMetadata(CGM, SectionName)); + GV->setSection(SectionName); + return GV; +} + /* *** CGObjCMac Public Interface *** */ CGObjCMac::CGObjCMac(CodeGen::CodeGenModule &cgm) : CGObjCCommonMac(cgm), @@ -1907,7 +1969,7 @@ llvm::Constant *CGObjCNonFragileABIMac::getNSConstantStringClassRef() { std::string str = StringClass.empty() ? "OBJC_CLASS_$_NSConstantString" : "OBJC_CLASS_$_" + StringClass; - auto GV = GetClassGlobal(str, NotForDefinition); + llvm::Constant *GV = GetClassGlobal(str, NotForDefinition); // Make sure the result is of the correct type. auto V = llvm::ConstantExpr::getBitCast(GV, CGM.IntTy->getPointerTo()); @@ -2004,9 +2066,8 @@ CGObjCMac::GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF, "objc_super"); llvm::Value *ReceiverAsObject = CGF.Builder.CreateBitCast(Receiver, ObjCTypes.ObjectPtrTy); - CGF.Builder.CreateStore( - ReceiverAsObject, - CGF.Builder.CreateStructGEP(ObjCSuper, 0, CharUnits::Zero())); + CGF.Builder.CreateStore(ReceiverAsObject, + CGF.Builder.CreateStructGEP(ObjCSuper, 0)); // If this is a class message the metaclass is passed as the target. llvm::Value *Target; @@ -2041,8 +2102,7 @@ CGObjCMac::GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF, llvm::Type *ClassTy = CGM.getTypes().ConvertType(CGF.getContext().getObjCClassType()); Target = CGF.Builder.CreateBitCast(Target, ClassTy); - CGF.Builder.CreateStore(Target, - CGF.Builder.CreateStructGEP(ObjCSuper, 1, CGF.getPointerSize())); + CGF.Builder.CreateStore(Target, CGF.Builder.CreateStructGEP(ObjCSuper, 1)); return EmitMessageSend(CGF, Return, ResultType, EmitSelector(CGF, Sel), ObjCSuper.getPointer(), ObjCTypes.SuperPtrCTy, @@ -2129,7 +2189,7 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF, bool RequiresNullCheck = false; - llvm::Constant *Fn = nullptr; + llvm::FunctionCallee Fn = nullptr; if (CGM.ReturnSlotInterferesWithArgs(MSI.CallInfo)) { if (ReceiverCanBeNull) RequiresNullCheck = true; Fn = (ObjCABI == 2) ? ObjCTypes.getSendStretFn2(IsSuper) @@ -2149,6 +2209,10 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF, : ObjCTypes.getSendFn(IsSuper); } + // Cast function to proper signature + llvm::Constant *BitcastFn = cast<llvm::Constant>( + CGF.Builder.CreateBitCast(Fn.getCallee(), MSI.MessengerType)); + // We don't need to emit a null check to zero out an indirect result if the // result is ignored. if (Return.isUnused()) @@ -2169,16 +2233,15 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF, nullReturn.init(CGF, Arg0); } - llvm::Instruction *CallSite; - Fn = llvm::ConstantExpr::getBitCast(Fn, MSI.MessengerType); - CGCallee Callee = CGCallee::forDirect(Fn); + llvm::CallBase *CallSite; + CGCallee Callee = CGCallee::forDirect(BitcastFn); RValue rvalue = CGF.EmitCall(MSI.CallInfo, Callee, Return, ActualArgs, &CallSite); // Mark the call as noreturn if the method is marked noreturn and the // receiver cannot be null. if (Method && Method->hasAttr<NoReturnAttr>() && !ReceiverCanBeNull) { - llvm::CallSite(CallSite).setDoesNotReturn(); + CallSite->setDoesNotReturn(); } return nullReturn.complete(CGF, Return, rvalue, ResultType, CallArgs, @@ -2954,7 +3017,7 @@ llvm::Value *CGObjCCommonMac::EmitClassRefViaRuntime( CodeGenFunction &CGF, const ObjCInterfaceDecl *ID, ObjCCommonTypesHelper &ObjCTypes) { - llvm::Constant *lookUpClassFn = ObjCTypes.getLookUpClassFn(); + llvm::FunctionCallee lookUpClassFn = ObjCTypes.getLookUpClassFn(); llvm::Value *className = CGF.CGM.GetAddrOfConstantCString(ID->getObjCRuntimeNameAsString()) @@ -3100,7 +3163,7 @@ CGObjCMac::EmitProtocolExtension(const ObjCProtocolDecl *PD, values.add(classProperties); // No special section, but goes in llvm.used - return CreateMetadataVar("\01l_OBJC_PROTOCOLEXT_" + PD->getName(), values, + return CreateMetadataVar("_OBJC_PROTOCOLEXT_" + PD->getName(), values, StringRef(), CGM.getPointerAlign(), true); } @@ -3333,9 +3396,9 @@ void CGObjCMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) { // If there is no category @interface then there can be no properties. if (Category) { - Values.add(EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ExtName.str(), + Values.add(EmitPropertyList("_OBJC_$_PROP_LIST_" + ExtName.str(), OCD, Category, ObjCTypes, false)); - Values.add(EmitPropertyList("\01l_OBJC_$_CLASS_PROP_LIST_" + ExtName.str(), + Values.add(EmitPropertyList("_OBJC_$_CLASS_PROP_LIST_" + ExtName.str(), OCD, Category, ObjCTypes, true)); } else { Values.addNullPointer(ObjCTypes.PropertyListPtrTy); @@ -3681,8 +3744,8 @@ CGObjCMac::EmitClassExtension(const ObjCImplementationDecl *ID, // Properties. llvm::Constant *propertyList = - EmitPropertyList((isMetaclass ? Twine("\01l_OBJC_$_CLASS_PROP_LIST_") - : Twine("\01l_OBJC_$_PROP_LIST_")) + EmitPropertyList((isMetaclass ? Twine("_OBJC_$_CLASS_PROP_LIST_") + : Twine("_OBJC_$_PROP_LIST_")) + ID->getName(), ID, ID->getClassInterface(), ObjCTypes, isMetaclass); @@ -3930,9 +3993,10 @@ llvm::GlobalVariable *CGObjCCommonMac::CreateMetadataVar(Twine Name, StringRef Section, CharUnits Align, bool AddToUsed) { + llvm::GlobalValue::LinkageTypes LT = + getLinkageTypeForObjCMetadata(CGM, Section); llvm::GlobalVariable *GV = - Init.finishAndCreateGlobal(Name, Align, /*constant*/ false, - llvm::GlobalValue::PrivateLinkage); + Init.finishAndCreateGlobal(Name, Align, /*constant*/ false, LT); if (!Section.empty()) GV->setSection(Section); if (AddToUsed) @@ -3946,9 +4010,10 @@ llvm::GlobalVariable *CGObjCCommonMac::CreateMetadataVar(Twine Name, CharUnits Align, bool AddToUsed) { llvm::Type *Ty = Init->getType(); + llvm::GlobalValue::LinkageTypes LT = + getLinkageTypeForObjCMetadata(CGM, Section); llvm::GlobalVariable *GV = - new llvm::GlobalVariable(CGM.getModule(), Ty, false, - llvm::GlobalValue::PrivateLinkage, Init, Name); + new llvm::GlobalVariable(CGM.getModule(), Ty, false, LT, Init, Name); if (!Section.empty()) GV->setSection(Section); GV->setAlignment(Align.getQuantity()); @@ -4011,36 +4076,36 @@ llvm::Function *CGObjCMac::ModuleInitFunction() { return nullptr; } -llvm::Constant *CGObjCMac::GetPropertyGetFunction() { +llvm::FunctionCallee CGObjCMac::GetPropertyGetFunction() { return ObjCTypes.getGetPropertyFn(); } -llvm::Constant *CGObjCMac::GetPropertySetFunction() { +llvm::FunctionCallee CGObjCMac::GetPropertySetFunction() { return ObjCTypes.getSetPropertyFn(); } -llvm::Constant *CGObjCMac::GetOptimizedPropertySetFunction(bool atomic, - bool copy) { +llvm::FunctionCallee CGObjCMac::GetOptimizedPropertySetFunction(bool atomic, + bool copy) { return ObjCTypes.getOptimizedSetPropertyFn(atomic, copy); } -llvm::Constant *CGObjCMac::GetGetStructFunction() { +llvm::FunctionCallee CGObjCMac::GetGetStructFunction() { return ObjCTypes.getCopyStructFn(); } -llvm::Constant *CGObjCMac::GetSetStructFunction() { +llvm::FunctionCallee CGObjCMac::GetSetStructFunction() { return ObjCTypes.getCopyStructFn(); } -llvm::Constant *CGObjCMac::GetCppAtomicObjectGetFunction() { +llvm::FunctionCallee CGObjCMac::GetCppAtomicObjectGetFunction() { return ObjCTypes.getCppAtomicObjectFunction(); } -llvm::Constant *CGObjCMac::GetCppAtomicObjectSetFunction() { +llvm::FunctionCallee CGObjCMac::GetCppAtomicObjectSetFunction() { return ObjCTypes.getCppAtomicObjectFunction(); } -llvm::Constant *CGObjCMac::EnumerationMutationFunction() { +llvm::FunctionCallee CGObjCMac::EnumerationMutationFunction() { return ObjCTypes.getEnumerationMutationFn(); } @@ -4216,14 +4281,15 @@ void FragileHazards::emitHazardsInNewBlocks() { // Ignore instructions that aren't non-intrinsic calls. // These are the only calls that can possibly call longjmp. - if (!isa<llvm::CallInst>(I) && !isa<llvm::InvokeInst>(I)) continue; + if (!isa<llvm::CallInst>(I) && !isa<llvm::InvokeInst>(I)) + continue; if (isa<llvm::IntrinsicInst>(I)) continue; // Ignore call sites marked nounwind. This may be questionable, // since 'nounwind' doesn't necessarily mean 'does not call longjmp'. - llvm::CallSite CS(&I); - if (CS.doesNotThrow()) continue; + if (cast<llvm::CallBase>(I).doesNotThrow()) + continue; // Insert a read hazard before the call. This will ensure that // any writes to the locals are performed before making the @@ -4855,7 +4921,7 @@ llvm::Value *CGObjCMac::EmitIvarOffset(CodeGen::CodeGenFunction &CGF, std::string CGObjCCommonMac::GetSectionName(StringRef Section, StringRef MachOAttributes) { switch (CGM.getTriple().getObjectFormat()) { - default: + case llvm::Triple::UnknownObjectFormat: llvm_unreachable("unexpected object file format"); case llvm::Triple::MachO: { if (MachOAttributes.empty()) @@ -4870,7 +4936,13 @@ std::string CGObjCCommonMac::GetSectionName(StringRef Section, assert(Section.substr(0, 2) == "__" && "expected the name to begin with __"); return ("." + Section.substr(2) + "$B").str(); + case llvm::Triple::Wasm: + case llvm::Triple::XCOFF: + llvm::report_fatal_error( + "Objective-C support is unimplemented for object file format."); } + + llvm_unreachable("Unhandled llvm::Triple::ObjectFormatType enum"); } /// EmitImageInfo - Emit the image info marker used to encode some module @@ -5994,10 +6066,15 @@ void CGObjCNonFragileABIMac::AddModuleClassList( Symbols.size()), Symbols); + // Section name is obtained by calling GetSectionName, which returns + // sections in the __DATA segment on MachO. + assert((!CGM.getTriple().isOSBinFormatMachO() || + SectionName.startswith("__DATA")) && + "SectionName expected to start with __DATA on MachO"); + llvm::GlobalValue::LinkageTypes LT = + getLinkageTypeForObjCMetadata(CGM, SectionName); llvm::GlobalVariable *GV = - new llvm::GlobalVariable(CGM.getModule(), Init->getType(), false, - llvm::GlobalValue::PrivateLinkage, - Init, + new llvm::GlobalVariable(CGM.getModule(), Init->getType(), false, LT, Init, SymbolName); GV->setAlignment(CGM.getDataLayout().getABITypeAlignment(Init->getType())); GV->setSection(SectionName); @@ -6034,6 +6111,9 @@ void CGObjCNonFragileABIMac::FinishNonFragileABIModule() { AddModuleClassList(DefinedCategories, "OBJC_LABEL_CATEGORY_$", GetSectionName("__objc_catlist", "regular,no_dead_strip")); + AddModuleClassList(DefinedStubCategories, "OBJC_LABEL_STUB_CATEGORY_$", + GetSectionName("__objc_catlist2", + "regular,no_dead_strip")); AddModuleClassList(DefinedNonLazyCategories, "OBJC_LABEL_NONLAZY_CATEGORY_$", GetSectionName("__objc_nlcatlist", "regular,no_dead_strip")); @@ -6176,7 +6256,7 @@ llvm::GlobalVariable * CGObjCNonFragileABIMac::BuildClassRoTInitializer( const ObjCInterfaceDecl *OID = ID->getClassInterface(); assert(OID && "CGObjCNonFragileABIMac::BuildClassRoTInitializer"); - values.add(EmitProtocolList("\01l_OBJC_CLASS_PROTOCOLS_$_" + values.add(EmitProtocolList("_OBJC_CLASS_PROTOCOLS_$_" + OID->getObjCRuntimeNameAsString(), OID->all_referenced_protocol_begin(), OID->all_referenced_protocol_end())); @@ -6185,29 +6265,23 @@ llvm::GlobalVariable * CGObjCNonFragileABIMac::BuildClassRoTInitializer( values.addNullPointer(ObjCTypes.IvarListnfABIPtrTy); values.add(GetIvarLayoutName(nullptr, ObjCTypes)); values.add(EmitPropertyList( - "\01l_OBJC_$_CLASS_PROP_LIST_" + ID->getObjCRuntimeNameAsString(), + "_OBJC_$_CLASS_PROP_LIST_" + ID->getObjCRuntimeNameAsString(), ID, ID->getClassInterface(), ObjCTypes, true)); } else { values.add(EmitIvarList(ID)); values.add(BuildWeakIvarLayout(ID, beginInstance, endInstance, hasMRCWeak)); values.add(EmitPropertyList( - "\01l_OBJC_$_PROP_LIST_" + ID->getObjCRuntimeNameAsString(), + "_OBJC_$_PROP_LIST_" + ID->getObjCRuntimeNameAsString(), ID, ID->getClassInterface(), ObjCTypes, false)); } llvm::SmallString<64> roLabel; llvm::raw_svector_ostream(roLabel) - << ((flags & NonFragileABI_Class_Meta) ? "\01l_OBJC_METACLASS_RO_$_" - : "\01l_OBJC_CLASS_RO_$_") + << ((flags & NonFragileABI_Class_Meta) ? "_OBJC_METACLASS_RO_$_" + : "_OBJC_CLASS_RO_$_") << ClassName; - llvm::GlobalVariable *CLASS_RO_GV = - values.finishAndCreateGlobal(roLabel, CGM.getPointerAlign(), - /*constant*/ false, - llvm::GlobalValue::PrivateLinkage); - if (CGM.getTriple().isOSBinFormatMachO()) - CLASS_RO_GV->setSection("__DATA, __objc_const"); - return CLASS_RO_GV; + return finishAndCreateGlobal(values, roLabel, CGM); } /// Build the metaclass object for a class. @@ -6253,9 +6327,11 @@ CGObjCNonFragileABIMac::BuildClassObject(const ObjCInterfaceDecl *CI, return GV; } -bool -CGObjCNonFragileABIMac::ImplementationIsNonLazy(const ObjCImplDecl *OD) const { - return OD->getClassMethod(GetNullarySelector("load")) != nullptr; +bool CGObjCNonFragileABIMac::ImplementationIsNonLazy( + const ObjCImplDecl *OD) const { + return OD->getClassMethod(GetNullarySelector("load")) != nullptr || + OD->getClassInterface()->hasAttr<ObjCNonLazyClassAttr>() || + OD->hasAttr<ObjCNonLazyClassAttr>(); } void CGObjCNonFragileABIMac::GetClassSizeInfo(const ObjCImplementationDecl *OID, @@ -6437,7 +6513,7 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF, llvm::ConstantExpr::getBitCast(GetOrEmitProtocol(PD), ObjCTypes.getExternalProtocolPtrTy()); - std::string ProtocolName("\01l_OBJC_PROTOCOL_REFERENCE_$_"); + std::string ProtocolName("_OBJC_PROTOCOL_REFERENCE_$_"); ProtocolName += PD->getObjCRuntimeNameAsString(); CharUnits Align = CGF.getPointerAlign(); @@ -6472,7 +6548,7 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF, /// void CGObjCNonFragileABIMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) { const ObjCInterfaceDecl *Interface = OCD->getClassInterface(); - const char *Prefix = "\01l_OBJC_$_CATEGORY_"; + const char *Prefix = "_OBJC_$_CATEGORY_"; llvm::SmallString<64> ExtCatName(Prefix); ExtCatName += Interface->getObjCRuntimeNameAsString(); @@ -6508,14 +6584,14 @@ void CGObjCNonFragileABIMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) { SmallString<256> ExtName; llvm::raw_svector_ostream(ExtName) << Interface->getObjCRuntimeNameAsString() << "_$_" << OCD->getName(); - values.add(EmitProtocolList("\01l_OBJC_CATEGORY_PROTOCOLS_$_" + values.add(EmitProtocolList("_OBJC_CATEGORY_PROTOCOLS_$_" + Interface->getObjCRuntimeNameAsString() + "_$_" + Category->getName(), Category->protocol_begin(), Category->protocol_end())); - values.add(EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ExtName.str(), + values.add(EmitPropertyList("_OBJC_$_PROP_LIST_" + ExtName.str(), OCD, Category, ObjCTypes, false)); - values.add(EmitPropertyList("\01l_OBJC_$_CLASS_PROP_LIST_" + ExtName.str(), + values.add(EmitPropertyList("_OBJC_$_CLASS_PROP_LIST_" + ExtName.str(), OCD, Category, ObjCTypes, true)); } else { values.addNullPointer(ObjCTypes.ProtocolListnfABIPtrTy); @@ -6527,13 +6603,12 @@ void CGObjCNonFragileABIMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) { values.addInt(ObjCTypes.IntTy, Size); llvm::GlobalVariable *GCATV = - values.finishAndCreateGlobal(ExtCatName.str(), CGM.getPointerAlign(), - /*constant*/ false, - llvm::GlobalValue::PrivateLinkage); - if (CGM.getTriple().isOSBinFormatMachO()) - GCATV->setSection("__DATA, __objc_const"); + finishAndCreateGlobal(values, ExtCatName.str(), CGM); CGM.addCompilerUsedGlobal(GCATV); - DefinedCategories.push_back(GCATV); + if (Interface->hasAttr<ObjCClassStubAttr>()) + DefinedStubCategories.push_back(GCATV); + else + DefinedCategories.push_back(GCATV); // Determine if this category is also "non-lazy". if (ImplementationIsNonLazy(OCD)) @@ -6590,36 +6665,36 @@ CGObjCNonFragileABIMac::emitMethodList(Twine name, MethodListType kind, bool forProtocol; switch (kind) { case MethodListType::CategoryInstanceMethods: - prefix = "\01l_OBJC_$_CATEGORY_INSTANCE_METHODS_"; + prefix = "_OBJC_$_CATEGORY_INSTANCE_METHODS_"; forProtocol = false; break; case MethodListType::CategoryClassMethods: - prefix = "\01l_OBJC_$_CATEGORY_CLASS_METHODS_"; + prefix = "_OBJC_$_CATEGORY_CLASS_METHODS_"; forProtocol = false; break; case MethodListType::InstanceMethods: - prefix = "\01l_OBJC_$_INSTANCE_METHODS_"; + prefix = "_OBJC_$_INSTANCE_METHODS_"; forProtocol = false; break; case MethodListType::ClassMethods: - prefix = "\01l_OBJC_$_CLASS_METHODS_"; + prefix = "_OBJC_$_CLASS_METHODS_"; forProtocol = false; break; case MethodListType::ProtocolInstanceMethods: - prefix = "\01l_OBJC_$_PROTOCOL_INSTANCE_METHODS_"; + prefix = "_OBJC_$_PROTOCOL_INSTANCE_METHODS_"; forProtocol = true; break; case MethodListType::ProtocolClassMethods: - prefix = "\01l_OBJC_$_PROTOCOL_CLASS_METHODS_"; + prefix = "_OBJC_$_PROTOCOL_CLASS_METHODS_"; forProtocol = true; break; case MethodListType::OptionalProtocolInstanceMethods: - prefix = "\01l_OBJC_$_PROTOCOL_INSTANCE_METHODS_OPT_"; + prefix = "_OBJC_$_PROTOCOL_INSTANCE_METHODS_OPT_"; forProtocol = true; break; case MethodListType::OptionalProtocolClassMethods: - prefix = "\01l_OBJC_$_PROTOCOL_CLASS_METHODS_OPT_"; + prefix = "_OBJC_$_PROTOCOL_CLASS_METHODS_OPT_"; forProtocol = true; break; } @@ -6638,11 +6713,7 @@ CGObjCNonFragileABIMac::emitMethodList(Twine name, MethodListType kind, } methodArray.finishAndAddTo(values); - auto *GV = values.finishAndCreateGlobal(prefix + name, CGM.getPointerAlign(), - /*constant*/ false, - llvm::GlobalValue::PrivateLinkage); - if (CGM.getTriple().isOSBinFormatMachO()) - GV->setSection("__DATA, __objc_const"); + llvm::GlobalVariable *GV = finishAndCreateGlobal(values, prefix + name, CGM); CGM.addCompilerUsedGlobal(GV); return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.MethodListnfABIPtrTy); } @@ -6702,6 +6773,12 @@ CGObjCNonFragileABIMac::EmitIvarOffsetVar(const ObjCInterfaceDecl *ID, IvarOffsetGV->setVisibility(llvm::GlobalValue::DefaultVisibility); } + // If ID's layout is known, then make the global constant. This serves as a + // useful assertion: we'll never use this variable to calculate ivar offsets, + // so if the runtime tries to patch it then we should crash. + if (isClassLayoutKnownStatically(ID)) + IvarOffsetGV->setConstant(true); + if (CGM.getTriple().isOSBinFormatMachO()) IvarOffsetGV->setSection("__DATA, __objc_ivar"); return IvarOffsetGV; @@ -6776,13 +6853,9 @@ llvm::Constant *CGObjCNonFragileABIMac::EmitIvarList( ivars.finishAndAddTo(ivarList); ivarList.fillPlaceholderWithInt(ivarCountSlot, ObjCTypes.IntTy, ivarCount); - const char *Prefix = "\01l_OBJC_$_INSTANCE_VARIABLES_"; - llvm::GlobalVariable *GV = - ivarList.finishAndCreateGlobal(Prefix + OID->getObjCRuntimeNameAsString(), - CGM.getPointerAlign(), /*constant*/ false, - llvm::GlobalValue::PrivateLinkage); - if (CGM.getTriple().isOSBinFormatMachO()) - GV->setSection("__DATA, __objc_const"); + const char *Prefix = "_OBJC_$_INSTANCE_VARIABLES_"; + llvm::GlobalVariable *GV = finishAndCreateGlobal( + ivarList, Prefix + OID->getObjCRuntimeNameAsString(), CGM); CGM.addCompilerUsedGlobal(GV); return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.IvarListnfABIPtrTy); } @@ -6796,7 +6869,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocolRef( // reference or not. At module finalization we add the empty // contents for protocols which were referenced but never defined. llvm::SmallString<64> Protocol; - llvm::raw_svector_ostream(Protocol) << "\01l_OBJC_PROTOCOL_$_" + llvm::raw_svector_ostream(Protocol) << "_OBJC_PROTOCOL_$_" << PD->getObjCRuntimeNameAsString(); Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ProtocolnfABITy, @@ -6850,7 +6923,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( // isa is NULL values.addNullPointer(ObjCTypes.ObjectPtrTy); values.add(GetClassName(PD->getObjCRuntimeNameAsString())); - values.add(EmitProtocolList("\01l_OBJC_$_PROTOCOL_REFS_" + values.add(EmitProtocolList("_OBJC_$_PROTOCOL_REFS_" + PD->getObjCRuntimeNameAsString(), PD->protocol_begin(), PD->protocol_end())); @@ -6863,13 +6936,13 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( values.add(methodLists.emitMethodList(this, PD, ProtocolMethodLists::OptionalClassMethods)); values.add(EmitPropertyList( - "\01l_OBJC_$_PROP_LIST_" + PD->getObjCRuntimeNameAsString(), + "_OBJC_$_PROP_LIST_" + PD->getObjCRuntimeNameAsString(), nullptr, PD, ObjCTypes, false)); uint32_t Size = CGM.getDataLayout().getTypeAllocSize(ObjCTypes.ProtocolnfABITy); values.addInt(ObjCTypes.IntTy, Size); values.addInt(ObjCTypes.IntTy, 0); - values.add(EmitProtocolMethodTypes("\01l_OBJC_$_PROTOCOL_METHOD_TYPES_" + values.add(EmitProtocolMethodTypes("_OBJC_$_PROTOCOL_METHOD_TYPES_" + PD->getObjCRuntimeNameAsString(), methodLists.emitExtendedTypesArray(this), ObjCTypes)); @@ -6878,7 +6951,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( values.addNullPointer(ObjCTypes.Int8PtrTy); values.add(EmitPropertyList( - "\01l_OBJC_$_CLASS_PROP_LIST_" + PD->getObjCRuntimeNameAsString(), + "_OBJC_$_CLASS_PROP_LIST_" + PD->getObjCRuntimeNameAsString(), nullptr, PD, ObjCTypes, true)); if (Entry) { @@ -6888,7 +6961,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( } else { llvm::SmallString<64> symbolName; llvm::raw_svector_ostream(symbolName) - << "\01l_OBJC_PROTOCOL_$_" << PD->getObjCRuntimeNameAsString(); + << "_OBJC_PROTOCOL_$_" << PD->getObjCRuntimeNameAsString(); Entry = values.finishAndCreateGlobal(symbolName, CGM.getPointerAlign(), /*constant*/ false, @@ -6904,7 +6977,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( // Use this protocol meta-data to build protocol list table in section // __DATA, __objc_protolist llvm::SmallString<64> ProtocolRef; - llvm::raw_svector_ostream(ProtocolRef) << "\01l_OBJC_LABEL_PROTOCOL_$_" + llvm::raw_svector_ostream(ProtocolRef) << "_OBJC_LABEL_PROTOCOL_$_" << PD->getObjCRuntimeNameAsString(); llvm::GlobalVariable *PTGV = @@ -6962,11 +7035,7 @@ CGObjCNonFragileABIMac::EmitProtocolList(Twine Name, array.finishAndAddTo(values); values.fillPlaceholderWithInt(countSlot, ObjCTypes.LongTy, count); - GV = values.finishAndCreateGlobal(Name, CGM.getPointerAlign(), - /*constant*/ false, - llvm::GlobalValue::PrivateLinkage); - if (CGM.getTriple().isOSBinFormatMachO()) - GV->setSection("__DATA, __objc_const"); + GV = finishAndCreateGlobal(values, Name, CGM); CGM.addCompilerUsedGlobal(GV); return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.ProtocolListnfABIPtrTy); @@ -6990,17 +7059,24 @@ LValue CGObjCNonFragileABIMac::EmitObjCValueForIvar( Offset); } -llvm::Value *CGObjCNonFragileABIMac::EmitIvarOffset( - CodeGen::CodeGenFunction &CGF, - const ObjCInterfaceDecl *Interface, - const ObjCIvarDecl *Ivar) { - llvm::Value *IvarOffsetValue = ObjCIvarOffsetVariable(Interface, Ivar); - IvarOffsetValue = CGF.Builder.CreateAlignedLoad(IvarOffsetValue, - CGF.getSizeAlign(), "ivar"); - if (IsIvarOffsetKnownIdempotent(CGF, Ivar)) - cast<llvm::LoadInst>(IvarOffsetValue) - ->setMetadata(CGM.getModule().getMDKindID("invariant.load"), - llvm::MDNode::get(VMContext, None)); +llvm::Value * +CGObjCNonFragileABIMac::EmitIvarOffset(CodeGen::CodeGenFunction &CGF, + const ObjCInterfaceDecl *Interface, + const ObjCIvarDecl *Ivar) { + llvm::Value *IvarOffsetValue; + if (isClassLayoutKnownStatically(Interface)) { + IvarOffsetValue = llvm::ConstantInt::get( + ObjCTypes.IvarOffsetVarTy, + ComputeIvarBaseOffset(CGM, Interface->getImplementation(), Ivar)); + } else { + llvm::GlobalVariable *GV = ObjCIvarOffsetVariable(Interface, Ivar); + IvarOffsetValue = + CGF.Builder.CreateAlignedLoad(GV, CGF.getSizeAlign(), "ivar"); + if (IsIvarOffsetKnownIdempotent(CGF, Ivar)) + cast<llvm::LoadInst>(IvarOffsetValue) + ->setMetadata(CGM.getModule().getMDKindID("invariant.load"), + llvm::MDNode::get(VMContext, None)); + } // This could be 32bit int or 64bit integer depending on the architecture. // Cast it to 64bit integer value, if it is a 32bit integer ivar offset value @@ -7069,8 +7145,8 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF, // The runtime currently never uses vtable dispatch for anything // except normal, non-super message-sends. // FIXME: don't use this for that. - llvm::Constant *fn = nullptr; - std::string messageRefName("\01l_"); + llvm::FunctionCallee fn = nullptr; + std::string messageRefName("_"); if (CGM.ReturnSlotInterferesWithArgs(MSI.CallInfo)) { if (isSuper) { fn = ObjCTypes.getMessageSendSuper2StretFixupFn(); @@ -7105,7 +7181,7 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF, // Build the message ref structure. ConstantInitBuilder builder(CGM); auto values = builder.beginStruct(); - values.add(fn); + values.add(cast<llvm::Constant>(fn.getCallee())); values.add(GetMethodVarName(selector)); messageRef = values.finishAndCreateGlobal(messageRefName, CharUnits::fromQuantity(16), @@ -7134,8 +7210,7 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF, args[1].setRValue(RValue::get(mref.getPointer())); // Load the function to call from the message ref table. - Address calleeAddr = - CGF.Builder.CreateStructGEP(mref, 0, CharUnits::Zero()); + Address calleeAddr = CGF.Builder.CreateStructGEP(mref, 0); llvm::Value *calleePtr = CGF.Builder.CreateLoad(calleeAddr, "msgSend_fn"); calleePtr = CGF.Builder.CreateBitCast(calleePtr, MSI.MessengerType); @@ -7209,31 +7284,68 @@ CGObjCNonFragileABIMac::GetClassGlobal(StringRef Name, return GV; } +llvm::Constant * +CGObjCNonFragileABIMac::GetClassGlobalForClassRef(const ObjCInterfaceDecl *ID) { + llvm::Constant *ClassGV = GetClassGlobal(ID, /*metaclass*/ false, + NotForDefinition); + + if (!ID->hasAttr<ObjCClassStubAttr>()) + return ClassGV; + + ClassGV = llvm::ConstantExpr::getPointerCast(ClassGV, ObjCTypes.Int8PtrTy); + + // Stub classes are pointer-aligned. Classrefs pointing at stub classes + // must set the least significant bit set to 1. + auto *Idx = llvm::ConstantInt::get(CGM.Int32Ty, 1); + return llvm::ConstantExpr::getGetElementPtr(CGM.Int8Ty, ClassGV, Idx); +} + +llvm::Value * +CGObjCNonFragileABIMac::EmitLoadOfClassRef(CodeGenFunction &CGF, + const ObjCInterfaceDecl *ID, + llvm::GlobalVariable *Entry) { + if (ID && ID->hasAttr<ObjCClassStubAttr>()) { + // Classrefs pointing at Objective-C stub classes must be loaded by calling + // a special runtime function. + return CGF.EmitRuntimeCall( + ObjCTypes.getLoadClassrefFn(), Entry, "load_classref_result"); + } + + CharUnits Align = CGF.getPointerAlign(); + return CGF.Builder.CreateAlignedLoad(Entry, Align); +} + llvm::Value * CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF, IdentifierInfo *II, const ObjCInterfaceDecl *ID) { - CharUnits Align = CGF.getPointerAlign(); llvm::GlobalVariable *&Entry = ClassReferences[II]; if (!Entry) { llvm::Constant *ClassGV; if (ID) { - ClassGV = GetClassGlobal(ID, /*metaclass*/ false, NotForDefinition); + ClassGV = GetClassGlobalForClassRef(ID); } else { ClassGV = GetClassGlobal((getClassSymbolPrefix() + II->getName()).str(), NotForDefinition); + assert(ClassGV->getType() == ObjCTypes.ClassnfABIPtrTy && + "classref was emitted with the wrong type?"); } - Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABIPtrTy, - false, llvm::GlobalValue::PrivateLinkage, - ClassGV, "OBJC_CLASSLIST_REFERENCES_$_"); - Entry->setAlignment(Align.getQuantity()); - Entry->setSection(GetSectionName("__objc_classrefs", - "regular,no_dead_strip")); + std::string SectionName = + GetSectionName("__objc_classrefs", "regular,no_dead_strip"); + Entry = new llvm::GlobalVariable( + CGM.getModule(), ClassGV->getType(), false, + getLinkageTypeForObjCMetadata(CGM, SectionName), ClassGV, + "OBJC_CLASSLIST_REFERENCES_$_"); + Entry->setAlignment(CGF.getPointerAlign().getQuantity()); + if (!ID || !ID->hasAttr<ObjCClassStubAttr>()) + Entry->setSection(SectionName); + CGM.addCompilerUsedGlobal(Entry); } - return CGF.Builder.CreateAlignedLoad(Entry, Align); + + return EmitLoadOfClassRef(CGF, ID, Entry); } llvm::Value *CGObjCNonFragileABIMac::EmitClassRef(CodeGenFunction &CGF, @@ -7255,20 +7367,22 @@ llvm::Value *CGObjCNonFragileABIMac::EmitNSAutoreleasePoolClassRef( llvm::Value * CGObjCNonFragileABIMac::EmitSuperClassRef(CodeGenFunction &CGF, const ObjCInterfaceDecl *ID) { - CharUnits Align = CGF.getPointerAlign(); llvm::GlobalVariable *&Entry = SuperClassReferences[ID->getIdentifier()]; if (!Entry) { - auto ClassGV = GetClassGlobal(ID, /*metaclass*/ false, NotForDefinition); - Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABIPtrTy, - false, llvm::GlobalValue::PrivateLinkage, - ClassGV, "OBJC_CLASSLIST_SUP_REFS_$_"); - Entry->setAlignment(Align.getQuantity()); - Entry->setSection(GetSectionName("__objc_superrefs", - "regular,no_dead_strip")); + llvm::Constant *ClassGV = GetClassGlobalForClassRef(ID); + std::string SectionName = + GetSectionName("__objc_superrefs", "regular,no_dead_strip"); + Entry = new llvm::GlobalVariable( + CGM.getModule(), ClassGV->getType(), false, + getLinkageTypeForObjCMetadata(CGM, SectionName), ClassGV, + "OBJC_CLASSLIST_SUP_REFS_$_"); + Entry->setAlignment(CGF.getPointerAlign().getQuantity()); + Entry->setSection(SectionName); CGM.addCompilerUsedGlobal(Entry); } - return CGF.Builder.CreateAlignedLoad(Entry, Align); + + return EmitLoadOfClassRef(CGF, ID, Entry); } /// EmitMetaClassRef - Return a Value * of the address of _class_t @@ -7281,14 +7395,14 @@ llvm::Value *CGObjCNonFragileABIMac::EmitMetaClassRef(CodeGenFunction &CGF, llvm::GlobalVariable * &Entry = MetaClassReferences[ID->getIdentifier()]; if (!Entry) { auto MetaClassGV = GetClassGlobal(ID, /*metaclass*/ true, NotForDefinition); - - Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABIPtrTy, - false, llvm::GlobalValue::PrivateLinkage, - MetaClassGV, "OBJC_CLASSLIST_SUP_REFS_$_"); + std::string SectionName = + GetSectionName("__objc_superrefs", "regular,no_dead_strip"); + Entry = new llvm::GlobalVariable( + CGM.getModule(), ObjCTypes.ClassnfABIPtrTy, false, + getLinkageTypeForObjCMetadata(CGM, SectionName), MetaClassGV, + "OBJC_CLASSLIST_SUP_REFS_$_"); Entry->setAlignment(Align.getQuantity()); - - Entry->setSection(GetSectionName("__objc_superrefs", - "regular,no_dead_strip")); + Entry->setSection(SectionName); CGM.addCompilerUsedGlobal(Entry); } @@ -7332,9 +7446,8 @@ CGObjCNonFragileABIMac::GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF, llvm::Value *ReceiverAsObject = CGF.Builder.CreateBitCast(Receiver, ObjCTypes.ObjectPtrTy); - CGF.Builder.CreateStore( - ReceiverAsObject, - CGF.Builder.CreateStructGEP(ObjCSuper, 0, CharUnits::Zero())); + CGF.Builder.CreateStore(ReceiverAsObject, + CGF.Builder.CreateStructGEP(ObjCSuper, 0)); // If this is a class message the metaclass is passed as the target. llvm::Value *Target; @@ -7348,8 +7461,7 @@ CGObjCNonFragileABIMac::GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF, llvm::Type *ClassTy = CGM.getTypes().ConvertType(CGF.getContext().getObjCClassType()); Target = CGF.Builder.CreateBitCast(Target, ClassTy); - CGF.Builder.CreateStore( - Target, CGF.Builder.CreateStructGEP(ObjCSuper, 1, CGF.getPointerSize())); + CGF.Builder.CreateStore(Target, CGF.Builder.CreateStructGEP(ObjCSuper, 1)); return (isVTableDispatchedSelector(Sel)) ? EmitVTableMessageSend(CGF, Return, ResultType, Sel, @@ -7380,12 +7492,14 @@ Address CGObjCNonFragileABIMac::EmitSelectorAddr(CodeGenFunction &CGF, llvm::Constant *Casted = llvm::ConstantExpr::getBitCast(GetMethodVarName(Sel), ObjCTypes.SelectorPtrTy); - Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.SelectorPtrTy, - false, llvm::GlobalValue::PrivateLinkage, - Casted, "OBJC_SELECTOR_REFERENCES_"); + std::string SectionName = + GetSectionName("__objc_selrefs", "literal_pointers,no_dead_strip"); + Entry = new llvm::GlobalVariable( + CGM.getModule(), ObjCTypes.SelectorPtrTy, false, + getLinkageTypeForObjCMetadata(CGM, SectionName), Casted, + "OBJC_SELECTOR_REFERENCES_"); Entry->setExternallyInitialized(true); - Entry->setSection(GetSectionName("__objc_selrefs", - "literal_pointers,no_dead_strip")); + Entry->setSection(SectionName); Entry->setAlignment(Align.getQuantity()); CGM.addCompilerUsedGlobal(Entry); } @@ -7509,9 +7623,8 @@ void CGObjCNonFragileABIMac::EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF, void CGObjCNonFragileABIMac::EmitSynchronizedStmt(CodeGen::CodeGenFunction &CGF, const ObjCAtSynchronizedStmt &S) { - EmitAtSynchronizedStmt(CGF, S, - cast<llvm::Function>(ObjCTypes.getSyncEnterFn()), - cast<llvm::Function>(ObjCTypes.getSyncExitFn())); + EmitAtSynchronizedStmt(CGF, S, ObjCTypes.getSyncEnterFn(), + ObjCTypes.getSyncExitFn()); } llvm::Constant * @@ -7542,10 +7655,9 @@ CGObjCNonFragileABIMac::GetEHType(QualType T) { void CGObjCNonFragileABIMac::EmitTryStmt(CodeGen::CodeGenFunction &CGF, const ObjCAtTryStmt &S) { - EmitTryCatchStmt(CGF, S, - cast<llvm::Function>(ObjCTypes.getObjCBeginCatchFn()), - cast<llvm::Function>(ObjCTypes.getObjCEndCatchFn()), - cast<llvm::Function>(ObjCTypes.getExceptionRethrowFn())); + EmitTryCatchStmt(CGF, S, ObjCTypes.getObjCBeginCatchFn(), + ObjCTypes.getObjCEndCatchFn(), + ObjCTypes.getExceptionRethrowFn()); } /// EmitThrowStmt - Generate code for a throw statement. @@ -7555,11 +7667,13 @@ void CGObjCNonFragileABIMac::EmitThrowStmt(CodeGen::CodeGenFunction &CGF, if (const Expr *ThrowExpr = S.getThrowExpr()) { llvm::Value *Exception = CGF.EmitObjCThrowOperand(ThrowExpr); Exception = CGF.Builder.CreateBitCast(Exception, ObjCTypes.ObjectPtrTy); - CGF.EmitRuntimeCallOrInvoke(ObjCTypes.getExceptionThrowFn(), Exception) - .setDoesNotReturn(); + llvm::CallBase *Call = + CGF.EmitRuntimeCallOrInvoke(ObjCTypes.getExceptionThrowFn(), Exception); + Call->setDoesNotReturn(); } else { - CGF.EmitRuntimeCallOrInvoke(ObjCTypes.getExceptionRethrowFn()) - .setDoesNotReturn(); + llvm::CallBase *Call = + CGF.EmitRuntimeCallOrInvoke(ObjCTypes.getExceptionRethrowFn()); + Call->setDoesNotReturn(); } CGF.Builder.CreateUnreachable(); diff --git a/lib/CodeGen/CGObjCRuntime.cpp b/lib/CodeGen/CGObjCRuntime.cpp index 4b6f24a03f27..f8b831d0e9be 100644 --- a/lib/CodeGen/CGObjCRuntime.cpp +++ b/lib/CodeGen/CGObjCRuntime.cpp @@ -1,9 +1,8 @@ //==- CGObjCRuntime.cpp - Interface to Shared Objective-C Runtime Features ==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -22,7 +21,6 @@ #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtObjC.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "llvm/IR/CallSite.h" #include "llvm/Support/SaveAndRestore.h" using namespace clang; @@ -127,10 +125,10 @@ namespace { }; struct CallObjCEndCatch final : EHScopeStack::Cleanup { - CallObjCEndCatch(bool MightThrow, llvm::Value *Fn) + CallObjCEndCatch(bool MightThrow, llvm::FunctionCallee Fn) : MightThrow(MightThrow), Fn(Fn) {} bool MightThrow; - llvm::Value *Fn; + llvm::FunctionCallee Fn; void Emit(CodeGenFunction &CGF, Flags flags) override { if (MightThrow) @@ -141,12 +139,11 @@ namespace { }; } - void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF, const ObjCAtTryStmt &S, - llvm::Constant *beginCatchFn, - llvm::Constant *endCatchFn, - llvm::Constant *exceptionRethrowFn) { + llvm::FunctionCallee beginCatchFn, + llvm::FunctionCallee endCatchFn, + llvm::FunctionCallee exceptionRethrowFn) { // Jump destination for falling out of catch bodies. CodeGenFunction::JumpDest Cont; if (S.getNumCatchStmts()) @@ -313,10 +310,10 @@ void CGObjCRuntime::EmitInitOfCatchParam(CodeGenFunction &CGF, namespace { struct CallSyncExit final : EHScopeStack::Cleanup { - llvm::Value *SyncExitFn; + llvm::FunctionCallee SyncExitFn; llvm::Value *SyncArg; - CallSyncExit(llvm::Value *SyncExitFn, llvm::Value *SyncArg) - : SyncExitFn(SyncExitFn), SyncArg(SyncArg) {} + CallSyncExit(llvm::FunctionCallee SyncExitFn, llvm::Value *SyncArg) + : SyncExitFn(SyncExitFn), SyncArg(SyncArg) {} void Emit(CodeGenFunction &CGF, Flags flags) override { CGF.EmitNounwindRuntimeCall(SyncExitFn, SyncArg); @@ -326,8 +323,8 @@ namespace { void CGObjCRuntime::EmitAtSynchronizedStmt(CodeGenFunction &CGF, const ObjCAtSynchronizedStmt &S, - llvm::Function *syncEnterFn, - llvm::Function *syncExitFn) { + llvm::FunctionCallee syncEnterFn, + llvm::FunctionCallee syncExitFn) { CodeGenFunction::RunCleanupsScope cleanups(CGF); // Evaluate the lock operand. This is guaranteed to dominate the diff --git a/lib/CodeGen/CGObjCRuntime.h b/lib/CodeGen/CGObjCRuntime.h index fa16c198adbc..471816cb5988 100644 --- a/lib/CodeGen/CGObjCRuntime.h +++ b/lib/CodeGen/CGObjCRuntime.h @@ -1,9 +1,8 @@ //===----- CGObjCRuntime.h - Interface to ObjC Runtimes ---------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -96,11 +95,10 @@ protected: /// used to rethrow exceptions. If the begin and end catch functions are /// NULL, then the function assumes that the EH personality function provides /// the thrown object directly. - void EmitTryCatchStmt(CodeGenFunction &CGF, - const ObjCAtTryStmt &S, - llvm::Constant *beginCatchFn, - llvm::Constant *endCatchFn, - llvm::Constant *exceptionRethrowFn); + void EmitTryCatchStmt(CodeGenFunction &CGF, const ObjCAtTryStmt &S, + llvm::FunctionCallee beginCatchFn, + llvm::FunctionCallee endCatchFn, + llvm::FunctionCallee exceptionRethrowFn); void EmitInitOfCatchParam(CodeGenFunction &CGF, llvm::Value *exn, const VarDecl *paramDecl); @@ -110,9 +108,9 @@ protected: /// the object. This function can be called by subclasses that use /// zero-cost exception handling. void EmitAtSynchronizedStmt(CodeGenFunction &CGF, - const ObjCAtSynchronizedStmt &S, - llvm::Function *syncEnterFn, - llvm::Function *syncExitFn); + const ObjCAtSynchronizedStmt &S, + llvm::FunctionCallee syncEnterFn, + llvm::FunctionCallee syncExitFn); public: virtual ~CGObjCRuntime(); @@ -208,25 +206,25 @@ public: const ObjCContainerDecl *CD) = 0; /// Return the runtime function for getting properties. - virtual llvm::Constant *GetPropertyGetFunction() = 0; + virtual llvm::FunctionCallee GetPropertyGetFunction() = 0; /// Return the runtime function for setting properties. - virtual llvm::Constant *GetPropertySetFunction() = 0; + virtual llvm::FunctionCallee GetPropertySetFunction() = 0; /// Return the runtime function for optimized setting properties. - virtual llvm::Constant *GetOptimizedPropertySetFunction(bool atomic, - bool copy) = 0; + virtual llvm::FunctionCallee GetOptimizedPropertySetFunction(bool atomic, + bool copy) = 0; // API for atomic copying of qualified aggregates in getter. - virtual llvm::Constant *GetGetStructFunction() = 0; + virtual llvm::FunctionCallee GetGetStructFunction() = 0; // API for atomic copying of qualified aggregates in setter. - virtual llvm::Constant *GetSetStructFunction() = 0; + virtual llvm::FunctionCallee GetSetStructFunction() = 0; /// API for atomic copying of qualified aggregates with non-trivial copy /// assignment (c++) in setter. - virtual llvm::Constant *GetCppAtomicObjectSetFunction() = 0; + virtual llvm::FunctionCallee GetCppAtomicObjectSetFunction() = 0; /// API for atomic copying of qualified aggregates with non-trivial copy /// assignment (c++) in getter. - virtual llvm::Constant *GetCppAtomicObjectGetFunction() = 0; + virtual llvm::FunctionCallee GetCppAtomicObjectGetFunction() = 0; /// GetClass - Return a reference to the class for the given /// interface decl. @@ -240,7 +238,7 @@ public: /// EnumerationMutationFunction - Return the function that's called by the /// compiler when a mutation is detected during foreach iteration. - virtual llvm::Constant *EnumerationMutationFunction() = 0; + virtual llvm::FunctionCallee EnumerationMutationFunction() = 0; virtual void EmitSynchronizedStmt(CodeGen::CodeGenFunction &CGF, const ObjCAtSynchronizedStmt &S) = 0; diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp index 7f6f595dd5d1..191a95c62992 100644 --- a/lib/CodeGen/CGOpenCLRuntime.cpp +++ b/lib/CodeGen/CGOpenCLRuntime.cpp @@ -1,9 +1,8 @@ //===----- CGOpenCLRuntime.cpp - Interface to OpenCL Runtimes -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -123,6 +122,23 @@ llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() { CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); } +// Get the block literal from an expression derived from the block expression. +// OpenCL v2.0 s6.12.5: +// Block variable declarations are implicitly qualified with const. Therefore +// all block variables must be initialized at declaration time and may not be +// reassigned. +static const BlockExpr *getBlockExpr(const Expr *E) { + const Expr *Prev = nullptr; // to make sure we do not stuck in infinite loop. + while(!isa<BlockExpr>(E) && E != Prev) { + Prev = E; + E = E->IgnoreCasts(); + if (auto DR = dyn_cast<DeclRefExpr>(E)) { + E = cast<VarDecl>(DR->getDecl())->getInit(); + } + } + return cast<BlockExpr>(E); +} + /// Record emitted llvm invoke function and llvm block literal for the /// corresponding block expression. void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, @@ -137,20 +153,17 @@ void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, EnqueuedBlockMap[E].Kernel = nullptr; } +llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) { + return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc; +} + CGOpenCLRuntime::EnqueuedBlockInfo CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) { CGF.EmitScalarExpr(E); // The block literal may be assigned to a const variable. Chasing down // to get the block literal. - if (auto DR = dyn_cast<DeclRefExpr>(E)) { - E = cast<VarDecl>(DR->getDecl())->getInit(); - } - E = E->IgnoreImplicit(); - if (auto Cast = dyn_cast<CastExpr>(E)) { - E = Cast->getSubExpr(); - } - auto *Block = cast<BlockExpr>(E); + const BlockExpr *Block = getBlockExpr(E); assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() && "Block expression not emitted"); diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h index 750721f1b80f..3f7aa9b0d8dc 100644 --- a/lib/CodeGen/CGOpenCLRuntime.h +++ b/lib/CodeGen/CGOpenCLRuntime.h @@ -1,9 +1,8 @@ //===----- CGOpenCLRuntime.h - Interface to OpenCL Runtimes -----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -92,6 +91,10 @@ public: /// \param Block block literal emitted for the block expression. void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF, llvm::Value *Block); + + /// \return LLVM block invoke function emitted for an expression derived from + /// the block expression. + llvm::Function *getInvokeFunction(const Expr *E); }; } diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 20eb0b29f427..27e7175da841 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1,9 +1,8 @@ //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -22,7 +21,6 @@ #include "clang/Basic/BitmaskEnum.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Bitcode/BitcodeReader.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Value.h" @@ -432,7 +430,7 @@ public: /// Values for bit flags used in the ident_t to describe the fields. /// All enumeric elements are named and described in accordance with the code -/// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h +/// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h enum OpenMPLocationFlags : unsigned { /// Use trampoline for internal microtask. OMP_IDENT_IMD = 0x01, @@ -459,9 +457,35 @@ enum OpenMPLocationFlags : unsigned { LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) }; +namespace { +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); +/// Values for bit flags for marking which requires clauses have been used. +enum OpenMPOffloadingRequiresDirFlags : int64_t { + /// flag undefined. + OMP_REQ_UNDEFINED = 0x000, + /// no requires clause present. + OMP_REQ_NONE = 0x001, + /// reverse_offload clause. + OMP_REQ_REVERSE_OFFLOAD = 0x002, + /// unified_address clause. + OMP_REQ_UNIFIED_ADDRESS = 0x004, + /// unified_shared_memory clause. + OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, + /// dynamic_allocators clause. + OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) +}; + +enum OpenMPOffloadingReservedDeviceIDs { + /// Device ID if the device was not defined, runtime should get it + /// from environment variables in the spec. + OMP_DEVICEID_UNDEF = -1, +}; +} // anonymous namespace + /// Describes ident structure that describes a source location. /// All descriptions are taken from -/// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h +/// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h /// Original structure: /// typedef struct ident { /// kmp_int32 reserved_1; /**< might be used in Fortran; @@ -586,6 +610,11 @@ enum OpenMPRTLFunction { // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, // kmp_routine_entry_t *task_entry); OMPRTL__kmpc_omp_task_alloc, + // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, + // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, + // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, + // kmp_int64 device_id); + OMPRTL__kmpc_omp_target_task_alloc, // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * // new_task); OMPRTL__kmpc_omp_task, @@ -669,6 +698,10 @@ enum OpenMPRTLFunction { // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void // *d); OMPRTL__kmpc_task_reduction_get_th_data, + // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); + OMPRTL__kmpc_alloc, + // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); + OMPRTL__kmpc_free, // // Offloading related calls @@ -677,44 +710,46 @@ enum OpenMPRTLFunction { // size); OMPRTL__kmpc_push_target_tripcount, // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t - // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t // *arg_types); OMPRTL__tgt_target, // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, - // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t // *arg_types); OMPRTL__tgt_target_nowait, // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, - // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t // *arg_types, int32_t num_teams, int32_t thread_limit); OMPRTL__tgt_target_teams, // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void - // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t + // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); OMPRTL__tgt_target_teams_nowait, + // Call to void __tgt_register_requires(int64_t flags); + OMPRTL__tgt_register_requires, // Call to void __tgt_register_lib(__tgt_bin_desc *desc); OMPRTL__tgt_register_lib, // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); OMPRTL__tgt_unregister_lib, // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, - // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); + // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); OMPRTL__tgt_target_data_begin, // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t - // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t // *arg_types); OMPRTL__tgt_target_data_begin_nowait, // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); OMPRTL__tgt_target_data_end, // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t - // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t // *arg_types); OMPRTL__tgt_target_data_end_nowait, // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, - // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); + // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); OMPRTL__tgt_target_data_update, // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t - // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t // *arg_types); OMPRTL__tgt_target_data_update_nowait, }; @@ -1272,9 +1307,11 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); - Fn->removeFnAttr(llvm::Attribute::NoInline); - Fn->removeFnAttr(llvm::Attribute::OptimizeNone); - Fn->addFnAttr(llvm::Attribute::AlwaysInline); + if (CGM.getLangOpts().Optimize) { + Fn->removeFnAttr(llvm::Attribute::NoInline); + Fn->removeFnAttr(llvm::Attribute::OptimizeNone); + Fn->addFnAttr(llvm::Attribute::AlwaysInline); + } CodeGenFunction CGF(CGM); // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. @@ -1340,7 +1377,7 @@ CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { return UDRMap.lookup(D); } -static llvm::Value *emitParallelOrTeamsOutlinedFunction( +static llvm::Function *emitParallelOrTeamsOutlinedFunction( CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { @@ -1370,7 +1407,7 @@ static llvm::Value *emitParallelOrTeamsOutlinedFunction( return CGF.GenerateOpenMPCapturedStmtFunction(*CS); } -llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( +llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); @@ -1378,7 +1415,7 @@ llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); } -llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction( +llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); @@ -1386,7 +1423,7 @@ llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction( CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); } -llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( +llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, @@ -1417,7 +1454,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( InnermostKind, TD ? TD->hasCancel() : false, Action); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - llvm::Value *Res = CGF.GenerateCapturedStmtFunction(*CS); + llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); if (!Tied) NumberOfParts = Action.getNumberOfParts(); return Res; @@ -1478,7 +1515,7 @@ Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { // Initialize default location for psource field of ident_t structure of // all ident_t objects. Format is ";file;function;line;column;;". // Taken from - // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c + // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp DefaultOpenMPPSource = CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); DefaultOpenMPPSource = @@ -1665,9 +1702,8 @@ llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { return llvm::PointerType::getUnqual(Kmpc_MicroTy); } -llvm::Constant * -CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { - llvm::Constant *RTLFn = nullptr; +llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { + llvm::FunctionCallee RTLFn = nullptr; switch (static_cast<OpenMPRTLFunction>(Function)) { case OMPRTL__kmpc_fork_call: { // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro @@ -1677,6 +1713,22 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); + if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { + if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { + llvm::LLVMContext &Ctx = F->getContext(); + llvm::MDBuilder MDB(Ctx); + // Annotate the callback behavior of the __kmpc_fork_call: + // - The callback callee is argument number 2 (microtask). + // - The first two arguments of the callback callee are unknown (-1). + // - All variadic arguments to the __kmpc_fork_call are passed to the + // callback callee. + F->addMetadata( + llvm::LLVMContext::MD_callback, + *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( + 2, {-1, -1}, + /* VarArgsArePassed */ true)})); + } + } break; } case OMPRTL__kmpc_global_thread_num: { @@ -1871,6 +1923,21 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); break; } + case OMPRTL__kmpc_omp_target_task_alloc: { + // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, + // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, + // kmp_routine_entry_t *task_entry, kmp_int64 device_id); + assert(KmpRoutineEntryPtrTy != nullptr && + "Type kmp_routine_entry_t must be created."); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, + CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, + CGM.Int64Ty}; + // Return void * and then cast to particular kmp_task_t type. + auto *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); + break; + } case OMPRTL__kmpc_omp_task: { // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t // *new_task); @@ -2084,6 +2151,22 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); + if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { + if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { + llvm::LLVMContext &Ctx = F->getContext(); + llvm::MDBuilder MDB(Ctx); + // Annotate the callback behavior of the __kmpc_fork_teams: + // - The callback callee is argument number 2 (microtask). + // - The first two arguments of the callback callee are unknown (-1). + // - All variadic arguments to the __kmpc_fork_teams are passed to the + // callback callee. + F->addMetadata( + llvm::LLVMContext::MD_callback, + *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( + 2, {-1, -1}, + /* VarArgsArePassed */ true)})); + } + } break; } case OMPRTL__kmpc_taskloop: { @@ -2166,6 +2249,24 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); break; } + case OMPRTL__kmpc_alloc: { + // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t + // al); omp_allocator_handle_t type is void *. + llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); + break; + } + case OMPRTL__kmpc_free: { + // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t + // al); omp_allocator_handle_t type is void *. + llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); + break; + } case OMPRTL__kmpc_push_target_tripcount: { // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 // size); @@ -2177,14 +2278,14 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { } case OMPRTL__tgt_target: { // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t - // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t // *arg_types); llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, - CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo(), CGM.Int64Ty->getPointerTo()}; auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); @@ -2193,14 +2294,14 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { } case OMPRTL__tgt_target_nowait: { // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, - // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, + // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, // int64_t *arg_types); llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, - CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo(), CGM.Int64Ty->getPointerTo()}; auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); @@ -2209,14 +2310,14 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { } case OMPRTL__tgt_target_teams: { // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, - // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, + // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, - CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo(), CGM.Int64Ty->getPointerTo(), CGM.Int32Ty, CGM.Int32Ty}; @@ -2227,14 +2328,14 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { } case OMPRTL__tgt_target_teams_nowait: { // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void - // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t + // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, - CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo(), CGM.Int64Ty->getPointerTo(), CGM.Int32Ty, CGM.Int32Ty}; @@ -2243,6 +2344,14 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); break; } + case OMPRTL__tgt_register_requires: { + // Build void __tgt_register_requires(int64_t flags); + llvm::Type *TypeParams[] = {CGM.Int64Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); + break; + } case OMPRTL__tgt_register_lib: { // Build void __tgt_register_lib(__tgt_bin_desc *desc); QualType ParamTy = @@ -2265,12 +2374,12 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { } case OMPRTL__tgt_target_data_begin: { // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, - // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); + // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, - CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo(), CGM.Int64Ty->getPointerTo()}; auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); @@ -2279,13 +2388,13 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { } case OMPRTL__tgt_target_data_begin_nowait: { // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t - // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t // *arg_types); llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, - CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo(), CGM.Int64Ty->getPointerTo()}; auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); @@ -2294,12 +2403,12 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { } case OMPRTL__tgt_target_data_end: { // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, - // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); + // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, - CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo(), CGM.Int64Ty->getPointerTo()}; auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); @@ -2308,13 +2417,13 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { } case OMPRTL__tgt_target_data_end_nowait: { // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t - // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t // *arg_types); llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, - CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo(), CGM.Int64Ty->getPointerTo()}; auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); @@ -2323,12 +2432,12 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { } case OMPRTL__tgt_target_data_update: { // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, - // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); + // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, - CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo(), CGM.Int64Ty->getPointerTo()}; auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); @@ -2337,13 +2446,13 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { } case OMPRTL__tgt_target_data_update_nowait: { // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t - // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t // *arg_types); llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, - CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo(), CGM.Int64Ty->getPointerTo()}; auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); @@ -2355,8 +2464,8 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { return RTLFn; } -llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, - bool IVSigned) { +llvm::FunctionCallee +CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" @@ -2381,8 +2490,8 @@ llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, return CGM.CreateRuntimeFunction(FnTy, Name); } -llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, - bool IVSigned) { +llvm::FunctionCallee +CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); StringRef Name = @@ -2403,8 +2512,8 @@ llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, return CGM.CreateRuntimeFunction(FnTy, Name); } -llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, - bool IVSigned) { +llvm::FunctionCallee +CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); StringRef Name = @@ -2420,8 +2529,8 @@ llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, return CGM.CreateRuntimeFunction(FnTy, Name); } -llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, - bool IVSigned) { +llvm::FunctionCallee +CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); StringRef Name = @@ -2443,16 +2552,18 @@ llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, return CGM.CreateRuntimeFunction(FnTy, Name); } -Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) { +Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { if (CGM.getLangOpts().OpenMPSimd) return Address::invalid(); llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); - if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) { + if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || + (*Res == OMPDeclareTargetDeclAttr::MT_To && + HasRequiresUnifiedSharedMemory))) { SmallString<64> PtrName; { llvm::raw_svector_ostream OS(PtrName); - OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr"; + OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_ref_ptr"; } llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); if (!Ptr) { @@ -2669,7 +2780,9 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, bool PerformInit) { Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); - if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) + if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || + (*Res == OMPDeclareTargetDeclAttr::MT_To && + HasRequiresUnifiedSharedMemory)) return CGM.getLangOpts().OpenMPIsDevice; VD = VD->getDefinition(CGM.getContext()); if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) @@ -2785,7 +2898,7 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, getThreadID(CGF, SourceLocation()), CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, - /*IsSigned=*/false), + /*isSigned=*/false), getOrCreateInternalVariable( CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; return Address( @@ -2836,7 +2949,7 @@ void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, } void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { if (!CGF.HaveInsertPoint()) @@ -2854,7 +2967,8 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, RealArgs.append(std::begin(Args), std::end(Args)); RealArgs.append(CapturedVars.begin(), CapturedVars.end()); - llvm::Value *RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); + llvm::FunctionCallee RTLFn = + RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); CGF.EmitRuntimeCall(RTLFn, RealArgs); }; auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, @@ -2915,9 +3029,8 @@ Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, return ThreadIDTemp; } -llvm::Constant * -CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, - const llvm::Twine &Name) { +llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( + llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); Out << Name; @@ -2932,7 +3045,8 @@ CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, return Elem.second = new llvm::GlobalVariable( CGM.getModule(), Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), - Elem.first()); + Elem.first(), /*InsertBefore=*/nullptr, + llvm::GlobalValue::NotThreadLocal, AddressSpace); } llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { @@ -2944,17 +3058,18 @@ llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { namespace { /// Common pre(post)-action for different OpenMP constructs. class CommonActionTy final : public PrePostActionTy { - llvm::Value *EnterCallee; + llvm::FunctionCallee EnterCallee; ArrayRef<llvm::Value *> EnterArgs; - llvm::Value *ExitCallee; + llvm::FunctionCallee ExitCallee; ArrayRef<llvm::Value *> ExitArgs; bool Conditional; llvm::BasicBlock *ContBlock = nullptr; public: - CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, - llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, - bool Conditional = false) + CommonActionTy(llvm::FunctionCallee EnterCallee, + ArrayRef<llvm::Value *> EnterArgs, + llvm::FunctionCallee ExitCallee, + ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), ExitArgs(ExitArgs), Conditional(Conditional) {} void Enter(CodeGenFunction &CGF) override { @@ -3059,8 +3174,7 @@ void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var) { // Pull out the pointer to the variable. - Address PtrAddr = - CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize()); + Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); @@ -3176,8 +3290,7 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, Address CopyprivateList = CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { - Address Elem = CGF.Builder.CreateConstArrayGEP( - CopyprivateList, I, CGF.getPointerSize()); + Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), @@ -3241,6 +3354,24 @@ unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { return Flags; } +void CGOpenMPRuntime::getDefaultScheduleAndChunk( + CodeGenFunction &CGF, const OMPLoopDirective &S, + OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { + // Check if the loop directive is actually a doacross loop directive. In this + // case choose static, 1 schedule. + if (llvm::any_of( + S.getClausesOfKind<OMPOrderedClause>(), + [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { + ScheduleKind = OMPC_SCHEDULE_static; + // Chunk size is 1 in this case. + llvm::APInt ChunkSize(32, 1); + ChunkExpr = IntegerLiteral::Create( + CGF.getContext(), ChunkSize, + CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), + SourceLocation()); + } +} + void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks, bool ForceSimpleCall) { @@ -3412,7 +3543,7 @@ void CGOpenMPRuntime::emitForDispatchInit( static void emitForStaticInitCall( CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, - llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, + llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values) { if (!CGF.HaveInsertPoint()) @@ -3473,7 +3604,7 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, ? OMP_IDENT_WORK_LOOP : OMP_IDENT_WORK_SECTIONS); llvm::Value *ThreadId = getThreadID(CGF, Loc); - llvm::Constant *StaticInitFunction = + llvm::FunctionCallee StaticInitFunction = createForStaticInitFunction(Values.IVSize, Values.IVSigned); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); @@ -3488,7 +3619,7 @@ void CGOpenMPRuntime::emitDistributeStaticInit( llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); llvm::Value *ThreadId = getThreadID(CGF, Loc); - llvm::Constant *StaticInitFunction = + llvm::FunctionCallee StaticInitFunction = createForStaticInitFunction(Values.IVSize, Values.IVSigned); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, @@ -3731,14 +3862,29 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: "Entry not initialized!"); assert((!Entry.getAddress() || Entry.getAddress() == Addr) && "Resetting with the new address."); - if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) + if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { + if (Entry.getVarSize().isZero()) { + Entry.setVarSize(VarSize); + Entry.setLinkage(Linkage); + } return; - Entry.setAddress(Addr); + } Entry.setVarSize(VarSize); Entry.setLinkage(Linkage); + Entry.setAddress(Addr); } else { - if (hasDeviceGlobalVarEntryInfo(VarName)) + if (hasDeviceGlobalVarEntryInfo(VarName)) { + auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; + assert(Entry.isValid() && Entry.getFlags() == Flags && + "Entry not initialized!"); + assert((!Entry.getAddress() || Entry.getAddress() == Addr) && + "Resetting with the new address."); + if (Entry.getVarSize().isZero()) { + Entry.setVarSize(VarSize); + Entry.setLinkage(Linkage); + } return; + } OffloadEntriesDeviceGlobalVar.try_emplace( VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); ++OffloadingEntriesNum; @@ -4052,6 +4198,9 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { CE->getFlags()); switch (Flags) { case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { + if (CGM.getLangOpts().OpenMPIsDevice && + CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) + continue; if (!CE->getAddress()) { unsigned DiagID = CGM.getDiags().getCustomDiagID( DiagnosticsEngine::Error, @@ -4364,12 +4513,12 @@ createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, /// return 0; /// } /// \endcode -static llvm::Value * +static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, - QualType SharedsPtrTy, llvm::Value *TaskFunction, + QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap) { ASTContext &C = CGM.getContext(); FunctionArgList Args; @@ -4587,9 +4736,11 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, &CGM.getModule()); CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, TaskPrivatesMapFnInfo); - TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); - TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); - TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); + if (CGM.getLangOpts().Optimize) { + TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); + TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); + TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); + } CodeGenFunction CGF(CGM); CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, TaskPrivatesMapFnInfo, Args, Loc, Loc); @@ -4614,11 +4765,6 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, return TaskPrivatesMap; } -static bool stable_sort_comparator(const PrivateDataTy P1, - const PrivateDataTy P2) { - return P1.first > P2.first; -} - /// Emit initialization for private variables in task-based directives. static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, @@ -4661,7 +4807,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF, // Check if the variable is the target-based BasePointersArray, // PointersArray or SizesArray. LValue SharedRefLValue; - QualType Type = OriginalVD->getType(); + QualType Type = PrivateLValue.getType(); const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); if (IsTargetTask && !SharedField) { assert(isa<ImplicitParamDecl>(OriginalVD) && @@ -4837,7 +4983,7 @@ checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { CGOpenMPRuntime::TaskResultTy CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, - llvm::Value *TaskFunction, QualType SharedsTy, + llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data) { ASTContext &C = CGM.getContext(); llvm::SmallVector<PrivateDataTy, 4> Privates; @@ -4872,7 +5018,9 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, /*PrivateElemInit=*/nullptr)); ++I; } - std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator); + llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { + return L.first > R.first; + }); QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); // Build type kmp_routine_entry_t (if not built yet). emitKmpRoutineEntryT(KmpInt32Ty); @@ -4911,7 +5059,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, // Emit initial values for private copies (if any). llvm::Value *TaskPrivatesMap = nullptr; llvm::Type *TaskPrivatesMapTy = - std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType(); + std::next(TaskFunction->arg_begin(), 3)->getType(); if (!Privates.empty()) { auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); TaskPrivatesMap = emitTaskPrivateMappingFunction( @@ -4925,7 +5073,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, } // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, // kmp_task_t *tt); - llvm::Value *TaskEntry = emitProxyTaskFunction( + llvm::Function *TaskEntry = emitProxyTaskFunction( CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); @@ -4934,7 +5082,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, // kmp_routine_entry_t *task_entry); // Task flags. Format is taken from - // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h, + // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, // description of kmp_tasking_flags struct. enum { TiedFlag = 0x1, @@ -4959,13 +5107,30 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); - llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), - getThreadID(CGF, Loc), TaskFlags, - KmpTaskTWithPrivatesTySize, SharedsSize, - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - TaskEntry, KmpRoutineEntryPtrTy)}; - llvm::Value *NewTask = CGF.EmitRuntimeCall( + SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), + getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, + SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + TaskEntry, KmpRoutineEntryPtrTy)}; + llvm::Value *NewTask; + if (D.hasClausesOfKind<OMPNowaitClause>()) { + // Check if we have any device clause associated with the directive. + const Expr *Device = nullptr; + if (auto *C = D.getSingleClause<OMPDeviceClause>()) + Device = C->getDevice(); + // Emit device ID if any otherwise use default value. + llvm::Value *DeviceID; + if (Device) + DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), + CGF.Int64Ty, /*isSigned=*/true); + else + DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); + AllocArgs.push_back(DeviceID); + NewTask = CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); + } else { + NewTask = CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); + } llvm::Value *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( NewTask, KmpTaskTWithPrivatesPtrTy); @@ -5037,7 +5202,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, - llvm::Value *TaskFunction, + llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) { @@ -5047,7 +5212,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, TaskResultTy Result = emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); llvm::Value *NewTask = Result.NewTask; - llvm::Value *TaskEntry = Result.TaskEntry; + llvm::Function *TaskEntry = Result.TaskEntry; llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; LValue TDBase = Result.TDBase; const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; @@ -5057,7 +5222,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, unsigned NumDependencies = Data.Dependences.size(); if (NumDependencies) { // Dependence kind for RTL. - enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 }; + enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; RecordDecl *KmpDependInfoRD; QualType FlagsTy = @@ -5074,7 +5239,6 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, } else { KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); } - CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); // Define type kmp_depend_info[<Dependences.size()>]; QualType KmpDependInfoArrayTy = C.getConstantArrayType( KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), @@ -5090,7 +5254,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, if (const auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { LValue UpAddrLVal = - CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); + CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); llvm::Value *LowIntPtr = @@ -5101,7 +5265,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, Size = CGF.getTypeSize(Ty); } LValue Base = CGF.MakeAddrLValue( - CGF.Builder.CreateConstArrayGEP(DependenciesArray, I, DependencySize), + CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), KmpDependInfoTy); // deps[i].base_addr = &<Dependences[i].second>; LValue BaseAddrLVal = CGF.EmitLValueForField( @@ -5124,6 +5288,9 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, case OMPC_DEPEND_inout: DepKind = DepInOut; break; + case OMPC_DEPEND_mutexinoutset: + DepKind = DepMutexInOutSet; + break; case OMPC_DEPEND_source: case OMPC_DEPEND_sink: case OMPC_DEPEND_unknown: @@ -5135,8 +5302,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, FlagsLVal); } DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()), - CGF.VoidPtrTy); + CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); } // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() @@ -5231,7 +5397,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, - llvm::Value *TaskFunction, + llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) { @@ -5411,10 +5577,10 @@ static void emitReductionCombiner(CodeGenFunction &CGF, CGF.EmitIgnoredExpr(ReductionOp); } -llvm::Value *CGOpenMPRuntime::emitReductionFunction( - CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType, - ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, - ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { +llvm::Function *CGOpenMPRuntime::emitReductionFunction( + SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, + ArrayRef<const Expr *> ReductionOps) { ASTContext &C = CGM.getContext(); // void reduction_func(void *LHSArg, void *RHSArg); @@ -5466,8 +5632,7 @@ llvm::Value *CGOpenMPRuntime::emitReductionFunction( if (PrivTy->isVariablyModifiedType()) { // Get array size and emit VLA type. ++Idx; - Address Elem = - CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); + Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); const VariableArrayType *VLA = CGF.getContext().getAsVariableArrayType(PrivTy); @@ -5605,8 +5770,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, auto IPriv = Privates.begin(); unsigned Idx = 0; for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { - Address Elem = - CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize()); + Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), @@ -5614,8 +5778,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, if ((*IPriv)->getType()->isVariablyModifiedType()) { // Store array size. ++Idx; - Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, - CGF.getPointerSize()); + Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); llvm::Value *Size = CGF.Builder.CreateIntCast( CGF.getVLASize( CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) @@ -5627,9 +5790,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, } // 2. Emit reduce_func(). - llvm::Value *ReductionFn = emitReductionFunction( - CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), - Privates, LHSExprs, RHSExprs, ReductionOps); + llvm::Function *ReductionFn = emitReductionFunction( + Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, + LHSExprs, RHSExprs, ReductionOps); // 3. Create static kmp_critical_name lock = { 0 }; std::string Name = getName({"reduction"}); @@ -6130,7 +6293,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); if (DelayedCreation) { CGF.EmitStoreOfScalar( - llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true), + llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), FlagsLVal); } else CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); @@ -6322,6 +6485,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunction( llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { assert(!ParentName.empty() && "Invalid target region parent name!"); + HasEmittedTargetRegion = true; emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); } @@ -6393,12 +6557,59 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); } -/// discard all CompoundStmts intervening between two constructs -static const Stmt *ignoreCompoundStmts(const Stmt *Body) { - while (const auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) - Body = CS->body_front(); +/// Checks if the expression is constant or does not have non-trivial function +/// calls. +static bool isTrivial(ASTContext &Ctx, const Expr * E) { + // We can skip constant expressions. + // We can skip expressions with trivial calls or simple expressions. + return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || + !E->hasNonTrivialCall(Ctx)) && + !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); +} - return Body; +const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, + const Stmt *Body) { + const Stmt *Child = Body->IgnoreContainers(); + while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { + Child = nullptr; + for (const Stmt *S : C->body()) { + if (const auto *E = dyn_cast<Expr>(S)) { + if (isTrivial(Ctx, E)) + continue; + } + // Some of the statements can be ignored. + if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || + isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) + continue; + // Analyze declarations. + if (const auto *DS = dyn_cast<DeclStmt>(S)) { + if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { + if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || + isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || + isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || + isa<UsingDirectiveDecl>(D) || + isa<OMPDeclareReductionDecl>(D) || + isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) + return true; + const auto *VD = dyn_cast<VarDecl>(D); + if (!VD) + return false; + return VD->isConstexpr() || + ((VD->getType().isTrivialType(Ctx) || + VD->getType()->isReferenceType()) && + (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); + })) + continue; + } + // Found multiple children - cannot get the one child only. + if (Child) + return nullptr; + Child = S; + } + if (Child) + Child = Child->IgnoreContainers(); + } + return Child; } /// Emit the number of teams for a target directive. Inspect the num_teams @@ -6410,63 +6621,208 @@ static const Stmt *ignoreCompoundStmts(const Stmt *Body) { /// /// Otherwise, return nullptr. static llvm::Value * -emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, - CodeGenFunction &CGF, +emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D) { - assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " - "teams directive expected to be " - "emitted only for the host!"); - + assert(!CGF.getLangOpts().OpenMPIsDevice && + "Clauses associated with the teams directive expected to be emitted " + "only for the host!"); + OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); + assert(isOpenMPTargetExecutionDirective(DirectiveKind) && + "Expected target-based executable directive."); CGBuilderTy &Bld = CGF.Builder; - - // If the target directive is combined with a teams directive: - // Return the value in the num_teams clause, if any. - // Otherwise, return 0 to denote the runtime default. - if (isOpenMPTeamsDirective(D.getDirectiveKind())) { - if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) { + switch (DirectiveKind) { + case OMPD_target: { + const auto *CS = D.getInnermostCapturedStmt(); + const auto *Body = + CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); + const Stmt *ChildStmt = + CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); + if (const auto *NestedDir = + dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { + if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { + if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { + CGOpenMPInnerExprInfo CGInfo(CGF, *CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + const Expr *NumTeams = + NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); + llvm::Value *NumTeamsVal = + CGF.EmitScalarExpr(NumTeams, + /*IgnoreResultAssign*/ true); + return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, + /*isSigned=*/true); + } + return Bld.getInt32(0); + } + if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || + isOpenMPSimdDirective(NestedDir->getDirectiveKind())) + return Bld.getInt32(1); + return Bld.getInt32(0); + } + return nullptr; + } + case OMPD_target_teams: + case OMPD_target_teams_distribute: + case OMPD_target_teams_distribute_simd: + case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: { + if (D.hasClausesOfKind<OMPNumTeamsClause>()) { CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); - llvm::Value *NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(), - /*IgnoreResultAssign*/ true); - return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, - /*IsSigned=*/true); + const Expr *NumTeams = + D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); + llvm::Value *NumTeamsVal = + CGF.EmitScalarExpr(NumTeams, + /*IgnoreResultAssign*/ true); + return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, + /*isSigned=*/true); } - - // The default value is 0. return Bld.getInt32(0); } - - // If the target directive is combined with a parallel directive but not a - // teams directive, start one team. - if (isOpenMPParallelDirective(D.getDirectiveKind())) + case OMPD_target_parallel: + case OMPD_target_parallel_for: + case OMPD_target_parallel_for_simd: + case OMPD_target_simd: return Bld.getInt32(1); - - // If the current target region has a teams region enclosed, we need to get - // the number of teams to pass to the runtime function call. This is done - // by generating the expression in a inlined region. This is required because - // the expression is captured in the enclosing target environment when the - // teams directive is not combined with target. - - const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); - - if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( - ignoreCompoundStmts(CS.getCapturedStmt()))) { - if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { - if (const auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { - CGOpenMPInnerExprInfo CGInfo(CGF, CS); + case OMPD_parallel: + case OMPD_for: + case OMPD_parallel_for: + case OMPD_parallel_sections: + case OMPD_for_simd: + case OMPD_parallel_for_simd: + case OMPD_cancel: + case OMPD_cancellation_point: + case OMPD_ordered: + case OMPD_threadprivate: + case OMPD_allocate: + case OMPD_task: + case OMPD_simd: + case OMPD_sections: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_taskgroup: + case OMPD_atomic: + case OMPD_flush: + case OMPD_teams: + case OMPD_target_data: + case OMPD_target_exit_data: + case OMPD_target_enter_data: + case OMPD_distribute: + case OMPD_distribute_simd: + case OMPD_distribute_parallel_for: + case OMPD_distribute_parallel_for_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_target_update: + case OMPD_declare_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_declare_reduction: + case OMPD_declare_mapper: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_requires: + case OMPD_unknown: + break; + } + llvm_unreachable("Unexpected directive kind."); +} + +static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, + llvm::Value *DefaultThreadLimitVal) { + const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( + CGF.getContext(), CS->getCapturedStmt()); + if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { + if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { + llvm::Value *NumThreads = nullptr; + llvm::Value *CondVal = nullptr; + // Handle if clause. If if clause present, the number of threads is + // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. + if (Dir->hasClausesOfKind<OMPIfClause>()) { + CGOpenMPInnerExprInfo CGInfo(CGF, *CS); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); - return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, - /*IsSigned=*/true); + const OMPIfClause *IfClause = nullptr; + for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_parallel) { + IfClause = C; + break; + } + } + if (IfClause) { + const Expr *Cond = IfClause->getCondition(); + bool Result; + if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { + if (!Result) + return CGF.Builder.getInt32(1); + } else { + CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); + if (const auto *PreInit = + cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { + for (const auto *I : PreInit->decls()) { + if (!I->hasAttr<OMPCaptureNoInitAttr>()) { + CGF.EmitVarDecl(cast<VarDecl>(*I)); + } else { + CodeGenFunction::AutoVarEmission Emission = + CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); + CGF.EmitAutoVarCleanups(Emission); + } + } + } + CondVal = CGF.EvaluateExprAsBool(Cond); + } + } } - - // If we have an enclosed teams directive but no num_teams clause we use - // the default value 0. - return Bld.getInt32(0); + // Check the value of num_threads clause iff if clause was not specified + // or is not evaluated to false. + if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { + CGOpenMPInnerExprInfo CGInfo(CGF, *CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + const auto *NumThreadsClause = + Dir->getSingleClause<OMPNumThreadsClause>(); + CodeGenFunction::LexicalScope Scope( + CGF, NumThreadsClause->getNumThreads()->getSourceRange()); + if (const auto *PreInit = + cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { + for (const auto *I : PreInit->decls()) { + if (!I->hasAttr<OMPCaptureNoInitAttr>()) { + CGF.EmitVarDecl(cast<VarDecl>(*I)); + } else { + CodeGenFunction::AutoVarEmission Emission = + CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); + CGF.EmitAutoVarCleanups(Emission); + } + } + } + NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); + NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, + /*isSigned=*/false); + if (DefaultThreadLimitVal) + NumThreads = CGF.Builder.CreateSelect( + CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), + DefaultThreadLimitVal, NumThreads); + } else { + NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal + : CGF.Builder.getInt32(0); + } + // Process condition of the if clause. + if (CondVal) { + NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, + CGF.Builder.getInt32(1)); + } + return NumThreads; } + if (isOpenMPSimdDirective(Dir->getDirectiveKind())) + return CGF.Builder.getInt32(1); + return DefaultThreadLimitVal; } - - // No teams associated with the directive. - return nullptr; + return DefaultThreadLimitVal ? DefaultThreadLimitVal + : CGF.Builder.getInt32(0); } /// Emit the number of threads for a target directive. Inspect the @@ -6478,98 +6834,208 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, /// /// Otherwise, return nullptr. static llvm::Value * -emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, - CodeGenFunction &CGF, +emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D) { - assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " - "teams directive expected to be " - "emitted only for the host!"); - + assert(!CGF.getLangOpts().OpenMPIsDevice && + "Clauses associated with the teams directive expected to be emitted " + "only for the host!"); + OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); + assert(isOpenMPTargetExecutionDirective(DirectiveKind) && + "Expected target-based executable directive."); CGBuilderTy &Bld = CGF.Builder; - - // - // If the target directive is combined with a teams directive: - // Return the value in the thread_limit clause, if any. - // - // If the target directive is combined with a parallel directive: - // Return the value in the num_threads clause, if any. - // - // If both clauses are set, select the minimum of the two. - // - // If neither teams or parallel combined directives set the number of threads - // in a team, return 0 to denote the runtime default. - // - // If this is not a teams directive return nullptr. - - if (isOpenMPTeamsDirective(D.getDirectiveKind()) || - isOpenMPParallelDirective(D.getDirectiveKind())) { - llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0); - llvm::Value *NumThreadsVal = nullptr; - llvm::Value *ThreadLimitVal = nullptr; - - if (const auto *ThreadLimitClause = - D.getSingleClause<OMPThreadLimitClause>()) { + llvm::Value *ThreadLimitVal = nullptr; + llvm::Value *NumThreadsVal = nullptr; + switch (DirectiveKind) { + case OMPD_target: { + const CapturedStmt *CS = D.getInnermostCapturedStmt(); + if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) + return NumThreads; + const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( + CGF.getContext(), CS->getCapturedStmt()); + if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { + if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { + CGOpenMPInnerExprInfo CGInfo(CGF, *CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + const auto *ThreadLimitClause = + Dir->getSingleClause<OMPThreadLimitClause>(); + CodeGenFunction::LexicalScope Scope( + CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); + if (const auto *PreInit = + cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { + for (const auto *I : PreInit->decls()) { + if (!I->hasAttr<OMPCaptureNoInitAttr>()) { + CGF.EmitVarDecl(cast<VarDecl>(*I)); + } else { + CodeGenFunction::AutoVarEmission Emission = + CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); + CGF.EmitAutoVarCleanups(Emission); + } + } + } + llvm::Value *ThreadLimit = CGF.EmitScalarExpr( + ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); + ThreadLimitVal = + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); + } + if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && + !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { + CS = Dir->getInnermostCapturedStmt(); + const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( + CGF.getContext(), CS->getCapturedStmt()); + Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); + } + if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && + !isOpenMPSimdDirective(Dir->getDirectiveKind())) { + CS = Dir->getInnermostCapturedStmt(); + if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) + return NumThreads; + } + if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) + return Bld.getInt32(1); + } + return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); + } + case OMPD_target_teams: { + if (D.hasClausesOfKind<OMPThreadLimitClause>()) { CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); - llvm::Value *ThreadLimit = - CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(), - /*IgnoreResultAssign*/ true); - ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, - /*IsSigned=*/true); + const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); + llvm::Value *ThreadLimit = CGF.EmitScalarExpr( + ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); + ThreadLimitVal = + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); } - - if (const auto *NumThreadsClause = - D.getSingleClause<OMPNumThreadsClause>()) { + const CapturedStmt *CS = D.getInnermostCapturedStmt(); + if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) + return NumThreads; + const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( + CGF.getContext(), CS->getCapturedStmt()); + if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { + if (Dir->getDirectiveKind() == OMPD_distribute) { + CS = Dir->getInnermostCapturedStmt(); + if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) + return NumThreads; + } + } + return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); + } + case OMPD_target_teams_distribute: + if (D.hasClausesOfKind<OMPThreadLimitClause>()) { + CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); + const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); + llvm::Value *ThreadLimit = CGF.EmitScalarExpr( + ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); + ThreadLimitVal = + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); + } + return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); + case OMPD_target_parallel: + case OMPD_target_parallel_for: + case OMPD_target_parallel_for_simd: + case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: { + llvm::Value *CondVal = nullptr; + // Handle if clause. If if clause present, the number of threads is + // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. + if (D.hasClausesOfKind<OMPIfClause>()) { + const OMPIfClause *IfClause = nullptr; + for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_parallel) { + IfClause = C; + break; + } + } + if (IfClause) { + const Expr *Cond = IfClause->getCondition(); + bool Result; + if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { + if (!Result) + return Bld.getInt32(1); + } else { + CodeGenFunction::RunCleanupsScope Scope(CGF); + CondVal = CGF.EvaluateExprAsBool(Cond); + } + } + } + if (D.hasClausesOfKind<OMPThreadLimitClause>()) { + CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); + const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); + llvm::Value *ThreadLimit = CGF.EmitScalarExpr( + ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); + ThreadLimitVal = + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); + } + if (D.hasClausesOfKind<OMPNumThreadsClause>()) { CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); - llvm::Value *NumThreads = - CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), - /*IgnoreResultAssign*/ true); + const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); + llvm::Value *NumThreads = CGF.EmitScalarExpr( + NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); NumThreadsVal = - Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true); - } - - // Select the lesser of thread_limit and num_threads. - if (NumThreadsVal) + Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); ThreadLimitVal = ThreadLimitVal - ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal, + ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, ThreadLimitVal), NumThreadsVal, ThreadLimitVal) : NumThreadsVal; - - // Set default value passed to the runtime if either teams or a target - // parallel type directive is found but no clause is specified. + } if (!ThreadLimitVal) - ThreadLimitVal = DefaultThreadLimitVal; - + ThreadLimitVal = Bld.getInt32(0); + if (CondVal) + return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); return ThreadLimitVal; } - - // If the current target region has a teams region enclosed, we need to get - // the thread limit to pass to the runtime function call. This is done - // by generating the expression in a inlined region. This is required because - // the expression is captured in the enclosing target environment when the - // teams directive is not combined with target. - - const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); - - if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( - ignoreCompoundStmts(CS.getCapturedStmt()))) { - if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { - if (const auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { - CGOpenMPInnerExprInfo CGInfo(CGF, CS); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); - return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, - /*IsSigned=*/true); - } - - // If we have an enclosed teams directive but no thread_limit clause we - // use the default value 0. - return CGF.Builder.getInt32(0); - } + case OMPD_target_teams_distribute_simd: + case OMPD_target_simd: + return Bld.getInt32(1); + case OMPD_parallel: + case OMPD_for: + case OMPD_parallel_for: + case OMPD_parallel_sections: + case OMPD_for_simd: + case OMPD_parallel_for_simd: + case OMPD_cancel: + case OMPD_cancellation_point: + case OMPD_ordered: + case OMPD_threadprivate: + case OMPD_allocate: + case OMPD_task: + case OMPD_simd: + case OMPD_sections: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_taskgroup: + case OMPD_atomic: + case OMPD_flush: + case OMPD_teams: + case OMPD_target_data: + case OMPD_target_exit_data: + case OMPD_target_enter_data: + case OMPD_distribute: + case OMPD_distribute_simd: + case OMPD_distribute_parallel_for: + case OMPD_distribute_parallel_for_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_target_update: + case OMPD_declare_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_declare_reduction: + case OMPD_declare_mapper: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_requires: + case OMPD_unknown: + break; } - - // No teams associated with the directive. - return nullptr; + llvm_unreachable("Unsupported directive kind."); } namespace { @@ -6689,7 +7155,9 @@ private: CodeGenFunction &CGF; /// Set of all first private variables in the current directive. - llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; + /// bool data is set to true if the variable is implicitly marked as + /// firstprivate, false otherwise. + llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; /// Map between device pointer declarations and their expression components. /// The key value for declarations in 'this' is null. @@ -6993,7 +7461,10 @@ private: // Track if the map information being generated is the first for a capture. bool IsCaptureFirstInfo = IsFirstComponentList; - bool IsLink = false; // Is this variable a "declare target link"? + // When the variable is on a declare target link or in a to clause with + // unified memory, a reference is needed to hold the host/device address + // of the variable. + bool RequiresReference = false; // Scan the components from the base to the complete expression. auto CI = Components.rbegin(); @@ -7023,11 +7494,14 @@ private: if (const auto *VD = dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = - OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) - if (*Res == OMPDeclareTargetDeclAttr::MT_Link) { - IsLink = true; - BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { + if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || + (*Res == OMPDeclareTargetDeclAttr::MT_To && + CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { + RequiresReference = true; + BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); } + } } // If the variable is a pointer and is being dereferenced (i.e. is not @@ -7135,7 +7609,7 @@ private: Address HB = CGF.Builder.CreateConstGEP( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, CGF.VoidPtrTy), - TypeSize.getQuantity() - 1, CharUnits::One()); + TypeSize.getQuantity() - 1); PartialStruct.HighestElem = { std::numeric_limits<decltype( PartialStruct.HighestElem.first)>::max(), @@ -7167,19 +7641,19 @@ private: } BasePointers.push_back(BP.getPointer()); Pointers.push_back(LB.getPointer()); - Sizes.push_back(Size); + Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, + /*isSigned=*/true)); Types.push_back(Flags); - LB = CGF.Builder.CreateConstGEP(ComponentLB, 1, - CGF.getPointerSize()); + LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); } BasePointers.push_back(BP.getPointer()); Pointers.push_back(LB.getPointer()); Size = CGF.Builder.CreatePtrDiff( CGF.EmitCastToVoidPtr( - CGF.Builder.CreateConstGEP(HB, 1, CharUnits::One()) - .getPointer()), + CGF.Builder.CreateConstGEP(HB, 1).getPointer()), CGF.EmitCastToVoidPtr(LB.getPointer())); - Sizes.push_back(Size); + Sizes.push_back( + CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); Types.push_back(Flags); break; } @@ -7187,7 +7661,8 @@ private: if (!IsMemberPointer) { BasePointers.push_back(BP.getPointer()); Pointers.push_back(LB.getPointer()); - Sizes.push_back(Size); + Sizes.push_back( + CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); // We need to add a pointer flag for each map that comes from the // same expression except for the first one. We also need to signal @@ -7195,7 +7670,8 @@ private: // (there is a set of entries for each capture). OpenMPOffloadMappingFlags Flags = getMapTypeBits( MapType, MapModifiers, IsImplicit, - !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink); + !IsExpressionFirstInfo || RequiresReference, + IsCaptureFirstInfo && !RequiresReference); if (!IsExpressionFirstInfo) { // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, @@ -7260,9 +7736,17 @@ private: // A first private variable captured by reference will use only the // 'private ptr' and 'map to' flag. Return the right flags if the captured // declaration is known as first-private in this handler. - if (FirstPrivateDecls.count(Cap.getCapturedVar())) + if (FirstPrivateDecls.count(Cap.getCapturedVar())) { + if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && + Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) + return MappableExprsHandler::OMP_MAP_ALWAYS | + MappableExprsHandler::OMP_MAP_TO; + if (Cap.getCapturedVar()->getType()->isAnyPointerType()) + return MappableExprsHandler::OMP_MAP_TO | + MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; return MappableExprsHandler::OMP_MAP_PRIVATE | MappableExprsHandler::OMP_MAP_TO; + } return MappableExprsHandler::OMP_MAP_TO | MappableExprsHandler::OMP_MAP_FROM; } @@ -7332,7 +7816,7 @@ private: for (const auto *Field : RD->fields()) { // Fill in non-bitfields. (Bitfields always use a zero pattern, which we // will fill in later.) - if (!Field->isBitField()) { + if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { unsigned FieldIndex = RL.getLLVMFieldNo(Field); RecordLayout[FieldIndex] = Field; } @@ -7354,8 +7838,8 @@ public: // Extract firstprivate clause information. for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) for (const auto *D : C->varlists()) - FirstPrivateDecls.insert( - cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); + FirstPrivateDecls.try_emplace( + cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); // Extract device pointer clause information. for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) for (auto L : C->component_lists()) @@ -7380,8 +7864,8 @@ public: llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); - llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.SizeTy, - /*isSinged=*/false); + llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, + /*isSigned=*/false); Sizes.push_back(Size); // Map type is always TARGET_PARAM Types.push_back(OMP_MAP_TARGET_PARAM); @@ -7497,7 +7981,7 @@ public: this->CGF.EmitLValue(IE), IE->getExprLoc()); BasePointers.emplace_back(Ptr, VD); Pointers.push_back(Ptr); - Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); + Sizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); } } @@ -7554,7 +8038,7 @@ public: this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); CurBasePointers.emplace_back(BasePtr, L.VD); CurPointers.push_back(Ptr); - CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); + CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder // value MEMBER_OF=FFFF so that the entry is later updated with the // correct value of MEMBER_OF. @@ -7602,23 +8086,37 @@ public: LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); BasePointers.push_back(ThisLVal.getPointer()); Pointers.push_back(ThisLValVal.getPointer()); - Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); + Sizes.push_back( + CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), + CGF.Int64Ty, /*isSigned=*/true)); Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); } for (const LambdaCapture &LC : RD->captures()) { - if (LC.getCaptureKind() != LCK_ByRef) + if (!LC.capturesVariable()) continue; const VarDecl *VD = LC.getCapturedVar(); + if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) + continue; auto It = Captures.find(VD); assert(It != Captures.end() && "Found lambda capture without field."); LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); - LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); - LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); - BasePointers.push_back(VarLVal.getPointer()); - Pointers.push_back(VarLValVal.getPointer()); - Sizes.push_back(CGF.getTypeSize( - VD->getType().getCanonicalType().getNonReferenceType())); + if (LC.getCaptureKind() == LCK_ByRef) { + LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); + LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); + BasePointers.push_back(VarLVal.getPointer()); + Pointers.push_back(VarLValVal.getPointer()); + Sizes.push_back(CGF.Builder.CreateIntCast( + CGF.getTypeSize( + VD->getType().getCanonicalType().getNonReferenceType()), + CGF.Int64Ty, /*isSigned=*/true)); + } else { + RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); + LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); + BasePointers.push_back(VarLVal.getPointer()); + Pointers.push_back(VarRVal.getScalarVal()); + Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); + } Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); } @@ -7675,7 +8173,9 @@ public: if (DevPointersMap.count(VD)) { BasePointers.emplace_back(Arg, VD); Pointers.push_back(Arg); - Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); + Sizes.push_back( + CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), + CGF.Int64Ty, /*isSigned=*/true)); Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); return; } @@ -7834,7 +8334,7 @@ public: MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types) const { // Map other list items in the map clause which are not captured variables - // but "declare target link" global variables., + // but "declare target link" global variables. for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { for (const auto &L : C->component_lists()) { if (!L.first) @@ -7844,7 +8344,8 @@ public: continue; llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); - if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) + if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || + !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) continue; StructRangeInfoTy PartialStruct; generateInfoForComponentList( @@ -7865,12 +8366,15 @@ public: MapValuesArrayTy &CurPointers, MapValuesArrayTy &CurSizes, MapFlagsArrayTy &CurMapTypes) const { + bool IsImplicit = true; // Do the default mapping. if (CI.capturesThis()) { CurBasePointers.push_back(CV); CurPointers.push_back(CV); const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); - CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); + CurSizes.push_back( + CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), + CGF.Int64Ty, /*isSigned=*/true)); // Default map type. CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); } else if (CI.capturesVariableByCopy()) { @@ -7880,39 +8384,64 @@ public: // We have to signal to the runtime captures passed by value that are // not pointers. CurMapTypes.push_back(OMP_MAP_LITERAL); - CurSizes.push_back(CGF.getTypeSize(RI.getType())); + CurSizes.push_back(CGF.Builder.CreateIntCast( + CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); } else { // Pointers are implicitly mapped with a zero size and no flags // (other than first map that is added for all implicit maps). CurMapTypes.push_back(OMP_MAP_NONE); - CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); + CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); } + const VarDecl *VD = CI.getCapturedVar(); + auto I = FirstPrivateDecls.find(VD); + if (I != FirstPrivateDecls.end()) + IsImplicit = I->getSecond(); } else { assert(CI.capturesVariable() && "Expected captured reference."); - CurBasePointers.push_back(CV); - CurPointers.push_back(CV); - const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); QualType ElementType = PtrTy->getPointeeType(); - CurSizes.push_back(CGF.getTypeSize(ElementType)); + CurSizes.push_back(CGF.Builder.CreateIntCast( + CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); // The default map type for a scalar/complex type is 'to' because by // default the value doesn't have to be retrieved. For an aggregate // type, the default is 'tofrom'. CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); + const VarDecl *VD = CI.getCapturedVar(); + auto I = FirstPrivateDecls.find(VD); + if (I != FirstPrivateDecls.end() && + VD->getType().isConstant(CGF.getContext())) { + llvm::Constant *Addr = + CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); + // Copy the value of the original variable to the new global copy. + CGF.Builder.CreateMemCpy( + CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(), + Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), + CurSizes.back(), /*IsVolatile=*/false); + // Use new global variable as the base pointers. + CurBasePointers.push_back(Addr); + CurPointers.push_back(Addr); + } else { + CurBasePointers.push_back(CV); + if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { + Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( + CV, ElementType, CGF.getContext().getDeclAlign(VD), + AlignmentSource::Decl)); + CurPointers.push_back(PtrAddr.getPointer()); + } else { + CurPointers.push_back(CV); + } + } + if (I != FirstPrivateDecls.end()) + IsImplicit = I->getSecond(); } // Every default map produces a single argument which is a target parameter. CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; // Add flag stating this is an implicit map. - CurMapTypes.back() |= OMP_MAP_IMPLICIT; + if (IsImplicit) + CurMapTypes.back() |= OMP_MAP_IMPLICIT; } }; - -enum OpenMPOffloadingReservedDeviceIDs { - /// Device ID if the device was not defined, runtime should get it - /// from environment variables in the spec. - OMP_DEVICEID_UNDEF = -1, -}; } // anonymous namespace /// Emit the arrays used to pass the captures and map information to the @@ -7955,10 +8484,12 @@ emitOffloadingArrays(CodeGenFunction &CGF, // If we don't have any VLA types or other types that require runtime // evaluation, we can use a constant array for the map sizes, otherwise we // need to fill up the arrays as we do for the pointers. + QualType Int64Ty = + Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); if (hasRuntimeEvaluationCaptureSize) { - QualType SizeArrayType = Ctx.getConstantArrayType( - Ctx.getSizeType(), PointerNumAP, ArrayType::Normal, - /*IndexTypeQuals=*/0); + QualType SizeArrayType = + Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal, + /*IndexTypeQuals=*/0); Info.SizesArray = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); } else { @@ -7969,7 +8500,7 @@ emitOffloadingArrays(CodeGenFunction &CGF, ConstSizes.push_back(cast<llvm::Constant>(S)); auto *SizesArrayInit = llvm::ConstantArray::get( - llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); + llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); auto *SizesArrayGbl = new llvm::GlobalVariable( CGM.getModule(), SizesArrayInit->getType(), @@ -8019,13 +8550,13 @@ emitOffloadingArrays(CodeGenFunction &CGF, if (hasRuntimeEvaluationCaptureSize) { llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), + llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, /*Idx0=*/0, /*Idx1=*/I); - Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); + Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); CGF.Builder.CreateStore( - CGF.Builder.CreateIntCast(Sizes[I], CGM.SizeTy, /*isSigned=*/true), + CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), SAddr); } } @@ -8049,7 +8580,7 @@ static void emitOffloadingArraysArgument( /*Idx0=*/0, /*Idx1=*/0); SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, + llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, /*Idx0=*/0, /*Idx1=*/0); MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), @@ -8059,76 +8590,23 @@ static void emitOffloadingArraysArgument( } else { BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); - SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); + SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); MapTypesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); } } -/// Checks if the expression is constant or does not have non-trivial function -/// calls. -static bool isTrivial(ASTContext &Ctx, const Expr * E) { - // We can skip constant expressions. - // We can skip expressions with trivial calls or simple expressions. - return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || - !E->hasNonTrivialCall(Ctx)) && - !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); -} - -/// Checks if the \p Body is the \a CompoundStmt and returns its child statement -/// iff there is only one that is not evaluatable at the compile time. -static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) { - if (const auto *C = dyn_cast<CompoundStmt>(Body)) { - const Stmt *Child = nullptr; - for (const Stmt *S : C->body()) { - if (const auto *E = dyn_cast<Expr>(S)) { - if (isTrivial(Ctx, E)) - continue; - } - // Some of the statements can be ignored. - if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || - isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) - continue; - // Analyze declarations. - if (const auto *DS = dyn_cast<DeclStmt>(S)) { - if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { - if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || - isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || - isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || - isa<UsingDirectiveDecl>(D) || - isa<OMPDeclareReductionDecl>(D) || - isa<OMPThreadPrivateDecl>(D)) - return true; - const auto *VD = dyn_cast<VarDecl>(D); - if (!VD) - return false; - return VD->isConstexpr() || - ((VD->getType().isTrivialType(Ctx) || - VD->getType()->isReferenceType()) && - (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); - })) - continue; - } - // Found multiple children - cannot get the one child only. - if (Child) - return Body; - Child = S; - } - if (Child) - return Child; - } - return Body; -} - /// Check for inner distribute directive. static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { const auto *CS = D.getInnermostCapturedStmt(); const auto *Body = CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); - const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body); + const Stmt *ChildStmt = + CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); - if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + if (const auto *NestedDir = + dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); switch (D.getDirectiveKind()) { case OMPD_target: @@ -8139,8 +8617,9 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { /*IgnoreCaptured=*/true); if (!Body) return nullptr; - ChildStmt = getSingleCompoundChild(Ctx, Body); - if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); + if (const auto *NND = + dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPDistributeDirective(DKind)) return NND; @@ -8170,6 +8649,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { case OMPD_cancellation_point: case OMPD_ordered: case OMPD_threadprivate: + case OMPD_allocate: case OMPD_task: case OMPD_simd: case OMPD_sections: @@ -8200,6 +8680,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: + case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: case OMPD_requires: @@ -8244,7 +8725,7 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall( void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, const Expr *Device) { if (!CGF.HaveInsertPoint()) @@ -8295,8 +8776,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // Return value of the runtime offloading call. llvm::Value *Return; - llvm::Value *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D); - llvm::Value *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D); + llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); + llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); // The target region is an outlined function launched by the runtime @@ -8420,10 +8901,12 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, if (CI->capturesVariableArrayType()) { CurBasePointers.push_back(*CV); CurPointers.push_back(*CV); - CurSizes.push_back(CGF.getTypeSize(RI->getType())); + CurSizes.push_back(CGF.Builder.CreateIntCast( + CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); // Copy to the device as an argument. No need to retrieve it. CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | - MappableExprsHandler::OMP_MAP_TARGET_PARAM); + MappableExprsHandler::OMP_MAP_TARGET_PARAM | + MappableExprsHandler::OMP_MAP_IMPLICIT); } else { // If we have any information in the map clause, we use it, otherwise we // just do a default mapping. @@ -8592,6 +9075,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_cancellation_point: case OMPD_ordered: case OMPD_threadprivate: + case OMPD_allocate: case OMPD_task: case OMPD_simd: case OMPD_sections: @@ -8622,6 +9106,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: + case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: case OMPD_requires: @@ -8691,13 +9176,49 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( cast<VarDecl>(GD.getDecl())); - if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) { + if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || + (*Res == OMPDeclareTargetDeclAttr::MT_To && + HasRequiresUnifiedSharedMemory)) { DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); return true; } return false; } +llvm::Constant * +CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, + const VarDecl *VD) { + assert(VD->getType().isConstant(CGM.getContext()) && + "Expected constant variable."); + StringRef VarName; + llvm::Constant *Addr; + llvm::GlobalValue::LinkageTypes Linkage; + QualType Ty = VD->getType(); + SmallString<128> Buffer; + { + unsigned DeviceID; + unsigned FileID; + unsigned Line; + getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, + FileID, Line); + llvm::raw_svector_ostream OS(Buffer); + OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) + << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; + VarName = OS.str(); + } + Linkage = llvm::GlobalValue::InternalLinkage; + Addr = + getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, + getDefaultFirstprivateAddressSpace()); + cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); + CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); + CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); + OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( + VarName, Addr, VarSize, + OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); + return Addr; +} + void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr) { llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = @@ -8716,8 +9237,9 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, StringRef VarName; CharUnits VarSize; llvm::GlobalValue::LinkageTypes Linkage; - switch (*Res) { - case OMPDeclareTargetDeclAttr::MT_To: + + if (*Res == OMPDeclareTargetDeclAttr::MT_To && + !HasRequiresUnifiedSharedMemory) { Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; VarName = CGM.getMangledName(VD); if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { @@ -8740,20 +9262,27 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, CGM.addCompilerUsedGlobal(GVAddrRef); } } - break; - case OMPDeclareTargetDeclAttr::MT_Link: - Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; + } else { + assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || + (*Res == OMPDeclareTargetDeclAttr::MT_To && + HasRequiresUnifiedSharedMemory)) && + "Declare target attribute must link or to with unified memory."); + if (*Res == OMPDeclareTargetDeclAttr::MT_Link) + Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; + else + Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; + if (CGM.getLangOpts().OpenMPIsDevice) { VarName = Addr->getName(); Addr = nullptr; } else { - VarName = getAddrOfDeclareTargetLink(VD).getName(); - Addr = cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer()); + VarName = getAddrOfDeclareTargetVar(VD).getName(); + Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); } VarSize = CGM.getPointerSize(); Linkage = llvm::GlobalValue::WeakAnyLinkage; - break; } + OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( VarName, Addr, VarSize, Flags, Linkage); } @@ -8772,12 +9301,15 @@ void CGOpenMPRuntime::emitDeferredTargetDecls() const { OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res) continue; - if (*Res == OMPDeclareTargetDeclAttr::MT_To) { + if (*Res == OMPDeclareTargetDeclAttr::MT_To && + !HasRequiresUnifiedSharedMemory) { CGM.EmitGlobal(VD); } else { - assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && - "Expected to or link clauses."); - (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); + assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || + (*Res == OMPDeclareTargetDeclAttr::MT_To && + HasRequiresUnifiedSharedMemory)) && + "Expected link clause or to clause with unified memory."); + (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); } } } @@ -8788,6 +9320,44 @@ void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( " Expected target-based directive."); } +void CGOpenMPRuntime::checkArchForUnifiedAddressing( + const OMPRequiresDecl *D) { + for (const OMPClause *Clause : D->clauselists()) { + if (Clause->getClauseKind() == OMPC_unified_shared_memory) { + HasRequiresUnifiedSharedMemory = true; + break; + } + } +} + +bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, + LangAS &AS) { + if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) + return false; + const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); + switch(A->getAllocatorType()) { + case OMPAllocateDeclAttr::OMPDefaultMemAlloc: + // Not supported, fallback to the default mem space. + case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: + case OMPAllocateDeclAttr::OMPCGroupMemAlloc: + case OMPAllocateDeclAttr::OMPHighBWMemAlloc: + case OMPAllocateDeclAttr::OMPLowLatMemAlloc: + case OMPAllocateDeclAttr::OMPThreadMemAlloc: + case OMPAllocateDeclAttr::OMPConstMemAlloc: + case OMPAllocateDeclAttr::OMPPTeamMemAlloc: + AS = LangAS::Default; + return true; + case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: + llvm_unreachable("Expected predefined allocator for the variables with the " + "static storage."); + } + return false; +} + +bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { + return HasRequiresUnifiedSharedMemory; +} + CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( CodeGenModule &CGM) : CGM(CGM) { @@ -8822,6 +9392,47 @@ bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { return !AlreadyEmittedTargetFunctions.insert(Name).second; } +llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { + // If we don't have entries or if we are emitting code for the device, we + // don't need to do anything. + if (CGM.getLangOpts().OMPTargetTriples.empty() || + CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || + (OffloadEntriesInfoManager.empty() && + !HasEmittedDeclareTargetRegion && + !HasEmittedTargetRegion)) + return nullptr; + + // Create and register the function that handles the requires directives. + ASTContext &C = CGM.getContext(); + + llvm::Function *RequiresRegFn; + { + CodeGenFunction CGF(CGM); + const auto &FI = CGM.getTypes().arrangeNullaryFunction(); + llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); + std::string ReqName = getName({"omp_offloading", "requires_reg"}); + RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); + CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); + OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; + // TODO: check for other requires clauses. + // The requires directive takes effect only when a target region is + // present in the compilation unit. Otherwise it is ignored and not + // passed to the runtime. This avoids the runtime from throwing an error + // for mismatching requires clauses across compilation units that don't + // contain at least 1 target region. + assert((HasEmittedTargetRegion || + HasEmittedDeclareTargetRegion || + !OffloadEntriesInfoManager.empty()) && + "Target or declare target region expected."); + if (HasRequiresUnifiedSharedMemory) + Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), + llvm::ConstantInt::get(CGM.Int64Ty, Flags)); + CGF.FinishFunction(); + } + return RequiresRegFn; +} + llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { // If we have offloading in the current module, we need to emit the entries // now and register the offloading descriptor. @@ -8836,7 +9447,7 @@ llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars) { if (!CGF.HaveInsertPoint()) return; @@ -8853,7 +9464,7 @@ void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, RealArgs.append(std::begin(Args), std::end(Args)); RealArgs.append(CapturedVars.begin(), CapturedVars.end()); - llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); + llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); CGF.EmitRuntimeCall(RTLFn, RealArgs); } @@ -9075,6 +9686,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_cancellation_point: case OMPD_ordered: case OMPD_threadprivate: + case OMPD_allocate: case OMPD_task: case OMPD_simd: case OMPD_sections: @@ -9102,6 +9714,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: + case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: case OMPD_target: @@ -9268,8 +9881,9 @@ emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, llvm::raw_svector_ostream Out(Buffer); Out << "_ZGV" << Data.ISA << Mask; if (!VLENVal) { - Out << llvm::APSInt::getUnsigned(Data.VecRegSize / - evaluateCDTSize(FD, ParamAttrs)); + unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); + assert(NumElts && "Non-zero simdlen/cdtsize expected"); + Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); } else { Out << VLENVal; } @@ -9299,6 +9913,307 @@ emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, } } +// This are the Functions that are needed to mangle the name of the +// vector functions generated by the compiler, according to the rules +// defined in the "Vector Function ABI specifications for AArch64", +// available at +// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. + +/// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. +/// +/// TODO: Need to implement the behavior for reference marked with a +/// var or no linear modifiers (1.b in the section). For this, we +/// need to extend ParamKindTy to support the linear modifiers. +static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { + QT = QT.getCanonicalType(); + + if (QT->isVoidType()) + return false; + + if (Kind == ParamKindTy::Uniform) + return false; + + if (Kind == ParamKindTy::Linear) + return false; + + // TODO: Handle linear references with modifiers + + if (Kind == ParamKindTy::LinearWithVarStride) + return false; + + return true; +} + +/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. +static bool getAArch64PBV(QualType QT, ASTContext &C) { + QT = QT.getCanonicalType(); + unsigned Size = C.getTypeSize(QT); + + // Only scalars and complex within 16 bytes wide set PVB to true. + if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) + return false; + + if (QT->isFloatingType()) + return true; + + if (QT->isIntegerType()) + return true; + + if (QT->isPointerType()) + return true; + + // TODO: Add support for complex types (section 3.1.2, item 2). + + return false; +} + +/// Computes the lane size (LS) of a return type or of an input parameter, +/// as defined by `LS(P)` in 3.2.1 of the AAVFABI. +/// TODO: Add support for references, section 3.2.1, item 1. +static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { + if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { + QualType PTy = QT.getCanonicalType()->getPointeeType(); + if (getAArch64PBV(PTy, C)) + return C.getTypeSize(PTy); + } + if (getAArch64PBV(QT, C)) + return C.getTypeSize(QT); + + return C.getTypeSize(C.getUIntPtrType()); +} + +// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the +// signature of the scalar function, as defined in 3.2.2 of the +// AAVFABI. +static std::tuple<unsigned, unsigned, bool> +getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { + QualType RetType = FD->getReturnType().getCanonicalType(); + + ASTContext &C = FD->getASTContext(); + + bool OutputBecomesInput = false; + + llvm::SmallVector<unsigned, 8> Sizes; + if (!RetType->isVoidType()) { + Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); + if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) + OutputBecomesInput = true; + } + for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { + QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); + Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); + } + + assert(!Sizes.empty() && "Unable to determine NDS and WDS."); + // The LS of a function parameter / return value can only be a power + // of 2, starting from 8 bits, up to 128. + assert(std::all_of(Sizes.begin(), Sizes.end(), + [](unsigned Size) { + return Size == 8 || Size == 16 || Size == 32 || + Size == 64 || Size == 128; + }) && + "Invalid size"); + + return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), + *std::max_element(std::begin(Sizes), std::end(Sizes)), + OutputBecomesInput); +} + +/// Mangle the parameter part of the vector function name according to +/// their OpenMP classification. The mangling function is defined in +/// section 3.5 of the AAVFABI. +static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + for (const auto &ParamAttr : ParamAttrs) { + switch (ParamAttr.Kind) { + case LinearWithVarStride: + Out << "ls" << ParamAttr.StrideOrArg; + break; + case Linear: + Out << 'l'; + // Don't print the step value if it is not present or if it is + // equal to 1. + if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) + Out << ParamAttr.StrideOrArg; + break; + case Uniform: + Out << 'u'; + break; + case Vector: + Out << 'v'; + break; + } + + if (!!ParamAttr.Alignment) + Out << 'a' << ParamAttr.Alignment; + } + + return Out.str(); +} + +// Function used to add the attribute. The parameter `VLEN` is +// templated to allow the use of "x" when targeting scalable functions +// for SVE. +template <typename T> +static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, + char ISA, StringRef ParSeq, + StringRef MangledName, bool OutputBecomesInput, + llvm::Function *Fn) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << Prefix << ISA << LMask << VLEN; + if (OutputBecomesInput) + Out << "v"; + Out << ParSeq << "_" << MangledName; + Fn->addFnAttr(Out.str()); +} + +// Helper function to generate the Advanced SIMD names depending on +// the value of the NDS when simdlen is not present. +static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, + StringRef Prefix, char ISA, + StringRef ParSeq, StringRef MangledName, + bool OutputBecomesInput, + llvm::Function *Fn) { + switch (NDS) { + case 8: + addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + case 16: + addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + case 32: + addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + case 64: + case 128: + addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + default: + llvm_unreachable("Scalar type is too wide."); + } +} + +/// Emit vector function attributes for AArch64, as defined in the AAVFABI. +static void emitAArch64DeclareSimdFunction( + CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, + ArrayRef<ParamAttrTy> ParamAttrs, + OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, + char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { + + // Get basic data for building the vector signature. + const auto Data = getNDSWDS(FD, ParamAttrs); + const unsigned NDS = std::get<0>(Data); + const unsigned WDS = std::get<1>(Data); + const bool OutputBecomesInput = std::get<2>(Data); + + // Check the values provided via `simdlen` by the user. + // 1. A `simdlen(1)` doesn't produce vector signatures, + if (UserVLEN == 1) { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Warning, + "The clause simdlen(1) has no effect when targeting aarch64."); + CGM.getDiags().Report(SLoc, DiagID); + return; + } + + // 2. Section 3.3.1, item 1: user input must be a power of 2 for + // Advanced SIMD output. + if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Warning, "The value specified in simdlen must be a " + "power of 2 when targeting Advanced SIMD."); + CGM.getDiags().Report(SLoc, DiagID); + return; + } + + // 3. Section 3.4.1. SVE fixed lengh must obey the architectural + // limits. + if (ISA == 's' && UserVLEN != 0) { + if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " + "lanes in the architectural constraints " + "for SVE (min is 128-bit, max is " + "2048-bit, by steps of 128-bit)"); + CGM.getDiags().Report(SLoc, DiagID) << WDS; + return; + } + } + + // Sort out parameter sequence. + const std::string ParSeq = mangleVectorParameters(ParamAttrs); + StringRef Prefix = "_ZGV"; + // Generate simdlen from user input (if any). + if (UserVLEN) { + if (ISA == 's') { + // SVE generates only a masked function. + addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + } else { + assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); + // Advanced SIMD generates one or two functions, depending on + // the `[not]inbranch` clause. + switch (State) { + case OMPDeclareSimdDeclAttr::BS_Undefined: + addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + case OMPDeclareSimdDeclAttr::BS_Notinbranch: + addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + case OMPDeclareSimdDeclAttr::BS_Inbranch: + addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + } + } + } else { + // If no user simdlen is provided, follow the AAVFABI rules for + // generating the vector length. + if (ISA == 's') { + // SVE, section 3.4.1, item 1. + addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + } else { + assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); + // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or + // two vector names depending on the use of the clause + // `[not]inbranch`. + switch (State) { + case OMPDeclareSimdDeclAttr::BS_Undefined: + addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + case OMPDeclareSimdDeclAttr::BS_Notinbranch: + addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + case OMPDeclareSimdDeclAttr::BS_Inbranch: + addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, + OutputBecomesInput, Fn); + break; + } + } + } +} + void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn) { ASTContext &C = CGM.getContext(); @@ -9385,12 +10300,26 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, ++MI; } llvm::APSInt VLENVal; - if (const Expr *VLEN = Attr->getSimdlen()) - VLENVal = VLEN->EvaluateKnownConstInt(C); + SourceLocation ExprLoc; + const Expr *VLENExpr = Attr->getSimdlen(); + if (VLENExpr) { + VLENVal = VLENExpr->EvaluateKnownConstInt(C); + ExprLoc = VLENExpr->getExprLoc(); + } OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); if (CGM.getTriple().getArch() == llvm::Triple::x86 || - CGM.getTriple().getArch() == llvm::Triple::x86_64) + CGM.getTriple().getArch() == llvm::Triple::x86_64) { emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); + } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { + unsigned VLEN = VLENVal.getExtValue(); + StringRef MangledName = Fn->getName(); + if (CGM.getTarget().hasFeature("sve")) + emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, + MangledName, 's', 128, Fn, ExprLoc); + if (CGM.getTarget().hasFeature("neon")) + emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, + MangledName, 'n', 128, Fn, ExprLoc); + } } FD = FD->getPreviousDecl(); } @@ -9403,11 +10332,12 @@ public: static const int DoacrossFinArgs = 2; private: - llvm::Value *RTLFn; + llvm::FunctionCallee RTLFn; llvm::Value *Args[DoacrossFinArgs]; public: - DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs) + DoacrossCleanupTy(llvm::FunctionCallee RTLFn, + ArrayRef<llvm::Value *> CallArgs) : RTLFn(RTLFn) { assert(CallArgs.size() == DoacrossFinArgs); std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); @@ -9454,10 +10384,8 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, enum { LowerFD = 0, UpperFD, StrideFD }; // Fill dims with data. for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { - LValue DimsLVal = - CGF.MakeAddrLValue(CGF.Builder.CreateConstArrayGEP( - DimsAddr, I, C.getTypeSizeInChars(KmpDimTy)), - KmpDimTy); + LValue DimsLVal = CGF.MakeAddrLValue( + CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); // dims.upper = num_iterations; LValue UpperLVal = CGF.EmitLValueForField( DimsLVal, *std::next(RD->field_begin(), UpperFD)); @@ -9480,16 +10408,16 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, getThreadID(CGF, D.getBeginLoc()), llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder - .CreateConstArrayGEP(DimsAddr, 0, C.getTypeSizeInChars(KmpDimTy)) - .getPointer(), + CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), CGM.VoidPtrTy)}; - llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init); + llvm::FunctionCallee RTLFn = + createRuntimeFunction(OMPRTL__kmpc_doacross_init); CGF.EmitRuntimeCall(RTLFn, Args); llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; - llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini); + llvm::FunctionCallee FiniRTLFn = + createRuntimeFunction(OMPRTL__kmpc_doacross_fini); CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, llvm::makeArrayRef(FiniArgs)); } @@ -9508,20 +10436,14 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, llvm::Value *CntVal = CGF.EmitScalarConversion( CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, CounterVal->getExprLoc()); - CGF.EmitStoreOfScalar( - CntVal, - CGF.Builder.CreateConstArrayGEP( - CntAddr, I, CGM.getContext().getTypeSizeInChars(Int64Ty)), - /*Volatile=*/false, Int64Ty); + CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), + /*Volatile=*/false, Int64Ty); } llvm::Value *Args[] = { emitUpdateLocation(CGF, C->getBeginLoc()), getThreadID(CGF, C->getBeginLoc()), - CGF.Builder - .CreateConstArrayGEP(CntAddr, 0, - CGM.getContext().getTypeSizeInChars(Int64Ty)) - .getPointer()}; - llvm::Value *RTLFn; + CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; + llvm::FunctionCallee RTLFn; if (C->getDependencyKind() == OMPC_DEPEND_source) { RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); } else { @@ -9532,12 +10454,12 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, } void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Value *Callee, + llvm::FunctionCallee Callee, ArrayRef<llvm::Value *> Args) const { assert(Loc.isValid() && "Outlined function call location must be valid."); auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); - if (auto *Fn = dyn_cast<llvm::Function>(Callee)) { + if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { if (Fn->doesNotThrow()) { CGF.EmitNounwindRuntimeCall(Fn, Args); return; @@ -9547,35 +10469,116 @@ void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, } void CGOpenMPRuntime::emitOutlinedFunctionCall( - CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef<llvm::Value *> Args) const { emitCall(CGF, Loc, OutlinedFn, Args); } +void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { + if (const auto *FD = dyn_cast<FunctionDecl>(D)) + if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) + HasEmittedDeclareTargetRegion = true; +} + Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const { return CGF.GetAddrOfLocalVar(NativeParam); } +namespace { +/// Cleanup action for allocate support. +class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { +public: + static const int CleanupArgs = 3; + +private: + llvm::FunctionCallee RTLFn; + llvm::Value *Args[CleanupArgs]; + +public: + OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, + ArrayRef<llvm::Value *> CallArgs) + : RTLFn(RTLFn) { + assert(CallArgs.size() == CleanupArgs && + "Size of arguments does not match."); + std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); + } + void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { + if (!CGF.HaveInsertPoint()) + return; + CGF.EmitRuntimeCall(RTLFn, Args); + } +}; +} // namespace + Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) { - return Address::invalid(); -} - -llvm::Value *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( + if (!VD) + return Address::invalid(); + const VarDecl *CVD = VD->getCanonicalDecl(); + if (!CVD->hasAttr<OMPAllocateDeclAttr>()) + return Address::invalid(); + const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); + // Use the default allocation. + if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && + !AA->getAllocator()) + return Address::invalid(); + llvm::Value *Size; + CharUnits Align = CGM.getContext().getDeclAlign(CVD); + if (CVD->getType()->isVariablyModifiedType()) { + Size = CGF.getTypeSize(CVD->getType()); + // Align the size: ((size + align - 1) / align) * align + Size = CGF.Builder.CreateNUWAdd( + Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); + Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); + Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); + } else { + CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); + Size = CGM.getSize(Sz.alignTo(Align)); + } + llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); + assert(AA->getAllocator() && + "Expected allocator expression for non-default allocator."); + llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); + // According to the standard, the original allocator type is a enum (integer). + // Convert to pointer type, if required. + if (Allocator->getType()->isIntegerTy()) + Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); + else if (Allocator->getType()->isPointerTy()) + Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, + CGM.VoidPtrTy); + llvm::Value *Args[] = {ThreadID, Size, Allocator}; + + llvm::Value *Addr = + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, + CVD->getName() + ".void.addr"); + llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, + Allocator}; + llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); + + CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, + llvm::makeArrayRef(FiniArgs)); + Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Addr, + CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), + CVD->getName() + ".addr"); + return Address(Addr, Align); +} + +llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { llvm_unreachable("Not supported in SIMD-only mode"); } -llvm::Value *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( +llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { llvm_unreachable("Not supported in SIMD-only mode"); } -llvm::Value *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( +llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, @@ -9585,7 +10588,7 @@ llvm::Value *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { llvm_unreachable("Not supported in SIMD-only mode"); @@ -9716,7 +10719,7 @@ void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, - llvm::Value *TaskFunction, + llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) { @@ -9725,7 +10728,7 @@ void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, void CGOpenMPSIMDRuntime::emitTaskLoopCall( CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, - llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, + llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -9785,9 +10788,10 @@ void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, - const Expr *IfCond, const Expr *Device) { + const Expr *IfCond, + const Expr *Device) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -9810,7 +10814,7 @@ llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -9857,4 +10861,3 @@ CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, const VarDecl *TargetParam) const { llvm_unreachable("Not supported in SIMD-only mode"); } - diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 1822a6fd1974..3f842ce96407 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -1,9 +1,8 @@ //===----- CGOpenMPRuntime.h - Interface to OpenMP Runtimes -----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -272,7 +271,8 @@ protected: virtual StringRef getOutlinedHelperName() const { return ".omp_outlined."; } /// Emits \p Callee function call with arguments \p Args with location \p Loc. - void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *Callee, + void emitCall(CodeGenFunction &CGF, SourceLocation Loc, + llvm::FunctionCallee Callee, ArrayRef<llvm::Value *> Args = llvm::None) const; /// Emits address of the word in a memory where current thread id is @@ -636,6 +636,17 @@ private: /// must be emitted. llvm::SmallDenseSet<const VarDecl *> DeferredGlobalVariables; + /// Flag for keeping track of weather a requires unified_shared_memory + /// directive is present. + bool HasRequiresUnifiedSharedMemory = false; + + /// Flag for keeping track of weather a target region has been emitted. + bool HasEmittedTargetRegion = false; + + /// Flag for keeping track of weather a device routine has been emitted. + /// Device routines are specific to the + bool HasEmittedDeclareTargetRegion = false; + /// Creates and registers offloading binary descriptor for the current /// compilation unit. The function that does the registration is returned. llvm::Function *createOffloadingBinaryDescriptorRegistration(); @@ -672,23 +683,27 @@ private: /// Returns specified OpenMP runtime function. /// \param Function OpenMP runtime function. /// \return Specified function. - llvm::Constant *createRuntimeFunction(unsigned Function); + llvm::FunctionCallee createRuntimeFunction(unsigned Function); /// Returns __kmpc_for_static_init_* runtime function for the specified /// size \a IVSize and sign \a IVSigned. - llvm::Constant *createForStaticInitFunction(unsigned IVSize, bool IVSigned); + llvm::FunctionCallee createForStaticInitFunction(unsigned IVSize, + bool IVSigned); /// Returns __kmpc_dispatch_init_* runtime function for the specified /// size \a IVSize and sign \a IVSigned. - llvm::Constant *createDispatchInitFunction(unsigned IVSize, bool IVSigned); + llvm::FunctionCallee createDispatchInitFunction(unsigned IVSize, + bool IVSigned); /// Returns __kmpc_dispatch_next_* runtime function for the specified /// size \a IVSize and sign \a IVSigned. - llvm::Constant *createDispatchNextFunction(unsigned IVSize, bool IVSigned); + llvm::FunctionCallee createDispatchNextFunction(unsigned IVSize, + bool IVSigned); /// Returns __kmpc_dispatch_fini_* runtime function for the specified /// size \a IVSize and sign \a IVSigned. - llvm::Constant *createDispatchFiniFunction(unsigned IVSize, bool IVSigned); + llvm::FunctionCallee createDispatchFiniFunction(unsigned IVSize, + bool IVSigned); /// If the specified mangled name is not in the module, create and /// return threadprivate cache object. This object is a pointer's worth of @@ -704,7 +719,8 @@ private: /// must be the same. /// \param Name Name of the variable. llvm::Constant *getOrCreateInternalVariable(llvm::Type *Ty, - const llvm::Twine &Name); + const llvm::Twine &Name, + unsigned AddressSpace = 0); /// Set of threadprivate variables with the generated initializer. llvm::StringSet<> ThreadPrivateWithDefinition; @@ -724,7 +740,7 @@ private: struct TaskResultTy { llvm::Value *NewTask = nullptr; - llvm::Value *TaskEntry = nullptr; + llvm::Function *TaskEntry = nullptr; llvm::Value *NewTaskNewTaskTTy = nullptr; LValue TDBase; const RecordDecl *KmpTaskTQTyRD = nullptr; @@ -754,15 +770,24 @@ private: /// state, list of privates etc. TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, - llvm::Value *TaskFunction, QualType SharedsTy, + llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data); + /// Returns default address space for the constant firstprivates, 0 by + /// default. + virtual unsigned getDefaultFirstprivateAddressSpace() const { return 0; } + public: explicit CGOpenMPRuntime(CodeGenModule &CGM) : CGOpenMPRuntime(CGM, ".", ".") {} virtual ~CGOpenMPRuntime() {} virtual void clear(); + /// Checks if the \p Body is the \a CompoundStmt and returns its child + /// statement iff there is only one that is not evaluatable at the compile + /// time. + static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body); + /// Get the platform-specific name separator. std::string getName(ArrayRef<StringRef> Parts) const; @@ -781,7 +806,7 @@ public: /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - virtual llvm::Value *emitParallelOutlinedFunction( + virtual llvm::Function *emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); @@ -793,7 +818,7 @@ public: /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - virtual llvm::Value *emitTeamsOutlinedFunction( + virtual llvm::Function *emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen); @@ -812,7 +837,7 @@ public: /// \param NumberOfParts Number of parts in untied task. Ignored for tied /// tasks. /// - virtual llvm::Value *emitTaskOutlinedFunction( + virtual llvm::Function *emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, @@ -833,7 +858,7 @@ public: /// specified, nullptr otherwise. /// virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond); @@ -1096,8 +1121,8 @@ public: SourceLocation Loc); /// Returns the address of the variable marked as declare target with link - /// clause. - virtual Address getAddrOfDeclareTargetLink(const VarDecl *VD); + /// clause OR as declare target with to clause and unified memory. + virtual Address getAddrOfDeclareTargetVar(const VarDecl *VD); /// Emit a code for initialization of threadprivate variable. It emits /// a call to runtime library which adds initial value to the newly created @@ -1162,7 +1187,7 @@ public: /// state, list of privates etc. virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, - llvm::Value *TaskFunction, QualType SharedsTy, + llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data); @@ -1195,10 +1220,11 @@ public: /// otherwise. /// \param Data Additional data for task generation like tiednsee, final /// state, list of privates etc. - virtual void emitTaskLoopCall( - CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, - llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, - const Expr *IfCond, const OMPTaskDataTy &Data); + virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, + const OMPLoopDirective &D, + llvm::Function *TaskFunction, + QualType SharedsTy, Address Shareds, + const Expr *IfCond, const OMPTaskDataTy &Data); /// Emit code for the directive that does not require outlining. /// @@ -1219,12 +1245,12 @@ public: /// \param RHSExprs List of RHS in \a ReductionOps reduction operations. /// \param ReductionOps List of reduction operations in form 'LHS binop RHS' /// or 'operator binop(LHS, RHS)'. - llvm::Value *emitReductionFunction(CodeGenModule &CGM, SourceLocation Loc, - llvm::Type *ArgsType, - ArrayRef<const Expr *> Privates, - ArrayRef<const Expr *> LHSExprs, - ArrayRef<const Expr *> RHSExprs, - ArrayRef<const Expr *> ReductionOps); + llvm::Function *emitReductionFunction(SourceLocation Loc, + llvm::Type *ArgsType, + ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, + ArrayRef<const Expr *> ReductionOps); /// Emits single reduction combiner void emitSingleReductionCombiner(CodeGenFunction &CGF, @@ -1389,7 +1415,7 @@ public: /// target directive, or null if no device clause is used. virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, const Expr *Device); @@ -1409,11 +1435,20 @@ public: virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr); + /// Registers provided target firstprivate variable as global on the + /// target. + llvm::Constant *registerTargetFirstprivateCopy(CodeGenFunction &CGF, + const VarDecl *VD); + /// Emit the global \a GD if it is meaningful for the target. Returns /// if it was emitted successfully. /// \param GD Global to scan. virtual bool emitTargetGlobal(GlobalDecl GD); + /// Creates and returns a registration function for when at least one + /// requires directives was used in the current module. + llvm::Function *emitRequiresDirectiveRegFun(); + /// Creates the offloading descriptor in the event any target region /// was emitted in the current module and return the function that registers /// it. @@ -1429,7 +1464,7 @@ public: /// virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, - SourceLocation Loc, llvm::Value *OutlinedFn, + SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars); /// Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 @@ -1550,18 +1585,18 @@ public: /// schedule clause. virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, - const Expr *&ChunkExpr) const {} + const Expr *&ChunkExpr) const; /// Emits call of the outlined function with the provided arguments, /// translating these arguments to correct target-specific arguments. virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Value *OutlinedFn, + llvm::FunctionCallee OutlinedFn, ArrayRef<llvm::Value *> Args = llvm::None) const; /// Emits OpenMP-specific function prolog. /// Required for device constructs. - virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {} + virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D); /// Gets the OpenMP-specific address of the local variable. virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, @@ -1582,8 +1617,15 @@ public: /// Perform check on requires decl to ensure that target architecture /// supports unified addressing - virtual void checkArchForUnifiedAddressing(CodeGenModule &CGM, - const OMPRequiresDecl *D) const {} + virtual void checkArchForUnifiedAddressing(const OMPRequiresDecl *D); + + /// Checks if the variable has associated OMPAllocateDeclAttr attribute with + /// the predefined allocator and translates it into the corresponding address + /// space. + virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS); + + /// Return whether the unified_shared_memory has been specified. + bool hasRequiresUnifiedSharedMemory() const; }; /// Class supports emissionof SIMD-only code. @@ -1600,7 +1642,7 @@ public: /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - llvm::Value * + llvm::Function * emitParallelOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, @@ -1614,7 +1656,7 @@ public: /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - llvm::Value * + llvm::Function * emitTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, @@ -1635,7 +1677,7 @@ public: /// \param NumberOfParts Number of parts in untied task. Ignored for tied /// tasks. /// - llvm::Value *emitTaskOutlinedFunction( + llvm::Function *emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, @@ -1652,7 +1694,7 @@ public: /// specified, nullptr otherwise. /// void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) override; @@ -1878,8 +1920,9 @@ public: /// \param Data Additional data for task generation like tiednsee, final /// state, list of privates etc. void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, - const OMPExecutableDirective &D, llvm::Value *TaskFunction, - QualType SharedsTy, Address Shareds, const Expr *IfCond, + const OMPExecutableDirective &D, + llvm::Function *TaskFunction, QualType SharedsTy, + Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override; /// Emit task region for the taskloop directive. The taskloop region is @@ -1912,7 +1955,7 @@ public: /// \param Data Additional data for task generation like tiednsee, final /// state, list of privates etc. void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, - const OMPLoopDirective &D, llvm::Value *TaskFunction, + const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override; @@ -2055,7 +2098,7 @@ public: /// \param Device Expression evaluated in device clause associated with the /// target directive, or null if no device clause is used. void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, - llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, const Expr *Device) override; /// Emit the target regions enclosed in \a GD function definition or @@ -2088,7 +2131,7 @@ public: /// variables used in \a OutlinedFn function. /// void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, - SourceLocation Loc, llvm::Value *OutlinedFn, + SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars) override; /// Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 @@ -2147,6 +2190,12 @@ public: /// \param TargetParam Corresponding target-specific parameter. Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override; + + /// Gets the OpenMP-specific address of the local variable. + Address getAddressOfLocalVariable(CodeGenFunction &CGF, + const VarDecl *VD) override { + return Address::invalid(); + } }; } // namespace CodeGen diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 7046ab3aa35c..48dcbbf3cabd 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -1,9 +1,8 @@ //===---- CGOpenMPRuntimeNVPTX.cpp - Interface to OpenMP NVPTX Runtimes ---===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -61,13 +60,19 @@ enum OpenMPRTLFunctionNVPTX { /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t /// lane_offset, int16_t shortCircuit), /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num)); - OMPRTL_NVPTX__kmpc_parallel_reduce_nowait_v2, - /// Call to __kmpc_nvptx_teams_reduce_nowait_simple(ident_t *loc, kmp_int32 - /// global_tid, kmp_critical_name *lck) - OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple, - /// Call to __kmpc_nvptx_teams_end_reduce_nowait_simple(ident_t *loc, - /// kmp_int32 global_tid, kmp_critical_name *lck) - OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple, + OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2, + /// Call to __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32 + /// global_tid, void *global_buffer, int32_t num_of_records, void* + /// reduce_data, + /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + /// lane_offset, int16_t shortCircuit), + /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void + /// (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data), + /// void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx, + /// void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer, + /// int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void + /// *buffer, int idx, void *reduce_data)); + OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2, /// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid); OMPRTL_NVPTX__kmpc_end_reduce_nowait, /// Call to void __kmpc_data_sharing_init_stack(); @@ -106,17 +111,18 @@ enum OpenMPRTLFunctionNVPTX { /// Pre(post)-action for different OpenMP constructs specialized for NVPTX. class NVPTXActionTy final : public PrePostActionTy { - llvm::Value *EnterCallee = nullptr; + llvm::FunctionCallee EnterCallee = nullptr; ArrayRef<llvm::Value *> EnterArgs; - llvm::Value *ExitCallee = nullptr; + llvm::FunctionCallee ExitCallee = nullptr; ArrayRef<llvm::Value *> ExitArgs; bool Conditional = false; llvm::BasicBlock *ContBlock = nullptr; public: - NVPTXActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, - llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, - bool Conditional = false) + NVPTXActionTy(llvm::FunctionCallee EnterCallee, + ArrayRef<llvm::Value *> EnterArgs, + llvm::FunctionCallee ExitCallee, + ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), ExitArgs(ExitArgs), Conditional(Conditional) {} void Enter(CodeGenFunction &CGF) override { @@ -215,16 +221,13 @@ static const ValueDecl *getPrivateItem(const Expr *RefExpr) { return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl()); } -typedef std::pair<CharUnits /*Align*/, const ValueDecl *> VarsDataTy; -static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) { - return P1.first > P2.first; -} static RecordDecl *buildRecordForGlobalizedVars( ASTContext &C, ArrayRef<const ValueDecl *> EscapedDecls, ArrayRef<const ValueDecl *> EscapedDeclsForTeams, llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> - &MappedDeclsFields) { + &MappedDeclsFields, int BufSize) { + using VarsDataTy = std::pair<CharUnits /*Align*/, const ValueDecl *>; if (EscapedDecls.empty() && EscapedDeclsForTeams.empty()) return nullptr; SmallVector<VarsDataTy, 4> GlobalizedVars; @@ -236,8 +239,10 @@ static RecordDecl *buildRecordForGlobalizedVars( D); for (const ValueDecl *D : EscapedDeclsForTeams) GlobalizedVars.emplace_back(C.getDeclAlign(D), D); - std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(), - stable_sort_comparator); + llvm::stable_sort(GlobalizedVars, [](VarsDataTy L, VarsDataTy R) { + return L.first > R.first; + }); + // Build struct _globalized_locals_ty { // /* globalized vars */[WarSize] align (max(decl_align, // GlobalMemoryAlignment)) @@ -270,7 +275,7 @@ static RecordDecl *buildRecordForGlobalizedVars( Field->addAttr(*I); } } else { - llvm::APInt ArraySize(32, WarpSize); + llvm::APInt ArraySize(32, BufSize); Type = C.getConstantArrayType(Type, ArraySize, ArrayType::Normal, 0); Field = FieldDecl::Create( C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type, @@ -312,6 +317,9 @@ class CheckVarsEscapingDeclContext final OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) return; VD = cast<ValueDecl>(VD->getCanonicalDecl()); + // Use user-specified allocation. + if (VD->hasAttrs() && VD->hasAttr<OMPAllocateDeclAttr>()) + return; // Variables captured by value must be globalized. if (auto *CSI = CGF.CapturedStmtInfo) { if (const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) { @@ -419,7 +427,7 @@ class CheckVarsEscapingDeclContext final EscapedDeclsForParallel = EscapedDecls.getArrayRef(); GlobalizedRD = ::buildRecordForGlobalizedVars( CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams, - MappedDeclsFields); + MappedDeclsFields, WarpSize); } public: @@ -705,112 +713,37 @@ getDataSharingMode(CodeGenModule &CGM) { : CGOpenMPRuntimeNVPTX::Generic; } -/// Checks if the expression is constant or does not have non-trivial function -/// calls. -static bool isTrivial(ASTContext &Ctx, const Expr * E) { - // We can skip constant expressions. - // We can skip expressions with trivial calls or simple expressions. - return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || - !E->hasNonTrivialCall(Ctx)) && - !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); -} - -/// Checks if the \p Body is the \a CompoundStmt and returns its child statement -/// iff there is only one that is not evaluatable at the compile time. -static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) { - if (const auto *C = dyn_cast<CompoundStmt>(Body)) { - const Stmt *Child = nullptr; - for (const Stmt *S : C->body()) { - if (const auto *E = dyn_cast<Expr>(S)) { - if (isTrivial(Ctx, E)) - continue; - } - // Some of the statements can be ignored. - if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || - isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) - continue; - // Analyze declarations. - if (const auto *DS = dyn_cast<DeclStmt>(S)) { - if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { - if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || - isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || - isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || - isa<UsingDirectiveDecl>(D) || - isa<OMPDeclareReductionDecl>(D) || - isa<OMPThreadPrivateDecl>(D)) - return true; - const auto *VD = dyn_cast<VarDecl>(D); - if (!VD) - return false; - return VD->isConstexpr() || - ((VD->getType().isTrivialType(Ctx) || - VD->getType()->isReferenceType()) && - (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); - })) - continue; - } - // Found multiple children - cannot get the one child only. - if (Child) - return Body; - Child = S; - } - if (Child) - return Child; - } - return Body; -} - -/// Check if the parallel directive has an 'if' clause with non-constant or -/// false condition. Also, check if the number of threads is strictly specified -/// and run those directives in non-SPMD mode. -static bool hasParallelIfNumThreadsClause(ASTContext &Ctx, - const OMPExecutableDirective &D) { - if (D.hasClausesOfKind<OMPNumThreadsClause>()) - return true; - for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { - OpenMPDirectiveKind NameModifier = C->getNameModifier(); - if (NameModifier != OMPD_parallel && NameModifier != OMPD_unknown) - continue; - const Expr *Cond = C->getCondition(); - bool Result; - if (!Cond->EvaluateAsBooleanCondition(Result, Ctx) || !Result) - return true; - } - return false; -} - /// Check for inner (nested) SPMD construct, if any static bool hasNestedSPMDDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { const auto *CS = D.getInnermostCapturedStmt(); const auto *Body = CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); - const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body); + const Stmt *ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); - if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + if (const auto *NestedDir = + dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); switch (D.getDirectiveKind()) { case OMPD_target: - if (isOpenMPParallelDirective(DKind) && - !hasParallelIfNumThreadsClause(Ctx, *NestedDir)) + if (isOpenMPParallelDirective(DKind)) return true; if (DKind == OMPD_teams) { Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Ctx, Body); - if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); + if (const auto *NND = + dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { DKind = NND->getDirectiveKind(); - if (isOpenMPParallelDirective(DKind) && - !hasParallelIfNumThreadsClause(Ctx, *NND)) + if (isOpenMPParallelDirective(DKind)) return true; } } return false; case OMPD_target_teams: - return isOpenMPParallelDirective(DKind) && - !hasParallelIfNumThreadsClause(Ctx, *NestedDir); + return isOpenMPParallelDirective(DKind); case OMPD_target_simd: case OMPD_target_parallel: case OMPD_target_parallel_for: @@ -829,6 +762,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, case OMPD_cancellation_point: case OMPD_ordered: case OMPD_threadprivate: + case OMPD_allocate: case OMPD_task: case OMPD_simd: case OMPD_sections: @@ -859,6 +793,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: + case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: case OMPD_requires: @@ -882,10 +817,10 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_target_parallel_for_simd: case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: - return !hasParallelIfNumThreadsClause(Ctx, D); case OMPD_target_simd: - case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: + return true; + case OMPD_target_teams_distribute: return false; case OMPD_parallel: case OMPD_for: @@ -897,6 +832,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_cancellation_point: case OMPD_ordered: case OMPD_threadprivate: + case OMPD_allocate: case OMPD_task: case OMPD_simd: case OMPD_sections: @@ -927,6 +863,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: + case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: case OMPD_requires: @@ -958,9 +895,10 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, const auto *CS = D.getInnermostCapturedStmt(); const auto *Body = CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); - const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body); + const Stmt *ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); - if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + if (const auto *NestedDir = + dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); switch (D.getDirectiveKind()) { case OMPD_target: @@ -968,13 +906,16 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir)) return true; + if (DKind == OMPD_teams_distribute_simd || DKind == OMPD_simd) + return true; if (DKind == OMPD_parallel) { Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Ctx, Body); - if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); + if (const auto *NND = + dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) @@ -985,8 +926,9 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Ctx, Body); - if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); + if (const auto *NND = + dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPParallelDirective(DKind) && isOpenMPWorksharingDirective(DKind) && @@ -997,8 +939,9 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Ctx, Body); - if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); + if (const auto *NND = + dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) @@ -1013,13 +956,16 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir)) return true; + if (DKind == OMPD_distribute_simd || DKind == OMPD_simd) + return true; if (DKind == OMPD_parallel) { Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Ctx, Body); - if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); + if (const auto *NND = + dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) @@ -1028,6 +974,8 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, } return false; case OMPD_target_parallel: + if (DKind == OMPD_simd) + return true; return isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir); case OMPD_target_teams_distribute: @@ -1047,6 +995,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, case OMPD_cancellation_point: case OMPD_ordered: case OMPD_threadprivate: + case OMPD_allocate: case OMPD_task: case OMPD_simd: case OMPD_sections: @@ -1077,6 +1026,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: + case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: case OMPD_requires: @@ -1107,8 +1057,9 @@ static bool supportsLightweightRuntime(ASTContext &Ctx, // (Last|First)-privates must be shared in parallel region. return hasStaticScheduling(D); case OMPD_target_simd: - case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: + return true; + case OMPD_target_teams_distribute: return false; case OMPD_parallel: case OMPD_for: @@ -1120,6 +1071,7 @@ static bool supportsLightweightRuntime(ASTContext &Ctx, case OMPD_cancellation_point: case OMPD_ordered: case OMPD_threadprivate: + case OMPD_allocate: case OMPD_task: case OMPD_simd: case OMPD_sections: @@ -1150,6 +1102,7 @@ static bool supportsLightweightRuntime(ASTContext &Ctx, case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: + case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: case OMPD_requires: @@ -1512,14 +1465,14 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, // directive. auto *ParallelFnTy = llvm::FunctionType::get(CGM.VoidTy, {CGM.Int16Ty, CGM.Int32Ty}, - /*isVarArg=*/false) - ->getPointerTo(); - llvm::Value *WorkFnCast = Bld.CreateBitCast(WorkID, ParallelFnTy); + /*isVarArg=*/false); + llvm::Value *WorkFnCast = + Bld.CreateBitCast(WorkID, ParallelFnTy->getPointerTo()); // Insert call to work function via shared wrapper. The shared // wrapper takes two arguments: // - the parallelism level; // - the thread ID; - emitCall(CGF, WST.Loc, WorkFnCast, + emitCall(CGF, WST.Loc, {ParallelFnTy, WorkFnCast}, {Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)}); // Go to end of parallel region. CGF.EmitBranch(TerminateBB); @@ -1547,9 +1500,9 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, /// implementation. Specialized for the NVPTX device. /// \param Function OpenMP runtime function. /// \return Specified function. -llvm::Constant * +llvm::FunctionCallee CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { - llvm::Constant *RTLFn = nullptr; + llvm::FunctionCallee RTLFn = nullptr; switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) { case OMPRTL_NVPTX__kmpc_kernel_init: { // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t @@ -1647,7 +1600,7 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64"); break; } - case OMPRTL_NVPTX__kmpc_parallel_reduce_nowait_v2: { + case OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2: { // Build int32_t kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, // kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void* // reduce_data, void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t @@ -1684,28 +1637,47 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait"); break; } - case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple: { - // Build __kmpc_nvptx_teams_reduce_nowait_simple(ident_t *loc, kmp_int32 - // global_tid, kmp_critical_name *lck) - llvm::Type *TypeParams[] = { - getIdentTyPointerTy(), CGM.Int32Ty, - llvm::PointerType::getUnqual(getKmpCriticalNameTy())}; + case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2: { + // Build int32_t __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32 + // global_tid, void *global_buffer, int32_t num_of_records, void* + // reduce_data, + // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + // lane_offset, int16_t shortCircuit), + // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void + // (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data), + // void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx, + // void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer, + // int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void + // *buffer, int idx, void *reduce_data)); + llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, + CGM.Int16Ty, CGM.Int16Ty}; + auto *ShuffleReduceFnTy = + llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, + /*isVarArg=*/false); + llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; + auto *InterWarpCopyFnTy = + llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, + /*isVarArg=*/false); + llvm::Type *GlobalListTypeParams[] = {CGM.VoidPtrTy, CGM.IntTy, + CGM.VoidPtrTy}; + auto *GlobalListFnTy = + llvm::FunctionType::get(CGM.VoidTy, GlobalListTypeParams, + /*isVarArg=*/false); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), + CGM.Int32Ty, + CGM.VoidPtrTy, + CGM.Int32Ty, + CGM.VoidPtrTy, + ShuffleReduceFnTy->getPointerTo(), + InterWarpCopyFnTy->getPointerTo(), + GlobalListFnTy->getPointerTo(), + GlobalListFnTy->getPointerTo(), + GlobalListFnTy->getPointerTo(), + GlobalListFnTy->getPointerTo()}; auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_simple"); - break; - } - case OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple: { - // Build __kmpc_nvptx_teams_end_reduce_nowait_simple(ident_t *loc, kmp_int32 - // global_tid, kmp_critical_name *lck) - llvm::Type *TypeParams[] = { - getIdentTyPointerTy(), CGM.Int32Ty, - llvm::PointerType::getUnqual(getKmpCriticalNameTy())}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_teams_end_reduce_nowait_simple"); + FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_v2"); break; } case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: { @@ -1806,7 +1778,8 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); - cast<llvm::Function>(RTLFn)->addFnAttr(llvm::Attribute::Convergent); + cast<llvm::Function>(RTLFn.getCallee()) + ->addFnAttr(llvm::Attribute::Convergent); break; } case OMPRTL__kmpc_barrier_simple_spmd: { @@ -1817,7 +1790,8 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier_simple_spmd"); - cast<llvm::Function>(RTLFn)->addFnAttr(llvm::Attribute::Convergent); + cast<llvm::Function>(RTLFn.getCallee()) + ->addFnAttr(llvm::Attribute::Convergent); break; } } @@ -1928,7 +1902,7 @@ void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc) {} -llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( +llvm::Function *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { // Emit target region as a standalone region. @@ -1955,6 +1929,11 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( auto *OutlinedFun = cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen)); + if (CGM.getLangOpts().Optimize) { + OutlinedFun->removeFnAttr(llvm::Attribute::NoInline); + OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone); + OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline); + } IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion; IsInTTDRegion = PrevIsInTTDRegion; if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD && @@ -1976,11 +1955,11 @@ getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D, "expected teams directive."); const OMPExecutableDirective *Dir = &D; if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { - if (const Stmt *S = getSingleCompoundChild( + if (const Stmt *S = CGOpenMPRuntime::getSingleCompoundChild( Ctx, D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers( /*IgnoreCaptured=*/true))) { - Dir = dyn_cast<OMPExecutableDirective>(S); + Dir = dyn_cast_or_null<OMPExecutableDirective>(S); if (Dir && !isOpenMPDistributeDirective(Dir->getDirectiveKind())) Dir = nullptr; } @@ -2005,7 +1984,7 @@ getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D, } } -llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( +llvm::Function *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { SourceLocation Loc = D.getBeginLoc(); @@ -2014,13 +1993,14 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( llvm::SmallVector<const ValueDecl *, 4> LastPrivatesReductions; llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields; // Globalize team reductions variable unconditionally in all modes. - getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions); + if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD) + getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions); if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) { getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions); if (!LastPrivatesReductions.empty()) { GlobalizedRD = ::buildRecordForGlobalizedVars( CGM.getContext(), llvm::None, LastPrivatesReductions, - MappedDeclsFields); + MappedDeclsFields, WarpSize); } } else if (!LastPrivatesReductions.empty()) { assert(!TeamAndReductions.first && @@ -2068,12 +2048,13 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( } } Action(Loc, GlobalizedRD, MappedDeclsFields); CodeGen.setAction(Action); - llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction( + llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen); - llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal); - OutlinedFun->removeFnAttr(llvm::Attribute::NoInline); - OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone); - OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline); + if (CGM.getLangOpts().Optimize) { + OutlinedFun->removeFnAttr(llvm::Attribute::NoInline); + OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone); + OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline); + } return OutlinedFun; } @@ -2235,8 +2216,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, .getPointerType(CGM.getContext().VoidPtrTy) .castAs<PointerType>()); llvm::Value *GlobalRecValue = - Bld.CreateConstInBoundsGEP(FrameAddr, Offset, CharUnits::One()) - .getPointer(); + Bld.CreateConstInBoundsGEP(FrameAddr, Offset).getPointer(); I->getSecond().GlobalRecordAddr = GlobalRecValue; I->getSecond().IsInSPMDModeFlag = nullptr; GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( @@ -2429,7 +2409,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF, void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars) { if (!CGF.HaveInsertPoint()) return; @@ -2446,7 +2426,7 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF, } void CGOpenMPRuntimeNVPTX::emitParallelCall( - CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { if (!CGF.HaveInsertPoint()) return; @@ -2536,8 +2516,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall( SharedArgs, Ctx.getPointerType(Ctx.getPointerType(Ctx.VoidPtrTy)) .castAs<PointerType>()); for (llvm::Value *V : CapturedVars) { - Address Dst = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx, - CGF.getPointerSize()); + Address Dst = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx); llvm::Value *PtrV; if (V->getType()->isIntegerTy()) PtrV = Bld.CreateIntToPtr(V, CGF.VoidPtrTy); @@ -2625,7 +2604,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall( } void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall( - CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { // Just call the outlined function to execute the parallel region. // OutlinedFn(>id, &zero, CapturedStruct); @@ -2846,7 +2825,7 @@ static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr, Address ElemPtr = DestAddr; Address Ptr = SrcAddr; Address PtrEnd = Bld.CreatePointerBitCastOrAddrSpaceCast( - Bld.CreateConstGEP(SrcAddr, 1, Size), CGF.VoidPtrTy); + Bld.CreateConstGEP(SrcAddr, 1), CGF.VoidPtrTy); for (int IntSize = 8; IntSize >= 1; IntSize /= 2) { if (Size < CharUnits::fromQuantity(IntSize)) continue; @@ -2881,10 +2860,8 @@ static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr, CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc), IntType, Offset, Loc); CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType); - Address LocalPtr = - Bld.CreateConstGEP(Ptr, 1, CharUnits::fromQuantity(IntSize)); - Address LocalElemPtr = - Bld.CreateConstGEP(ElemPtr, 1, CharUnits::fromQuantity(IntSize)); + Address LocalPtr = Bld.CreateConstGEP(Ptr, 1); + Address LocalElemPtr = Bld.CreateConstGEP(ElemPtr, 1); PhiSrc->addIncoming(LocalPtr.getPointer(), ThenBB); PhiDest->addIncoming(LocalElemPtr.getPointer(), ThenBB); CGF.EmitBranch(PreCondBB); @@ -2894,9 +2871,8 @@ static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr, CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc), IntType, Offset, Loc); CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType); - Ptr = Bld.CreateConstGEP(Ptr, 1, CharUnits::fromQuantity(IntSize)); - ElemPtr = - Bld.CreateConstGEP(ElemPtr, 1, CharUnits::fromQuantity(IntSize)); + Ptr = Bld.CreateConstGEP(Ptr, 1); + ElemPtr = Bld.CreateConstGEP(ElemPtr, 1); } Size = Size % IntSize; } @@ -2959,16 +2935,14 @@ static void emitReductionListCopy( switch (Action) { case RemoteLaneToThread: { // Step 1.1: Get the address for the src element in the Reduce list. - Address SrcElementPtrAddr = - Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize()); + Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx); SrcElementAddr = CGF.EmitLoadOfPointer( SrcElementPtrAddr, C.getPointerType(Private->getType())->castAs<PointerType>()); // Step 1.2: Create a temporary to store the element in the destination // Reduce list. - DestElementPtrAddr = - Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize()); + DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx); DestElementAddr = CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element"); ShuffleInElement = true; @@ -2977,16 +2951,14 @@ static void emitReductionListCopy( } case ThreadCopy: { // Step 1.1: Get the address for the src element in the Reduce list. - Address SrcElementPtrAddr = - Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize()); + Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx); SrcElementAddr = CGF.EmitLoadOfPointer( SrcElementPtrAddr, C.getPointerType(Private->getType())->castAs<PointerType>()); // Step 1.2: Get the address for dest element. The destination // element has already been created on the thread's stack. - DestElementPtrAddr = - Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize()); + DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx); DestElementAddr = CGF.EmitLoadOfPointer( DestElementPtrAddr, C.getPointerType(Private->getType())->castAs<PointerType>()); @@ -2994,8 +2966,7 @@ static void emitReductionListCopy( } case ThreadToScratchpad: { // Step 1.1: Get the address for the src element in the Reduce list. - Address SrcElementPtrAddr = - Bld.CreateConstArrayGEP(SrcBase, Idx, CGF.getPointerSize()); + Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx); SrcElementAddr = CGF.EmitLoadOfPointer( SrcElementPtrAddr, C.getPointerType(Private->getType())->castAs<PointerType>()); @@ -3030,8 +3001,7 @@ static void emitReductionListCopy( // Step 1.2: Create a temporary to store the element in the destination // Reduce list. - DestElementPtrAddr = - Bld.CreateConstArrayGEP(DestBase, Idx, CGF.getPointerSize()); + DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx); DestElementAddr = CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element"); UpdateDestListPtr = true; @@ -3052,18 +3022,31 @@ static void emitReductionListCopy( shuffleAndStore(CGF, SrcElementAddr, DestElementAddr, Private->getType(), RemoteLaneOffset, Private->getExprLoc()); } else { - if (Private->getType()->isScalarType()) { + switch (CGF.getEvaluationKind(Private->getType())) { + case TEK_Scalar: { llvm::Value *Elem = CGF.EmitLoadOfScalar(SrcElementAddr, /*Volatile=*/false, Private->getType(), Private->getExprLoc()); // Store the source element value to the dest element address. CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false, Private->getType()); - } else { + break; + } + case TEK_Complex: { + CodeGenFunction::ComplexPairTy Elem = CGF.EmitLoadOfComplex( + CGF.MakeAddrLValue(SrcElementAddr, Private->getType()), + Private->getExprLoc()); + CGF.EmitStoreOfComplex( + Elem, CGF.MakeAddrLValue(DestElementAddr, Private->getType()), + /*isInit=*/false); + break; + } + case TEK_Aggregate: CGF.EmitAggregateCopy( CGF.MakeAddrLValue(DestElementAddr, Private->getType()), CGF.MakeAddrLValue(SrcElementAddr, Private->getType()), Private->getType(), AggValueSlot::DoesNotOverlap); + break; } } @@ -3147,9 +3130,9 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, const CGFunctionInfo &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *Fn = llvm::Function::Create( - CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, - "_omp_reduction_inter_warp_copy_func", &CGM.getModule()); + auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), + llvm::GlobalValue::InternalLinkage, + "_omp_reduction_inter_warp_copy_func", &M); CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); Fn->setDoesNotRecurse(); CodeGenFunction CGF(CGM); @@ -3246,8 +3229,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, CGF.EmitBlock(ThenBB); // Reduce element = LocalReduceList[i] - Address ElemPtrPtrAddr = - Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize()); + Address ElemPtrPtrAddr = Bld.CreateConstArrayGEP(LocalReduceList, Idx); llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar( ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); // elemptr = ((CopyType*)(elemptrptr)) + I @@ -3313,8 +3295,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, SrcMediumPtr = Bld.CreateElementBitCast(SrcMediumPtr, CopyType); // TargetElemPtr = (CopyType*)(SrcDataAddr[i]) + I - Address TargetElemPtrPtr = - Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize()); + Address TargetElemPtrPtr = Bld.CreateConstArrayGEP(LocalReduceList, Idx); llvm::Value *TargetElemPtrVal = CGF.EmitLoadOfScalar( TargetElemPtrPtr, /*Volatile=*/false, C.VoidPtrTy, Loc); Address TargetElemPtr = Address(TargetElemPtrVal, Align); @@ -3418,9 +3399,9 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, /// (2k+1)th thread is ignored in the value aggregation. Therefore /// we copy the Reduce list from the (2k+1)th lane to (k+1)th lane so /// that the contiguity assumption still holds. -static llvm::Value *emitShuffleAndReduceFunction( +static llvm::Function *emitShuffleAndReduceFunction( CodeGenModule &CGM, ArrayRef<const Expr *> Privates, - QualType ReductionArrayTy, llvm::Value *ReduceFn, SourceLocation Loc) { + QualType ReductionArrayTy, llvm::Function *ReduceFn, SourceLocation Loc) { ASTContext &C = CGM.getContext(); // Thread local Reduce list used to host the values of data to be reduced. @@ -3448,6 +3429,12 @@ static llvm::Value *emitShuffleAndReduceFunction( "_omp_reduction_shuffle_and_reduce_func", &CGM.getModule()); CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); Fn->setDoesNotRecurse(); + if (CGM.getLangOpts().Optimize) { + Fn->removeFnAttr(llvm::Attribute::NoInline); + Fn->removeFnAttr(llvm::Attribute::OptimizeNone); + Fn->addFnAttr(llvm::Attribute::AlwaysInline); + } + CodeGenFunction CGF(CGM); CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); @@ -3568,6 +3555,406 @@ static llvm::Value *emitShuffleAndReduceFunction( return Fn; } +/// This function emits a helper that copies all the reduction variables from +/// the team into the provided global buffer for the reduction variables. +/// +/// void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data) +/// For all data entries D in reduce_data: +/// Copy local D to buffer.D[Idx] +static llvm::Value *emitListToGlobalCopyFunction( + CodeGenModule &CGM, ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy, SourceLocation Loc, + const RecordDecl *TeamReductionRec, + const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> + &VarFieldMap) { + ASTContext &C = CGM.getContext(); + + // Buffer: global reduction buffer. + ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + // Idx: index of the buffer. + ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, + ImplicitParamDecl::Other); + // ReduceList: thread local Reduce list. + ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + FunctionArgList Args; + Args.push_back(&BufferArg); + Args.push_back(&IdxArg); + Args.push_back(&ReduceListArg); + + const CGFunctionInfo &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *Fn = llvm::Function::Create( + CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, + "_omp_reduction_list_to_global_copy_func", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); + Fn->setDoesNotRecurse(); + CodeGenFunction CGF(CGM); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); + + CGBuilderTy &Bld = CGF.Builder; + + Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); + Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg); + Address LocalReduceList( + Bld.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false, + C.VoidPtrTy, Loc), + CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), + CGF.getPointerAlign()); + QualType StaticTy = C.getRecordType(TeamReductionRec); + llvm::Type *LLVMReductionsBufferTy = + CGM.getTypes().ConvertTypeForMem(StaticTy); + llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc), + LLVMReductionsBufferTy->getPointerTo()); + llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty), + CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), + /*Volatile=*/false, C.IntTy, + Loc)}; + unsigned Idx = 0; + for (const Expr *Private : Privates) { + // Reduce element = LocalReduceList[i] + Address ElemPtrPtrAddr = Bld.CreateConstArrayGEP(LocalReduceList, Idx); + llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar( + ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + // elemptr = ((CopyType*)(elemptrptr)) + I + ElemPtrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( + ElemPtrPtr, CGF.ConvertTypeForMem(Private->getType())->getPointerTo()); + Address ElemPtr = + Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType())); + const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl(); + // Global = Buffer.VD[Idx]; + const FieldDecl *FD = VarFieldMap.lookup(VD); + LValue GlobLVal = CGF.EmitLValueForField( + CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); + llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobLVal.getPointer(), Idxs); + GlobLVal.setAddress(Address(BufferPtr, GlobLVal.getAlignment())); + switch (CGF.getEvaluationKind(Private->getType())) { + case TEK_Scalar: { + llvm::Value *V = CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false, + Private->getType(), Loc); + CGF.EmitStoreOfScalar(V, GlobLVal); + break; + } + case TEK_Complex: { + CodeGenFunction::ComplexPairTy V = CGF.EmitLoadOfComplex( + CGF.MakeAddrLValue(ElemPtr, Private->getType()), Loc); + CGF.EmitStoreOfComplex(V, GlobLVal, /*isInit=*/false); + break; + } + case TEK_Aggregate: + CGF.EmitAggregateCopy(GlobLVal, + CGF.MakeAddrLValue(ElemPtr, Private->getType()), + Private->getType(), AggValueSlot::DoesNotOverlap); + break; + } + ++Idx; + } + + CGF.FinishFunction(); + return Fn; +} + +/// This function emits a helper that reduces all the reduction variables from +/// the team into the provided global buffer for the reduction variables. +/// +/// void list_to_global_reduce_func(void *buffer, int Idx, void *reduce_data) +/// void *GlobPtrs[]; +/// GlobPtrs[0] = (void*)&buffer.D0[Idx]; +/// ... +/// GlobPtrs[N] = (void*)&buffer.DN[Idx]; +/// reduce_function(GlobPtrs, reduce_data); +static llvm::Value *emitListToGlobalReduceFunction( + CodeGenModule &CGM, ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy, SourceLocation Loc, + const RecordDecl *TeamReductionRec, + const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> + &VarFieldMap, + llvm::Function *ReduceFn) { + ASTContext &C = CGM.getContext(); + + // Buffer: global reduction buffer. + ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + // Idx: index of the buffer. + ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, + ImplicitParamDecl::Other); + // ReduceList: thread local Reduce list. + ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + FunctionArgList Args; + Args.push_back(&BufferArg); + Args.push_back(&IdxArg); + Args.push_back(&ReduceListArg); + + const CGFunctionInfo &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *Fn = llvm::Function::Create( + CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, + "_omp_reduction_list_to_global_reduce_func", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); + Fn->setDoesNotRecurse(); + CodeGenFunction CGF(CGM); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); + + CGBuilderTy &Bld = CGF.Builder; + + Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg); + QualType StaticTy = C.getRecordType(TeamReductionRec); + llvm::Type *LLVMReductionsBufferTy = + CGM.getTypes().ConvertTypeForMem(StaticTy); + llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc), + LLVMReductionsBufferTy->getPointerTo()); + + // 1. Build a list of reduction variables. + // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; + Address ReductionList = + CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); + auto IPriv = Privates.begin(); + llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty), + CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), + /*Volatile=*/false, C.IntTy, + Loc)}; + unsigned Idx = 0; + for (unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) { + Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); + // Global = Buffer.VD[Idx]; + const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl(); + const FieldDecl *FD = VarFieldMap.lookup(VD); + LValue GlobLVal = CGF.EmitLValueForField( + CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); + llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobLVal.getPointer(), Idxs); + llvm::Value *Ptr = CGF.EmitCastToVoidPtr(BufferPtr); + CGF.EmitStoreOfScalar(Ptr, Elem, /*Volatile=*/false, C.VoidPtrTy); + if ((*IPriv)->getType()->isVariablyModifiedType()) { + // Store array size. + ++Idx; + Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); + llvm::Value *Size = CGF.Builder.CreateIntCast( + CGF.getVLASize( + CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) + .NumElts, + CGF.SizeTy, /*isSigned=*/false); + CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), + Elem); + } + } + + // Call reduce_function(GlobalReduceList, ReduceList) + llvm::Value *GlobalReduceList = + CGF.EmitCastToVoidPtr(ReductionList.getPointer()); + Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); + llvm::Value *ReducedPtr = CGF.EmitLoadOfScalar( + AddrReduceListArg, /*Volatile=*/false, C.VoidPtrTy, Loc); + CGM.getOpenMPRuntime().emitOutlinedFunctionCall( + CGF, Loc, ReduceFn, {GlobalReduceList, ReducedPtr}); + CGF.FinishFunction(); + return Fn; +} + +/// This function emits a helper that copies all the reduction variables from +/// the team into the provided global buffer for the reduction variables. +/// +/// void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data) +/// For all data entries D in reduce_data: +/// Copy buffer.D[Idx] to local D; +static llvm::Value *emitGlobalToListCopyFunction( + CodeGenModule &CGM, ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy, SourceLocation Loc, + const RecordDecl *TeamReductionRec, + const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> + &VarFieldMap) { + ASTContext &C = CGM.getContext(); + + // Buffer: global reduction buffer. + ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + // Idx: index of the buffer. + ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, + ImplicitParamDecl::Other); + // ReduceList: thread local Reduce list. + ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + FunctionArgList Args; + Args.push_back(&BufferArg); + Args.push_back(&IdxArg); + Args.push_back(&ReduceListArg); + + const CGFunctionInfo &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *Fn = llvm::Function::Create( + CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, + "_omp_reduction_global_to_list_copy_func", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); + Fn->setDoesNotRecurse(); + CodeGenFunction CGF(CGM); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); + + CGBuilderTy &Bld = CGF.Builder; + + Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); + Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg); + Address LocalReduceList( + Bld.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false, + C.VoidPtrTy, Loc), + CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), + CGF.getPointerAlign()); + QualType StaticTy = C.getRecordType(TeamReductionRec); + llvm::Type *LLVMReductionsBufferTy = + CGM.getTypes().ConvertTypeForMem(StaticTy); + llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc), + LLVMReductionsBufferTy->getPointerTo()); + + llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty), + CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), + /*Volatile=*/false, C.IntTy, + Loc)}; + unsigned Idx = 0; + for (const Expr *Private : Privates) { + // Reduce element = LocalReduceList[i] + Address ElemPtrPtrAddr = Bld.CreateConstArrayGEP(LocalReduceList, Idx); + llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar( + ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + // elemptr = ((CopyType*)(elemptrptr)) + I + ElemPtrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( + ElemPtrPtr, CGF.ConvertTypeForMem(Private->getType())->getPointerTo()); + Address ElemPtr = + Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType())); + const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl(); + // Global = Buffer.VD[Idx]; + const FieldDecl *FD = VarFieldMap.lookup(VD); + LValue GlobLVal = CGF.EmitLValueForField( + CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); + llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobLVal.getPointer(), Idxs); + GlobLVal.setAddress(Address(BufferPtr, GlobLVal.getAlignment())); + switch (CGF.getEvaluationKind(Private->getType())) { + case TEK_Scalar: { + llvm::Value *V = CGF.EmitLoadOfScalar(GlobLVal, Loc); + CGF.EmitStoreOfScalar(V, ElemPtr, /*Volatile=*/false, Private->getType()); + break; + } + case TEK_Complex: { + CodeGenFunction::ComplexPairTy V = CGF.EmitLoadOfComplex(GlobLVal, Loc); + CGF.EmitStoreOfComplex(V, CGF.MakeAddrLValue(ElemPtr, Private->getType()), + /*isInit=*/false); + break; + } + case TEK_Aggregate: + CGF.EmitAggregateCopy(CGF.MakeAddrLValue(ElemPtr, Private->getType()), + GlobLVal, Private->getType(), + AggValueSlot::DoesNotOverlap); + break; + } + ++Idx; + } + + CGF.FinishFunction(); + return Fn; +} + +/// This function emits a helper that reduces all the reduction variables from +/// the team into the provided global buffer for the reduction variables. +/// +/// void global_to_list_reduce_func(void *buffer, int Idx, void *reduce_data) +/// void *GlobPtrs[]; +/// GlobPtrs[0] = (void*)&buffer.D0[Idx]; +/// ... +/// GlobPtrs[N] = (void*)&buffer.DN[Idx]; +/// reduce_function(reduce_data, GlobPtrs); +static llvm::Value *emitGlobalToListReduceFunction( + CodeGenModule &CGM, ArrayRef<const Expr *> Privates, + QualType ReductionArrayTy, SourceLocation Loc, + const RecordDecl *TeamReductionRec, + const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> + &VarFieldMap, + llvm::Function *ReduceFn) { + ASTContext &C = CGM.getContext(); + + // Buffer: global reduction buffer. + ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + // Idx: index of the buffer. + ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, + ImplicitParamDecl::Other); + // ReduceList: thread local Reduce list. + ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + FunctionArgList Args; + Args.push_back(&BufferArg); + Args.push_back(&IdxArg); + Args.push_back(&ReduceListArg); + + const CGFunctionInfo &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *Fn = llvm::Function::Create( + CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, + "_omp_reduction_global_to_list_reduce_func", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); + Fn->setDoesNotRecurse(); + CodeGenFunction CGF(CGM); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); + + CGBuilderTy &Bld = CGF.Builder; + + Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg); + QualType StaticTy = C.getRecordType(TeamReductionRec); + llvm::Type *LLVMReductionsBufferTy = + CGM.getTypes().ConvertTypeForMem(StaticTy); + llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc), + LLVMReductionsBufferTy->getPointerTo()); + + // 1. Build a list of reduction variables. + // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; + Address ReductionList = + CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); + auto IPriv = Privates.begin(); + llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty), + CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg), + /*Volatile=*/false, C.IntTy, + Loc)}; + unsigned Idx = 0; + for (unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) { + Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); + // Global = Buffer.VD[Idx]; + const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl(); + const FieldDecl *FD = VarFieldMap.lookup(VD); + LValue GlobLVal = CGF.EmitLValueForField( + CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD); + llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobLVal.getPointer(), Idxs); + llvm::Value *Ptr = CGF.EmitCastToVoidPtr(BufferPtr); + CGF.EmitStoreOfScalar(Ptr, Elem, /*Volatile=*/false, C.VoidPtrTy); + if ((*IPriv)->getType()->isVariablyModifiedType()) { + // Store array size. + ++Idx; + Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); + llvm::Value *Size = CGF.Builder.CreateIntCast( + CGF.getVLASize( + CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) + .NumElts, + CGF.SizeTy, /*isSigned=*/false); + CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), + Elem); + } + } + + // Call reduce_function(ReduceList, GlobalReduceList) + llvm::Value *GlobalReduceList = + CGF.EmitCastToVoidPtr(ReductionList.getPointer()); + Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); + llvm::Value *ReducedPtr = CGF.EmitLoadOfScalar( + AddrReduceListArg, /*Volatile=*/false, C.VoidPtrTy, Loc); + CGM.getOpenMPRuntime().emitOutlinedFunctionCall( + CGF, Loc, ReduceFn, {ReducedPtr, GlobalReduceList}); + CGF.FinishFunction(); + return Fn; +} + /// /// Design of OpenMP reductions on the GPU /// @@ -3841,57 +4228,55 @@ void CGOpenMPRuntimeNVPTX::emitReduction( llvm::Value *ThreadId = getThreadID(CGF, Loc); llvm::Value *Res; - if (ParallelReduction) { - ASTContext &C = CGM.getContext(); - // 1. Build a list of reduction variables. - // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; - auto Size = RHSExprs.size(); - for (const Expr *E : Privates) { - if (E->getType()->isVariablyModifiedType()) - // Reserve place for array size. - ++Size; - } - llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); - QualType ReductionArrayTy = - C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, - /*IndexTypeQuals=*/0); - Address ReductionList = - CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); - auto IPriv = Privates.begin(); - unsigned Idx = 0; - for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { - Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, - CGF.getPointerSize()); - CGF.Builder.CreateStore( - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), - Elem); - if ((*IPriv)->getType()->isVariablyModifiedType()) { - // Store array size. - ++Idx; - Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, - CGF.getPointerSize()); - llvm::Value *Size = CGF.Builder.CreateIntCast( - CGF.getVLASize( - CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) - .NumElts, - CGF.SizeTy, /*isSigned=*/false); - CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), - Elem); - } + ASTContext &C = CGM.getContext(); + // 1. Build a list of reduction variables. + // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; + auto Size = RHSExprs.size(); + for (const Expr *E : Privates) { + if (E->getType()->isVariablyModifiedType()) + // Reserve place for array size. + ++Size; + } + llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); + QualType ReductionArrayTy = + C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, + /*IndexTypeQuals=*/0); + Address ReductionList = + CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); + auto IPriv = Privates.begin(); + unsigned Idx = 0; + for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { + Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); + CGF.Builder.CreateStore( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), + Elem); + if ((*IPriv)->getType()->isVariablyModifiedType()) { + // Store array size. + ++Idx; + Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); + llvm::Value *Size = CGF.Builder.CreateIntCast( + CGF.getVLASize( + CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) + .NumElts, + CGF.SizeTy, /*isSigned=*/false); + CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), + Elem); } + } - llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); - llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - ReductionList.getPointer(), CGF.VoidPtrTy); - llvm::Value *ReductionFn = emitReductionFunction( - CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), - Privates, LHSExprs, RHSExprs, ReductionOps); - llvm::Value *ShuffleAndReduceFn = emitShuffleAndReduceFunction( - CGM, Privates, ReductionArrayTy, ReductionFn, Loc); - llvm::Value *InterWarpCopyFn = - emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc); + llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + ReductionList.getPointer(), CGF.VoidPtrTy); + llvm::Function *ReductionFn = emitReductionFunction( + Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, + LHSExprs, RHSExprs, ReductionOps); + llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); + llvm::Function *ShuffleAndReduceFn = emitShuffleAndReduceFunction( + CGM, Privates, ReductionArrayTy, ReductionFn, Loc); + llvm::Value *InterWarpCopyFn = + emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc); + if (ParallelReduction) { llvm::Value *Args[] = {RTLoc, ThreadId, CGF.Builder.getInt32(RHSExprs.size()), @@ -3900,17 +4285,59 @@ void CGOpenMPRuntimeNVPTX::emitReduction( ShuffleAndReduceFn, InterWarpCopyFn}; - Res = CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_parallel_reduce_nowait_v2), - Args); + Res = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2), + Args); } else { assert(TeamsReduction && "expected teams reduction."); - std::string Name = getName({"reduction"}); - llvm::Value *Lock = getCriticalRegionLock(Name); - llvm::Value *Args[] = {RTLoc, ThreadId, Lock}; + llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> VarFieldMap; + llvm::SmallVector<const ValueDecl *, 4> PrivatesReductions(Privates.size()); + int Cnt = 0; + for (const Expr *DRE : Privates) { + PrivatesReductions[Cnt] = cast<DeclRefExpr>(DRE)->getDecl(); + ++Cnt; + } + const RecordDecl *TeamReductionRec = ::buildRecordForGlobalizedVars( + CGM.getContext(), PrivatesReductions, llvm::None, VarFieldMap, + C.getLangOpts().OpenMPCUDAReductionBufNum); + TeamsReductions.push_back(TeamReductionRec); + if (!KernelTeamsReductionPtr) { + KernelTeamsReductionPtr = new llvm::GlobalVariable( + CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/true, + llvm::GlobalValue::InternalLinkage, nullptr, + "_openmp_teams_reductions_buffer_$_$ptr"); + } + llvm::Value *GlobalBufferPtr = CGF.EmitLoadOfScalar( + Address(KernelTeamsReductionPtr, CGM.getPointerAlign()), + /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); + llvm::Value *GlobalToBufferCpyFn = ::emitListToGlobalCopyFunction( + CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap); + llvm::Value *GlobalToBufferRedFn = ::emitListToGlobalReduceFunction( + CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap, + ReductionFn); + llvm::Value *BufferToGlobalCpyFn = ::emitGlobalToListCopyFunction( + CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap); + llvm::Value *BufferToGlobalRedFn = ::emitGlobalToListReduceFunction( + CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap, + ReductionFn); + + llvm::Value *Args[] = { + RTLoc, + ThreadId, + GlobalBufferPtr, + CGF.Builder.getInt32(C.getLangOpts().OpenMPCUDAReductionBufNum), + RL, + ShuffleAndReduceFn, + InterWarpCopyFn, + GlobalToBufferCpyFn, + GlobalToBufferRedFn, + BufferToGlobalCpyFn, + BufferToGlobalRedFn}; + Res = CGF.EmitRuntimeCall( createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple), + OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2), Args); } @@ -3941,30 +4368,14 @@ void CGOpenMPRuntimeNVPTX::emitReduction( ++IRHS; } }; - if (ParallelReduction) { - llvm::Value *EndArgs[] = {ThreadId}; - RegionCodeGenTy RCG(CodeGen); - NVPTXActionTy Action( - nullptr, llvm::None, - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait), - EndArgs); - RCG.setAction(Action); - RCG(CGF); - } else { - assert(TeamsReduction && "expected teams reduction."); - llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); - std::string Name = getName({"reduction"}); - llvm::Value *Lock = getCriticalRegionLock(Name); - llvm::Value *EndArgs[] = {RTLoc, ThreadId, Lock}; - RegionCodeGenTy RCG(CodeGen); - NVPTXActionTy Action( - nullptr, llvm::None, - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple), - EndArgs); - RCG.setAction(Action); - RCG(CGF); - } + llvm::Value *EndArgs[] = {ThreadId}; + RegionCodeGenTy RCG(CodeGen); + NVPTXActionTy Action( + nullptr, llvm::None, + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait), + EndArgs); + RCG.setAction(Action); + RCG(CGF); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); CGF.EmitBlock(ExitBB, /*IsFinished=*/true); @@ -3983,6 +4394,10 @@ CGOpenMPRuntimeNVPTX::translateParameter(const FieldDecl *FD, if (Attr->getCaptureKind() == OMPC_map) { PointeeTy = CGM.getContext().getAddrSpaceQualType(PointeeTy, LangAS::opencl_global); + } else if (Attr->getCaptureKind() == OMPC_firstprivate && + PointeeTy.isConstant(CGM.getContext())) { + PointeeTy = CGM.getContext().getAddrSpaceQualType(PointeeTy, + LangAS::opencl_generic); } } ArgType = CGM.getContext().getPointerType(PointeeTy); @@ -4034,12 +4449,11 @@ CGOpenMPRuntimeNVPTX::getParameterAddress(CodeGenFunction &CGF, } void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall( - CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef<llvm::Value *> Args) const { SmallVector<llvm::Value *, 4> TargetArgs; TargetArgs.reserve(Args.size()); - auto *FnType = - cast<llvm::FunctionType>(OutlinedFn->getType()->getPointerElementType()); + auto *FnType = OutlinedFn.getFunctionType(); for (unsigned I = 0, E = Args.size(); I < E; ++I) { if (FnType->isVarArg() && FnType->getNumParams() <= I) { TargetArgs.append(std::next(Args.begin(), I), Args.end()); @@ -4137,8 +4551,7 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper( } unsigned Idx = 0; if (isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) { - Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx, - CGF.getPointerSize()); + Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx); Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast( Src, CGF.SizeTy->getPointerTo()); llvm::Value *LB = CGF.EmitLoadOfScalar( @@ -4148,8 +4561,7 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper( cast<OMPLoopDirective>(D).getLowerBoundVariable()->getExprLoc()); Args.emplace_back(LB); ++Idx; - Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx, - CGF.getPointerSize()); + Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx); TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast( Src, CGF.SizeTy->getPointerTo()); llvm::Value *UB = CGF.EmitLoadOfScalar( @@ -4164,8 +4576,7 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper( ASTContext &CGFContext = CGF.getContext(); for (unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) { QualType ElemTy = CurField->getType(); - Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, I + Idx, - CGF.getPointerSize()); + Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, I + Idx); Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast( Src, CGF.ConvertTypeForMem(CGFContext.getPointerType(ElemTy))); llvm::Value *Arg = CGF.EmitLoadOfScalar(TypedAddress, @@ -4266,6 +4677,58 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF, Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) { + if (VD && VD->hasAttr<OMPAllocateDeclAttr>()) { + const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); + switch (A->getAllocatorType()) { + // Use the default allocator here as by default local vars are + // threadlocal. + case OMPAllocateDeclAttr::OMPDefaultMemAlloc: + case OMPAllocateDeclAttr::OMPThreadMemAlloc: + case OMPAllocateDeclAttr::OMPHighBWMemAlloc: + case OMPAllocateDeclAttr::OMPLowLatMemAlloc: + // Follow the user decision - use default allocation. + return Address::invalid(); + case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: + // TODO: implement aupport for user-defined allocators. + return Address::invalid(); + case OMPAllocateDeclAttr::OMPConstMemAlloc: { + llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType()); + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), VarTy, /*isConstant=*/false, + llvm::GlobalValue::InternalLinkage, + llvm::Constant::getNullValue(VarTy), VD->getName(), + /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, + CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant)); + CharUnits Align = CGM.getContext().getDeclAlign(VD); + GV->setAlignment(Align.getQuantity()); + return Address(GV, Align); + } + case OMPAllocateDeclAttr::OMPPTeamMemAlloc: { + llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType()); + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), VarTy, /*isConstant=*/false, + llvm::GlobalValue::InternalLinkage, + llvm::Constant::getNullValue(VarTy), VD->getName(), + /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, + CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); + CharUnits Align = CGM.getContext().getDeclAlign(VD); + GV->setAlignment(Align.getQuantity()); + return Address(GV, Align); + } + case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: + case OMPAllocateDeclAttr::OMPCGroupMemAlloc: { + llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType()); + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), VarTy, /*isConstant=*/false, + llvm::GlobalValue::InternalLinkage, + llvm::Constant::getNullValue(VarTy), VD->getName()); + CharUnits Align = CGM.getContext().getDeclAlign(VD); + GV->setAlignment(Align.getQuantity()); + return Address(GV, Align); + } + } + } + if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic) return Address::invalid(); @@ -4287,6 +4750,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, return VDI->second.PrivateAddr; } } + return Address::invalid(); } @@ -4374,6 +4838,38 @@ void CGOpenMPRuntimeNVPTX::adjustTargetSpecificDataForLambdas( } } +unsigned CGOpenMPRuntimeNVPTX::getDefaultFirstprivateAddressSpace() const { + return CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant); +} + +bool CGOpenMPRuntimeNVPTX::hasAllocateAttributeForGlobalVar(const VarDecl *VD, + LangAS &AS) { + if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) + return false; + const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); + switch(A->getAllocatorType()) { + case OMPAllocateDeclAttr::OMPDefaultMemAlloc: + // Not supported, fallback to the default mem space. + case OMPAllocateDeclAttr::OMPThreadMemAlloc: + case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: + case OMPAllocateDeclAttr::OMPCGroupMemAlloc: + case OMPAllocateDeclAttr::OMPHighBWMemAlloc: + case OMPAllocateDeclAttr::OMPLowLatMemAlloc: + AS = LangAS::Default; + return true; + case OMPAllocateDeclAttr::OMPConstMemAlloc: + AS = LangAS::cuda_constant; + return true; + case OMPAllocateDeclAttr::OMPPTeamMemAlloc: + AS = LangAS::cuda_shared; + return true; + case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: + llvm_unreachable("Expected predefined allocator for the variables with the " + "static storage."); + } + return false; +} + // Get current CudaArch and ignore any unknown values static CudaArch getCudaArch(CodeGenModule &CGM) { if (!CGM.getTarget().hasFeature("ptx")) @@ -4395,7 +4891,7 @@ static CudaArch getCudaArch(CodeGenModule &CGM) { /// Check to see if target architecture supports unified addressing which is /// a restriction for OpenMP requires clause "unified_shared_memory". void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing( - CodeGenModule &CGM, const OMPRequiresDecl *D) const { + const OMPRequiresDecl *D) { for (const OMPClause *Clause : D->clauselists()) { if (Clause->getClauseKind() == OMPC_unified_shared_memory) { switch (getCudaArch(CGM)) { @@ -4432,7 +4928,11 @@ void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing( case CudaArch::GFX902: case CudaArch::GFX904: case CudaArch::GFX906: + case CudaArch::GFX908: case CudaArch::GFX909: + case CudaArch::GFX1010: + case CudaArch::GFX1011: + case CudaArch::GFX1012: case CudaArch::UNKNOWN: break; case CudaArch::LAST: @@ -4440,6 +4940,7 @@ void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing( } } } + CGOpenMPRuntime::checkArchForUnifiedAddressing(D); } /// Get number of SMs and number of blocks per SM. @@ -4485,7 +4986,11 @@ static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) { case CudaArch::GFX902: case CudaArch::GFX904: case CudaArch::GFX906: + case CudaArch::GFX908: case CudaArch::GFX909: + case CudaArch::GFX1010: + case CudaArch::GFX1011: + case CudaArch::GFX1012: case CudaArch::UNKNOWN: break; case CudaArch::LAST: @@ -4587,9 +5092,12 @@ void CGOpenMPRuntimeNVPTX::clear() { QualType Arr2Ty = C.getConstantArrayType(Arr1Ty, Size2, ArrayType::Normal, /*IndexTypeQuals=*/0); llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty); + // FIXME: nvlink does not handle weak linkage correctly (object with the + // different size are reported as erroneous). + // Restore CommonLinkage as soon as nvlink is fixed. auto *GV = new llvm::GlobalVariable( CGM.getModule(), LLVMArr2Ty, - /*isConstant=*/false, llvm::GlobalValue::CommonLinkage, + /*isConstant=*/false, llvm::GlobalValue::InternalLinkage, llvm::Constant::getNullValue(LLVMArr2Ty), "_openmp_static_glob_rd_$_"); auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( @@ -4600,5 +5108,36 @@ void CGOpenMPRuntimeNVPTX::clear() { } } } + if (!TeamsReductions.empty()) { + ASTContext &C = CGM.getContext(); + RecordDecl *StaticRD = C.buildImplicitRecord( + "_openmp_teams_reduction_type_$_", RecordDecl::TagKind::TTK_Union); + StaticRD->startDefinition(); + for (const RecordDecl *TeamReductionRec : TeamsReductions) { + QualType RecTy = C.getRecordType(TeamReductionRec); + auto *Field = FieldDecl::Create( + C, StaticRD, SourceLocation(), SourceLocation(), nullptr, RecTy, + C.getTrivialTypeSourceInfo(RecTy, SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, + /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + StaticRD->addDecl(Field); + } + StaticRD->completeDefinition(); + QualType StaticTy = C.getRecordType(StaticRD); + llvm::Type *LLVMReductionsBufferTy = + CGM.getTypes().ConvertTypeForMem(StaticTy); + // FIXME: nvlink does not handle weak linkage correctly (object with the + // different size are reported as erroneous). + // Restore CommonLinkage as soon as nvlink is fixed. + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), LLVMReductionsBufferTy, + /*isConstant=*/false, llvm::GlobalValue::InternalLinkage, + llvm::Constant::getNullValue(LLVMReductionsBufferTy), + "_openmp_teams_reductions_buffer_$_"); + KernelTeamsReductionPtr->setInitializer( + llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, + CGM.VoidPtrTy)); + } CGOpenMPRuntime::clear(); } diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index 6091610c37e3..e7fd458e7271 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -1,9 +1,8 @@ //===----- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -18,7 +17,6 @@ #include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "clang/AST/StmtOpenMP.h" -#include "llvm/IR/CallSite.h" namespace clang { namespace CodeGen { @@ -173,7 +171,7 @@ private: /// specified, nullptr otherwise. /// void emitSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond); @@ -230,7 +228,7 @@ public: /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - llvm::Value * + llvm::Function * emitParallelOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, @@ -245,7 +243,7 @@ public: /// \param InnermostKind Kind of innermost directive (for simple directives it /// is a directive itself, for combined - its innermost directive). /// \param CodeGen Code generation sequence for the \a D directive. - llvm::Value * + llvm::Function * emitTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, @@ -260,7 +258,7 @@ public: /// variables used in \a OutlinedFn function. /// void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, - SourceLocation Loc, llvm::Value *OutlinedFn, + SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars) override; /// Emits code for parallel or serial call of the \a OutlinedFn with @@ -273,7 +271,7 @@ public: /// \param IfCond Condition in the associated 'if' clause, if it was /// specified, nullptr otherwise. void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Value *OutlinedFn, + llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) override; @@ -323,7 +321,7 @@ public: /// implementation. Specialized for the NVPTX device. /// \param Function OpenMP runtime function. /// \return Specified function. - llvm::Constant *createNVPTXRuntimeFunction(unsigned Function); + llvm::FunctionCallee createNVPTXRuntimeFunction(unsigned Function); /// Translates the native parameter of outlined function if this is required /// for target. @@ -342,7 +340,7 @@ public: /// Emits call of the outlined function with the provided arguments, /// translating these arguments to correct target-specific arguments. void emitOutlinedFunctionCall( - CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef<llvm::Value *> Args = llvm::None) const override; /// Emits OpenMP-specific function prolog. @@ -385,8 +383,16 @@ public: /// Perform check on requires decl to ensure that target architecture /// supports unified addressing - void checkArchForUnifiedAddressing(CodeGenModule &CGM, - const OMPRequiresDecl *D) const override; + void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) override; + + /// Returns default address space for the constant firstprivates, __constant__ + /// address space by default. + unsigned getDefaultFirstprivateAddressSpace() const override; + + /// Checks if the variable has associated OMPAllocateDeclAttr attribute with + /// the predefined allocator and translates it into the corresponding address + /// space. + bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override; private: /// Track the execution mode when codegening directives within a target @@ -463,6 +469,12 @@ private: unsigned RegionCounter = 0; }; llvm::SmallVector<GlobalPtrSizeRecsTy, 8> GlobalizedRecords; + llvm::GlobalVariable *KernelTeamsReductionPtr = nullptr; + /// List of the records with the list of fields for the reductions across the + /// teams. Used to build the intermediate buffer for the fast teams + /// reductions. + /// All the records are gathered into a union `union.type` is created. + llvm::SmallVector<const RecordDecl *, 4> TeamsReductions; /// Shared pointer for the global memory in the global memory buffer used for /// the given kernel. llvm::GlobalVariable *KernelStaticGlobalized = nullptr; diff --git a/lib/CodeGen/CGRecordLayout.h b/lib/CodeGen/CGRecordLayout.h index 41084294ab9a..730ee4c438e7 100644 --- a/lib/CodeGen/CGRecordLayout.h +++ b/lib/CodeGen/CGRecordLayout.h @@ -1,9 +1,8 @@ //===--- CGRecordLayout.h - LLVM Record Layout Information ------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/CGRecordLayoutBuilder.cpp b/lib/CodeGen/CGRecordLayoutBuilder.cpp index c754541ac121..4de64a32f2ac 100644 --- a/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -1,9 +1,8 @@ //===--- CGRecordLayoutBuilder.cpp - CGRecordLayout builder ----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -274,7 +273,7 @@ void CGRecordLowering::lower(bool NVBaseType) { if (!NVBaseType) accumulateVBases(); } - std::stable_sort(Members.begin(), Members.end()); + llvm::stable_sort(Members); Members.push_back(StorageInfo(Size, getIntNType(8))); clipTailPadding(); determinePacked(NVBaseType); @@ -348,18 +347,21 @@ void CGRecordLowering::lowerUnion() { void CGRecordLowering::accumulateFields() { for (RecordDecl::field_iterator Field = D->field_begin(), FieldEnd = D->field_end(); - Field != FieldEnd;) + Field != FieldEnd;) { if (Field->isBitField()) { RecordDecl::field_iterator Start = Field; // Iterate to gather the list of bitfields. for (++Field; Field != FieldEnd && Field->isBitField(); ++Field); accumulateBitFields(Start, Field); - } else { + } else if (!Field->isZeroSize(Context)) { Members.push_back(MemberInfo( bitsToCharUnits(getFieldBitOffset(*Field)), MemberInfo::Field, getStorageType(*Field), *Field)); ++Field; + } else { + ++Field; } + } } void @@ -591,10 +593,17 @@ void CGRecordLowering::clipTailPadding() { if (!Member->Data && Member->Kind != MemberInfo::Scissor) continue; if (Member->Offset < Tail) { - assert(Prior->Kind == MemberInfo::Field && !Prior->FD && + assert(Prior->Kind == MemberInfo::Field && "Only storage fields have tail padding!"); - Prior->Data = getByteArrayType(bitsToCharUnits(llvm::alignTo( - cast<llvm::IntegerType>(Prior->Data)->getIntegerBitWidth(), 8))); + if (!Prior->FD || Prior->FD->isBitField()) + Prior->Data = getByteArrayType(bitsToCharUnits(llvm::alignTo( + cast<llvm::IntegerType>(Prior->Data)->getIntegerBitWidth(), 8))); + else { + assert(Prior->FD->hasAttr<NoUniqueAddressAttr>() && + "should not have reused this field's tail padding"); + Prior->Data = getByteArrayType( + Context.getTypeInfoDataSizeInChars(Prior->FD->getType()).first); + } } if (Member->Data) Prior = Member; @@ -659,7 +668,7 @@ void CGRecordLowering::insertPadding() { Pad = Padding.begin(), PadEnd = Padding.end(); Pad != PadEnd; ++Pad) Members.push_back(StorageInfo(Pad->first, getByteArrayType(Pad->second))); - std::stable_sort(Members.begin(), Members.end()); + llvm::stable_sort(Members); } void CGRecordLowering::fillOutputFields() { @@ -798,6 +807,10 @@ CGRecordLayout *CodeGenTypes::ComputeRecordLayout(const RecordDecl *D, for (unsigned i = 0, e = AST_RL.getFieldCount(); i != e; ++i, ++it) { const FieldDecl *FD = *it; + // Ignore zero-sized fields. + if (FD->isZeroSize(getContext())) + continue; + // For non-bit-fields, just check that the LLVM struct offset matches the // AST offset. if (!FD->isBitField()) { @@ -811,10 +824,6 @@ CGRecordLayout *CodeGenTypes::ComputeRecordLayout(const RecordDecl *D, if (!FD->getDeclName()) continue; - // Don't inspect zero-length bitfields. - if (FD->isZeroLengthBitField(getContext())) - continue; - const CGBitFieldInfo &Info = RL->getBitFieldInfo(FD); llvm::Type *ElementTy = ST->getTypeAtIndex(RL->getLLVMFieldNo(FD)); diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp index 0242b48659d1..dd0dea5b94a0 100644 --- a/lib/CodeGen/CGStmt.cpp +++ b/lib/CodeGen/CGStmt.cpp @@ -1,9 +1,8 @@ //===--- CGStmt.cpp - Emit LLVM Code from Statements ----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -20,7 +19,6 @@ #include "clang/Basic/PrettyStackTrace.h" #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" @@ -383,36 +381,49 @@ CodeGenFunction::EmitCompoundStmtWithoutScope(const CompoundStmt &S, bool GetLast, AggValueSlot AggSlot) { - for (CompoundStmt::const_body_iterator I = S.body_begin(), - E = S.body_end()-GetLast; I != E; ++I) - EmitStmt(*I); + const Stmt *ExprResult = S.getStmtExprResult(); + assert((!GetLast || (GetLast && ExprResult)) && + "If GetLast is true then the CompoundStmt must have a StmtExprResult"); Address RetAlloca = Address::invalid(); - if (GetLast) { - // We have to special case labels here. They are statements, but when put - // at the end of a statement expression, they yield the value of their - // subexpression. Handle this by walking through all labels we encounter, - // emitting them before we evaluate the subexpr. - const Stmt *LastStmt = S.body_back(); - while (const LabelStmt *LS = dyn_cast<LabelStmt>(LastStmt)) { - EmitLabel(LS->getDecl()); - LastStmt = LS->getSubStmt(); - } - EnsureInsertPoint(); + for (auto *CurStmt : S.body()) { + if (GetLast && ExprResult == CurStmt) { + // We have to special case labels here. They are statements, but when put + // at the end of a statement expression, they yield the value of their + // subexpression. Handle this by walking through all labels we encounter, + // emitting them before we evaluate the subexpr. + // Similar issues arise for attributed statements. + while (!isa<Expr>(ExprResult)) { + if (const auto *LS = dyn_cast<LabelStmt>(ExprResult)) { + EmitLabel(LS->getDecl()); + ExprResult = LS->getSubStmt(); + } else if (const auto *AS = dyn_cast<AttributedStmt>(ExprResult)) { + // FIXME: Update this if we ever have attributes that affect the + // semantics of an expression. + ExprResult = AS->getSubStmt(); + } else { + llvm_unreachable("unknown value statement"); + } + } - QualType ExprTy = cast<Expr>(LastStmt)->getType(); - if (hasAggregateEvaluationKind(ExprTy)) { - EmitAggExpr(cast<Expr>(LastStmt), AggSlot); + EnsureInsertPoint(); + + const Expr *E = cast<Expr>(ExprResult); + QualType ExprTy = E->getType(); + if (hasAggregateEvaluationKind(ExprTy)) { + EmitAggExpr(E, AggSlot); + } else { + // We can't return an RValue here because there might be cleanups at + // the end of the StmtExpr. Because of that, we have to emit the result + // here into a temporary alloca. + RetAlloca = CreateMemTemp(ExprTy); + EmitAnyExprToMem(E, RetAlloca, Qualifiers(), + /*IsInit*/ false); + } } else { - // We can't return an RValue here because there might be cleanups at - // the end of the StmtExpr. Because of that, we have to emit the result - // here into a temporary alloca. - RetAlloca = CreateMemTemp(ExprTy); - EmitAnyExprToMem(cast<Expr>(LastStmt), RetAlloca, Qualifiers(), - /*IsInit*/false); + EmitStmt(CurStmt); } - } return RetAlloca; @@ -529,6 +540,16 @@ void CodeGenFunction::EmitLabel(const LabelDecl *D) { } EmitBlock(Dest.getBlock()); + + // Emit debug info for labels. + if (CGDebugInfo *DI = getDebugInfo()) { + if (CGM.getCodeGenOpts().getDebugInfo() >= + codegenoptions::LimitedDebugInfo) { + DI->setLocation(D->getLocation()); + DI->EmitLabel(D, Builder); + } + } + incrementProfileCounter(D->getStmt()); } @@ -1007,7 +1028,7 @@ void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) { } else if (RV.isAggregate()) { LValue Dest = MakeAddrLValue(ReturnValue, Ty); LValue Src = MakeAddrLValue(RV.getAggregateAddress(), Ty); - EmitAggregateCopy(Dest, Src, Ty, overlapForReturnValue()); + EmitAggregateCopy(Dest, Src, Ty, getOverlapForReturnValue()); } else { EmitStoreOfComplex(RV.getComplexVal(), MakeAddrLValue(ReturnValue, Ty), /*init*/ true); @@ -1089,7 +1110,7 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { AggValueSlot::IsDestructed, AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased, - overlapForReturnValue())); + getOverlapForReturnValue())); break; } } @@ -1821,8 +1842,15 @@ llvm::Value* CodeGenFunction::EmitAsmInput( // (immediate or symbolic), try to emit it as such. if (!Info.allowsRegister() && !Info.allowsMemory()) { if (Info.requiresImmediateConstant()) { - llvm::APSInt AsmConst = InputExpr->EvaluateKnownConstInt(getContext()); - return llvm::ConstantInt::get(getLLVMContext(), AsmConst); + Expr::EvalResult EVResult; + InputExpr->EvaluateAsRValue(EVResult, getContext(), true); + + llvm::APSInt IntResult; + if (!EVResult.Val.toIntegralConstant(IntResult, InputExpr->getType(), + getContext())) + llvm_unreachable("Invalid immediate constant!"); + + return llvm::ConstantInt::get(getLLVMContext(), IntResult); } Expr::EvalResult Result; @@ -1872,6 +1900,55 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str, return llvm::MDNode::get(CGF.getLLVMContext(), Locs); } +static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect, + bool ReadOnly, bool ReadNone, const AsmStmt &S, + const std::vector<llvm::Type *> &ResultRegTypes, + CodeGenFunction &CGF, + std::vector<llvm::Value *> &RegResults) { + Result.addAttribute(llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoUnwind); + // Attach readnone and readonly attributes. + if (!HasSideEffect) { + if (ReadNone) + Result.addAttribute(llvm::AttributeList::FunctionIndex, + llvm::Attribute::ReadNone); + else if (ReadOnly) + Result.addAttribute(llvm::AttributeList::FunctionIndex, + llvm::Attribute::ReadOnly); + } + + // Slap the source location of the inline asm into a !srcloc metadata on the + // call. + if (const auto *gccAsmStmt = dyn_cast<GCCAsmStmt>(&S)) + Result.setMetadata("srcloc", + getAsmSrcLocInfo(gccAsmStmt->getAsmString(), CGF)); + else { + // At least put the line number on MS inline asm blobs. + llvm::Constant *Loc = llvm::ConstantInt::get(CGF.Int32Ty, + S.getAsmLoc().getRawEncoding()); + Result.setMetadata("srcloc", + llvm::MDNode::get(CGF.getLLVMContext(), + llvm::ConstantAsMetadata::get(Loc))); + } + + if (CGF.getLangOpts().assumeFunctionsAreConvergent()) + // Conservatively, mark all inline asm blocks in CUDA or OpenCL as + // convergent (meaning, they may call an intrinsically convergent op, such + // as bar.sync, and so can't have certain optimizations applied around + // them). + Result.addAttribute(llvm::AttributeList::FunctionIndex, + llvm::Attribute::Convergent); + // Extract all of the register value results from the asm. + if (ResultRegTypes.size() == 1) { + RegResults.push_back(&Result); + } else { + for (unsigned i = 0, e = ResultRegTypes.size(); i != e; ++i) { + llvm::Value *Tmp = CGF.Builder.CreateExtractValue(&Result, i, "asmresult"); + RegResults.push_back(Tmp); + } + } +} + void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Assemble the final asm string. std::string AsmString = S.generateAsmString(getContext()); @@ -1915,6 +1992,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { std::vector<llvm::Value*> InOutArgs; std::vector<llvm::Type*> InOutArgTypes; + // Keep track of out constraints for tied input operand. + std::vector<std::string> OutputConstraints; + // An inline asm can be marked readonly if it meets the following conditions: // - it doesn't have any sideeffects // - it doesn't clobber memory @@ -1937,7 +2017,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { OutputConstraint = AddVariableConstraints(OutputConstraint, *OutExpr, getTarget(), CGM, S, Info.earlyClobber()); - + OutputConstraints.push_back(OutputConstraint); LValue Dest = EmitLValue(OutExpr); if (!Constraints.empty()) Constraints += ','; @@ -2055,6 +2135,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { InputConstraint, *InputExpr->IgnoreParenNoopCasts(getContext()), getTarget(), CGM, S, false /* No EarlyClobber */); + std::string ReplaceConstraint (InputConstraint); llvm::Value *Arg = EmitAsmInput(Info, InputExpr, Constraints); // If this input argument is tied to a larger output result, extend the @@ -2082,9 +2163,11 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { Arg = Builder.CreateFPExt(Arg, OutputTy); } } + // Deal with the tied operands' constraint code in adjustInlineAsmType. + ReplaceConstraint = OutputConstraints[Output]; } if (llvm::Type* AdjTy = - getTargetHooks().adjustInlineAsmType(*this, InputConstraint, + getTargetHooks().adjustInlineAsmType(*this, ReplaceConstraint, Arg->getType())) Arg = Builder.CreateBitCast(Arg, AdjTy); else @@ -2108,6 +2191,29 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { } Constraints += InOutConstraints; + // Labels + SmallVector<llvm::BasicBlock *, 16> Transfer; + llvm::BasicBlock *Fallthrough = nullptr; + bool IsGCCAsmGoto = false; + if (const auto *GS = dyn_cast<GCCAsmStmt>(&S)) { + IsGCCAsmGoto = GS->isAsmGoto(); + if (IsGCCAsmGoto) { + for (auto *E : GS->labels()) { + JumpDest Dest = getJumpDestForLabel(E->getLabel()); + Transfer.push_back(Dest.getBlock()); + llvm::BlockAddress *BA = + llvm::BlockAddress::get(CurFn, Dest.getBlock()); + Args.push_back(BA); + ArgTypes.push_back(BA->getType()); + if (!Constraints.empty()) + Constraints += ','; + Constraints += 'X'; + } + StringRef Name = "asm.fallthrough"; + Fallthrough = createBasicBlock(Name); + } + } + // Clobbers for (unsigned i = 0, e = S.getNumClobbers(); i != e; i++) { StringRef Clobber = S.getClobber(i); @@ -2150,52 +2256,18 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, AsmString, Constraints, HasSideEffect, /* IsAlignStack */ false, AsmDialect); - llvm::CallInst *Result = - Builder.CreateCall(IA, Args, getBundlesForFunclet(IA)); - Result->addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoUnwind); - - // Attach readnone and readonly attributes. - if (!HasSideEffect) { - if (ReadNone) - Result->addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::ReadNone); - else if (ReadOnly) - Result->addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::ReadOnly); - } - - // Slap the source location of the inline asm into a !srcloc metadata on the - // call. - if (const GCCAsmStmt *gccAsmStmt = dyn_cast<GCCAsmStmt>(&S)) { - Result->setMetadata("srcloc", getAsmSrcLocInfo(gccAsmStmt->getAsmString(), - *this)); - } else { - // At least put the line number on MS inline asm blobs. - auto Loc = llvm::ConstantInt::get(Int32Ty, S.getAsmLoc().getRawEncoding()); - Result->setMetadata("srcloc", - llvm::MDNode::get(getLLVMContext(), - llvm::ConstantAsMetadata::get(Loc))); - } - - if (getLangOpts().assumeFunctionsAreConvergent()) { - // Conservatively, mark all inline asm blocks in CUDA or OpenCL as - // convergent (meaning, they may call an intrinsically convergent op, such - // as bar.sync, and so can't have certain optimizations applied around - // them). - Result->addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::Convergent); - } - - // Extract all of the register value results from the asm. std::vector<llvm::Value*> RegResults; - if (ResultRegTypes.size() == 1) { - RegResults.push_back(Result); + if (IsGCCAsmGoto) { + llvm::CallBrInst *Result = + Builder.CreateCallBr(IA, Fallthrough, Transfer, Args); + UpdateAsmCallInst(cast<llvm::CallBase>(*Result), HasSideEffect, ReadOnly, + ReadNone, S, ResultRegTypes, *this, RegResults); + EmitBlock(Fallthrough); } else { - for (unsigned i = 0, e = ResultRegTypes.size(); i != e; ++i) { - llvm::Value *Tmp = Builder.CreateExtractValue(Result, i, "asmresult"); - RegResults.push_back(Tmp); - } + llvm::CallInst *Result = + Builder.CreateCall(IA, Args, getBundlesForFunclet(IA)); + UpdateAsmCallInst(cast<llvm::CallBase>(*Result), HasSideEffect, ReadOnly, + ReadNone, S, ResultRegTypes, *this, RegResults); } assert(RegResults.size() == ResultRegTypes.size()); diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index eb1304d89345..e8fbca5108ad 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -1,9 +1,8 @@ //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -19,7 +18,6 @@ #include "clang/AST/Stmt.h" #include "clang/AST/StmtOpenMP.h" #include "clang/AST/DeclOpenMP.h" -#include "llvm/IR/CallSite.h" using namespace clang; using namespace CodeGen; @@ -298,8 +296,7 @@ void CodeGenFunction::GenerateOpenMPCapturedVars( static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, QualType DstType, StringRef Name, - LValue AddrLV, - bool isReferenceType = false) { + LValue AddrLV) { ASTContext &Ctx = CGF.getContext(); llvm::Value *CastedPtr = CGF.EmitScalarConversion( @@ -308,17 +305,6 @@ static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, Address TmpAddr = CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) .getAddress(); - - // If we are dealing with references we need to return the address of the - // reference instead of the reference of the value. - if (isReferenceType) { - QualType RefType = Ctx.getLValueReferenceType(DstType); - llvm::Value *RefVal = TmpAddr.getPointer(); - TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name, ".ref")); - LValue TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType); - CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit=*/true); - } - return TmpAddr; } @@ -475,14 +461,6 @@ static llvm::Function *emitOutlinedFunctionPrologue( // use the value that we get from the arguments. if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { const VarDecl *CurVD = I->getCapturedVar(); - // If the variable is a reference we need to materialize it here. - if (CurVD->getType()->isReferenceType()) { - Address RefAddr = CGF.CreateMemTemp( - CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref"); - CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, - /*Volatile=*/false, CurVD->getType()); - LocalAddr = RefAddr; - } if (!FO.RegisterCastedArgsOnly) LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); ++Cnt; @@ -506,15 +484,12 @@ static llvm::Function *emitOutlinedFunctionPrologue( const VarDecl *Var = I->getCapturedVar(); QualType VarTy = Var->getType(); Address ArgAddr = ArgLVal.getAddress(); - if (!VarTy->isReferenceType()) { - if (ArgLVal.getType()->isLValueReferenceType()) { - ArgAddr = CGF.EmitLoadOfReference(ArgLVal); - } else if (!VarTy->isVariablyModifiedType() || - !VarTy->isPointerType()) { - assert(ArgLVal.getType()->isPointerType()); - ArgAddr = CGF.EmitLoadOfPointer( - ArgAddr, ArgLVal.getType()->castAs<PointerType>()); - } + if (ArgLVal.getType()->isLValueReferenceType()) { + ArgAddr = CGF.EmitLoadOfReference(ArgLVal); + } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { + assert(ArgLVal.getType()->isPointerType()); + ArgAddr = CGF.EmitLoadOfPointer( + ArgAddr, ArgLVal.getType()->castAs<PointerType>()); } if (!FO.RegisterCastedArgsOnly) { LocalAddrs.insert( @@ -525,14 +500,12 @@ static llvm::Function *emitOutlinedFunctionPrologue( assert(!FD->getType()->isAnyPointerType() && "Not expecting a captured pointer."); const VarDecl *Var = I->getCapturedVar(); - QualType VarTy = Var->getType(); - LocalAddrs.insert( - {Args[Cnt], - {Var, FO.UIntPtrCastRequired - ? castValueFromUintptr(CGF, I->getLocation(), - FD->getType(), Args[Cnt]->getName(), - ArgLVal, VarTy->isReferenceType()) - : ArgLVal.getAddress()}}); + LocalAddrs.insert({Args[Cnt], + {Var, FO.UIntPtrCastRequired + ? castValueFromUintptr( + CGF, I->getLocation(), FD->getType(), + Args[Cnt]->getName(), ArgLVal) + : ArgLVal.getAddress()}}); } else { // If 'this' is captured, load it into CXXThisValue. assert(I->capturesThis()); @@ -568,16 +541,20 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { Out.str()); llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, VLASizes, CXXThisValue, FO); + CodeGenFunction::OMPPrivateScope LocalScope(*this); for (const auto &LocalAddrPair : LocalAddrs) { if (LocalAddrPair.second.first) { - setAddrOfLocalVar(LocalAddrPair.second.first, - LocalAddrPair.second.second); + LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() { + return LocalAddrPair.second.second; + }); } } + (void)LocalScope.Privatize(); for (const auto &VLASizePair : VLASizes) VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; PGO.assignRegionCounters(GlobalDecl(CD), F); CapturedStmtInfo->EmitBody(*this, CD->getBody()); + (void)LocalScope.ForceCleanup(); FinishFunction(CD->getBodyRBrace()); if (!NeedWrapperFunction) return F; @@ -727,6 +704,9 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { if (!HaveInsertPoint()) return false; + bool DeviceConstTarget = + getLangOpts().OpenMPIsDevice && + isOpenMPTargetExecutionDirective(D.getDirectiveKind()); bool FirstprivateIsLastprivate = false; llvm::DenseSet<const VarDecl *> Lastprivates; for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { @@ -749,24 +729,54 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, bool ThisFirstprivateIsLastprivate = Lastprivates.count(OrigVD->getCanonicalDecl()) > 0; const FieldDecl *FD = CapturedStmtInfo->lookup(OrigVD); + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && - !FD->getType()->isReferenceType()) { + !FD->getType()->isReferenceType() && + (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); ++IRef; ++InitsRef; continue; } + // Do not emit copy for firstprivate constant variables in target regions, + // captured by reference. + if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && + FD && FD->getType()->isReferenceType() && + (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { + (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this, + OrigVD); + ++IRef; + ++InitsRef; + continue; + } FirstprivateIsLastprivate = FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { - const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); const auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); bool IsRegistered; DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); - LValue OriginalLVal = EmitLValue(&DRE); + LValue OriginalLVal; + if (!FD) { + // Check if the firstprivate variable is just a constant value. + ConstantEmission CE = tryEmitAsConstant(&DRE); + if (CE && !CE.isReference()) { + // Constant value, no need to create a copy. + ++IRef; + ++InitsRef; + continue; + } + if (CE && CE.isReference()) { + OriginalLVal = CE.getReferenceLValue(*this, &DRE); + } else { + assert(!CE && "Expected non-constant firstprivate."); + OriginalLVal = EmitLValue(&DRE); + } + } else { + OriginalLVal = EmitLValue(&DRE); + } QualType Type = VD->getType(); if (Type->isArrayType()) { // Emit VarDecl with copy init for arrays. @@ -1227,7 +1237,7 @@ static void emitCommonOMPParallelDirective( OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, const CodeGenBoundParametersTy &CodeGenBoundParameters) { const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); - llvm::Value *OutlinedFn = + llvm::Function *OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { @@ -1518,8 +1528,9 @@ void CodeGenFunction::EmitOMPPrivateLoopCounters( I < E; ++I) { const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); const auto *VD = cast<VarDecl>(DRE->getDecl()); - // Override only those variables that are really emitted already. - if (LocalDeclMap.count(VD)) { + // Override only those variables that can be captured to avoid re-emission + // of the variables declared within the loops. + if (DRE->refersToEnclosingVariableOrCapture()) { (void)LoopScope.addPrivate(VD, [this, DRE, VD]() { return CreateMemTemp(DRE->getType(), VD->getName()); }); @@ -2893,6 +2904,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( OMPPrivateScope Scope(CGF); if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || !Data.LastprivateVars.empty()) { + llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( + CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); enum { PrivatesParam = 2, CopyFnParam = 3 }; llvm::Value *CopyFn = CGF.Builder.CreateLoad( CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); @@ -2925,8 +2938,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( PrivatePtrs.emplace_back(VD, PrivatePtr); CallArgs.push_back(PrivatePtr.getPointer()); } - CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), - CopyFn, CallArgs); + CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( + CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); for (const auto &Pair : LastprivateDstsOrigs) { const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), @@ -3028,7 +3041,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( Action.Enter(CGF); BodyGen(CGF); }; - llvm::Value *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( + llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, Data.NumberOfParts); OMPLexicalScope Scope(*this, S); @@ -3106,7 +3119,8 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( PVD = createImplicitFirstprivateForType( getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); QualType SizesType = getContext().getConstantArrayType( - getContext().getSizeType(), ArrSize, ArrayType::Normal, + getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), + ArrSize, ArrayType::Normal, /*IndexTypeQuals=*/0); SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, S.getBeginLoc()); @@ -3127,6 +3141,8 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); if (!Data.FirstprivateVars.empty()) { + llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( + CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); enum { PrivatesParam = 2, CopyFnParam = 3 }; llvm::Value *CopyFn = CGF.Builder.CreateLoad( CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); @@ -3144,8 +3160,8 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( PrivatePtrs.emplace_back(VD, PrivatePtr); CallArgs.push_back(PrivatePtr.getPointer()); } - CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), - CopyFn, CallArgs); + CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( + CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); for (const auto &Pair : PrivatePtrs) { Address Replacement(CGF.Builder.CreateLoad(Pair.second), CGF.getContext().getDeclAlign(Pair.first)); @@ -3156,18 +3172,18 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( (void)Scope.Privatize(); if (InputInfo.NumberOfTargetItems > 0) { InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( - CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize()); + CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0); InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( - CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize()); + CGF.GetAddrOfLocalVar(PVD), /*Index=*/0); InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( - CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize()); + CGF.GetAddrOfLocalVar(SVD), /*Index=*/0); } Action.Enter(CGF); OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); BodyGen(CGF); }; - llvm::Value *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( + llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, Data.NumberOfParts); llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); @@ -3200,7 +3216,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { CGF.EmitStmt(CS->getCapturedStmt()); }; auto &&TaskGen = [&S, SharedsTy, CapturedStruct, - IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, + IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, const OMPTaskDataTy &Data) { CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, @@ -3587,7 +3603,7 @@ static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent : llvm::AtomicOrdering::Monotonic, - LVal.isVolatile(), /*IsInit=*/false); + LVal.isVolatile(), /*isInit=*/false); } } @@ -3933,6 +3949,8 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_in_reduction: case OMPC_safelen: case OMPC_simdlen: + case OMPC_allocator: + case OMPC_allocate: case OMPC_collapse: case OMPC_default: case OMPC_seq_cst: @@ -4077,11 +4095,12 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, // Emit calculation of the iterations count. llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, - /*IsSigned=*/false); + /*isSigned=*/false); return NumIterations; }; - CGM.getOpenMPRuntime().emitTargetNumIterationsCall(CGF, S, Device, - SizeEmitter); + if (IsOffloadEntry) + CGM.getOpenMPRuntime().emitTargetNumIterationsCall(CGF, S, Device, + SizeEmitter); CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device); } @@ -4124,7 +4143,7 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); - llvm::Value *OutlinedFn = + llvm::Function *OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); @@ -4970,7 +4989,7 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { } }; auto &&TaskGen = [&S, SharedsTy, CapturedStruct, - IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, + IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, const OMPTaskDataTy &Data) { auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, &Data](CodeGenFunction &CGF, PrePostActionTy &) { @@ -5077,4 +5096,3 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective( : D.getDirectiveKind(), CodeGen); } - diff --git a/lib/CodeGen/CGVTT.cpp b/lib/CodeGen/CGVTT.cpp index fbd8146702a9..e79f3f3dd8bc 100644 --- a/lib/CodeGen/CGVTT.cpp +++ b/lib/CodeGen/CGVTT.cpp @@ -1,9 +1,8 @@ //===--- CGVTT.cpp - Emit LLVM Code for C++ VTTs --------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp index bfb089ff908e..3cb3d3544838 100644 --- a/lib/CodeGen/CGVTables.cpp +++ b/lib/CodeGen/CGVTables.cpp @@ -1,9 +1,8 @@ //===--- CGVTables.cpp - Emit LLVM Code for C++ vtables -------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -279,7 +278,7 @@ void CodeGenFunction::FinishThunk() { FinishFunction(); } -void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, +void CodeGenFunction::EmitCallAndReturnForThunk(llvm::FunctionCallee Callee, const ThunkInfo *Thunk, bool IsUnprototyped) { assert(isa<CXXMethodDecl>(CurGD.getDecl()) && @@ -304,7 +303,7 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, CGM.ErrorUnsupported( MD, "non-trivial argument copy for return-adjusting thunk"); } - EmitMustTailThunk(CurGD, AdjustedThisPtr, CalleePtr); + EmitMustTailThunk(CurGD, AdjustedThisPtr, Callee); return; } @@ -327,7 +326,7 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, #ifndef NDEBUG const CGFunctionInfo &CallFnInfo = CGM.getTypes().arrangeCXXMethodCall( - CallArgs, FPT, RequiredArgs::forPrototypePlus(FPT, 1, MD), PrefixArgs); + CallArgs, FPT, RequiredArgs::forPrototypePlus(FPT, 1), PrefixArgs); assert(CallFnInfo.getRegParm() == CurFnInfo->getRegParm() && CallFnInfo.isNoReturn() == CurFnInfo->isNoReturn() && CallFnInfo.getCallingConvention() == CurFnInfo->getCallingConvention()); @@ -354,9 +353,9 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, Slot = ReturnValueSlot(ReturnValue, ResultType.isVolatileQualified()); // Now emit our call. - llvm::Instruction *CallOrInvoke; - CGCallee Callee = CGCallee::forDirect(CalleePtr, CurGD); - RValue RV = EmitCall(*CurFnInfo, Callee, Slot, CallArgs, &CallOrInvoke); + llvm::CallBase *CallOrInvoke; + RValue RV = EmitCall(*CurFnInfo, CGCallee::forDirect(Callee, CurGD), Slot, + CallArgs, &CallOrInvoke); // Consider return adjustment if we have ThunkInfo. if (Thunk && !Thunk->Return.isEmpty()) @@ -376,7 +375,7 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, void CodeGenFunction::EmitMustTailThunk(GlobalDecl GD, llvm::Value *AdjustedThisPtr, - llvm::Value *CalleePtr) { + llvm::FunctionCallee Callee) { // Emitting a musttail call thunk doesn't use any of the CGCall.cpp machinery // to translate AST arguments into LLVM IR arguments. For thunks, we know // that the caller prototype more or less matches the callee prototype with @@ -405,14 +404,14 @@ void CodeGenFunction::EmitMustTailThunk(GlobalDecl GD, // Emit the musttail call manually. Even if the prologue pushed cleanups, we // don't actually want to run them. - llvm::CallInst *Call = Builder.CreateCall(CalleePtr, Args); + llvm::CallInst *Call = Builder.CreateCall(Callee, Args); Call->setTailCallKind(llvm::CallInst::TCK_MustTail); // Apply the standard set of call attributes. unsigned CallingConv; llvm::AttributeList Attrs; - CGM.ConstructAttributeList(CalleePtr->getName(), *CurFnInfo, GD, Attrs, - CallingConv, /*AttrOnCallSite=*/true); + CGM.ConstructAttributeList(Callee.getCallee()->getName(), *CurFnInfo, GD, + Attrs, CallingConv, /*AttrOnCallSite=*/true); Call->setAttributes(Attrs); Call->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); @@ -450,7 +449,8 @@ void CodeGenFunction::generateThunk(llvm::Function *Fn, Callee = llvm::ConstantExpr::getBitCast(Callee, Fn->getType()); // Make the call and return the result. - EmitCallAndReturnForThunk(Callee, &Thunk, IsUnprototyped); + EmitCallAndReturnForThunk(llvm::FunctionCallee(Fn->getFunctionType(), Callee), + &Thunk, IsUnprototyped); } static bool shouldEmitVTableThunk(CodeGenModule &CGM, const CXXMethodDecl *MD, @@ -649,7 +649,8 @@ void CodeGenVTables::addVTableComponent( auto getSpecialVirtualFn = [&](StringRef name) { llvm::FunctionType *fnTy = llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); - llvm::Constant *fn = CGM.CreateRuntimeFunction(fnTy, name); + llvm::Constant *fn = cast<llvm::Constant>( + CGM.CreateRuntimeFunction(fnTy, name).getCallee()); if (auto f = dyn_cast<llvm::Function>(fn)) f->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); return llvm::ConstantExpr::getBitCast(fn, CGM.Int8PtrTy); @@ -760,7 +761,6 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD, // Create the variable that will hold the construction vtable. llvm::GlobalVariable *VTable = CGM.CreateOrReplaceCXXRuntimeVariable(Name, VTType, Linkage, Align); - CGM.setGVProperties(VTable, RD); // V-tables are always unnamed_addr. VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); @@ -774,6 +774,11 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD, createVTableInitializer(components, *VTLayout, RTTI); components.finishAndSetAsInitializer(VTable); + // Set properties only after the initializer has been set to ensure that the + // GV is treated as definition and not declaration. + assert(!VTable->isDeclaration() && "Shouldn't set properties on declaration"); + CGM.setGVProperties(VTable, RD); + CGM.EmitVTableTypeMetadata(VTable, *VTLayout.get()); return VTable; diff --git a/lib/CodeGen/CGVTables.h b/lib/CodeGen/CGVTables.h index 6377659e4cb3..a47841bfc6c3 100644 --- a/lib/CodeGen/CGVTables.h +++ b/lib/CodeGen/CGVTables.h @@ -1,9 +1,8 @@ //===--- CGVTables.h - Emit LLVM Code for C++ vtables -----------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h index da8a8efb840b..71f95abe488a 100644 --- a/lib/CodeGen/CGValue.h +++ b/lib/CodeGen/CGValue.h @@ -1,9 +1,8 @@ //===-- CGValue.h - LLVM CodeGen wrappers for llvm::Value* ------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CodeGenABITypes.cpp b/lib/CodeGen/CodeGenABITypes.cpp index 27f5d53ffe11..6b6a116cf259 100644 --- a/lib/CodeGen/CodeGenABITypes.cpp +++ b/lib/CodeGen/CodeGenABITypes.cpp @@ -1,9 +1,8 @@ //==--- CodeGenABITypes.cpp - Convert Clang types to LLVM types for ABI ----==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -35,9 +34,8 @@ CodeGen::arrangeObjCMessageSendSignature(CodeGenModule &CGM, const CGFunctionInfo & CodeGen::arrangeFreeFunctionType(CodeGenModule &CGM, - CanQual<FunctionProtoType> Ty, - const FunctionDecl *FD) { - return CGM.getTypes().arrangeFreeFunctionType(Ty, FD); + CanQual<FunctionProtoType> Ty) { + return CGM.getTypes().arrangeFreeFunctionType(Ty); } const CGFunctionInfo & @@ -61,14 +59,14 @@ CodeGen::arrangeFreeFunctionCall(CodeGenModule &CGM, FunctionType::ExtInfo info, RequiredArgs args) { return CGM.getTypes().arrangeLLVMFunctionInfo( - returnType, /*IsInstanceMethod=*/false, /*IsChainCall=*/false, argTypes, + returnType, /*instanceMethod=*/false, /*chainCall=*/false, argTypes, info, {}, args); } llvm::FunctionType * CodeGen::convertFreeFunctionType(CodeGenModule &CGM, const FunctionDecl *FD) { assert(FD != nullptr && "Expected a non-null function declaration!"); - llvm::Type *T = CGM.getTypes().ConvertFunctionType(FD->getType(), FD); + llvm::Type *T = CGM.getTypes().ConvertType(FD->getType()); if (auto FT = dyn_cast<llvm::FunctionType>(T)) return FT; diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp index fd4506f2d197..0ae9ea427d65 100644 --- a/lib/CodeGen/CodeGenAction.cpp +++ b/lib/CodeGen/CodeGenAction.cpp @@ -1,9 +1,8 @@ //===--- CodeGenAction.cpp - LLVM Code Generation Frontend Action ---------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -20,6 +19,7 @@ #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/BackendUtil.h" #include "clang/CodeGen/ModuleBuilder.h" +#include "clang/Driver/DriverDiagnostic.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Lex/Preprocessor.h" @@ -31,6 +31,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/RemarkStreamer.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" #include "llvm/Pass.h" @@ -261,28 +262,37 @@ namespace clang { Ctx.getDiagnosticHandler(); Ctx.setDiagnosticHandler(llvm::make_unique<ClangDiagnosticHandler>( CodeGenOpts, this)); - Ctx.setDiagnosticsHotnessRequested(CodeGenOpts.DiagnosticsWithHotness); - if (CodeGenOpts.DiagnosticsHotnessThreshold != 0) - Ctx.setDiagnosticsHotnessThreshold( - CodeGenOpts.DiagnosticsHotnessThreshold); - - std::unique_ptr<llvm::ToolOutputFile> OptRecordFile; - if (!CodeGenOpts.OptRecordFile.empty()) { - std::error_code EC; - OptRecordFile = llvm::make_unique<llvm::ToolOutputFile>( - CodeGenOpts.OptRecordFile, EC, sys::fs::F_None); - if (EC) { - Diags.Report(diag::err_cannot_open_file) << - CodeGenOpts.OptRecordFile << EC.message(); - return; - } - - Ctx.setDiagnosticsOutputFile( - llvm::make_unique<yaml::Output>(OptRecordFile->os())); - if (CodeGenOpts.getProfileUse() != CodeGenOptions::ProfileNone) - Ctx.setDiagnosticsHotnessRequested(true); + Expected<std::unique_ptr<llvm::ToolOutputFile>> OptRecordFileOrErr = + setupOptimizationRemarks(Ctx, CodeGenOpts.OptRecordFile, + CodeGenOpts.OptRecordPasses, + CodeGenOpts.OptRecordFormat, + CodeGenOpts.DiagnosticsWithHotness, + CodeGenOpts.DiagnosticsHotnessThreshold); + + if (Error E = OptRecordFileOrErr.takeError()) { + handleAllErrors( + std::move(E), + [&](const RemarkSetupFileError &E) { + Diags.Report(diag::err_cannot_open_file) + << CodeGenOpts.OptRecordFile << E.message(); + }, + [&](const RemarkSetupPatternError &E) { + Diags.Report(diag::err_drv_optimization_remark_pattern) + << E.message() << CodeGenOpts.OptRecordPasses; + }, + [&](const RemarkSetupFormatError &E) { + Diags.Report(diag::err_drv_optimization_remark_format) + << CodeGenOpts.OptRecordFormat; + }); + return; } + std::unique_ptr<llvm::ToolOutputFile> OptRecordFile = + std::move(*OptRecordFileOrErr); + + if (OptRecordFile && + CodeGenOpts.getProfileUse() != CodeGenOptions::ProfileNone) + Ctx.setDiagnosticsHotnessRequested(true); // Link each LinkModule into our module. if (LinkInModules()) @@ -936,7 +946,8 @@ static void BitcodeInlineAsmDiagHandler(const llvm::SMDiagnostic &SM, Diags->Report(DiagID).AddString("cannot compile inline asm"); } -std::unique_ptr<llvm::Module> CodeGenAction::loadModule(MemoryBufferRef MBRef) { +std::unique_ptr<llvm::Module> +CodeGenAction::loadModule(MemoryBufferRef MBRef) { CompilerInstance &CI = getCompilerInstance(); SourceManager &SM = CI.getSourceManager(); @@ -1014,7 +1025,7 @@ void CodeGenAction::ExecuteAction() { bool Invalid; SourceManager &SM = CI.getSourceManager(); FileID FID = SM.getMainFileID(); - llvm::MemoryBuffer *MainFile = SM.getBuffer(FID, &Invalid); + const llvm::MemoryBuffer *MainFile = SM.getBuffer(FID, &Invalid); if (Invalid) return; diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp index 1713e40c312b..eafe26674434 100644 --- a/lib/CodeGen/CodeGenFunction.cpp +++ b/lib/CodeGen/CodeGenFunction.cpp @@ -1,9 +1,8 @@ //===--- CodeGenFunction.cpp - Emit LLVM Code from ASTs for a Function ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -256,6 +255,7 @@ llvm::DebugLoc CodeGenFunction::EmitReturnBlock() { if (CurBB->empty() || ReturnBlock.getBlock()->use_empty()) { ReturnBlock.getBlock()->replaceAllUsesWith(CurBB); delete ReturnBlock.getBlock(); + ReturnBlock = JumpDest(); } else EmitBlock(ReturnBlock.getBlock()); return llvm::DebugLoc(); @@ -275,6 +275,7 @@ llvm::DebugLoc CodeGenFunction::EmitReturnBlock() { Builder.SetInsertPoint(BI->getParent()); BI->eraseFromParent(); delete ReturnBlock.getBlock(); + ReturnBlock = JumpDest(); return Loc; } } @@ -449,6 +450,19 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { // 5. Width of vector aguments and return types for functions called by this // function. CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth)); + + // If we generated an unreachable return block, delete it now. + if (ReturnBlock.isValid() && ReturnBlock.getBlock()->use_empty()) { + Builder.ClearInsertionPoint(); + ReturnBlock.getBlock()->eraseFromParent(); + } + if (ReturnValue.isValid()) { + auto *RetAlloca = dyn_cast<llvm::AllocaInst>(ReturnValue.getPointer()); + if (RetAlloca && RetAlloca->use_empty()) { + RetAlloca->eraseFromParent(); + ReturnValue = Address::invalid(); + } + } } /// ShouldInstrumentFunction - Return true if the current function should be @@ -522,205 +536,6 @@ CodeGenFunction::DecodeAddrUsedInPrologue(llvm::Value *F, "decoded_addr"); } -static void removeImageAccessQualifier(std::string& TyName) { - std::string ReadOnlyQual("__read_only"); - std::string::size_type ReadOnlyPos = TyName.find(ReadOnlyQual); - if (ReadOnlyPos != std::string::npos) - // "+ 1" for the space after access qualifier. - TyName.erase(ReadOnlyPos, ReadOnlyQual.size() + 1); - else { - std::string WriteOnlyQual("__write_only"); - std::string::size_type WriteOnlyPos = TyName.find(WriteOnlyQual); - if (WriteOnlyPos != std::string::npos) - TyName.erase(WriteOnlyPos, WriteOnlyQual.size() + 1); - else { - std::string ReadWriteQual("__read_write"); - std::string::size_type ReadWritePos = TyName.find(ReadWriteQual); - if (ReadWritePos != std::string::npos) - TyName.erase(ReadWritePos, ReadWriteQual.size() + 1); - } - } -} - -// Returns the address space id that should be produced to the -// kernel_arg_addr_space metadata. This is always fixed to the ids -// as specified in the SPIR 2.0 specification in order to differentiate -// for example in clGetKernelArgInfo() implementation between the address -// spaces with targets without unique mapping to the OpenCL address spaces -// (basically all single AS CPUs). -static unsigned ArgInfoAddressSpace(LangAS AS) { - switch (AS) { - case LangAS::opencl_global: return 1; - case LangAS::opencl_constant: return 2; - case LangAS::opencl_local: return 3; - case LangAS::opencl_generic: return 4; // Not in SPIR 2.0 specs. - default: - return 0; // Assume private. - } -} - -// OpenCL v1.2 s5.6.4.6 allows the compiler to store kernel argument -// information in the program executable. The argument information stored -// includes the argument name, its type, the address and access qualifiers used. -static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn, - CodeGenModule &CGM, llvm::LLVMContext &Context, - CGBuilderTy &Builder, ASTContext &ASTCtx) { - // Create MDNodes that represent the kernel arg metadata. - // Each MDNode is a list in the form of "key", N number of values which is - // the same number of values as their are kernel arguments. - - const PrintingPolicy &Policy = ASTCtx.getPrintingPolicy(); - - // MDNode for the kernel argument address space qualifiers. - SmallVector<llvm::Metadata *, 8> addressQuals; - - // MDNode for the kernel argument access qualifiers (images only). - SmallVector<llvm::Metadata *, 8> accessQuals; - - // MDNode for the kernel argument type names. - SmallVector<llvm::Metadata *, 8> argTypeNames; - - // MDNode for the kernel argument base type names. - SmallVector<llvm::Metadata *, 8> argBaseTypeNames; - - // MDNode for the kernel argument type qualifiers. - SmallVector<llvm::Metadata *, 8> argTypeQuals; - - // MDNode for the kernel argument names. - SmallVector<llvm::Metadata *, 8> argNames; - - for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i) { - const ParmVarDecl *parm = FD->getParamDecl(i); - QualType ty = parm->getType(); - std::string typeQuals; - - if (ty->isPointerType()) { - QualType pointeeTy = ty->getPointeeType(); - - // Get address qualifier. - addressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32( - ArgInfoAddressSpace(pointeeTy.getAddressSpace())))); - - // Get argument type name. - std::string typeName = - pointeeTy.getUnqualifiedType().getAsString(Policy) + "*"; - - // Turn "unsigned type" to "utype" - std::string::size_type pos = typeName.find("unsigned"); - if (pointeeTy.isCanonical() && pos != std::string::npos) - typeName.erase(pos+1, 8); - - argTypeNames.push_back(llvm::MDString::get(Context, typeName)); - - std::string baseTypeName = - pointeeTy.getUnqualifiedType().getCanonicalType().getAsString( - Policy) + - "*"; - - // Turn "unsigned type" to "utype" - pos = baseTypeName.find("unsigned"); - if (pos != std::string::npos) - baseTypeName.erase(pos+1, 8); - - argBaseTypeNames.push_back(llvm::MDString::get(Context, baseTypeName)); - - // Get argument type qualifiers: - if (ty.isRestrictQualified()) - typeQuals = "restrict"; - if (pointeeTy.isConstQualified() || - (pointeeTy.getAddressSpace() == LangAS::opencl_constant)) - typeQuals += typeQuals.empty() ? "const" : " const"; - if (pointeeTy.isVolatileQualified()) - typeQuals += typeQuals.empty() ? "volatile" : " volatile"; - } else { - uint32_t AddrSpc = 0; - bool isPipe = ty->isPipeType(); - if (ty->isImageType() || isPipe) - AddrSpc = ArgInfoAddressSpace(LangAS::opencl_global); - - addressQuals.push_back( - llvm::ConstantAsMetadata::get(Builder.getInt32(AddrSpc))); - - // Get argument type name. - std::string typeName; - if (isPipe) - typeName = ty.getCanonicalType()->getAs<PipeType>()->getElementType() - .getAsString(Policy); - else - typeName = ty.getUnqualifiedType().getAsString(Policy); - - // Turn "unsigned type" to "utype" - std::string::size_type pos = typeName.find("unsigned"); - if (ty.isCanonical() && pos != std::string::npos) - typeName.erase(pos+1, 8); - - std::string baseTypeName; - if (isPipe) - baseTypeName = ty.getCanonicalType()->getAs<PipeType>() - ->getElementType().getCanonicalType() - .getAsString(Policy); - else - baseTypeName = - ty.getUnqualifiedType().getCanonicalType().getAsString(Policy); - - // Remove access qualifiers on images - // (as they are inseparable from type in clang implementation, - // but OpenCL spec provides a special query to get access qualifier - // via clGetKernelArgInfo with CL_KERNEL_ARG_ACCESS_QUALIFIER): - if (ty->isImageType()) { - removeImageAccessQualifier(typeName); - removeImageAccessQualifier(baseTypeName); - } - - argTypeNames.push_back(llvm::MDString::get(Context, typeName)); - - // Turn "unsigned type" to "utype" - pos = baseTypeName.find("unsigned"); - if (pos != std::string::npos) - baseTypeName.erase(pos+1, 8); - - argBaseTypeNames.push_back(llvm::MDString::get(Context, baseTypeName)); - - if (isPipe) - typeQuals = "pipe"; - } - - argTypeQuals.push_back(llvm::MDString::get(Context, typeQuals)); - - // Get image and pipe access qualifier: - if (ty->isImageType()|| ty->isPipeType()) { - const Decl *PDecl = parm; - if (auto *TD = dyn_cast<TypedefType>(ty)) - PDecl = TD->getDecl(); - const OpenCLAccessAttr *A = PDecl->getAttr<OpenCLAccessAttr>(); - if (A && A->isWriteOnly()) - accessQuals.push_back(llvm::MDString::get(Context, "write_only")); - else if (A && A->isReadWrite()) - accessQuals.push_back(llvm::MDString::get(Context, "read_write")); - else - accessQuals.push_back(llvm::MDString::get(Context, "read_only")); - } else - accessQuals.push_back(llvm::MDString::get(Context, "none")); - - // Get argument name. - argNames.push_back(llvm::MDString::get(Context, parm->getName())); - } - - Fn->setMetadata("kernel_arg_addr_space", - llvm::MDNode::get(Context, addressQuals)); - Fn->setMetadata("kernel_arg_access_qual", - llvm::MDNode::get(Context, accessQuals)); - Fn->setMetadata("kernel_arg_type", - llvm::MDNode::get(Context, argTypeNames)); - Fn->setMetadata("kernel_arg_base_type", - llvm::MDNode::get(Context, argBaseTypeNames)); - Fn->setMetadata("kernel_arg_type_qual", - llvm::MDNode::get(Context, argTypeQuals)); - if (CGM.getCodeGenOpts().EmitOpenCLArgMetadata) - Fn->setMetadata("kernel_arg_name", - llvm::MDNode::get(Context, argNames)); -} - void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::Function *Fn) { @@ -729,7 +544,7 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::LLVMContext &Context = getLLVMContext(); - GenOpenCLArgMetadata(FD, Fn, CGM, Context, Builder, getContext()); + CGM.GenOpenCLArgMetadata(Fn, FD, this); if (const VecTypeHintAttr *A = FD->getAttr<VecTypeHintAttr>()) { QualType HintQTy = A->getTypeHint(); @@ -881,6 +696,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, Fn->addFnAttr(llvm::Attribute::SanitizeAddress); if (SanOpts.hasOneOf(SanitizerKind::HWAddress | SanitizerKind::KernelHWAddress)) Fn->addFnAttr(llvm::Attribute::SanitizeHWAddress); + if (SanOpts.has(SanitizerKind::MemTag)) + Fn->addFnAttr(llvm::Attribute::SanitizeMemTag); if (SanOpts.has(SanitizerKind::Thread)) Fn->addFnAttr(llvm::Attribute::SanitizeThread); if (SanOpts.hasOneOf(SanitizerKind::Memory | SanitizerKind::KernelMemory)) @@ -1080,6 +897,13 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, if (CurFnInfo->getReturnInfo().isSRetAfterThis()) ++AI; ReturnValue = Address(&*AI, CurFnInfo->getReturnInfo().getIndirectAlign()); + if (!CurFnInfo->getReturnInfo().getIndirectByVal()) { + ReturnValuePointer = + CreateDefaultAlignTempAlloca(Int8PtrTy, "result.ptr"); + Builder.CreateStore(Builder.CreatePointerBitCastOrAddrSpaceCast( + ReturnValue.getPointer(), Int8PtrTy), + ReturnValuePointer); + } } else if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::InAlloca && !hasScalarEvaluationKind(CurFnInfo->getReturnType())) { // Load the sret pointer from the argument struct and return into that. @@ -1087,6 +911,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, llvm::Function::arg_iterator EI = CurFn->arg_end(); --EI; llvm::Value *Addr = Builder.CreateStructGEP(nullptr, &*EI, Idx); + ReturnValuePointer = Address(Addr, getPointerAlign()); Addr = Builder.CreateAlignedLoad(Addr, getPointerAlign(), "agg.result"); ReturnValue = Address(Addr, getNaturalTypeAlignment(RetTy)); } else { @@ -2135,6 +1960,7 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) { case Type::Attributed: case Type::SubstTemplateTypeParm: case Type::PackExpansion: + case Type::MacroQualified: // Keep walking after single level desugaring. type = type.getSingleStepDesugaredType(getContext()); break; @@ -2174,7 +2000,7 @@ Address CodeGenFunction::EmitMSVAListRef(const Expr *E) { void CodeGenFunction::EmitDeclRefExprDbgValue(const DeclRefExpr *E, const APValue &Init) { - assert(!Init.isUninit() && "Invalid DeclRefExpr initializer!"); + assert(Init.hasValue() && "Invalid DeclRefExpr initializer!"); if (CGDebugInfo *Dbg = getDebugInfo()) if (CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) Dbg->EmitGlobalVariable(E->getDecl(), Init); @@ -2250,7 +2076,7 @@ void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue, OffsetValue); } -llvm::Value *CodeGenFunction::EmitAnnotationCall(llvm::Value *AnnotationFn, +llvm::Value *CodeGenFunction::EmitAnnotationCall(llvm::Function *AnnotationFn, llvm::Value *AnnotatedVal, StringRef AnnotationStr, SourceLocation Location) { @@ -2278,7 +2104,7 @@ Address CodeGenFunction::EmitFieldAnnotations(const FieldDecl *D, assert(D->hasAttr<AnnotateAttr>() && "no annotate attribute"); llvm::Value *V = Addr.getPointer(); llvm::Type *VTy = V->getType(); - llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::ptr_annotation, + llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::ptr_annotation, CGM.Int8PtrTy); for (const auto *I : D->specific_attrs<AnnotateAttr>()) { @@ -2355,6 +2181,13 @@ static bool hasRequiredFeatures(const SmallVectorImpl<StringRef> &ReqFeatures, // called function. void CodeGenFunction::checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl) { + return checkTargetFeatures(E->getBeginLoc(), TargetDecl); +} + +// Emits an error if we don't have a valid set of target features for the +// called function. +void CodeGenFunction::checkTargetFeatures(SourceLocation Loc, + const FunctionDecl *TargetDecl) { // Early exit if this is an indirect call. if (!TargetDecl) return; @@ -2379,7 +2212,7 @@ void CodeGenFunction::checkTargetFeatures(const CallExpr *E, return; StringRef(FeatureList).split(ReqFeatures, ','); if (!hasRequiredFeatures(ReqFeatures, CGM, FD, MissingFeature)) - CGM.getDiags().Report(E->getBeginLoc(), diag::err_builtin_needs_feature) + CGM.getDiags().Report(Loc, diag::err_builtin_needs_feature) << TargetDecl->getDeclName() << CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID); @@ -2405,7 +2238,7 @@ void CodeGenFunction::checkTargetFeatures(const CallExpr *E, ReqFeatures.push_back(F.getKey()); } if (!hasRequiredFeatures(ReqFeatures, CGM, FD, MissingFeature)) - CGM.getDiags().Report(E->getBeginLoc(), diag::err_function_needs_feature) + CGM.getDiags().Report(Loc, diag::err_function_needs_feature) << FD->getDeclName() << TargetDecl->getDeclName() << MissingFeature; } } diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 89cb850ab1b1..c3060d1fb351 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -1,9 +1,8 @@ //===-- CodeGenFunction.h - Per-Function state for LLVM CodeGen -*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -23,6 +22,7 @@ #include "EHScopeStack.h" #include "VarBypassDetector.h" #include "clang/AST/CharUnits.h" +#include "clang/AST/CurrentSourceLocExprScope.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" @@ -48,7 +48,6 @@ class Module; class SwitchInst; class Twine; class Value; -class CallSite; } namespace clang { @@ -115,7 +114,7 @@ enum TypeEvaluationKind { SANITIZER_CHECK(DivremOverflow, divrem_overflow, 0) \ SANITIZER_CHECK(DynamicTypeCacheMiss, dynamic_type_cache_miss, 0) \ SANITIZER_CHECK(FloatCastOverflow, float_cast_overflow, 0) \ - SANITIZER_CHECK(FunctionTypeMismatch, function_type_mismatch, 0) \ + SANITIZER_CHECK(FunctionTypeMismatch, function_type_mismatch, 1) \ SANITIZER_CHECK(ImplicitConversion, implicit_conversion, 0) \ SANITIZER_CHECK(InvalidBuiltin, invalid_builtin, 0) \ SANITIZER_CHECK(LoadInvalidValue, load_invalid_value, 0) \ @@ -328,6 +327,10 @@ public: /// value. This is invalid iff the function has no return value. Address ReturnValue = Address::invalid(); + /// ReturnValuePointer - The temporary alloca to hold a pointer to sret. + /// This is invalid if sret is not in use. + Address ReturnValuePointer = Address::invalid(); + /// Return true if a label was seen in the current scope. bool hasLabelBeenSeenInCurrentScope() const { if (CurLexicalScope) @@ -477,6 +480,10 @@ public: /// finally block or filter expression. bool IsOutlinedSEHHelper = false; + /// True if CodeGen currently emits code inside presereved access index + /// region. + bool IsInPreservedAIRegion = false; + const CodeGen::CGBlockInfo *BlockInfo = nullptr; llvm::Value *BlockPointer = nullptr; @@ -568,7 +575,7 @@ public: JumpDest RethrowDest; /// A function to call to enter the catch. - llvm::Constant *BeginCatchFn; + llvm::FunctionCallee BeginCatchFn; /// An i1 variable indicating whether or not the @finally is /// running for an exception. @@ -580,8 +587,8 @@ public: public: void enter(CodeGenFunction &CGF, const Stmt *Finally, - llvm::Constant *beginCatchFn, llvm::Constant *endCatchFn, - llvm::Constant *rethrowFn); + llvm::FunctionCallee beginCatchFn, + llvm::FunctionCallee endCatchFn, llvm::FunctionCallee rethrowFn); void exit(CodeGenFunction &CGF); }; @@ -668,7 +675,8 @@ public: /// PushDestructorCleanup - Push a cleanup to call the /// complete-object variant of the given destructor on the object at /// the given address. - void PushDestructorCleanup(const CXXDestructorDecl *Dtor, Address Addr); + void PushDestructorCleanup(const CXXDestructorDecl *Dtor, QualType T, + Address Addr); /// PopCleanupBlock - Will pop the cleanup entry on the stack and /// process all branch fixups. @@ -1395,6 +1403,12 @@ private: SourceLocation LastStopPoint; public: + /// Source location information about the default argument or member + /// initializer expression we're evaluating, if any. + CurrentSourceLocExprScope CurSourceLocExprScope; + using SourceLocExprScopeGuard = + CurrentSourceLocExprScope::SourceLocExprScopeGuard; + /// A scope within which we are constructing the fields of an object which /// might use a CXXDefaultInitExpr. This stashes away a 'this' value to use /// if we need to evaluate a CXXDefaultInitExpr within the evaluation. @@ -1415,11 +1429,12 @@ public: /// The scope of a CXXDefaultInitExpr. Within this scope, the value of 'this' /// is overridden to be the object under construction. - class CXXDefaultInitExprScope { + class CXXDefaultInitExprScope { public: - CXXDefaultInitExprScope(CodeGenFunction &CGF) - : CGF(CGF), OldCXXThisValue(CGF.CXXThisValue), - OldCXXThisAlignment(CGF.CXXThisAlignment) { + CXXDefaultInitExprScope(CodeGenFunction &CGF, const CXXDefaultInitExpr *E) + : CGF(CGF), OldCXXThisValue(CGF.CXXThisValue), + OldCXXThisAlignment(CGF.CXXThisAlignment), + SourceLocScope(E, CGF.CurSourceLocExprScope) { CGF.CXXThisValue = CGF.CXXDefaultInitExprThis.getPointer(); CGF.CXXThisAlignment = CGF.CXXDefaultInitExprThis.getAlignment(); } @@ -1432,6 +1447,12 @@ public: CodeGenFunction &CGF; llvm::Value *OldCXXThisValue; CharUnits OldCXXThisAlignment; + SourceLocExprScopeGuard SourceLocScope; + }; + + struct CXXDefaultArgExprScope : SourceLocExprScopeGuard { + CXXDefaultArgExprScope(CodeGenFunction &CGF, const CXXDefaultArgExpr *E) + : SourceLocExprScopeGuard(E, CGF.CurSourceLocExprScope) {} }; /// The scope of an ArrayInitLoopExpr. Within this scope, the value of the @@ -1836,6 +1857,9 @@ public: void EmitLambdaBlockInvokeBody(); void EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD); void EmitLambdaStaticInvokeBody(const CXXMethodDecl *MD); + void EmitLambdaVLACapture(const VariableArrayType *VAT, LValue LV) { + EmitStoreThroughLValue(RValue::get(VLASizeMap[VAT->getSizeExpr()]), LV); + } void EmitAsanPrologueOrEpilogue(bool Prologue); /// Emit the unified return block, trying to avoid its emission when @@ -1851,14 +1875,14 @@ public: void StartThunk(llvm::Function *Fn, GlobalDecl GD, const CGFunctionInfo &FnInfo, bool IsUnprototyped); - void EmitCallAndReturnForThunk(llvm::Constant *Callee, const ThunkInfo *Thunk, - bool IsUnprototyped); + void EmitCallAndReturnForThunk(llvm::FunctionCallee Callee, + const ThunkInfo *Thunk, bool IsUnprototyped); void FinishThunk(); /// Emit a musttail call for a thunk with a potentially adjusted this pointer. void EmitMustTailThunk(GlobalDecl GD, llvm::Value *AdjustedThisPtr, - llvm::Value *Callee); + llvm::FunctionCallee Callee); /// Generate a thunk for the given method. void generateThunk(llvm::Function *Fn, const CGFunctionInfo &FnInfo, @@ -2295,7 +2319,7 @@ public: } /// Determine whether a return value slot may overlap some other object. - AggValueSlot::Overlap_t overlapForReturnValue() { + AggValueSlot::Overlap_t getOverlapForReturnValue() { // FIXME: Assuming no overlap here breaks guaranteed copy elision for base // class subobjects. These cases may need to be revisited depending on the // resolution of the relevant core issue. @@ -2303,20 +2327,13 @@ public: } /// Determine whether a field initialization may overlap some other object. - AggValueSlot::Overlap_t overlapForFieldInit(const FieldDecl *FD) { - // FIXME: These cases can result in overlap as a result of P0840R0's - // [[no_unique_address]] attribute. We can still infer NoOverlap in the - // presence of that attribute if the field is within the nvsize of its - // containing class, because non-virtual subobjects are initialized in - // address order. - return AggValueSlot::DoesNotOverlap; - } + AggValueSlot::Overlap_t getOverlapForFieldInit(const FieldDecl *FD); /// Determine whether a base class initialization may overlap some other /// object. - AggValueSlot::Overlap_t overlapForBaseInit(const CXXRecordDecl *RD, - const CXXRecordDecl *BaseRD, - bool IsVirtual); + AggValueSlot::Overlap_t getOverlapForBaseInit(const CXXRecordDecl *RD, + const CXXRecordDecl *BaseRD, + bool IsVirtual); /// Emit an aggregate assignment. void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy) { @@ -2502,16 +2519,13 @@ public: void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, bool Delegating, - Address This, const CXXConstructExpr *E, - AggValueSlot::Overlap_t Overlap, - bool NewPointerIsChecked); + AggValueSlot ThisAVS, const CXXConstructExpr *E); void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, bool Delegating, Address This, CallArgList &Args, AggValueSlot::Overlap_t Overlap, - SourceLocation Loc, - bool NewPointerIsChecked); + SourceLocation Loc, bool NewPointerIsChecked); /// Emit assumption load for all bases. Requires to be be called only on /// most-derived class and not under construction of the object. @@ -2541,8 +2555,8 @@ public: static Destroyer destroyCXXObject; void EmitCXXDestructorCall(const CXXDestructorDecl *D, CXXDtorType Type, - bool ForVirtualBase, bool Delegating, - Address This); + bool ForVirtualBase, bool Delegating, Address This, + QualType ThisTy); void EmitNewArrayInitializer(const CXXNewExpr *E, QualType elementType, llvm::Type *ElementTy, Address NewPtr, @@ -2618,10 +2632,12 @@ public: bool sanitizePerformTypeCheck() const; /// Emit a check that \p V is the address of storage of the - /// appropriate size and alignment for an object of type \p Type. + /// appropriate size and alignment for an object of type \p Type + /// (or if ArraySize is provided, for an array of that bound). void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, llvm::Value *V, QualType Type, CharUnits Alignment = CharUnits::Zero(), - SanitizerSet SkippedChecks = SanitizerSet()); + SanitizerSet SkippedChecks = SanitizerSet(), + llvm::Value *ArraySize = nullptr); /// Emit a check that \p Base points into an array object, which /// we can access at index \p Index. \p Accessed should be \c false if we @@ -2637,6 +2653,9 @@ public: /// Converts Location to a DebugLoc, if debug information is enabled. llvm::DebugLoc SourceLocToDebugLoc(SourceLocation Location); + /// Get the record field index as represented in debug info. + unsigned getDebugInfoFIndex(const RecordDecl *Rec, unsigned FieldIndex); + //===--------------------------------------------------------------------===// // Declaration Emission @@ -3084,7 +3103,7 @@ public: bool EmitOMPLinearClauseInit(const OMPLoopDirective &D); typedef const llvm::function_ref<void(CodeGenFunction & /*CGF*/, - llvm::Value * /*OutlinedFn*/, + llvm::Function * /*OutlinedFn*/, const OMPTaskDataTy & /*Data*/)> TaskGenTy; void EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, @@ -3560,7 +3579,6 @@ public: LValue EmitCXXConstructLValue(const CXXConstructExpr *E); LValue EmitCXXBindTemporaryLValue(const CXXBindTemporaryExpr *E); - LValue EmitLambdaLValue(const LambdaExpr *E); LValue EmitCXXTypeidLValue(const CXXTypeidExpr *E); LValue EmitCXXUuidofLValue(const CXXUuidofExpr *E); @@ -3580,10 +3598,10 @@ public: /// LLVM arguments and the types they were derived from. RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, - llvm::Instruction **callOrInvoke, SourceLocation Loc); + llvm::CallBase **callOrInvoke, SourceLocation Loc); RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, - llvm::Instruction **callOrInvoke = nullptr) { + llvm::CallBase **callOrInvoke = nullptr) { return EmitCall(CallInfo, Callee, ReturnValue, Args, callOrInvoke, SourceLocation()); } @@ -3595,31 +3613,32 @@ public: CGCallee EmitCallee(const Expr *E); void checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl); + void checkTargetFeatures(SourceLocation Loc, const FunctionDecl *TargetDecl); - llvm::CallInst *EmitRuntimeCall(llvm::Value *callee, + llvm::CallInst *EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name = ""); - llvm::CallInst *EmitRuntimeCall(llvm::Value *callee, - ArrayRef<llvm::Value*> args, + llvm::CallInst *EmitRuntimeCall(llvm::FunctionCallee callee, + ArrayRef<llvm::Value *> args, const Twine &name = ""); - llvm::CallInst *EmitNounwindRuntimeCall(llvm::Value *callee, + llvm::CallInst *EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name = ""); - llvm::CallInst *EmitNounwindRuntimeCall(llvm::Value *callee, - ArrayRef<llvm::Value*> args, + llvm::CallInst *EmitNounwindRuntimeCall(llvm::FunctionCallee callee, + ArrayRef<llvm::Value *> args, const Twine &name = ""); SmallVector<llvm::OperandBundleDef, 1> getBundlesForFunclet(llvm::Value *Callee); - llvm::CallSite EmitCallOrInvoke(llvm::Value *Callee, - ArrayRef<llvm::Value *> Args, - const Twine &Name = ""); - llvm::CallSite EmitRuntimeCallOrInvoke(llvm::Value *callee, - ArrayRef<llvm::Value*> args, - const Twine &name = ""); - llvm::CallSite EmitRuntimeCallOrInvoke(llvm::Value *callee, - const Twine &name = ""); - void EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee, - ArrayRef<llvm::Value*> args); + llvm::CallBase *EmitCallOrInvoke(llvm::FunctionCallee Callee, + ArrayRef<llvm::Value *> Args, + const Twine &Name = ""); + llvm::CallBase *EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, + ArrayRef<llvm::Value *> args, + const Twine &name = ""); + llvm::CallBase *EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, + const Twine &name = ""); + void EmitNoreturnRuntimeCallOrInvoke(llvm::FunctionCallee callee, + ArrayRef<llvm::Value *> args); CGCallee BuildAppleKextVirtualCall(const CXXMethodDecl *MD, NestedNameSpecifier *Qual, @@ -3659,11 +3678,10 @@ public: llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *E, CallArgList *RtlArgs); - RValue EmitCXXDestructorCall(const CXXDestructorDecl *DD, - const CGCallee &Callee, - llvm::Value *This, llvm::Value *ImplicitParam, - QualType ImplicitParamTy, const CallExpr *E, - StructorType Type); + RValue EmitCXXDestructorCall(GlobalDecl Dtor, const CGCallee &Callee, + llvm::Value *This, QualType ThisTy, + llvm::Value *ImplicitParam, + QualType ImplicitParamTy, const CallExpr *E); RValue EmitCXXMemberCallExpr(const CXXMemberCallExpr *E, ReturnValueSlot ReturnValue); RValue EmitCXXMemberOrOperatorMemberCallExpr(const CallExpr *CE, @@ -3727,9 +3745,6 @@ public: Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch); - llvm::Value *EmitISOVolatileLoad(const CallExpr *E); - llvm::Value *EmitISOVolatileStore(const CallExpr *E); - llvm::Function *LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E); @@ -3825,6 +3840,8 @@ public: llvm::Type *returnType); llvm::Value *EmitObjCAllocWithZone(llvm::Value *value, llvm::Type *returnType); + llvm::Value *EmitObjCAllocInit(llvm::Value *value, llvm::Type *resultType); + llvm::Value *EmitObjCThrowOperand(const Expr *expr); llvm::Value *EmitObjCConsumeObject(QualType T, llvm::Value *Ptr); llvm::Value *EmitObjCExtendObjectLifetime(QualType T, llvm::Value *Ptr); @@ -3922,12 +3939,12 @@ public: void EmitCXXGlobalVarDeclInit(const VarDecl &D, llvm::Constant *DeclPtr, bool PerformInit); - llvm::Constant *createAtExitStub(const VarDecl &VD, llvm::Constant *Dtor, + llvm::Function *createAtExitStub(const VarDecl &VD, llvm::FunctionCallee Dtor, llvm::Constant *Addr); /// Call atexit() with a function that passes the given argument to /// the given function. - void registerGlobalDtorWithAtExit(const VarDecl &D, llvm::Constant *fn, + void registerGlobalDtorWithAtExit(const VarDecl &D, llvm::FunctionCallee fn, llvm::Constant *addr); /// Call atexit() with function dtorStub. @@ -3960,8 +3977,8 @@ public: /// variables. void GenerateCXXGlobalDtorsFunc( llvm::Function *Fn, - const std::vector<std::pair<llvm::WeakTrackingVH, llvm::Constant *>> - &DtorsAndObjects); + const std::vector<std::tuple<llvm::FunctionType *, llvm::WeakTrackingVH, + llvm::Constant *>> &DtorsAndObjects); void GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn, const VarDecl *D, @@ -3982,16 +3999,14 @@ public: void EmitCXXThrowExpr(const CXXThrowExpr *E, bool KeepInsertionPoint = true); - void EmitLambdaExpr(const LambdaExpr *E, AggValueSlot Dest); - RValue EmitAtomicExpr(AtomicExpr *E); //===--------------------------------------------------------------------===// // Annotations Emission //===--------------------------------------------------------------------===// - /// Emit an annotation call (intrinsic or builtin). - llvm::Value *EmitAnnotationCall(llvm::Value *AnnotationFn, + /// Emit an annotation call (intrinsic). + llvm::Value *EmitAnnotationCall(llvm::Function *AnnotationFn, llvm::Value *AnnotatedVal, StringRef AnnotationStr, SourceLocation Location); @@ -4084,8 +4099,8 @@ public: /// passing to a runtime sanitizer handler. llvm::Constant *EmitCheckSourceLocation(SourceLocation Loc); - /// Create a basic block that will call a handler function in a - /// sanitizer runtime with the provided arguments, and create a conditional + /// Create a basic block that will either trap or call a handler function in + /// the UBSan runtime with the provided arguments, and create a conditional /// branch to it. void EmitCheck(ArrayRef<std::pair<llvm::Value *, SanitizerMask>> Checked, SanitizerHandler Check, ArrayRef<llvm::Constant *> StaticArgs, @@ -4177,14 +4192,19 @@ private: /// If EmittedExpr is non-null, this will use that instead of re-emitting E. llvm::Value *evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, llvm::IntegerType *ResType, - llvm::Value *EmittedE); + llvm::Value *EmittedE, + bool IsDynamic); /// Emits the size of E, as required by __builtin_object_size. This /// function is aware of pass_object_size parameters, and will act accordingly /// if E is a parameter with the pass_object_size attribute. llvm::Value *emitBuiltinObjectSize(const Expr *E, unsigned Type, llvm::IntegerType *ResType, - llvm::Value *EmittedE); + llvm::Value *EmittedE, + bool IsDynamic); + + void emitZeroOrPatternForAutoVarInit(QualType type, const VarDecl &D, + Address Loc); public: #ifndef NDEBUG diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index 244738042cef..6ff72ec045e6 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -1,9 +1,8 @@ //===--- CodeGenModule.cpp - Emit LLVM Code from ASTs for a Module --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -34,6 +33,7 @@ #include "clang/AST/Mangle.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/StmtVisitor.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/CodeGenOptions.h" @@ -47,17 +47,18 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MD5.h" +#include "llvm/Support/TimeProfiler.h" using namespace clang; using namespace CodeGen; @@ -409,6 +410,10 @@ void CodeGenModule::Release() { AddGlobalCtor(CudaCtorFunction); } if (OpenMPRuntime) { + if (llvm::Function *OpenMPRequiresDirectiveRegFun = + OpenMPRuntime->emitRequiresDirectiveRegFun()) { + AddGlobalCtor(OpenMPRequiresDirectiveRegFun, 0); + } if (llvm::Function *OpenMPRegistrationFunction = OpenMPRuntime->emitRegistrationFunction()) { auto ComdatKey = OpenMPRegistrationFunction->hasComdat() ? @@ -418,7 +423,9 @@ void CodeGenModule::Release() { OpenMPRuntime->clear(); } if (PGOReader) { - getModule().setProfileSummary(PGOReader->getSummary().getMD(VMContext)); + getModule().setProfileSummary( + PGOReader->getSummary(/* UseCS */ false).getMD(VMContext), + llvm::ProfileSummary::PSK_Instr); if (PGOStats.hasDiagnostics()) PGOStats.reportDiagnostics(getDiags(), getCodeGenOpts().MainFileName); } @@ -443,6 +450,24 @@ void CodeGenModule::Release() { EmitModuleLinkOptions(); } + // On ELF we pass the dependent library specifiers directly to the linker + // without manipulating them. This is in contrast to other platforms where + // they are mapped to a specific linker option by the compiler. This + // difference is a result of the greater variety of ELF linkers and the fact + // that ELF linkers tend to handle libraries in a more complicated fashion + // than on other platforms. This forces us to defer handling the dependent + // libs to the linker. + // + // CUDA/HIP device and host libraries are different. Currently there is no + // way to differentiate dependent libraries for host or device. Existing + // usage of #pragma comment(lib, *) is intended for host libraries on + // Windows. Therefore emit llvm.dependent-libraries only for host. + if (!ELFDependentLibraries.empty() && !Context.getLangOpts().CUDAIsDevice) { + auto *NMD = getModule().getOrInsertNamedMetadata("llvm.dependent-libraries"); + for (auto *MD : ELFDependentLibraries) + NMD->addOperand(MD); + } + // Record mregparm value now so it is visible through rest of codegen. if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86) getModule().addModuleFlag(llvm::Module::Error, "NumRegisterParameters", @@ -536,15 +561,16 @@ void CodeGenModule::Release() { if (LangOpts.OpenCL) { EmitOpenCLMetadata(); // Emit SPIR version. - if (getTriple().getArch() == llvm::Triple::spir || - getTriple().getArch() == llvm::Triple::spir64) { + if (getTriple().isSPIR()) { // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the // opencl.spir.version named metadata. + // C++ is backwards compatible with OpenCL v2.0. + auto Version = LangOpts.OpenCLCPlusPlus ? 200 : LangOpts.OpenCLVersion; llvm::Metadata *SPIRVerElts[] = { llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - Int32Ty, LangOpts.OpenCLVersion / 100)), + Int32Ty, Version / 100)), llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - Int32Ty, (LangOpts.OpenCLVersion / 100 > 1) ? 0 : 2))}; + Int32Ty, (Version / 100 > 1) ? 0 : 2))}; llvm::NamedMDNode *SPIRVerMD = TheModule.getOrInsertNamedMetadata("opencl.spir.version"); llvm::LLVMContext &Ctx = TheModule.getContext(); @@ -599,11 +625,14 @@ void CodeGenModule::Release() { void CodeGenModule::EmitOpenCLMetadata() { // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the // opencl.ocl.version named metadata node. + // C++ is backwards compatible with OpenCL v2.0. + // FIXME: We might need to add CXX version at some point too? + auto Version = LangOpts.OpenCLCPlusPlus ? 200 : LangOpts.OpenCLVersion; llvm::Metadata *OCLVerElts[] = { llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - Int32Ty, LangOpts.OpenCLVersion / 100)), + Int32Ty, Version / 100)), llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - Int32Ty, (LangOpts.OpenCLVersion % 100) / 10))}; + Int32Ty, (Version % 100) / 10))}; llvm::NamedMDNode *OCLVerMD = TheModule.getOrInsertNamedMetadata("opencl.ocl.version"); llvm::LLVMContext &Ctx = TheModule.getContext(); @@ -731,9 +760,11 @@ void CodeGenModule::setGlobalVisibility(llvm::GlobalValue *GV, } if (!D) return; - // Set visibility for definitions. + // Set visibility for definitions, and for declarations if requested globally + // or set explicitly. LinkageInfo LV = D->getLinkageAndVisibility(); - if (LV.isVisibilityExplicit() || !GV->isDeclarationForLinker()) + if (LV.isVisibilityExplicit() || getLangOpts().SetVisibilityForExternDecls || + !GV->isDeclarationForLinker()) GV->setVisibility(GetLLVMVisibility(LV.getVisibility())); } @@ -758,6 +789,13 @@ static bool shouldAssumeDSOLocal(const CodeGenModule &CGM, !GV->isThreadLocal()) return false; } + + // On COFF, don't mark 'extern_weak' symbols as DSO local. If these symbols + // remain unresolved in the link, they can be resolved to zero, which is + // outside the current DSO. + if (TT.isOSBinFormatCOFF() && GV->hasExternalWeakLinkage()) + return false; + // Every other GV is local on COFF. // Make an exception for windows OS in the triple: Some firmware builds use // *-win32-macho triples. This (accidentally?) produced windows relocations @@ -774,7 +812,7 @@ static bool shouldAssumeDSOLocal(const CodeGenModule &CGM, const auto &CGOpts = CGM.getCodeGenOpts(); llvm::Reloc::Model RM = CGOpts.RelocationModel; const auto &LOpts = CGM.getLangOpts(); - if (RM != llvm::Reloc::Static && !LOpts.PIE) + if (RM != llvm::Reloc::Static && !LOpts.PIE && !LOpts.OpenMPIsDevice) return false; // A definition cannot be preempted from an executable. @@ -837,19 +875,20 @@ void CodeGenModule::setDLLImportDLLExport(llvm::GlobalValue *GV, void CodeGenModule::setGVProperties(llvm::GlobalValue *GV, GlobalDecl GD) const { setDLLImportDLLExport(GV, GD); - setGlobalVisibilityAndLocal(GV, dyn_cast<NamedDecl>(GD.getDecl())); + setGVPropertiesAux(GV, dyn_cast<NamedDecl>(GD.getDecl())); } void CodeGenModule::setGVProperties(llvm::GlobalValue *GV, const NamedDecl *D) const { setDLLImportDLLExport(GV, D); - setGlobalVisibilityAndLocal(GV, D); + setGVPropertiesAux(GV, D); } -void CodeGenModule::setGlobalVisibilityAndLocal(llvm::GlobalValue *GV, - const NamedDecl *D) const { +void CodeGenModule::setGVPropertiesAux(llvm::GlobalValue *GV, + const NamedDecl *D) const { setGlobalVisibility(GV, D); setDSOLocal(GV); + GV->setPartition(CodeGenOpts.SymbolPartition); } static llvm::GlobalVariable::ThreadLocalMode GetLLVMTLSModel(StringRef S) { @@ -1047,8 +1086,15 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) { // Keep the first result in the case of a mangling collision. const auto *ND = cast<NamedDecl>(GD.getDecl()); - auto Result = - Manglings.insert(std::make_pair(getMangledNameImpl(*this, GD, ND), GD)); + std::string MangledName = getMangledNameImpl(*this, GD, ND); + + // Adjust kernel stub mangling as we may need to be able to differentiate + // them from the kernel itself (e.g., for HIP). + if (auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) + if (!getLangOpts().CUDAIsDevice && FD->hasAttr<CUDAGlobalAttr>()) + MangledName = getCUDARuntime().getDeviceStubName(MangledName); + + auto Result = Manglings.insert(std::make_pair(MangledName, GD)); return MangledDeclNames[CanonicalGD] = Result.first->first(); } @@ -1153,7 +1199,7 @@ CodeGenModule::getFunctionLinkage(GlobalDecl GD) { return llvm::GlobalValue::InternalLinkage; } - return getLLVMLinkageForDeclarator(D, Linkage, /*isConstantVariable=*/false); + return getLLVMLinkageForDeclarator(D, Linkage, /*IsConstantVariable=*/false); } llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) { @@ -1173,6 +1219,212 @@ void CodeGenModule::SetLLVMFunctionAttributes(GlobalDecl GD, F->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); } +static void removeImageAccessQualifier(std::string& TyName) { + std::string ReadOnlyQual("__read_only"); + std::string::size_type ReadOnlyPos = TyName.find(ReadOnlyQual); + if (ReadOnlyPos != std::string::npos) + // "+ 1" for the space after access qualifier. + TyName.erase(ReadOnlyPos, ReadOnlyQual.size() + 1); + else { + std::string WriteOnlyQual("__write_only"); + std::string::size_type WriteOnlyPos = TyName.find(WriteOnlyQual); + if (WriteOnlyPos != std::string::npos) + TyName.erase(WriteOnlyPos, WriteOnlyQual.size() + 1); + else { + std::string ReadWriteQual("__read_write"); + std::string::size_type ReadWritePos = TyName.find(ReadWriteQual); + if (ReadWritePos != std::string::npos) + TyName.erase(ReadWritePos, ReadWriteQual.size() + 1); + } + } +} + +// Returns the address space id that should be produced to the +// kernel_arg_addr_space metadata. This is always fixed to the ids +// as specified in the SPIR 2.0 specification in order to differentiate +// for example in clGetKernelArgInfo() implementation between the address +// spaces with targets without unique mapping to the OpenCL address spaces +// (basically all single AS CPUs). +static unsigned ArgInfoAddressSpace(LangAS AS) { + switch (AS) { + case LangAS::opencl_global: return 1; + case LangAS::opencl_constant: return 2; + case LangAS::opencl_local: return 3; + case LangAS::opencl_generic: return 4; // Not in SPIR 2.0 specs. + default: + return 0; // Assume private. + } +} + +void CodeGenModule::GenOpenCLArgMetadata(llvm::Function *Fn, + const FunctionDecl *FD, + CodeGenFunction *CGF) { + assert(((FD && CGF) || (!FD && !CGF)) && + "Incorrect use - FD and CGF should either be both null or not!"); + // Create MDNodes that represent the kernel arg metadata. + // Each MDNode is a list in the form of "key", N number of values which is + // the same number of values as their are kernel arguments. + + const PrintingPolicy &Policy = Context.getPrintingPolicy(); + + // MDNode for the kernel argument address space qualifiers. + SmallVector<llvm::Metadata *, 8> addressQuals; + + // MDNode for the kernel argument access qualifiers (images only). + SmallVector<llvm::Metadata *, 8> accessQuals; + + // MDNode for the kernel argument type names. + SmallVector<llvm::Metadata *, 8> argTypeNames; + + // MDNode for the kernel argument base type names. + SmallVector<llvm::Metadata *, 8> argBaseTypeNames; + + // MDNode for the kernel argument type qualifiers. + SmallVector<llvm::Metadata *, 8> argTypeQuals; + + // MDNode for the kernel argument names. + SmallVector<llvm::Metadata *, 8> argNames; + + if (FD && CGF) + for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i) { + const ParmVarDecl *parm = FD->getParamDecl(i); + QualType ty = parm->getType(); + std::string typeQuals; + + if (ty->isPointerType()) { + QualType pointeeTy = ty->getPointeeType(); + + // Get address qualifier. + addressQuals.push_back( + llvm::ConstantAsMetadata::get(CGF->Builder.getInt32( + ArgInfoAddressSpace(pointeeTy.getAddressSpace())))); + + // Get argument type name. + std::string typeName = + pointeeTy.getUnqualifiedType().getAsString(Policy) + "*"; + + // Turn "unsigned type" to "utype" + std::string::size_type pos = typeName.find("unsigned"); + if (pointeeTy.isCanonical() && pos != std::string::npos) + typeName.erase(pos + 1, 8); + + argTypeNames.push_back(llvm::MDString::get(VMContext, typeName)); + + std::string baseTypeName = + pointeeTy.getUnqualifiedType().getCanonicalType().getAsString( + Policy) + + "*"; + + // Turn "unsigned type" to "utype" + pos = baseTypeName.find("unsigned"); + if (pos != std::string::npos) + baseTypeName.erase(pos + 1, 8); + + argBaseTypeNames.push_back( + llvm::MDString::get(VMContext, baseTypeName)); + + // Get argument type qualifiers: + if (ty.isRestrictQualified()) + typeQuals = "restrict"; + if (pointeeTy.isConstQualified() || + (pointeeTy.getAddressSpace() == LangAS::opencl_constant)) + typeQuals += typeQuals.empty() ? "const" : " const"; + if (pointeeTy.isVolatileQualified()) + typeQuals += typeQuals.empty() ? "volatile" : " volatile"; + } else { + uint32_t AddrSpc = 0; + bool isPipe = ty->isPipeType(); + if (ty->isImageType() || isPipe) + AddrSpc = ArgInfoAddressSpace(LangAS::opencl_global); + + addressQuals.push_back( + llvm::ConstantAsMetadata::get(CGF->Builder.getInt32(AddrSpc))); + + // Get argument type name. + std::string typeName; + if (isPipe) + typeName = ty.getCanonicalType() + ->getAs<PipeType>() + ->getElementType() + .getAsString(Policy); + else + typeName = ty.getUnqualifiedType().getAsString(Policy); + + // Turn "unsigned type" to "utype" + std::string::size_type pos = typeName.find("unsigned"); + if (ty.isCanonical() && pos != std::string::npos) + typeName.erase(pos + 1, 8); + + std::string baseTypeName; + if (isPipe) + baseTypeName = ty.getCanonicalType() + ->getAs<PipeType>() + ->getElementType() + .getCanonicalType() + .getAsString(Policy); + else + baseTypeName = + ty.getUnqualifiedType().getCanonicalType().getAsString(Policy); + + // Remove access qualifiers on images + // (as they are inseparable from type in clang implementation, + // but OpenCL spec provides a special query to get access qualifier + // via clGetKernelArgInfo with CL_KERNEL_ARG_ACCESS_QUALIFIER): + if (ty->isImageType()) { + removeImageAccessQualifier(typeName); + removeImageAccessQualifier(baseTypeName); + } + + argTypeNames.push_back(llvm::MDString::get(VMContext, typeName)); + + // Turn "unsigned type" to "utype" + pos = baseTypeName.find("unsigned"); + if (pos != std::string::npos) + baseTypeName.erase(pos + 1, 8); + + argBaseTypeNames.push_back( + llvm::MDString::get(VMContext, baseTypeName)); + + if (isPipe) + typeQuals = "pipe"; + } + + argTypeQuals.push_back(llvm::MDString::get(VMContext, typeQuals)); + + // Get image and pipe access qualifier: + if (ty->isImageType() || ty->isPipeType()) { + const Decl *PDecl = parm; + if (auto *TD = dyn_cast<TypedefType>(ty)) + PDecl = TD->getDecl(); + const OpenCLAccessAttr *A = PDecl->getAttr<OpenCLAccessAttr>(); + if (A && A->isWriteOnly()) + accessQuals.push_back(llvm::MDString::get(VMContext, "write_only")); + else if (A && A->isReadWrite()) + accessQuals.push_back(llvm::MDString::get(VMContext, "read_write")); + else + accessQuals.push_back(llvm::MDString::get(VMContext, "read_only")); + } else + accessQuals.push_back(llvm::MDString::get(VMContext, "none")); + + // Get argument name. + argNames.push_back(llvm::MDString::get(VMContext, parm->getName())); + } + + Fn->setMetadata("kernel_arg_addr_space", + llvm::MDNode::get(VMContext, addressQuals)); + Fn->setMetadata("kernel_arg_access_qual", + llvm::MDNode::get(VMContext, accessQuals)); + Fn->setMetadata("kernel_arg_type", + llvm::MDNode::get(VMContext, argTypeNames)); + Fn->setMetadata("kernel_arg_base_type", + llvm::MDNode::get(VMContext, argBaseTypeNames)); + Fn->setMetadata("kernel_arg_type_qual", + llvm::MDNode::get(VMContext, argTypeQuals)); + if (getCodeGenOpts().EmitOpenCLArgMetadata) + Fn->setMetadata("kernel_arg_name", + llvm::MDNode::get(VMContext, argNames)); +} + /// Determines whether the language options require us to model /// unwind exceptions. We treat -fexceptions as mandating this /// except under the fragile ObjC ABI with only ObjC exceptions @@ -1544,12 +1796,8 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, const auto *FD = cast<FunctionDecl>(GD.getDecl()); - if (!IsIncompleteFunction) { + if (!IsIncompleteFunction) SetLLVMFunctionAttributes(GD, getTypes().arrangeGlobalDeclaration(GD), F); - // Setup target-specific attributes. - if (F->isDeclaration()) - getTargetCodeGenInfo().setTargetAttributes(FD, F, *this); - } // Add the Returned attribute for "this", except for iOS 5 and earlier // where substantial code, including the libstdc++ dylib, was compiled with @@ -1569,6 +1817,10 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, setLinkageForGV(F, FD); setGVProperties(F, FD); + // Setup target-specific attributes. + if (!IsIncompleteFunction && F->isDeclaration()) + getTargetCodeGenInfo().setTargetAttributes(FD, F, *this); + if (const auto *CSA = FD->getAttr<CodeSegAttr>()) F->setSection(CSA->getName()); else if (const auto *SA = FD->getAttr<SectionAttr>()) @@ -1603,6 +1855,23 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, if (getLangOpts().OpenMP && FD->hasAttr<OMPDeclareSimdDeclAttr>()) getOpenMPRuntime().emitDeclareSimdFunction(FD, F); + + if (const auto *CB = FD->getAttr<CallbackAttr>()) { + // Annotate the callback behavior as metadata: + // - The callback callee (as argument number). + // - The callback payloads (as argument numbers). + llvm::LLVMContext &Ctx = F->getContext(); + llvm::MDBuilder MDB(Ctx); + + // The payload indices are all but the first one in the encoding. The first + // identifies the callback callee. + int CalleeIdx = *CB->encoding_begin(); + ArrayRef<int> PayloadIndices(CB->encoding_begin() + 1, CB->encoding_end()); + F->addMetadata(llvm::LLVMContext::MD_callback, + *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( + CalleeIdx, PayloadIndices, + /* VarArgsArePassed */ false)})); + } } void CodeGenModule::addUsedGlobal(llvm::GlobalValue *GV) { @@ -1660,17 +1929,18 @@ void CodeGenModule::AddDetectMismatch(StringRef Name, StringRef Value) { LinkerOptionsMetadata.push_back(llvm::MDNode::get(getLLVMContext(), MDOpts)); } -void CodeGenModule::AddELFLibDirective(StringRef Lib) { +void CodeGenModule::AddDependentLib(StringRef Lib) { auto &C = getLLVMContext(); - LinkerOptionsMetadata.push_back(llvm::MDNode::get( - C, {llvm::MDString::get(C, "lib"), llvm::MDString::get(C, Lib)})); -} + if (getTarget().getTriple().isOSBinFormatELF()) { + ELFDependentLibraries.push_back( + llvm::MDNode::get(C, llvm::MDString::get(C, Lib))); + return; + } -void CodeGenModule::AddDependentLib(StringRef Lib) { llvm::SmallString<24> Opt; getTargetCodeGenInfo().getDependentLibraryOption(Lib, Opt); auto *MDOpts = llvm::MDString::get(getLLVMContext(), Opt); - LinkerOptionsMetadata.push_back(llvm::MDNode::get(getLLVMContext(), MDOpts)); + LinkerOptionsMetadata.push_back(llvm::MDNode::get(C, MDOpts)); } /// Add link options implied by the given module, including modules @@ -1693,7 +1963,6 @@ static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod, // described by this module. llvm::LLVMContext &Context = CGM.getLLVMContext(); bool IsELF = CGM.getTarget().getTriple().isOSBinFormatELF(); - bool IsPS4 = CGM.getTarget().getTriple().isPS4(); // For modules that use export_as for linking, use that module // name instead. @@ -1713,7 +1982,7 @@ static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod, } // Link against a library. - if (IsELF && !IsPS4) { + if (IsELF) { llvm::Metadata *Args[2] = { llvm::MDString::get(Context, "lib"), llvm::MDString::get(Context, Mod->LinkLibraries[I - 1].Library), @@ -1970,9 +2239,11 @@ bool CodeGenModule::isInSanitizerBlacklist(llvm::GlobalVariable *GV, SourceLocation Loc, QualType Ty, StringRef Category) const { // For now globals can be blacklisted only in ASan and KASan. - const SanitizerMask EnabledAsanMask = LangOpts.Sanitize.Mask & + const SanitizerMask EnabledAsanMask = + LangOpts.Sanitize.Mask & (SanitizerKind::Address | SanitizerKind::KernelAddress | - SanitizerKind::HWAddress | SanitizerKind::KernelHWAddress); + SanitizerKind::HWAddress | SanitizerKind::KernelHWAddress | + SanitizerKind::MemTag); if (!EnabledAsanMask) return false; const auto &SanitizerBL = getContext().getSanitizerBlacklist(); @@ -2146,7 +2417,8 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { if (!Global->hasAttr<CUDADeviceAttr>() && !Global->hasAttr<CUDAGlobalAttr>() && !Global->hasAttr<CUDAConstantAttr>() && - !Global->hasAttr<CUDASharedAttr>()) + !Global->hasAttr<CUDASharedAttr>() && + !(LangOpts.HIP && Global->hasAttr<HIPPinnedShadowAttr>())) return; } else { // We need to emit host-side 'shadows' for all global @@ -2173,6 +2445,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { if (MustBeEmitted(Global)) EmitOMPDeclareReduction(DRD); return; + } else if (auto *DMD = dyn_cast<OMPDeclareMapperDecl>(Global)) { + if (MustBeEmitted(Global)) + EmitOMPDeclareMapper(DMD); + return; } } @@ -2202,13 +2478,19 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { // Emit declaration of the must-be-emitted declare target variable. if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { - if (*Res == OMPDeclareTargetDeclAttr::MT_To) { + bool UnifiedMemoryEnabled = + getOpenMPRuntime().hasRequiresUnifiedSharedMemory(); + if (*Res == OMPDeclareTargetDeclAttr::MT_To && + !UnifiedMemoryEnabled) { (void)GetAddrOfGlobalVar(VD); } else { - assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && - "link claue expected."); - (void)getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); + assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || + (*Res == OMPDeclareTargetDeclAttr::MT_To && + UnifiedMemoryEnabled)) && + "Link clause or to clause with unified memory expected."); + (void)getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); } + return; } } @@ -2265,35 +2547,36 @@ static bool HasNonDllImportDtor(QualType T) { } namespace { - struct FunctionIsDirectlyRecursive : - public RecursiveASTVisitor<FunctionIsDirectlyRecursive> { + struct FunctionIsDirectlyRecursive + : public ConstStmtVisitor<FunctionIsDirectlyRecursive, bool> { const StringRef Name; const Builtin::Context &BI; - bool Result; - FunctionIsDirectlyRecursive(StringRef N, const Builtin::Context &C) : - Name(N), BI(C), Result(false) { - } - typedef RecursiveASTVisitor<FunctionIsDirectlyRecursive> Base; + FunctionIsDirectlyRecursive(StringRef N, const Builtin::Context &C) + : Name(N), BI(C) {} - bool TraverseCallExpr(CallExpr *E) { + bool VisitCallExpr(const CallExpr *E) { const FunctionDecl *FD = E->getDirectCallee(); if (!FD) - return true; - AsmLabelAttr *Attr = FD->getAttr<AsmLabelAttr>(); - if (Attr && Name == Attr->getLabel()) { - Result = true; return false; - } + AsmLabelAttr *Attr = FD->getAttr<AsmLabelAttr>(); + if (Attr && Name == Attr->getLabel()) + return true; unsigned BuiltinID = FD->getBuiltinID(); if (!BuiltinID || !BI.isLibFunction(BuiltinID)) - return true; + return false; StringRef BuiltinName = BI.getName(BuiltinID); if (BuiltinName.startswith("__builtin_") && Name == BuiltinName.slice(strlen("__builtin_"), StringRef::npos)) { - Result = true; - return false; + return true; } - return true; + return false; + } + + bool VisitStmt(const Stmt *S) { + for (const Stmt *Child : S->children()) + if (Child && this->Visit(Child)) + return true; + return false; } }; @@ -2378,8 +2661,8 @@ CodeGenModule::isTriviallyRecursive(const FunctionDecl *FD) { } FunctionIsDirectlyRecursive Walker(Name, Context.BuiltinInfo); - Walker.TraverseFunctionDecl(const_cast<FunctionDecl*>(FD)); - return Walker.Result; + const Stmt *Body = FD->getBody(); + return Body ? Walker.Visit(Body) : false; } bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) { @@ -2447,13 +2730,19 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { if (!shouldEmitFunction(GD)) return; + llvm::TimeTraceScope TimeScope("CodeGen Function", [&]() { + std::string Name; + llvm::raw_string_ostream OS(Name); + FD->getNameForDiagnostic(OS, getContext().getPrintingPolicy(), + /*Qualified=*/true); + return Name; + }); + if (const auto *Method = dyn_cast<CXXMethodDecl>(D)) { // Make sure to emit the definition(s) before we emit the thunks. // This is necessary for the generation of certain thunks. - if (const auto *CD = dyn_cast<CXXConstructorDecl>(Method)) - ABI->emitCXXStructor(CD, getFromCtorType(GD.getCtorType())); - else if (const auto *DD = dyn_cast<CXXDestructorDecl>(Method)) - ABI->emitCXXStructor(DD, getFromDtorType(GD.getDtorType())); + if (isa<CXXConstructorDecl>(Method) || isa<CXXDestructorDecl>(Method)) + ABI->emitCXXStructor(GD); else if (FD->isMultiVersion()) EmitMultiVersionFunctionDefinition(GD, GV); else @@ -2537,10 +2826,9 @@ void CodeGenModule::emitMultiVersionFunctions() { ResolverFunc->setComdat( getModule().getOrInsertComdat(ResolverFunc->getName())); - std::stable_sort( - Options.begin(), Options.end(), - [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS, - const CodeGenFunction::MultiVersionResolverOption &RHS) { + llvm::stable_sort( + Options, [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS, + const CodeGenFunction::MultiVersionResolverOption &RHS) { return TargetMVPriority(TI, LHS) > TargetMVPriority(TI, RHS); }); CodeGenFunction CGF(*this); @@ -2553,8 +2841,7 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { assert(FD && "Not a FunctionDecl?"); const auto *DD = FD->getAttr<CPUDispatchAttr>(); assert(DD && "Not a cpu_dispatch Function?"); - QualType CanonTy = Context.getCanonicalType(FD->getType()); - llvm::Type *DeclTy = getTypes().ConvertFunctionType(CanonTy, FD); + llvm::Type *DeclTy = getTypes().ConvertType(FD->getType()); if (const auto *CXXFD = dyn_cast<CXXMethodDecl>(FD)) { const CGFunctionInfo &FInfo = getTypes().arrangeCXXMethodDeclaration(CXXFD); @@ -2893,8 +3180,7 @@ llvm::Constant *CodeGenModule::GetAddrOfFunction(GlobalDecl GD, // If there was no specific requested type, just convert it now. if (!Ty) { const auto *FD = cast<FunctionDecl>(GD.getDecl()); - auto CanonTy = Context.getCanonicalType(FD->getType()); - Ty = getTypes().ConvertFunctionType(CanonTy, FD); + Ty = getTypes().ConvertType(FD->getType()); } // Devirtualized destructor calls may come through here instead of via @@ -2953,7 +3239,7 @@ GetRuntimeFunctionDecl(ASTContext &C, StringRef Name) { /// CreateRuntimeFunction - Create a new runtime function with the specified /// type and name. -llvm::Constant * +llvm::FunctionCallee CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, llvm::AttributeList ExtraAttrs, bool Local) { @@ -2966,9 +3252,13 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, if (F->empty()) { F->setCallingConv(getRuntimeCC()); - if (!Local && getTriple().isOSBinFormatCOFF() && - !getCodeGenOpts().LTOVisibilityPublicStd && - !getTriple().isWindowsGNUEnvironment()) { + // In Windows Itanium environments, try to mark runtime functions + // dllimport. For Mingw and MSVC, don't. We don't really know if the user + // will link their standard library statically or dynamically. Marking + // functions imported when they are not imported can cause linker errors + // and warnings. + if (!Local && getTriple().isWindowsItaniumEnvironment() && + !getCodeGenOpts().LTOVisibilityPublicStd) { const FunctionDecl *FD = GetRuntimeFunctionDecl(Context, Name); if (!FD || FD->hasAttr<DLLImportAttr>()) { F->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); @@ -2979,15 +3269,7 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, } } - return C; -} - -/// CreateBuiltinFunction - Create a new builtin function with the specified -/// type and name. -llvm::Constant * -CodeGenModule::CreateBuiltinFunction(llvm::FunctionType *FTy, StringRef Name, - llvm::AttributeList ExtraAttrs) { - return CreateRuntimeFunction(FTy, Name, ExtraAttrs, true); + return {FTy, C}; } /// isTypeConstant - Determine whether an object of this type can be emitted @@ -3199,6 +3481,9 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, return getTargetCodeGenInfo().performAddrSpaceCast(*this, GV, AddrSpace, ExpectedAS, Ty); + if (GV->isDeclaration()) + getTargetCodeGenInfo().setTargetAttributes(D, GV, *this); + return GV; } @@ -3206,15 +3491,8 @@ llvm::Constant * CodeGenModule::GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition) { const Decl *D = GD.getDecl(); - if (isa<CXXConstructorDecl>(D)) - return getAddrOfCXXStructor(cast<CXXConstructorDecl>(D), - getFromCtorType(GD.getCtorType()), - /*FnInfo=*/nullptr, /*FnType=*/nullptr, - /*DontDefer=*/false, IsForDefinition); - else if (isa<CXXDestructorDecl>(D)) - return getAddrOfCXXStructor(cast<CXXDestructorDecl>(D), - getFromDtorType(GD.getDtorType()), - /*FnInfo=*/nullptr, /*FnType=*/nullptr, + if (isa<CXXConstructorDecl>(D) || isa<CXXDestructorDecl>(D)) + return getAddrOfCXXStructor(GD, /*FnInfo=*/nullptr, /*FnType=*/nullptr, /*DontDefer=*/false, IsForDefinition); else if (isa<CXXMethodDecl>(D)) { auto FInfo = &getTypes().arrangeCXXMethodDeclaration( @@ -3301,8 +3579,12 @@ llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D, llvm::Constant * CodeGenModule::CreateRuntimeVariable(llvm::Type *Ty, StringRef Name) { - auto *Ret = - GetOrCreateLLVMGlobal(Name, llvm::PointerType::getUnqual(Ty), nullptr); + auto PtrTy = + getContext().getLangOpts().OpenCL + ? llvm::PointerType::get( + Ty, getContext().getTargetAddressSpace(LangAS::opencl_global)) + : llvm::PointerType::getUnqual(Ty); + auto *Ret = GetOrCreateLLVMGlobal(Name, PtrTy, nullptr); setDSOLocal(cast<llvm::GlobalValue>(Ret->stripPointerCasts())); return Ret; } @@ -3359,6 +3641,11 @@ LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) { return LangAS::cuda_device; } + if (LangOpts.OpenMP) { + LangAS AS; + if (OpenMPRuntime->hasAllocateAttributeForGlobalVar(D, AS)) + return AS; + } return getTargetCodeGenInfo().getGlobalVarAddressSpace(*this, D); } @@ -3432,6 +3719,11 @@ static bool shouldBeInCOMDAT(CodeGenModule &CGM, const Decl &D) { if (!CGM.supportsCOMDAT()) return false; + // Do not set COMDAT attribute for CUDA/HIP stub functions to prevent + // them being "merged" by the COMDAT Folding linker optimization. + if (D.hasAttr<CUDAGlobalAttr>()) + return false; + if (D.hasAttr<SelectAnyAttr>()) return true; @@ -3496,7 +3788,12 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, !getLangOpts().CUDAIsDevice && (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDASharedAttr>()); - if (getLangOpts().CUDA && (IsCUDASharedVar || IsCUDAShadowVar)) + // HIP pinned shadow of initialized host-side global variables are also + // left undefined. + bool IsHIPPinnedShadowVar = + getLangOpts().CUDAIsDevice && D->hasAttr<HIPPinnedShadowAttr>(); + if (getLangOpts().CUDA && + (IsCUDASharedVar || IsCUDAShadowVar || IsHIPPinnedShadowVar)) Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy)); else if (!InitExpr) { // This is a tentative definition; tentative definitions are @@ -3599,14 +3896,16 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())." if (GV && LangOpts.CUDA) { if (LangOpts.CUDAIsDevice) { - if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) + if (Linkage != llvm::GlobalValue::InternalLinkage && + (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>())) GV->setExternallyInitialized(true); } else { // Host-side shadows of external declarations of device-side // global variables become internal definitions. These have to // be internal in order to prevent name conflicts with global // host variables with the same name in a different TUs. - if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) { + if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() || + D->hasAttr<HIPPinnedShadowAttr>()) { Linkage = llvm::GlobalValue::InternalLinkage; // Shadow variables and their properties must be registered @@ -3619,7 +3918,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // Extern global variables will be registered in the TU where they are // defined. if (!D->hasExternalStorage()) - getCUDARuntime().registerDeviceVar(*GV, Flags); + getCUDARuntime().registerDeviceVar(D, *GV, Flags); } else if (D->hasAttr<CUDASharedAttr>()) // __shared__ variables are odd. Shadows do get created, but // they are not registered with the CUDA runtime, so they @@ -3630,7 +3929,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, } } - GV->setInitializer(Init); + if (!IsHIPPinnedShadowVar) + GV->setInitializer(Init); if (emitter) emitter->finalize(GV); // If it is safe to mark the global 'constant', do so now. @@ -3762,13 +4062,15 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context, } } - // Microsoft's link.exe doesn't support alignments greater than 32 for common - // symbols, so symbols with greater alignment requirements cannot be common. + // Microsoft's link.exe doesn't support alignments greater than 32 bytes for + // common symbols, so symbols with greater alignment requirements cannot be + // common. // Other COFF linkers (ld.bfd and LLD) support arbitrary power-of-two // alignments for common symbols via the aligncomm directive, so this // restriction only applies to MSVC environments. if (Context.getTargetInfo().getTriple().isKnownWindowsMSVCEnvironment() && - Context.getTypeAlignIfKnown(D->getType()) > 32) + Context.getTypeAlignIfKnown(D->getType()) > + Context.toBits(CharUnits::fromQuantity(32))) return true; return false; @@ -3877,9 +4179,10 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old, } // Recognize calls to the function. - llvm::CallSite callSite(user); + llvm::CallBase *callSite = dyn_cast<llvm::CallBase>(user); if (!callSite) continue; - if (!callSite.isCallee(&*use)) continue; + if (!callSite->isCallee(&*use)) + continue; // If the return types don't match exactly, then we can't // transform this call unless it's dead. @@ -3888,18 +4191,19 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old, // Get the call site's attribute list. SmallVector<llvm::AttributeSet, 8> newArgAttrs; - llvm::AttributeList oldAttrs = callSite.getAttributes(); + llvm::AttributeList oldAttrs = callSite->getAttributes(); // If the function was passed too few arguments, don't transform. unsigned newNumArgs = newFn->arg_size(); - if (callSite.arg_size() < newNumArgs) continue; + if (callSite->arg_size() < newNumArgs) + continue; // If extra arguments were passed, we silently drop them. // If any of the types mismatch, we don't transform. unsigned argNo = 0; bool dontTransform = false; for (llvm::Argument &A : newFn->args()) { - if (callSite.getArgument(argNo)->getType() != A.getType()) { + if (callSite->getArgOperand(argNo)->getType() != A.getType()) { dontTransform = true; break; } @@ -3913,35 +4217,33 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old, // Okay, we can transform this. Create the new call instruction and copy // over the required information. - newArgs.append(callSite.arg_begin(), callSite.arg_begin() + argNo); + newArgs.append(callSite->arg_begin(), callSite->arg_begin() + argNo); // Copy over any operand bundles. - callSite.getOperandBundlesAsDefs(newBundles); + callSite->getOperandBundlesAsDefs(newBundles); - llvm::CallSite newCall; - if (callSite.isCall()) { - newCall = llvm::CallInst::Create(newFn, newArgs, newBundles, "", - callSite.getInstruction()); + llvm::CallBase *newCall; + if (dyn_cast<llvm::CallInst>(callSite)) { + newCall = + llvm::CallInst::Create(newFn, newArgs, newBundles, "", callSite); } else { - auto *oldInvoke = cast<llvm::InvokeInst>(callSite.getInstruction()); - newCall = llvm::InvokeInst::Create(newFn, - oldInvoke->getNormalDest(), - oldInvoke->getUnwindDest(), - newArgs, newBundles, "", - callSite.getInstruction()); + auto *oldInvoke = cast<llvm::InvokeInst>(callSite); + newCall = llvm::InvokeInst::Create(newFn, oldInvoke->getNormalDest(), + oldInvoke->getUnwindDest(), newArgs, + newBundles, "", callSite); } newArgs.clear(); // for the next iteration if (!newCall->getType()->isVoidTy()) - newCall->takeName(callSite.getInstruction()); - newCall.setAttributes(llvm::AttributeList::get( + newCall->takeName(callSite); + newCall->setAttributes(llvm::AttributeList::get( newFn->getContext(), oldAttrs.getFnAttributes(), oldAttrs.getRetAttributes(), newArgAttrs)); - newCall.setCallingConv(callSite.getCallingConv()); + newCall->setCallingConv(callSite->getCallingConv()); // Finally, remove the old call, replacing any uses with the new one. if (!callSite->use_empty()) - callSite->replaceAllUsesWith(newCall.getInstruction()); + callSite->replaceAllUsesWith(newCall); // Copy debug location attached to CI. if (callSite->getDebugLoc()) @@ -4373,9 +4675,12 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { GV = Fields.finishAndCreateGlobal("_unnamed_cfstring_", Alignment, /*isConstant=*/false, llvm::GlobalVariable::PrivateLinkage); + GV->addAttribute("objc_arc_inert"); switch (Triple.getObjectFormat()) { case llvm::Triple::UnknownObjectFormat: llvm_unreachable("unknown file format"); + case llvm::Triple::XCOFF: + llvm_unreachable("XCOFF is not yet implemented"); case llvm::Triple::COFF: case llvm::Triple::ELF: case llvm::Triple::Wasm: @@ -4504,7 +4809,8 @@ CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S, if (auto GV = *Entry) { if (Alignment.getQuantity() > GV->getAlignment()) GV->setAlignment(Alignment.getQuantity()); - return ConstantAddress(GV, Alignment); + return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), + Alignment); } } @@ -4566,7 +4872,8 @@ ConstantAddress CodeGenModule::GetAddrOfConstantCString( if (auto GV = *Entry) { if (Alignment.getQuantity() > GV->getAlignment()) GV->setAlignment(Alignment.getQuantity()); - return ConstantAddress(GV, Alignment); + return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), + Alignment); } } @@ -4615,7 +4922,7 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( // evaluating the initializer if the surrounding constant expression // modifies the temporary. Value = getContext().getMaterializedTemporaryValue(E, false); - if (Value && Value->isUninit()) + if (Value && Value->isAbsent()) Value = nullptr; } @@ -4861,6 +5168,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { case Decl::UsingShadow: case Decl::ClassTemplate: case Decl::VarTemplate: + case Decl::Concept: case Decl::VarTemplatePartialSpecialization: case Decl::FunctionTemplate: case Decl::TypeAliasTemplate: @@ -4943,10 +5251,6 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { AppendLinkerOptions(PCD->getArg()); break; case PCK_Lib: - if (getTarget().getTriple().isOSBinFormatELF() && - !getTarget().getTriple().isPS4()) - AddELFLibDirective(PCD->getArg()); - else AddDependentLib(PCD->getArg()); break; case PCK_Compiler: @@ -5030,10 +5334,17 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { EmitOMPThreadPrivateDecl(cast<OMPThreadPrivateDecl>(D)); break; + case Decl::OMPAllocate: + break; + case Decl::OMPDeclareReduction: EmitOMPDeclareReduction(cast<OMPDeclareReductionDecl>(D)); break; + case Decl::OMPDeclareMapper: + EmitOMPDeclareMapper(cast<OMPDeclareMapperDecl>(D)); + break; + case Decl::OMPRequires: EmitOMPRequiresDecl(cast<OMPRequiresDecl>(D)); break; diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h index 75679d11c13c..95964afed4ec 100644 --- a/lib/CodeGen/CodeGenModule.h +++ b/lib/CodeGen/CodeGenModule.h @@ -1,9 +1,8 @@ //===--- CodeGenModule.h - Per-Module state for LLVM CodeGen ----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -120,90 +119,93 @@ struct ObjCEntrypoints { ObjCEntrypoints() { memset(this, 0, sizeof(*this)); } /// void objc_alloc(id); - llvm::Constant *objc_alloc; + llvm::FunctionCallee objc_alloc; /// void objc_allocWithZone(id); - llvm::Constant *objc_allocWithZone; + llvm::FunctionCallee objc_allocWithZone; + + /// void objc_alloc_init(id); + llvm::FunctionCallee objc_alloc_init; /// void objc_autoreleasePoolPop(void*); - llvm::Constant *objc_autoreleasePoolPop; + llvm::FunctionCallee objc_autoreleasePoolPop; /// void objc_autoreleasePoolPop(void*); /// Note this method is used when we are using exception handling - llvm::Constant *objc_autoreleasePoolPopInvoke; + llvm::FunctionCallee objc_autoreleasePoolPopInvoke; /// void *objc_autoreleasePoolPush(void); - llvm::Constant *objc_autoreleasePoolPush; + llvm::Function *objc_autoreleasePoolPush; /// id objc_autorelease(id); - llvm::Constant *objc_autorelease; + llvm::Function *objc_autorelease; /// id objc_autorelease(id); /// Note this is the runtime method not the intrinsic. - llvm::Constant *objc_autoreleaseRuntimeFunction; + llvm::FunctionCallee objc_autoreleaseRuntimeFunction; /// id objc_autoreleaseReturnValue(id); - llvm::Constant *objc_autoreleaseReturnValue; + llvm::Function *objc_autoreleaseReturnValue; /// void objc_copyWeak(id *dest, id *src); - llvm::Constant *objc_copyWeak; + llvm::Function *objc_copyWeak; /// void objc_destroyWeak(id*); - llvm::Constant *objc_destroyWeak; + llvm::Function *objc_destroyWeak; /// id objc_initWeak(id*, id); - llvm::Constant *objc_initWeak; + llvm::Function *objc_initWeak; /// id objc_loadWeak(id*); - llvm::Constant *objc_loadWeak; + llvm::Function *objc_loadWeak; /// id objc_loadWeakRetained(id*); - llvm::Constant *objc_loadWeakRetained; + llvm::Function *objc_loadWeakRetained; /// void objc_moveWeak(id *dest, id *src); - llvm::Constant *objc_moveWeak; + llvm::Function *objc_moveWeak; /// id objc_retain(id); - llvm::Constant *objc_retain; + llvm::Function *objc_retain; /// id objc_retain(id); /// Note this is the runtime method not the intrinsic. - llvm::Constant *objc_retainRuntimeFunction; + llvm::FunctionCallee objc_retainRuntimeFunction; /// id objc_retainAutorelease(id); - llvm::Constant *objc_retainAutorelease; + llvm::Function *objc_retainAutorelease; /// id objc_retainAutoreleaseReturnValue(id); - llvm::Constant *objc_retainAutoreleaseReturnValue; + llvm::Function *objc_retainAutoreleaseReturnValue; /// id objc_retainAutoreleasedReturnValue(id); - llvm::Constant *objc_retainAutoreleasedReturnValue; + llvm::Function *objc_retainAutoreleasedReturnValue; /// id objc_retainBlock(id); - llvm::Constant *objc_retainBlock; + llvm::Function *objc_retainBlock; /// void objc_release(id); - llvm::Constant *objc_release; + llvm::Function *objc_release; /// void objc_release(id); /// Note this is the runtime method not the intrinsic. - llvm::Constant *objc_releaseRuntimeFunction; + llvm::FunctionCallee objc_releaseRuntimeFunction; /// void objc_storeStrong(id*, id); - llvm::Constant *objc_storeStrong; + llvm::Function *objc_storeStrong; /// id objc_storeWeak(id*, id); - llvm::Constant *objc_storeWeak; + llvm::Function *objc_storeWeak; /// id objc_unsafeClaimAutoreleasedReturnValue(id); - llvm::Constant *objc_unsafeClaimAutoreleasedReturnValue; + llvm::Function *objc_unsafeClaimAutoreleasedReturnValue; /// A void(void) inline asm to use to mark that the return value of /// a call will be immediately retain. llvm::InlineAsm *retainAutoreleasedReturnValueMarker; /// void clang.arc.use(...); - llvm::Constant *clang_arc_use; + llvm::Function *clang_arc_use; }; /// This class records statistics on instrumentation based profiling. @@ -252,7 +254,8 @@ public: /// have different helper functions. CharUnits Alignment; - BlockByrefHelpers(CharUnits alignment) : Alignment(alignment) {} + BlockByrefHelpers(CharUnits alignment) + : CopyHelper(nullptr), DisposeHelper(nullptr), Alignment(alignment) {} BlockByrefHelpers(const BlockByrefHelpers &) = default; virtual ~BlockByrefHelpers(); @@ -359,6 +362,10 @@ private: llvm::SmallVector<std::pair<llvm::GlobalValue *, llvm::Constant *>, 8> GlobalValReplacements; + /// Variables for which we've emitted globals containing their constant + /// values along with the corresponding globals, for opportunistic reuse. + llvm::DenseMap<const VarDecl*, llvm::GlobalVariable*> InitializerConstants; + /// Set of global decls for which we already diagnosed mangled name conflict. /// Required to not issue a warning (on a mangling conflict) multiple times /// for the same decl. @@ -452,7 +459,9 @@ private: SmallVector<GlobalInitData, 8> PrioritizedCXXGlobalInits; /// Global destructor functions and arguments that need to run on termination. - std::vector<std::pair<llvm::WeakTrackingVH, llvm::Constant *>> CXXGlobalDtors; + std::vector< + std::tuple<llvm::FunctionType *, llvm::WeakTrackingVH, llvm::Constant *>> + CXXGlobalDtors; /// The complete set of modules that has been imported. llvm::SetVector<clang::Module *> ImportedModules; @@ -461,9 +470,12 @@ private: /// have been emitted. llvm::SmallPtrSet<clang::Module *, 16> EmittedModuleInitializers; - /// A vector of metadata strings. + /// A vector of metadata strings for linker options. SmallVector<llvm::MDNode *, 16> LinkerOptionsMetadata; + /// A vector of metadata strings for dependent libraries for ELF. + SmallVector<llvm::MDNode *, 16> ELFDependentLibraries; + /// @name Cache for Objective-C runtime types /// @{ @@ -501,8 +513,8 @@ private: llvm::Constant *NSConcreteGlobalBlock = nullptr; llvm::Constant *NSConcreteStackBlock = nullptr; - llvm::Constant *BlockObjectAssign = nullptr; - llvm::Constant *BlockObjectDispose = nullptr; + llvm::FunctionCallee BlockObjectAssign = nullptr; + llvm::FunctionCallee BlockObjectDispose = nullptr; llvm::Type *BlockDescriptorType = nullptr; llvm::Type *GenericBlockLiteralType = nullptr; @@ -512,10 +524,10 @@ private: } Block; /// void @llvm.lifetime.start(i64 %size, i8* nocapture <ptr>) - llvm::Constant *LifetimeStartFn = nullptr; + llvm::Function *LifetimeStartFn = nullptr; /// void @llvm.lifetime.end(i64 %size, i8* nocapture <ptr>) - llvm::Constant *LifetimeEndFn = nullptr; + llvm::Function *LifetimeEndFn = nullptr; GlobalDecl initializedGlobalDecl; @@ -586,7 +598,7 @@ public: // Version checking function, used to implement ObjC's @available: // i32 @__isOSVersionAtLeast(i32, i32, i32) - llvm::Constant *IsOSVersionAtLeastFn = nullptr; + llvm::FunctionCallee IsOSVersionAtLeastFn = nullptr; InstrProfStats &getPGOStats() { return PGOStats; } llvm::IndexedInstrProfReader *getPGOReader() const { return PGOReader.get(); } @@ -615,6 +627,9 @@ public: StaticLocalDeclGuardMap[D] = C; } + Address createUnnamedGlobalFrom(const VarDecl &D, llvm::Constant *Constant, + CharUnits Align); + bool lookupRepresentativeDecl(StringRef MangledName, GlobalDecl &Result) const; @@ -751,9 +766,6 @@ public: /// Set the visibility for the given LLVM GlobalValue. void setGlobalVisibility(llvm::GlobalValue *GV, const NamedDecl *D) const; - void setGlobalVisibilityAndLocal(llvm::GlobalValue *GV, - const NamedDecl *D) const; - void setDSOLocal(llvm::GlobalValue *GV) const; void setDLLImportDLLExport(llvm::GlobalValue *GV, GlobalDecl D) const; @@ -763,6 +775,8 @@ public: void setGVProperties(llvm::GlobalValue *GV, GlobalDecl GD) const; void setGVProperties(llvm::GlobalValue *GV, const NamedDecl *D) const; + void setGVPropertiesAux(llvm::GlobalValue *GV, const NamedDecl *D) const; + /// Set the TLS mode for the given LLVM GlobalValue for the thread-local /// variable declaration D. void setTLSMode(llvm::GlobalValue *GV, const VarDecl &D) const; @@ -950,16 +964,24 @@ public: // Produce code for this constructor/destructor. This method doesn't try // to apply any ABI rules about which other constructors/destructors // are needed or if they are alias to each other. - llvm::Function *codegenCXXStructor(const CXXMethodDecl *MD, - StructorType Type); + llvm::Function *codegenCXXStructor(GlobalDecl GD); /// Return the address of the constructor/destructor of the given type. llvm::Constant * - getAddrOfCXXStructor(const CXXMethodDecl *MD, StructorType Type, - const CGFunctionInfo *FnInfo = nullptr, + getAddrOfCXXStructor(GlobalDecl GD, const CGFunctionInfo *FnInfo = nullptr, llvm::FunctionType *FnType = nullptr, bool DontDefer = false, - ForDefinition_t IsForDefinition = NotForDefinition); + ForDefinition_t IsForDefinition = NotForDefinition) { + return cast<llvm::Constant>(getAddrAndTypeOfCXXStructor(GD, FnInfo, FnType, + DontDefer, + IsForDefinition) + .getCallee()); + } + + llvm::FunctionCallee getAddrAndTypeOfCXXStructor( + GlobalDecl GD, const CGFunctionInfo *FnInfo = nullptr, + llvm::FunctionType *FnType = nullptr, bool DontDefer = false, + ForDefinition_t IsForDefinition = NotForDefinition); /// Given a builtin id for a function like "__builtin_fabsf", return a /// Function* for "fabsf". @@ -999,20 +1021,18 @@ public: void addCompilerUsedGlobal(llvm::GlobalValue *GV); /// Add a destructor and object to add to the C++ global destructor function. - void AddCXXDtorEntry(llvm::Constant *DtorFn, llvm::Constant *Object) { - CXXGlobalDtors.emplace_back(DtorFn, Object); + void AddCXXDtorEntry(llvm::FunctionCallee DtorFn, llvm::Constant *Object) { + CXXGlobalDtors.emplace_back(DtorFn.getFunctionType(), DtorFn.getCallee(), + Object); } - /// Create a new runtime function with the specified type and name. - llvm::Constant * + /// Create or return a runtime function declaration with the specified type + /// and name. + llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs = llvm::AttributeList(), bool Local = false); - /// Create a new compiler builtin function with the specified type and name. - llvm::Constant * - CreateBuiltinFunction(llvm::FunctionType *Ty, StringRef Name, - llvm::AttributeList ExtraAttrs = llvm::AttributeList()); /// Create a new runtime global variable with the specified type and name. llvm::Constant *CreateRuntimeVariable(llvm::Type *Ty, StringRef Name); @@ -1022,13 +1042,13 @@ public: llvm::Constant *getNSConcreteGlobalBlock(); llvm::Constant *getNSConcreteStackBlock(); - llvm::Constant *getBlockObjectAssign(); - llvm::Constant *getBlockObjectDispose(); + llvm::FunctionCallee getBlockObjectAssign(); + llvm::FunctionCallee getBlockObjectDispose(); ///@} - llvm::Constant *getLLVMLifetimeStartFn(); - llvm::Constant *getLLVMLifetimeEndFn(); + llvm::Function *getLLVMLifetimeStartFn(); + llvm::Function *getLLVMLifetimeEndFn(); // Make sure that this type is translated. void UpdateCompletedType(const TagDecl *TD); @@ -1142,11 +1162,9 @@ public: /// Appends a detect mismatch command to the linker options. void AddDetectMismatch(StringRef Name, StringRef Value); - /// Appends a dependent lib to the "llvm.linker.options" metadata - /// value. + /// Appends a dependent lib to the appropriate metadata value. void AddDependentLib(StringRef Lib); - void AddELFLibDirective(StringRef Lib); llvm::GlobalVariable::LinkageTypes getFunctionLinkage(GlobalDecl GD); @@ -1244,6 +1262,10 @@ public: void EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D, CodeGenFunction *CGF = nullptr); + /// Emit a code for declare mapper construct. + void EmitOMPDeclareMapper(const OMPDeclareMapperDecl *D, + CodeGenFunction *CGF = nullptr); + /// Emit a code for requires directive. /// \param D Requires declaration void EmitOMPRequiresDecl(const OMPRequiresDecl *D); @@ -1294,13 +1316,27 @@ public: getMostBaseClasses(const CXXRecordDecl *RD); /// Get the declaration of std::terminate for the platform. - llvm::Constant *getTerminateFn(); + llvm::FunctionCallee getTerminateFn(); llvm::SanitizerStatReport &getSanStats(); llvm::Value * createOpenCLIntToSamplerConversion(const Expr *E, CodeGenFunction &CGF); + /// OpenCL v1.2 s5.6.4.6 allows the compiler to store kernel argument + /// information in the program executable. The argument information stored + /// includes the argument name, its type, the address and access qualifiers + /// used. This helper can be used to generate metadata for source code kernel + /// function as well as generated implicitly kernels. If a kernel is generated + /// implicitly null value has to be passed to the last two parameters, + /// otherwise all parameters must have valid non-null values. + /// \param FN is a pointer to IR function being generated. + /// \param FD is a pointer to function declaration if any. + /// \param CGF is a pointer to CodeGenFunction that generates this function. + void GenOpenCLArgMetadata(llvm::Function *FN, + const FunctionDecl *FD = nullptr, + CodeGenFunction *CGF = nullptr); + /// Get target specific null pointer. /// \param T is the LLVM type of the null pointer. /// \param QT is the clang QualType of the null pointer. diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp index 776060743a63..d10a321dc3d7 100644 --- a/lib/CodeGen/CodeGenPGO.cpp +++ b/lib/CodeGen/CodeGenPGO.cpp @@ -1,9 +1,8 @@ //===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -772,14 +771,14 @@ void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) { // If so, instrument only base variant, others are implemented by delegation // to the base one, it would be counted twice otherwise. if (CGM.getTarget().getCXXABI().hasConstructorVariants()) { - if (isa<CXXDestructorDecl>(D) && GD.getDtorType() != Dtor_Base) - return; - if (const auto *CCD = dyn_cast<CXXConstructorDecl>(D)) if (GD.getCtorType() != Ctor_Base && CodeGenFunction::IsConstructorDelegationValid(CCD)) return; } + if (isa<CXXDestructorDecl>(D) && GD.getDtorType() != Dtor_Base) + return; + CGM.ClearUnusedCoverageMapping(D); setFuncName(Fn); diff --git a/lib/CodeGen/CodeGenPGO.h b/lib/CodeGen/CodeGenPGO.h index 120ab651a4a8..2e740f789243 100644 --- a/lib/CodeGen/CodeGenPGO.h +++ b/lib/CodeGen/CodeGenPGO.h @@ -1,9 +1,8 @@ //===--- CodeGenPGO.h - PGO Instrumentation for LLVM CodeGen ----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CodeGenTBAA.cpp b/lib/CodeGen/CodeGenTBAA.cpp index 27d39716d22f..09de9591de7e 100644 --- a/lib/CodeGen/CodeGenTBAA.cpp +++ b/lib/CodeGen/CodeGenTBAA.cpp @@ -1,9 +1,8 @@ -//===--- CodeGenTypes.cpp - TBAA information for LLVM CodeGen -------------===// +//===-- CodeGenTBAA.cpp - TBAA information for LLVM CodeGen ---------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -258,6 +257,8 @@ CodeGenTBAA::CollectFields(uint64_t BaseOffset, unsigned idx = 0; for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); i != e; ++i, ++idx) { + if ((*i)->isZeroSize(Context) || (*i)->isUnnamedBitfield()) + continue; uint64_t Offset = BaseOffset + Layout.getFieldOffset(idx) / Context.getCharWidth(); QualType FieldQTy = i->getType(); @@ -298,6 +299,8 @@ llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) { const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); SmallVector<llvm::MDBuilder::TBAAStructField, 4> Fields; for (FieldDecl *Field : RD->fields()) { + if (Field->isZeroSize(Context) || Field->isUnnamedBitfield()) + continue; QualType FieldQTy = Field->getType(); llvm::MDNode *TypeNode = isValidBaseType(FieldQTy) ? getBaseTypeInfo(FieldQTy) : getTypeInfo(FieldQTy); diff --git a/lib/CodeGen/CodeGenTBAA.h b/lib/CodeGen/CodeGenTBAA.h index 86ba407c05c6..e8e006f41616 100644 --- a/lib/CodeGen/CodeGenTBAA.h +++ b/lib/CodeGen/CodeGenTBAA.h @@ -1,9 +1,8 @@ //===--- CodeGenTBAA.h - TBAA information for LLVM CodeGen ------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CodeGenTypeCache.h b/lib/CodeGen/CodeGenTypeCache.h index 901aed6c00b2..ed4b773afd13 100644 --- a/lib/CodeGen/CodeGenTypeCache.h +++ b/lib/CodeGen/CodeGenTypeCache.h @@ -1,9 +1,8 @@ //===--- CodeGenTypeCache.h - Commonly used LLVM types and info -*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp index 2acf1ac16180..79b29b3d916f 100644 --- a/lib/CodeGen/CodeGenTypes.cpp +++ b/lib/CodeGen/CodeGenTypes.cpp @@ -1,9 +1,8 @@ //===--- CodeGenTypes.cpp - Type translation for LLVM CodeGen -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -309,8 +308,7 @@ static llvm::Type *getTypeForFormat(llvm::LLVMContext &VMContext, llvm_unreachable("Unknown float format!"); } -llvm::Type *CodeGenTypes::ConvertFunctionType(QualType QFT, - const FunctionDecl *FD) { +llvm::Type *CodeGenTypes::ConvertFunctionTypeInternal(QualType QFT) { assert(QFT.isCanonical()); const Type *Ty = QFT.getTypePtr(); const FunctionType *FT = cast<FunctionType>(QFT.getTypePtr()); @@ -348,7 +346,7 @@ llvm::Type *CodeGenTypes::ConvertFunctionType(QualType QFT, const CGFunctionInfo *FI; if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(FT)) { FI = &arrangeFreeFunctionType( - CanQual<FunctionProtoType>::CreateUnsafe(QualType(FPT, 0)), FD); + CanQual<FunctionProtoType>::CreateUnsafe(QualType(FPT, 0))); } else { const FunctionNoProtoType *FNPT = cast<FunctionNoProtoType>(FT); FI = &arrangeFreeFunctionType( @@ -597,7 +595,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { } case Type::FunctionNoProto: case Type::FunctionProto: - ResultType = ConvertFunctionType(T); + ResultType = ConvertFunctionTypeInternal(T); break; case Type::ObjCObject: ResultType = ConvertType(cast<ObjCObjectType>(Ty)->getBaseType()); @@ -637,7 +635,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { case Type::BlockPointer: { const QualType FTy = cast<BlockPointerType>(Ty)->getPointeeType(); - llvm::Type *PointeeType = ConvertTypeForMem(FTy); + llvm::Type *PointeeType = CGM.getLangOpts().OpenCL + ? CGM.getGenericBlockLiteralType() + : ConvertTypeForMem(FTy); unsigned AS = Context.getTargetAddressSpace(FTy); ResultType = llvm::PointerType::get(PointeeType, AS); break; diff --git a/lib/CodeGen/CodeGenTypes.h b/lib/CodeGen/CodeGenTypes.h index 8e344e91b8cd..03102329507e 100644 --- a/lib/CodeGen/CodeGenTypes.h +++ b/lib/CodeGen/CodeGenTypes.h @@ -1,9 +1,8 @@ //===--- CodeGenTypes.h - Type translation for LLVM CodeGen -----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -55,65 +54,6 @@ class CGRecordLayout; class CodeGenModule; class RequiredArgs; -enum class StructorType { - Complete, // constructor or destructor - Base, // constructor or destructor - Deleting // destructor only -}; - -inline CXXCtorType toCXXCtorType(StructorType T) { - switch (T) { - case StructorType::Complete: - return Ctor_Complete; - case StructorType::Base: - return Ctor_Base; - case StructorType::Deleting: - llvm_unreachable("cannot have a deleting ctor"); - } - llvm_unreachable("not a StructorType"); -} - -inline StructorType getFromCtorType(CXXCtorType T) { - switch (T) { - case Ctor_Complete: - return StructorType::Complete; - case Ctor_Base: - return StructorType::Base; - case Ctor_Comdat: - llvm_unreachable("not expecting a COMDAT"); - case Ctor_CopyingClosure: - case Ctor_DefaultClosure: - llvm_unreachable("not expecting a closure"); - } - llvm_unreachable("not a CXXCtorType"); -} - -inline CXXDtorType toCXXDtorType(StructorType T) { - switch (T) { - case StructorType::Complete: - return Dtor_Complete; - case StructorType::Base: - return Dtor_Base; - case StructorType::Deleting: - return Dtor_Deleting; - } - llvm_unreachable("not a StructorType"); -} - -inline StructorType getFromDtorType(CXXDtorType T) { - switch (T) { - case Dtor_Deleting: - return StructorType::Deleting; - case Dtor_Complete: - return StructorType::Complete; - case Dtor_Base: - return StructorType::Base; - case Dtor_Comdat: - llvm_unreachable("not expecting a COMDAT"); - } - llvm_unreachable("not a CXXDtorType"); -} - /// This class organizes the cross-module state that is used while lowering /// AST types to LLVM types. class CodeGenTypes { @@ -163,6 +103,9 @@ class CodeGenTypes { llvm::SmallSet<const Type *, 8> RecordsWithOpaqueMemberPointers; + /// Helper for ConvertType. + llvm::Type *ConvertFunctionTypeInternal(QualType FT); + public: CodeGenTypes(CodeGenModule &cgm); ~CodeGenTypes(); @@ -180,17 +123,13 @@ public: /// Convert clang calling convention to LLVM callilng convention. unsigned ClangCallConvToLLVMCallConv(CallingConv CC); + /// Derives the 'this' type for codegen purposes, i.e. ignoring method CVR + /// qualification. + CanQualType DeriveThisType(const CXXRecordDecl *RD, const CXXMethodDecl *MD); + /// ConvertType - Convert type T into a llvm::Type. llvm::Type *ConvertType(QualType T); - /// Converts the GlobalDecl into an llvm::Type. This should be used - /// when we know the target of the function we want to convert. This is - /// because some functions (explicitly, those with pass_object_size - /// parameters) may not have the same signature as their type portrays, and - /// can only be called directly. - llvm::Type *ConvertFunctionType(QualType FT, - const FunctionDecl *FD = nullptr); - /// ConvertTypeForMem - Convert type T into a llvm::Type. This differs from /// ConvertType in that it is used to convert to the memory representation for /// a type. For example, the scalar representation for _Bool is i1, but the @@ -263,8 +202,7 @@ public: const CGFunctionInfo &arrangeFreeFunctionCall(const CallArgList &Args, const FunctionType *Ty, bool ChainCall); - const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionProtoType> Ty, - const FunctionDecl *FD); + const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionProtoType> Ty); const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionNoProtoType> Ty); /// A nullary function is a freestanding function of type 'void ()'. @@ -299,8 +237,7 @@ public: /// C++ methods have some special rules and also have implicit parameters. const CGFunctionInfo &arrangeCXXMethodDeclaration(const CXXMethodDecl *MD); - const CGFunctionInfo &arrangeCXXStructorDeclaration(const CXXMethodDecl *MD, - StructorType Type); + const CGFunctionInfo &arrangeCXXStructorDeclaration(GlobalDecl GD); const CGFunctionInfo &arrangeCXXConstructorCall(const CallArgList &Args, const CXXConstructorDecl *D, CXXCtorType CtorKind, diff --git a/lib/CodeGen/ConstantEmitter.h b/lib/CodeGen/ConstantEmitter.h index 7ad8e5d37cd1..59a19730f4eb 100644 --- a/lib/CodeGen/ConstantEmitter.h +++ b/lib/CodeGen/ConstantEmitter.h @@ -1,9 +1,8 @@ //===--- ConstantEmitter.h - IR constant emission ---------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/ConstantInitBuilder.cpp b/lib/CodeGen/ConstantInitBuilder.cpp index 59e66b88fb01..40b1607b5626 100644 --- a/lib/CodeGen/ConstantInitBuilder.cpp +++ b/lib/CodeGen/ConstantInitBuilder.cpp @@ -1,9 +1,8 @@ //===--- ConstantInitBuilder.cpp - Global initializer builder -------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp index 35962c73d9a8..6d18027f16a8 100644 --- a/lib/CodeGen/CoverageMappingGen.cpp +++ b/lib/CodeGen/CoverageMappingGen.cpp @@ -1,9 +1,8 @@ //===--- CoverageMappingGen.cpp - Coverage mapping generation ---*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -244,7 +243,7 @@ public: ++Depth; FileLocs.push_back(std::make_pair(Loc, Depth)); } - std::stable_sort(FileLocs.begin(), FileLocs.end(), llvm::less_second()); + llvm::stable_sort(FileLocs, llvm::less_second()); for (const auto &FL : FileLocs) { SourceLocation Loc = FL.first; @@ -1282,7 +1281,7 @@ std::string getCoverageSection(const CodeGenModule &CGM) { std::string normalizeFilename(StringRef Filename) { llvm::SmallString<256> Path(Filename); llvm::sys::fs::make_absolute(Path); - llvm::sys::path::remove_dots(Path, /*remove_dot_dots=*/true); + llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true); return Path.str().str(); } @@ -1389,10 +1388,19 @@ void CoverageMappingModuleGen::emit() { std::string FilenamesAndCoverageMappings; llvm::raw_string_ostream OS(FilenamesAndCoverageMappings); CoverageFilenamesSectionWriter(FilenameRefs).write(OS); - std::string RawCoverageMappings = - llvm::join(CoverageMappings.begin(), CoverageMappings.end(), ""); - OS << RawCoverageMappings; - size_t CoverageMappingSize = RawCoverageMappings.size(); + + // Stream the content of CoverageMappings to OS while keeping + // memory consumption under control. + size_t CoverageMappingSize = 0; + for (auto &S : CoverageMappings) { + CoverageMappingSize += S.size(); + OS << S; + S.clear(); + S.shrink_to_fit(); + } + CoverageMappings.clear(); + CoverageMappings.shrink_to_fit(); + size_t FilenamesSize = OS.str().size() - CoverageMappingSize; // Append extra zeroes if necessary to ensure that the size of the filenames // and coverage mappings is a multiple of 8. diff --git a/lib/CodeGen/CoverageMappingGen.h b/lib/CodeGen/CoverageMappingGen.h index c62db096952a..3bf51f590479 100644 --- a/lib/CodeGen/CoverageMappingGen.h +++ b/lib/CodeGen/CoverageMappingGen.h @@ -1,9 +1,8 @@ //===---- CoverageMappingGen.h - Coverage mapping generation ----*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/EHScopeStack.h b/lib/CodeGen/EHScopeStack.h index c7bdeac58a1a..3b0db35d982b 100644 --- a/lib/CodeGen/EHScopeStack.h +++ b/lib/CodeGen/EHScopeStack.h @@ -1,9 +1,8 @@ //===-- EHScopeStack.h - Stack for cleanup IR generation --------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp index b53304528c3d..3b2413d960d6 100644 --- a/lib/CodeGen/ItaniumCXXABI.cpp +++ b/lib/CodeGen/ItaniumCXXABI.cpp @@ -1,9 +1,8 @@ //===------- ItaniumCXXABI.cpp - Emit LLVM Code from ASTs for a Module ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -29,7 +28,6 @@ #include "clang/AST/Mangle.h" #include "clang/AST/Type.h" #include "clang/AST/StmtCXX.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" @@ -64,13 +62,9 @@ public: bool classifyReturnType(CGFunctionInfo &FI) const override; - bool passClassIndirect(const CXXRecordDecl *RD) const { - return !canCopyArgument(RD); - } - RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const override { // If C++ prohibits us from making a copy, pass by address. - if (passClassIndirect(RD)) + if (!RD->canPassInRegisters()) return RAA_Indirect; return RAA_Default; } @@ -160,19 +154,6 @@ public: Address Ptr, QualType ElementType, const CXXDestructorDecl *Dtor) override; - /// Itanium says that an _Unwind_Exception has to be "double-word" - /// aligned (and thus the end of it is also so-aligned), meaning 16 - /// bytes. Of course, that was written for the actual Itanium, - /// which is a 64-bit platform. Classically, the ABI doesn't really - /// specify the alignment on other platforms, but in practice - /// libUnwind declares the struct with __attribute__((aligned)), so - /// we assume that alignment here. (It's generally 16 bytes, but - /// some targets overwrite it.) - CharUnits getAlignmentOfExnObject() { - auto align = CGM.getContext().getTargetDefaultAlignForAttributeAligned(); - return CGM.getContext().toCharUnitsFromBits(align); - } - void emitRethrow(CodeGenFunction &CGF, bool isNoReturn) override; void emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) override; @@ -218,7 +199,7 @@ public: void EmitCXXConstructors(const CXXConstructorDecl *D) override; AddedStructorArgs - buildStructorSignature(const CXXMethodDecl *MD, StructorType T, + buildStructorSignature(GlobalDecl GD, SmallVectorImpl<CanQualType> &ArgTys) override; bool useThunkForDtorVariant(const CXXDestructorDecl *Dtor, @@ -243,7 +224,8 @@ public: void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD, CXXDtorType Type, bool ForVirtualBase, - bool Delegating, Address This) override; + bool Delegating, Address This, + QualType ThisTy) override; void emitVTableDefinitions(CodeGenVTables &CGVT, const CXXRecordDecl *RD) override; @@ -280,9 +262,8 @@ public: llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *Dtor, - CXXDtorType DtorType, - Address This, - const CXXMemberCallExpr *CE) override; + CXXDtorType DtorType, Address This, + DeleteOrMemberCallExpr E) override; void emitVirtualInheritanceTables(const CXXRecordDecl *RD) override; @@ -330,7 +311,8 @@ public: llvm::GlobalVariable *DeclPtr, bool PerformInit) override; void registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, - llvm::Constant *dtor, llvm::Constant *addr) override; + llvm::FunctionCallee dtor, + llvm::Constant *addr) override; llvm::Function *getOrCreateThreadLocalWrapper(const VarDecl *VD, llvm::Value *Val); @@ -377,7 +359,7 @@ public: llvm::GlobalValue::LinkageTypes Linkage) const; friend class ItaniumRTTIBuilder; - void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) override; + void emitCXXStructor(GlobalDecl GD) override; std::pair<llvm::Value *, const CXXRecordDecl *> LoadVTablePtr(CodeGenFunction &CGF, Address This, @@ -1094,7 +1076,7 @@ bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const { return false; // If C++ prohibits us from making a copy, return by address. - if (passClassIndirect(RD)) { + if (!RD->canPassInRegisters()) { auto Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false); return true; @@ -1146,7 +1128,7 @@ void ItaniumCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF, // FIXME: Provide a source location here even though there's no // CXXMemberCallExpr for dtor call. CXXDtorType DtorType = UseGlobalDelete ? Dtor_Complete : Dtor_Deleting; - EmitVirtualDestructorCall(CGF, Dtor, DtorType, Ptr, /*CE=*/nullptr); + EmitVirtualDestructorCall(CGF, Dtor, DtorType, Ptr, DE); if (UseGlobalDelete) CGF.PopCleanupBlock(); @@ -1156,9 +1138,9 @@ void ItaniumCXXABI::emitRethrow(CodeGenFunction &CGF, bool isNoReturn) { // void __cxa_rethrow(); llvm::FunctionType *FTy = - llvm::FunctionType::get(CGM.VoidTy, /*IsVarArgs=*/false); + llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); - llvm::Constant *Fn = CGM.CreateRuntimeFunction(FTy, "__cxa_rethrow"); + llvm::FunctionCallee Fn = CGM.CreateRuntimeFunction(FTy, "__cxa_rethrow"); if (isNoReturn) CGF.EmitNoreturnRuntimeCallOrInvoke(Fn, None); @@ -1166,22 +1148,22 @@ void ItaniumCXXABI::emitRethrow(CodeGenFunction &CGF, bool isNoReturn) { CGF.EmitRuntimeCallOrInvoke(Fn); } -static llvm::Constant *getAllocateExceptionFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getAllocateExceptionFn(CodeGenModule &CGM) { // void *__cxa_allocate_exception(size_t thrown_size); llvm::FunctionType *FTy = - llvm::FunctionType::get(CGM.Int8PtrTy, CGM.SizeTy, /*IsVarArgs=*/false); + llvm::FunctionType::get(CGM.Int8PtrTy, CGM.SizeTy, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FTy, "__cxa_allocate_exception"); } -static llvm::Constant *getThrowFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getThrowFn(CodeGenModule &CGM) { // void __cxa_throw(void *thrown_exception, std::type_info *tinfo, // void (*dest) (void *)); llvm::Type *Args[3] = { CGM.Int8PtrTy, CGM.Int8PtrTy, CGM.Int8PtrTy }; llvm::FunctionType *FTy = - llvm::FunctionType::get(CGM.VoidTy, Args, /*IsVarArgs=*/false); + llvm::FunctionType::get(CGM.VoidTy, Args, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FTy, "__cxa_throw"); } @@ -1192,11 +1174,11 @@ void ItaniumCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) { llvm::Type *SizeTy = CGF.ConvertType(getContext().getSizeType()); uint64_t TypeSize = getContext().getTypeSizeInChars(ThrowType).getQuantity(); - llvm::Constant *AllocExceptionFn = getAllocateExceptionFn(CGM); + llvm::FunctionCallee AllocExceptionFn = getAllocateExceptionFn(CGM); llvm::CallInst *ExceptionPtr = CGF.EmitNounwindRuntimeCall( AllocExceptionFn, llvm::ConstantInt::get(SizeTy, TypeSize), "exception"); - CharUnits ExnAlign = getAlignmentOfExnObject(); + CharUnits ExnAlign = CGF.getContext().getExnObjectAlignment(); CGF.EmitAnyExprToExn(E->getSubExpr(), Address(ExceptionPtr, ExnAlign)); // Now throw the exception. @@ -1210,7 +1192,7 @@ void ItaniumCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) { CXXRecordDecl *Record = cast<CXXRecordDecl>(RecordTy->getDecl()); if (!Record->hasTrivialDestructor()) { CXXDestructorDecl *DtorD = Record->getDestructor(); - Dtor = CGM.getAddrOfCXXStructor(DtorD, StructorType::Complete); + Dtor = CGM.getAddrOfCXXStructor(GlobalDecl(DtorD, Dtor_Complete)); Dtor = llvm::ConstantExpr::getBitCast(Dtor, CGM.Int8PtrTy); } } @@ -1220,7 +1202,7 @@ void ItaniumCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) { CGF.EmitNoreturnRuntimeCallOrInvoke(getThrowFn(CGM), args); } -static llvm::Constant *getItaniumDynamicCastFn(CodeGenFunction &CGF) { +static llvm::FunctionCallee getItaniumDynamicCastFn(CodeGenFunction &CGF) { // void *__dynamic_cast(const void *sub, // const abi::__class_type_info *src, // const abi::__class_type_info *dst, @@ -1243,7 +1225,7 @@ static llvm::Constant *getItaniumDynamicCastFn(CodeGenFunction &CGF) { return CGF.CGM.CreateRuntimeFunction(FTy, "__dynamic_cast", Attrs); } -static llvm::Constant *getBadCastFn(CodeGenFunction &CGF) { +static llvm::FunctionCallee getBadCastFn(CodeGenFunction &CGF) { // void __cxa_bad_cast(); llvm::FunctionType *FTy = llvm::FunctionType::get(CGF.VoidTy, false); return CGF.CGM.CreateRuntimeFunction(FTy, "__cxa_bad_cast"); @@ -1301,7 +1283,7 @@ static CharUnits computeOffsetHint(ASTContext &Context, return Offset; } -static llvm::Constant *getBadTypeidFn(CodeGenFunction &CGF) { +static llvm::FunctionCallee getBadTypeidFn(CodeGenFunction &CGF) { // void __cxa_bad_typeid(); llvm::FunctionType *FTy = llvm::FunctionType::get(CGF.VoidTy, false); @@ -1314,8 +1296,9 @@ bool ItaniumCXXABI::shouldTypeidBeNullChecked(bool IsDeref, } void ItaniumCXXABI::EmitBadTypeidCall(CodeGenFunction &CGF) { - llvm::Value *Fn = getBadTypeidFn(CGF); - CGF.EmitRuntimeCallOrInvoke(Fn).setDoesNotReturn(); + llvm::FunctionCallee Fn = getBadTypeidFn(CGF); + llvm::CallBase *Call = CGF.EmitRuntimeCallOrInvoke(Fn); + Call->setDoesNotReturn(); CGF.Builder.CreateUnreachable(); } @@ -1411,8 +1394,9 @@ llvm::Value *ItaniumCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, } bool ItaniumCXXABI::EmitBadCastCall(CodeGenFunction &CGF) { - llvm::Value *Fn = getBadCastFn(CGF); - CGF.EmitRuntimeCallOrInvoke(Fn).setDoesNotReturn(); + llvm::FunctionCallee Fn = getBadCastFn(CGF); + llvm::CallBase *Call = CGF.EmitRuntimeCallOrInvoke(Fn); + Call->setDoesNotReturn(); CGF.Builder.CreateUnreachable(); return true; } @@ -1457,7 +1441,7 @@ void ItaniumCXXABI::EmitCXXConstructors(const CXXConstructorDecl *D) { } CGCXXABI::AddedStructorArgs -ItaniumCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T, +ItaniumCXXABI::buildStructorSignature(GlobalDecl GD, SmallVectorImpl<CanQualType> &ArgTys) { ASTContext &Context = getContext(); @@ -1465,7 +1449,9 @@ ItaniumCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T, // These are Clang types, so we don't need to worry about sret yet. // Check if we need to add a VTT parameter (which has type void **). - if (T == StructorType::Base && MD->getParent()->getNumVBases() != 0) { + if ((isa<CXXConstructorDecl>(GD.getDecl()) ? GD.getCtorType() == Ctor_Base + : GD.getDtorType() == Dtor_Base) && + cast<CXXMethodDecl>(GD.getDecl())->getParent()->getNumVBases() != 0) { ArgTys.insert(ArgTys.begin() + 1, Context.getPointerType(Context.VoidPtrTy)); return AddedStructorArgs::prefix(1); @@ -1553,7 +1539,8 @@ CGCXXABI::AddedStructorArgs ItaniumCXXABI::addImplicitConstructorArgs( void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD, CXXDtorType Type, bool ForVirtualBase, - bool Delegating, Address This) { + bool Delegating, Address This, + QualType ThisTy) { GlobalDecl GD(DD, Type); llvm::Value *VTT = CGF.GetVTTParameter(GD, ForVirtualBase, Delegating); QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy); @@ -1563,12 +1550,10 @@ void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF, Type != Dtor_Base && DD->isVirtual()) Callee = CGF.BuildAppleKextVirtualDestructorCall(DD, Type, DD->getParent()); else - Callee = CGCallee::forDirect( - CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)), GD); + Callee = CGCallee::forDirect(CGM.getAddrOfCXXStructor(GD), GD); - CGF.EmitCXXMemberOrOperatorCall(DD, Callee, ReturnValueSlot(), - This.getPointer(), VTT, VTTTy, - nullptr, nullptr); + CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy, VTT, VTTTy, + nullptr); } void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, @@ -1756,19 +1741,27 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall( CodeGenFunction &CGF, const CXXDestructorDecl *Dtor, CXXDtorType DtorType, - Address This, const CXXMemberCallExpr *CE) { + Address This, DeleteOrMemberCallExpr E) { + auto *CE = E.dyn_cast<const CXXMemberCallExpr *>(); + auto *D = E.dyn_cast<const CXXDeleteExpr *>(); + assert((CE != nullptr) ^ (D != nullptr)); assert(CE == nullptr || CE->arg_begin() == CE->arg_end()); assert(DtorType == Dtor_Deleting || DtorType == Dtor_Complete); - const CGFunctionInfo *FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration( - Dtor, getFromDtorType(DtorType)); + GlobalDecl GD(Dtor, DtorType); + const CGFunctionInfo *FInfo = + &CGM.getTypes().arrangeCXXStructorDeclaration(GD); llvm::FunctionType *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo); - CGCallee Callee = - CGCallee::forVirtual(CE, GlobalDecl(Dtor, DtorType), This, Ty); + CGCallee Callee = CGCallee::forVirtual(CE, GD, This, Ty); + + QualType ThisTy; + if (CE) + ThisTy = CE->getImplicitObjectArgument()->getType()->getPointeeType(); + else + ThisTy = D->getDestroyedType(); - CGF.EmitCXXMemberOrOperatorCall(Dtor, Callee, ReturnValueSlot(), - This.getPointer(), /*ImplicitParam=*/nullptr, - QualType(), CE, nullptr); + CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy, nullptr, + QualType(), nullptr); return nullptr; } @@ -1958,7 +1951,7 @@ Address ItaniumCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, CGM.getSanitizerMetadata()->disableSanitizerForInstruction(SI); llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, NumElementsPtr.getType(), false); - llvm::Constant *F = + llvm::FunctionCallee F = CGM.CreateRuntimeFunction(FTy, "__asan_poison_cxx_array_cookie"); CGF.Builder.CreateCall(F, NumElementsPtr.getPointer()); } @@ -1989,7 +1982,7 @@ llvm::Value *ItaniumCXXABI::readArrayCookieImpl(CodeGenFunction &CGF, // the metadata may be lost. llvm::FunctionType *FTy = llvm::FunctionType::get(CGF.SizeTy, CGF.SizeTy->getPointerTo(0), false); - llvm::Constant *F = + llvm::FunctionCallee F = CGM.CreateRuntimeFunction(FTy, "__asan_load_cxx_array_cookie"); return CGF.Builder.CreateCall(F, numElementsPtr.getPointer()); } @@ -2024,7 +2017,7 @@ Address ARMCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, CGF.Builder.CreateStore(elementSize, cookie); // The second element is the element count. - cookie = CGF.Builder.CreateConstInBoundsGEP(cookie, 1, CGF.getSizeSize()); + cookie = CGF.Builder.CreateConstInBoundsGEP(cookie, 1); CGF.Builder.CreateStore(numElements, cookie); // Finally, compute a pointer to the actual data buffer by skipping @@ -2047,8 +2040,8 @@ llvm::Value *ARMCXXABI::readArrayCookieImpl(CodeGenFunction &CGF, /*********************** Static local initialization **************************/ -static llvm::Constant *getGuardAcquireFn(CodeGenModule &CGM, - llvm::PointerType *GuardPtrTy) { +static llvm::FunctionCallee getGuardAcquireFn(CodeGenModule &CGM, + llvm::PointerType *GuardPtrTy) { // int __cxa_guard_acquire(__guard *guard_object); llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.getTypes().ConvertType(CGM.getContext().IntTy), @@ -2060,8 +2053,8 @@ static llvm::Constant *getGuardAcquireFn(CodeGenModule &CGM, llvm::Attribute::NoUnwind)); } -static llvm::Constant *getGuardReleaseFn(CodeGenModule &CGM, - llvm::PointerType *GuardPtrTy) { +static llvm::FunctionCallee getGuardReleaseFn(CodeGenModule &CGM, + llvm::PointerType *GuardPtrTy) { // void __cxa_guard_release(__guard *guard_object); llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, GuardPtrTy, /*isVarArg=*/false); @@ -2072,8 +2065,8 @@ static llvm::Constant *getGuardReleaseFn(CodeGenModule &CGM, llvm::Attribute::NoUnwind)); } -static llvm::Constant *getGuardAbortFn(CodeGenModule &CGM, - llvm::PointerType *GuardPtrTy) { +static llvm::FunctionCallee getGuardAbortFn(CodeGenModule &CGM, + llvm::PointerType *GuardPtrTy) { // void __cxa_guard_abort(__guard *guard_object); llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, GuardPtrTy, /*isVarArg=*/false); @@ -2286,9 +2279,10 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, /// Register a global destructor using __cxa_atexit. static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF, - llvm::Constant *dtor, - llvm::Constant *addr, - bool TLS) { + llvm::FunctionCallee dtor, + llvm::Constant *addr, bool TLS) { + assert((TLS || CGF.getTypes().getCodeGenOpts().CXAAtExit) && + "__cxa_atexit is disabled"); const char *Name = "__cxa_atexit"; if (TLS) { const llvm::Triple &T = CGF.getTarget().getTriple(); @@ -2301,15 +2295,10 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF, llvm::Type *dtorTy = llvm::FunctionType::get(CGF.VoidTy, CGF.Int8PtrTy, false)->getPointerTo(); - // extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d); - llvm::Type *paramTys[] = { dtorTy, CGF.Int8PtrTy, CGF.Int8PtrTy }; - llvm::FunctionType *atexitTy = - llvm::FunctionType::get(CGF.IntTy, paramTys, false); - - // Fetch the actual function. - llvm::Constant *atexit = CGF.CGM.CreateRuntimeFunction(atexitTy, Name); - if (llvm::Function *fn = dyn_cast<llvm::Function>(atexit)) - fn->setDoesNotThrow(); + // Preserve address space of addr. + auto AddrAS = addr ? addr->getType()->getPointerAddressSpace() : 0; + auto AddrInt8PtrTy = + AddrAS ? CGF.Int8Ty->getPointerTo(AddrAS) : CGF.Int8PtrTy; // Create a variable that binds the atexit to this shared object. llvm::Constant *handle = @@ -2317,6 +2306,16 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF, auto *GV = cast<llvm::GlobalValue>(handle->stripPointerCasts()); GV->setVisibility(llvm::GlobalValue::HiddenVisibility); + // extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d); + llvm::Type *paramTys[] = {dtorTy, AddrInt8PtrTy, handle->getType()}; + llvm::FunctionType *atexitTy = + llvm::FunctionType::get(CGF.IntTy, paramTys, false); + + // Fetch the actual function. + llvm::FunctionCallee atexit = CGF.CGM.CreateRuntimeFunction(atexitTy, Name); + if (llvm::Function *fn = dyn_cast<llvm::Function>(atexit.getCallee())) + fn->setDoesNotThrow(); + if (!addr) // addr is null when we are trying to register a dtor annotated with // __attribute__((destructor)) in a constructor function. Using null here is @@ -2324,11 +2323,10 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF, // function. addr = llvm::Constant::getNullValue(CGF.Int8PtrTy); - llvm::Value *args[] = { - llvm::ConstantExpr::getBitCast(dtor, dtorTy), - llvm::ConstantExpr::getBitCast(addr, CGF.Int8PtrTy), - handle - }; + llvm::Value *args[] = {llvm::ConstantExpr::getBitCast( + cast<llvm::Constant>(dtor.getCallee()), dtorTy), + llvm::ConstantExpr::getBitCast(addr, AddrInt8PtrTy), + handle}; CGF.EmitNounwindRuntimeCall(atexit, args); } @@ -2377,20 +2375,19 @@ void CodeGenModule::registerGlobalDtorsWithAtExit() { } /// Register a global destructor as best as we know how. -void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF, - const VarDecl &D, - llvm::Constant *dtor, +void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, + llvm::FunctionCallee dtor, llvm::Constant *addr) { if (D.isNoDestroy(CGM.getContext())) return; - // Use __cxa_atexit if available. - if (CGM.getCodeGenOpts().CXAAtExit) + // emitGlobalDtorWithCXAAtExit will emit a call to either __cxa_thread_atexit + // or __cxa_atexit depending on whether this VarDecl is a thread-local storage + // or not. CXAAtExit controls only __cxa_atexit, so use it if it is enabled. + // We can always use __cxa_thread_atexit. + if (CGM.getCodeGenOpts().CXAAtExit || D.getTLSKind()) return emitGlobalDtorWithCXAAtExit(CGF, dtor, addr, D.getTLSKind()); - if (D.getTLSKind()) - CGM.ErrorUnsupported(&D, "non-trivial TLS destruction"); - // In Apple kexts, we want to add a global destructor entry. // FIXME: shouldn't this be guarded by some variable? if (CGM.getLangOpts().AppleKext) { @@ -2416,7 +2413,7 @@ static bool isThreadWrapperReplaceable(const VarDecl *VD, static llvm::GlobalValue::LinkageTypes getThreadLocalWrapperLinkage(const VarDecl *VD, CodeGen::CodeGenModule &CGM) { llvm::GlobalValue::LinkageTypes VarLinkage = - CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false); + CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); // For internal linkage variables, we don't need an external or weak wrapper. if (llvm::GlobalValue::isLocalLinkage(VarLinkage)) @@ -2463,10 +2460,12 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD, CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper); // Always resolve references to the wrapper at link time. - if (!Wrapper->hasLocalLinkage() && !(isThreadWrapperReplaceable(VD, CGM) && - !llvm::GlobalVariable::isLinkOnceLinkage(Wrapper->getLinkage()) && - !llvm::GlobalVariable::isWeakODRLinkage(Wrapper->getLinkage()))) - Wrapper->setVisibility(llvm::GlobalValue::HiddenVisibility); + if (!Wrapper->hasLocalLinkage()) + if (!isThreadWrapperReplaceable(VD, CGM) || + llvm::GlobalVariable::isLinkOnceLinkage(Wrapper->getLinkage()) || + llvm::GlobalVariable::isWeakODRLinkage(Wrapper->getLinkage()) || + VD->getVisibility() == HiddenVisibility) + Wrapper->setVisibility(llvm::GlobalValue::HiddenVisibility); if (isThreadWrapperReplaceable(VD, CGM)) { Wrapper->setCallingConv(llvm::CallingConv::CXX_FAST_TLS); @@ -2541,6 +2540,8 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( getMangleContext().mangleItaniumThreadLocalInit(VD, Out); } + llvm::FunctionType *InitFnTy = llvm::FunctionType::get(CGM.VoidTy, false); + // If we have a definition for the variable, emit the initialization // function as an alias to the global Init function (if any). Otherwise, // produce a declaration of the initialization function. @@ -2559,8 +2560,7 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( // This function will not exist if the TU defining the thread_local // variable in question does not need any dynamic initialization for // its thread_local variables. - llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, false); - Init = llvm::Function::Create(FnTy, + Init = llvm::Function::Create(InitFnTy, llvm::GlobalVariable::ExternalWeakLinkage, InitFnName.str(), &CGM.getModule()); const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); @@ -2578,7 +2578,7 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( CGBuilderTy Builder(CGM, Entry); if (InitIsInitFunc) { if (Init) { - llvm::CallInst *CallVal = Builder.CreateCall(Init); + llvm::CallInst *CallVal = Builder.CreateCall(InitFnTy, Init); if (isThreadWrapperReplaceable(VD, CGM)) { CallVal->setCallingConv(llvm::CallingConv::CXX_FAST_TLS); llvm::Function *Fn = @@ -2594,7 +2594,7 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( Builder.CreateCondBr(Have, InitBB, ExitBB); Builder.SetInsertPoint(InitBB); - Builder.CreateCall(Init); + Builder.CreateCall(InitFnTy, Init); Builder.CreateBr(ExitBB); Builder.SetInsertPoint(ExitBB); @@ -2791,7 +2791,7 @@ ItaniumRTTIBuilder::GetAddrOfExternalRTTIDescriptor(QualType Ty) { // RTTI, check if emitting vtables opportunistically need any adjustment. GV = new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy, - /*Constant=*/true, + /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr, Name); const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); @@ -2960,7 +2960,7 @@ static bool ShouldUseExternalRTTIDescriptor(CodeGenModule &CGM, bool IsDLLImport = RD->hasAttr<DLLImportAttr>(); // Don't import the RTTI but emit it locally. - if (CGM.getTriple().isWindowsGNUEnvironment() && IsDLLImport) + if (CGM.getTriple().isWindowsGNUEnvironment()) return false; if (CGM.getVTables().isVTableExternal(RD)) @@ -3396,7 +3396,7 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo( llvm::GlobalVariable *OldGV = M.getNamedGlobal(Name); llvm::GlobalVariable *GV = new llvm::GlobalVariable(M, Init->getType(), - /*Constant=*/true, Linkage, Init, Name); + /*isConstant=*/true, Linkage, Init, Name); // If there's already an old global variable, replace it with the new one. if (OldGV) { @@ -3438,6 +3438,9 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo( TypeName->setDLLStorageClass(DLLStorageClass); GV->setDLLStorageClass(DLLStorageClass); + TypeName->setPartition(CGM.getCodeGenOpts().SymbolPartition); + GV->setPartition(CGM.getCodeGenOpts().SymbolPartition); + return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy); } @@ -3846,31 +3849,28 @@ static void emitConstructorDestructorAlias(CodeGenModule &CGM, CGM.SetCommonAttributes(AliasDecl, Alias); } -void ItaniumCXXABI::emitCXXStructor(const CXXMethodDecl *MD, - StructorType Type) { +void ItaniumCXXABI::emitCXXStructor(GlobalDecl GD) { + auto *MD = cast<CXXMethodDecl>(GD.getDecl()); auto *CD = dyn_cast<CXXConstructorDecl>(MD); const CXXDestructorDecl *DD = CD ? nullptr : cast<CXXDestructorDecl>(MD); StructorCodegen CGType = getCodegenToUse(CGM, MD); - if (Type == StructorType::Complete) { - GlobalDecl CompleteDecl; + if (CD ? GD.getCtorType() == Ctor_Complete + : GD.getDtorType() == Dtor_Complete) { GlobalDecl BaseDecl; - if (CD) { - CompleteDecl = GlobalDecl(CD, Ctor_Complete); - BaseDecl = GlobalDecl(CD, Ctor_Base); - } else { - CompleteDecl = GlobalDecl(DD, Dtor_Complete); - BaseDecl = GlobalDecl(DD, Dtor_Base); - } + if (CD) + BaseDecl = GD.getWithCtorType(Ctor_Base); + else + BaseDecl = GD.getWithDtorType(Dtor_Base); if (CGType == StructorCodegen::Alias || CGType == StructorCodegen::COMDAT) { - emitConstructorDestructorAlias(CGM, CompleteDecl, BaseDecl); + emitConstructorDestructorAlias(CGM, GD, BaseDecl); return; } if (CGType == StructorCodegen::RAUW) { - StringRef MangledName = CGM.getMangledName(CompleteDecl); + StringRef MangledName = CGM.getMangledName(GD); auto *Aliasee = CGM.GetAddrOfGlobal(BaseDecl); CGM.addReplacement(MangledName, Aliasee); return; @@ -3881,7 +3881,8 @@ void ItaniumCXXABI::emitCXXStructor(const CXXMethodDecl *MD, // base class if there is exactly one non-virtual base class with a // non-trivial destructor, there are no fields with a non-trivial // destructor, and the body of the destructor is trivial. - if (DD && Type == StructorType::Base && CGType != StructorCodegen::COMDAT && + if (DD && GD.getDtorType() == Dtor_Base && + CGType != StructorCodegen::COMDAT && !CGM.TryEmitBaseDestructorAsAlias(DD)) return; @@ -3897,7 +3898,7 @@ void ItaniumCXXABI::emitCXXStructor(const CXXMethodDecl *MD, // In such cases we should try to emit the deleting dtor as an alias to the // selected 'operator delete'. - llvm::Function *Fn = CGM.codegenCXXStructor(MD, Type); + llvm::Function *Fn = CGM.codegenCXXStructor(GD); if (CGType == StructorCodegen::COMDAT) { SmallString<256> Buffer; @@ -3913,26 +3914,26 @@ void ItaniumCXXABI::emitCXXStructor(const CXXMethodDecl *MD, } } -static llvm::Constant *getBeginCatchFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getBeginCatchFn(CodeGenModule &CGM) { // void *__cxa_begin_catch(void*); llvm::FunctionType *FTy = llvm::FunctionType::get( - CGM.Int8PtrTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); + CGM.Int8PtrTy, CGM.Int8PtrTy, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FTy, "__cxa_begin_catch"); } -static llvm::Constant *getEndCatchFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getEndCatchFn(CodeGenModule &CGM) { // void __cxa_end_catch(); llvm::FunctionType *FTy = - llvm::FunctionType::get(CGM.VoidTy, /*IsVarArgs=*/false); + llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FTy, "__cxa_end_catch"); } -static llvm::Constant *getGetExceptionPtrFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getGetExceptionPtrFn(CodeGenModule &CGM) { // void *__cxa_get_exception_ptr(void*); llvm::FunctionType *FTy = llvm::FunctionType::get( - CGM.Int8PtrTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); + CGM.Int8PtrTy, CGM.Int8PtrTy, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FTy, "__cxa_get_exception_ptr"); } @@ -4204,14 +4205,14 @@ void ItaniumCXXABI::emitBeginCatch(CodeGenFunction &CGF, /// Get or define the following function: /// void @__clang_call_terminate(i8* %exn) nounwind noreturn /// This code is used only in C++. -static llvm::Constant *getClangCallTerminateFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getClangCallTerminateFn(CodeGenModule &CGM) { llvm::FunctionType *fnTy = - llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); - llvm::Constant *fnRef = CGM.CreateRuntimeFunction( + llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*isVarArg=*/false); + llvm::FunctionCallee fnRef = CGM.CreateRuntimeFunction( fnTy, "__clang_call_terminate", llvm::AttributeList(), /*Local=*/true); - - llvm::Function *fn = dyn_cast<llvm::Function>(fnRef); - if (fn && fn->empty()) { + llvm::Function *fn = + cast<llvm::Function>(fnRef.getCallee()->stripPointerCasts()); + if (fn->empty()) { fn->setDoesNotThrow(); fn->setDoesNotReturn(); @@ -4229,7 +4230,7 @@ static llvm::Constant *getClangCallTerminateFn(CodeGenModule &CGM) { // Set up the function. llvm::BasicBlock *entry = - llvm::BasicBlock::Create(CGM.getLLVMContext(), "", fn); + llvm::BasicBlock::Create(CGM.getLLVMContext(), "", fn); CGBuilderTy builder(CGM, entry); // Pull the exception pointer out of the parameter list. @@ -4249,7 +4250,6 @@ static llvm::Constant *getClangCallTerminateFn(CodeGenModule &CGM) { // std::terminate cannot return. builder.CreateUnreachable(); } - return fnRef; } diff --git a/lib/CodeGen/MacroPPCallbacks.cpp b/lib/CodeGen/MacroPPCallbacks.cpp index 013ca15e2391..92800e738b62 100644 --- a/lib/CodeGen/MacroPPCallbacks.cpp +++ b/lib/CodeGen/MacroPPCallbacks.cpp @@ -1,9 +1,8 @@ //===--- MacroPPCallbacks.cpp ---------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/MacroPPCallbacks.h b/lib/CodeGen/MacroPPCallbacks.h index b87a4005d481..32906a000269 100644 --- a/lib/CodeGen/MacroPPCallbacks.h +++ b/lib/CodeGen/MacroPPCallbacks.h @@ -1,9 +1,8 @@ //===--- MacroPPCallbacks.h -------------------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp index 5545bc6647e6..fa34414de5da 100644 --- a/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/lib/CodeGen/MicrosoftCXXABI.cpp @@ -1,9 +1,8 @@ //===--- MicrosoftCXXABI.cpp - Emit LLVM Code from ASTs for a Module ------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -27,7 +26,6 @@ #include "clang/AST/VTableBuilder.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Intrinsics.h" using namespace clang; @@ -207,7 +205,7 @@ public: // delegate to or alias the base destructor. AddedStructorArgs - buildStructorSignature(const CXXMethodDecl *MD, StructorType T, + buildStructorSignature(GlobalDecl GD, SmallVectorImpl<CanQualType> &ArgTys) override; /// Non-base dtors should be emitted as delegating thunks in this ABI. @@ -260,7 +258,8 @@ public: void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD, CXXDtorType Type, bool ForVirtualBase, - bool Delegating, Address This) override; + bool Delegating, Address This, + QualType ThisTy) override; void emitVTableTypeMetadata(const VPtrInfo &Info, const CXXRecordDecl *RD, llvm::GlobalVariable *VTable); @@ -298,9 +297,8 @@ public: llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *Dtor, - CXXDtorType DtorType, - Address This, - const CXXMemberCallExpr *CE) override; + CXXDtorType DtorType, Address This, + DeleteOrMemberCallExpr E) override; void adjustCallArgsForDestructorThunk(CodeGenFunction &CGF, GlobalDecl GD, CallArgList &CallArgs) override { @@ -354,7 +352,7 @@ public: ? llvm::GlobalValue::LinkOnceODRLinkage : llvm::GlobalValue::InternalLinkage; auto *VDispMap = new llvm::GlobalVariable( - CGM.getModule(), VDispMapTy, /*Constant=*/true, Linkage, + CGM.getModule(), VDispMapTy, /*isConstant=*/true, Linkage, /*Initializer=*/Init, MangledName); return VDispMap; } @@ -396,7 +394,8 @@ public: llvm::GlobalVariable *DeclPtr, bool PerformInit) override; void registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, - llvm::Constant *Dtor, llvm::Constant *Addr) override; + llvm::FunctionCallee Dtor, + llvm::Constant *Addr) override; // ==== Notes on array cookies ========= // @@ -437,7 +436,7 @@ public: friend struct MSRTTIBuilder; bool isImageRelative() const { - return CGM.getTarget().getPointerWidth(/*AddressSpace=*/0) == 64; + return CGM.getTarget().getPointerWidth(/*AddrSpace=*/0) == 64; } // 5 routines for constructing the llvm types for MS RTTI structs. @@ -674,7 +673,7 @@ public: llvm::Value *MemPtr, const MemberPointerType *MPT) override; - void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) override; + void emitCXXStructor(GlobalDecl GD) override; llvm::StructType *getCatchableTypeType() { if (CatchableTypeType) @@ -726,18 +725,20 @@ public: return ThrowInfoType; } - llvm::Constant *getThrowFn() { + llvm::FunctionCallee getThrowFn() { // _CxxThrowException is passed an exception object and a ThrowInfo object // which describes the exception. llvm::Type *Args[] = {CGM.Int8PtrTy, getThrowInfoType()->getPointerTo()}; llvm::FunctionType *FTy = - llvm::FunctionType::get(CGM.VoidTy, Args, /*IsVarArgs=*/false); - auto *Fn = cast<llvm::Function>( - CGM.CreateRuntimeFunction(FTy, "_CxxThrowException")); + llvm::FunctionType::get(CGM.VoidTy, Args, /*isVarArg=*/false); + llvm::FunctionCallee Throw = + CGM.CreateRuntimeFunction(FTy, "_CxxThrowException"); // _CxxThrowException is stdcall on 32-bit x86 platforms. - if (CGM.getTarget().getTriple().getArch() == llvm::Triple::x86) - Fn->setCallingConv(llvm::CallingConv::X86_StdCall); - return Fn; + if (CGM.getTarget().getTriple().getArch() == llvm::Triple::x86) { + if (auto *Fn = dyn_cast<llvm::Function>(Throw.getCallee())) + Fn->setCallingConv(llvm::CallingConv::X86_StdCall); + } + return Throw; } llvm::Function *getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD, @@ -810,7 +811,7 @@ MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const { // Use the simple Itanium rules for now. // FIXME: This is incompatible with MSVC for arguments with a dtor and no // copy ctor. - return !canCopyArgument(RD) ? RAA_Indirect : RAA_Default; + return !RD->canPassInRegisters() ? RAA_Indirect : RAA_Default; case llvm::Triple::x86: // All record arguments are passed in memory on x86. Decide whether to @@ -819,7 +820,7 @@ MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const { // If C++ prohibits us from making a copy, construct the arguments directly // into argument memory. - if (!canCopyArgument(RD)) + if (!RD->canPassInRegisters()) return RAA_DirectInMemory; // Otherwise, construct the argument into a temporary and copy the bytes @@ -828,7 +829,7 @@ MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const { case llvm::Triple::x86_64: case llvm::Triple::aarch64: - return !canCopyArgument(RD) ? RAA_Indirect : RAA_Default; + return !RD->canPassInRegisters() ? RAA_Indirect : RAA_Default; } llvm_unreachable("invalid enum"); @@ -843,8 +844,7 @@ void MicrosoftCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF, // CXXMemberCallExpr for dtor call. bool UseGlobalDelete = DE->isGlobalDelete(); CXXDtorType DtorType = UseGlobalDelete ? Dtor_Complete : Dtor_Deleting; - llvm::Value *MDThis = - EmitVirtualDestructorCall(CGF, Dtor, DtorType, Ptr, /*CE=*/nullptr); + llvm::Value *MDThis = EmitVirtualDestructorCall(CGF, Dtor, DtorType, Ptr, DE); if (UseGlobalDelete) CGF.EmitDeleteCall(DE->getOperatorDelete(), MDThis, ElementType); } @@ -853,7 +853,7 @@ void MicrosoftCXXABI::emitRethrow(CodeGenFunction &CGF, bool isNoReturn) { llvm::Value *Args[] = { llvm::ConstantPointerNull::get(CGM.Int8PtrTy), llvm::ConstantPointerNull::get(getThrowInfoType()->getPointerTo())}; - auto *Fn = getThrowFn(); + llvm::FunctionCallee Fn = getThrowFn(); if (isNoReturn) CGF.EmitNoreturnRuntimeCallOrInvoke(Fn, Args); else @@ -927,20 +927,20 @@ bool MicrosoftCXXABI::shouldTypeidBeNullChecked(bool IsDeref, !getContext().getASTRecordLayout(SrcDecl).hasExtendableVFPtr(); } -static llvm::CallSite emitRTtypeidCall(CodeGenFunction &CGF, - llvm::Value *Argument) { +static llvm::CallBase *emitRTtypeidCall(CodeGenFunction &CGF, + llvm::Value *Argument) { llvm::Type *ArgTypes[] = {CGF.Int8PtrTy}; llvm::FunctionType *FTy = llvm::FunctionType::get(CGF.Int8PtrTy, ArgTypes, false); llvm::Value *Args[] = {Argument}; - llvm::Constant *Fn = CGF.CGM.CreateRuntimeFunction(FTy, "__RTtypeid"); + llvm::FunctionCallee Fn = CGF.CGM.CreateRuntimeFunction(FTy, "__RTtypeid"); return CGF.EmitRuntimeCallOrInvoke(Fn, Args); } void MicrosoftCXXABI::EmitBadTypeidCall(CodeGenFunction &CGF) { - llvm::CallSite Call = + llvm::CallBase *Call = emitRTtypeidCall(CGF, llvm::Constant::getNullValue(CGM.VoidPtrTy)); - Call.setDoesNotReturn(); + Call->setDoesNotReturn(); CGF.Builder.CreateUnreachable(); } @@ -950,7 +950,7 @@ llvm::Value *MicrosoftCXXABI::EmitTypeid(CodeGenFunction &CGF, llvm::Type *StdTypeInfoPtrTy) { std::tie(ThisPtr, std::ignore, std::ignore) = performBaseAdjustment(CGF, ThisPtr, SrcRecordTy); - auto Typeid = emitRTtypeidCall(CGF, ThisPtr.getPointer()).getInstruction(); + llvm::CallBase *Typeid = emitRTtypeidCall(CGF, ThisPtr.getPointer()); return CGF.Builder.CreateBitCast(Typeid, StdTypeInfoPtrTy); } @@ -985,13 +985,13 @@ llvm::Value *MicrosoftCXXABI::EmitDynamicCastCall( // BOOL isReference) llvm::Type *ArgTypes[] = {CGF.Int8PtrTy, CGF.Int32Ty, CGF.Int8PtrTy, CGF.Int8PtrTy, CGF.Int32Ty}; - llvm::Constant *Function = CGF.CGM.CreateRuntimeFunction( + llvm::FunctionCallee Function = CGF.CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGF.Int8PtrTy, ArgTypes, false), "__RTDynamicCast"); llvm::Value *Args[] = { ThisPtr, Offset, SrcRTTI, DestRTTI, llvm::ConstantInt::get(CGF.Int32Ty, DestTy->isReferenceType())}; - ThisPtr = CGF.EmitRuntimeCallOrInvoke(Function, Args).getInstruction(); + ThisPtr = CGF.EmitRuntimeCallOrInvoke(Function, Args); return CGF.Builder.CreateBitCast(ThisPtr, DestLTy); } @@ -1005,7 +1005,7 @@ MicrosoftCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, Address Value, // PVOID __RTCastToVoid( // PVOID inptr) llvm::Type *ArgTypes[] = {CGF.Int8PtrTy}; - llvm::Constant *Function = CGF.CGM.CreateRuntimeFunction( + llvm::FunctionCallee Function = CGF.CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGF.Int8PtrTy, ArgTypes, false), "__RTCastToVoid"); llvm::Value *Args[] = {Value.getPointer()}; @@ -1050,33 +1050,55 @@ bool MicrosoftCXXABI::hasMostDerivedReturn(GlobalDecl GD) const { return isDeletingDtor(GD); } +static bool IsSizeGreaterThan128(const CXXRecordDecl *RD) { + return RD->getASTContext().getTypeSize(RD->getTypeForDecl()) > 128; +} + +static bool hasMicrosoftABIRestrictions(const CXXRecordDecl *RD) { + // For AArch64, we use the C++14 definition of an aggregate, so we also + // check for: + // No private or protected non static data members. + // No base classes + // No virtual functions + // Additionally, we need to ensure that there is a trivial copy assignment + // operator, a trivial destructor and no user-provided constructors. + if (RD->hasProtectedFields() || RD->hasPrivateFields()) + return true; + if (RD->getNumBases() > 0) + return true; + if (RD->isPolymorphic()) + return true; + if (RD->hasNonTrivialCopyAssignment()) + return true; + for (const CXXConstructorDecl *Ctor : RD->ctors()) + if (Ctor->isUserProvided()) + return true; + if (RD->hasNonTrivialDestructor()) + return true; + return false; +} + bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const { const CXXRecordDecl *RD = FI.getReturnType()->getAsCXXRecordDecl(); if (!RD) return false; - CharUnits Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); - if (FI.isInstanceMethod()) { - // If it's an instance method, aggregates are always returned indirectly via - // the second parameter. - FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false); - FI.getReturnInfo().setSRetAfterThis(FI.isInstanceMethod()); + bool isAArch64 = CGM.getTarget().getTriple().isAArch64(); + bool isSimple = !isAArch64 || !hasMicrosoftABIRestrictions(RD); + bool isIndirectReturn = + isAArch64 ? (!RD->canPassInRegisters() || + IsSizeGreaterThan128(RD)) + : !RD->isPOD(); + bool isInstanceMethod = FI.isInstanceMethod(); - // aarch64-windows requires that instance methods use X1 for the return - // address. So for aarch64-windows we do not mark the - // return as SRet. - FI.getReturnInfo().setSuppressSRet(CGM.getTarget().getTriple().getArch() == - llvm::Triple::aarch64); - return true; - } else if (!RD->isPOD()) { - // If it's a free function, non-POD types are returned indirectly. + if (isIndirectReturn || !isSimple || isInstanceMethod) { + CharUnits Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + FI.getReturnInfo().setSRetAfterThis(isInstanceMethod); + + FI.getReturnInfo().setInReg(isAArch64 && + !(isSimple && IsSizeGreaterThan128(RD))); - // aarch64-windows requires that non-POD, non-instance returns use X0 for - // the return address. So for aarch64-windows we do not mark the return as - // SRet. - FI.getReturnInfo().setSuppressSRet(CGM.getTarget().getTriple().getArch() == - llvm::Triple::aarch64); return true; } @@ -1233,16 +1255,17 @@ void MicrosoftCXXABI::EmitVBPtrStores(CodeGenFunction &CGF, } CGCXXABI::AddedStructorArgs -MicrosoftCXXABI::buildStructorSignature(const CXXMethodDecl *MD, StructorType T, +MicrosoftCXXABI::buildStructorSignature(GlobalDecl GD, SmallVectorImpl<CanQualType> &ArgTys) { AddedStructorArgs Added; // TODO: 'for base' flag - if (T == StructorType::Deleting) { + if (isa<CXXDestructorDecl>(GD.getDecl()) && + GD.getDtorType() == Dtor_Deleting) { // The scalar deleting destructor takes an implicit int parameter. ArgTys.push_back(getContext().IntTy); ++Added.Suffix; } - auto *CD = dyn_cast<CXXConstructorDecl>(MD); + auto *CD = dyn_cast<CXXConstructorDecl>(GD.getDecl()); if (!CD) return Added; @@ -1289,7 +1312,7 @@ llvm::GlobalValue::LinkageTypes MicrosoftCXXABI::getCXXDestructorLinkage( // The base destructor most closely tracks the user-declared constructor, so // we delegate back to the normal declarator case. return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage, - /*isConstantVariable=*/false); + /*IsConstantVariable=*/false); case Dtor_Complete: // The complete destructor is like an inline function, but it may be // imported and therefore must be exported as well. This requires changing @@ -1545,16 +1568,16 @@ CGCXXABI::AddedStructorArgs MicrosoftCXXABI::addImplicitConstructorArgs( void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD, CXXDtorType Type, bool ForVirtualBase, - bool Delegating, Address This) { + bool Delegating, Address This, + QualType ThisTy) { // Use the base destructor variant in place of the complete destructor variant // if the class has no virtual bases. This effectively implements some of the // -mconstructor-aliases optimization, but as part of the MS C++ ABI. if (Type == Dtor_Complete && DD->getParent()->getNumVBases() == 0) Type = Dtor_Base; - CGCallee Callee = - CGCallee::forDirect(CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)), - GlobalDecl(DD, Type)); + GlobalDecl GD(DD, Type); + CGCallee Callee = CGCallee::forDirect(CGM.getAddrOfCXXStructor(GD), GD); if (DD->isVirtual()) { assert(Type != CXXDtorType::Dtor_Deleting && @@ -1568,10 +1591,9 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF, BaseDtorEndBB = EmitDtorCompleteObjectHandler(CGF); } - CGF.EmitCXXDestructorCall(DD, Callee, This.getPointer(), + CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy, /*ImplicitParam=*/nullptr, - /*ImplicitParamTy=*/QualType(), nullptr, - getFromDtorType(Type)); + /*ImplicitParamTy=*/QualType(), nullptr); if (BaseDtorEndBB) { // Complete object handler should continue to be the remaining CGF.Builder.CreateBr(BaseDtorEndBB); @@ -1878,15 +1900,18 @@ CGCallee MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall( CodeGenFunction &CGF, const CXXDestructorDecl *Dtor, CXXDtorType DtorType, - Address This, const CXXMemberCallExpr *CE) { + Address This, DeleteOrMemberCallExpr E) { + auto *CE = E.dyn_cast<const CXXMemberCallExpr *>(); + auto *D = E.dyn_cast<const CXXDeleteExpr *>(); + assert((CE != nullptr) ^ (D != nullptr)); assert(CE == nullptr || CE->arg_begin() == CE->arg_end()); assert(DtorType == Dtor_Deleting || DtorType == Dtor_Complete); // We have only one destructor in the vftable but can get both behaviors // by passing an implicit int parameter. GlobalDecl GD(Dtor, Dtor_Deleting); - const CGFunctionInfo *FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration( - Dtor, StructorType::Deleting); + const CGFunctionInfo *FInfo = + &CGM.getTypes().arrangeCXXStructorDeclaration(GD); llvm::FunctionType *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo); CGCallee Callee = CGCallee::forVirtual(CE, GD, This, Ty); @@ -1895,10 +1920,15 @@ llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall( llvm::IntegerType::getInt32Ty(CGF.getLLVMContext()), DtorType == Dtor_Deleting); + QualType ThisTy; + if (CE) + ThisTy = CE->getImplicitObjectArgument()->getType()->getPointeeType(); + else + ThisTy = D->getDestroyedType(); + This = adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true); - RValue RV = - CGF.EmitCXXDestructorCall(Dtor, Callee, This.getPointer(), ImplicitParam, - Context.IntTy, CE, StructorType::Deleting); + RValue RV = CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy, + ImplicitParam, Context.IntTy, CE); return RV.getScalarVal(); } @@ -1996,7 +2026,7 @@ MicrosoftCXXABI::EmitVirtualMemPtrThunk(const CXXMethodDecl *MD, llvm::Value *Callee = CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign()); - CGF.EmitMustTailThunk(MD, getThisValue(CGF), Callee); + CGF.EmitMustTailThunk(MD, getThisValue(CGF), {ThunkTy, Callee}); return ThunkFn; } @@ -2222,25 +2252,26 @@ Address MicrosoftCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, } static void emitGlobalDtorWithTLRegDtor(CodeGenFunction &CGF, const VarDecl &VD, - llvm::Constant *Dtor, + llvm::FunctionCallee Dtor, llvm::Constant *Addr) { // Create a function which calls the destructor. llvm::Constant *DtorStub = CGF.createAtExitStub(VD, Dtor, Addr); // extern "C" int __tlregdtor(void (*f)(void)); llvm::FunctionType *TLRegDtorTy = llvm::FunctionType::get( - CGF.IntTy, DtorStub->getType(), /*IsVarArg=*/false); + CGF.IntTy, DtorStub->getType(), /*isVarArg=*/false); - llvm::Constant *TLRegDtor = CGF.CGM.CreateRuntimeFunction( + llvm::FunctionCallee TLRegDtor = CGF.CGM.CreateRuntimeFunction( TLRegDtorTy, "__tlregdtor", llvm::AttributeList(), /*Local=*/true); - if (llvm::Function *TLRegDtorFn = dyn_cast<llvm::Function>(TLRegDtor)) + if (llvm::Function *TLRegDtorFn = + dyn_cast<llvm::Function>(TLRegDtor.getCallee())) TLRegDtorFn->setDoesNotThrow(); CGF.EmitNounwindRuntimeCall(TLRegDtor, DtorStub); } void MicrosoftCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, - llvm::Constant *Dtor, + llvm::FunctionCallee Dtor, llvm::Constant *Addr) { if (D.isNoDestroy(CGM.getContext())) return; @@ -2269,7 +2300,7 @@ void MicrosoftCXXABI::EmitThreadLocalInitFuncs( // pointers at start-up time and, eventually, at thread-creation time. auto AddToXDU = [&CGM](llvm::Function *InitFunc) { llvm::GlobalVariable *InitFuncPtr = new llvm::GlobalVariable( - CGM.getModule(), InitFunc->getType(), /*IsConstant=*/true, + CGM.getModule(), InitFunc->getType(), /*isConstant=*/true, llvm::GlobalVariable::InternalLinkage, InitFunc, Twine(InitFunc->getName(), "$initializer$")); InitFuncPtr->setSection(".CRT$XDU"); @@ -2318,14 +2349,14 @@ static ConstantAddress getInitThreadEpochPtr(CodeGenModule &CGM) { return ConstantAddress(GV, Align); auto *GV = new llvm::GlobalVariable( CGM.getModule(), CGM.IntTy, - /*Constant=*/false, llvm::GlobalVariable::ExternalLinkage, + /*isConstant=*/false, llvm::GlobalVariable::ExternalLinkage, /*Initializer=*/nullptr, VarName, /*InsertBefore=*/nullptr, llvm::GlobalVariable::GeneralDynamicTLSModel); GV->setAlignment(Align.getQuantity()); return ConstantAddress(GV, Align); } -static llvm::Constant *getInitThreadHeaderFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getInitThreadHeaderFn(CodeGenModule &CGM) { llvm::FunctionType *FTy = llvm::FunctionType::get(llvm::Type::getVoidTy(CGM.getLLVMContext()), CGM.IntTy->getPointerTo(), /*isVarArg=*/false); @@ -2337,7 +2368,7 @@ static llvm::Constant *getInitThreadHeaderFn(CodeGenModule &CGM) { /*Local=*/true); } -static llvm::Constant *getInitThreadFooterFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getInitThreadFooterFn(CodeGenModule &CGM) { llvm::FunctionType *FTy = llvm::FunctionType::get(llvm::Type::getVoidTy(CGM.getLLVMContext()), CGM.IntTy->getPointerTo(), /*isVarArg=*/false); @@ -2349,7 +2380,7 @@ static llvm::Constant *getInitThreadFooterFn(CodeGenModule &CGM) { /*Local=*/true); } -static llvm::Constant *getInitThreadAbortFn(CodeGenModule &CGM) { +static llvm::FunctionCallee getInitThreadAbortFn(CodeGenModule &CGM) { llvm::FunctionType *FTy = llvm::FunctionType::get(llvm::Type::getVoidTy(CGM.getLLVMContext()), CGM.IntTy->getPointerTo(), /*isVarArg=*/false); @@ -3378,7 +3409,7 @@ static llvm::GlobalVariable *getTypeInfoVTable(CodeGenModule &CGM) { if (auto VTable = CGM.getModule().getNamedGlobal(MangledName)) return VTable; return new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy, - /*Constant=*/true, + /*isConstant=*/true, llvm::GlobalVariable::ExternalLinkage, /*Initializer=*/nullptr, MangledName); } @@ -3558,7 +3589,7 @@ llvm::GlobalVariable *MSRTTIBuilder::getClassHierarchyDescriptor() { // Forward-declare the class hierarchy descriptor auto Type = ABI.getClassHierarchyDescriptorType(); - auto CHD = new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage, + auto CHD = new llvm::GlobalVariable(Module, Type, /*isConstant=*/true, Linkage, /*Initializer=*/nullptr, MangledName); if (CHD->isWeakForLinker()) @@ -3597,7 +3628,7 @@ MSRTTIBuilder::getBaseClassArray(SmallVectorImpl<MSRTTIClass> &Classes) { auto *ArrType = llvm::ArrayType::get(PtrType, Classes.size() + 1); auto *BCA = new llvm::GlobalVariable(Module, ArrType, - /*Constant=*/true, Linkage, + /*isConstant=*/true, Linkage, /*Initializer=*/nullptr, MangledName); if (BCA->isWeakForLinker()) BCA->setComdat(CGM.getModule().getOrInsertComdat(BCA->getName())); @@ -3639,7 +3670,7 @@ MSRTTIBuilder::getBaseClassDescriptor(const MSRTTIClass &Class) { // Forward-declare the base class descriptor. auto Type = ABI.getBaseClassDescriptorType(); auto BCD = - new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage, + new llvm::GlobalVariable(Module, Type, /*isConstant=*/true, Linkage, /*Initializer=*/nullptr, MangledName); if (BCD->isWeakForLinker()) BCD->setComdat(CGM.getModule().getOrInsertComdat(BCD->getName())); @@ -3685,7 +3716,7 @@ MSRTTIBuilder::getCompleteObjectLocator(const VPtrInfo &Info) { // Forward-declare the complete object locator. llvm::StructType *Type = ABI.getCompleteObjectLocatorType(); - auto COL = new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage, + auto COL = new llvm::GlobalVariable(Module, Type, /*isConstant=*/true, Linkage, /*Initializer=*/nullptr, MangledName); // Initialize the CompleteObjectLocator. @@ -3800,7 +3831,7 @@ llvm::Constant *MicrosoftCXXABI::getAddrOfRTTIDescriptor(QualType Type) { llvm::StructType *TypeDescriptorType = getTypeDescriptorType(TypeInfoString); auto *Var = new llvm::GlobalVariable( - CGM.getModule(), TypeDescriptorType, /*Constant=*/false, + CGM.getModule(), TypeDescriptorType, /*isConstant=*/false, getLinkageForRTTI(Type), llvm::ConstantStruct::get(TypeDescriptorType, Fields), MangledName); @@ -3816,44 +3847,36 @@ MicrosoftCXXABI::getMSCompleteObjectLocator(const CXXRecordDecl *RD, return MSRTTIBuilder(*this, RD).getCompleteObjectLocator(Info); } -static void emitCXXConstructor(CodeGenModule &CGM, - const CXXConstructorDecl *ctor, - StructorType ctorType) { - // There are no constructor variants, always emit the complete destructor. - llvm::Function *Fn = CGM.codegenCXXStructor(ctor, StructorType::Complete); - CGM.maybeSetTrivialComdat(*ctor, *Fn); -} +void MicrosoftCXXABI::emitCXXStructor(GlobalDecl GD) { + if (auto *ctor = dyn_cast<CXXConstructorDecl>(GD.getDecl())) { + // There are no constructor variants, always emit the complete destructor. + llvm::Function *Fn = + CGM.codegenCXXStructor(GD.getWithCtorType(Ctor_Complete)); + CGM.maybeSetTrivialComdat(*ctor, *Fn); + return; + } + + auto *dtor = cast<CXXDestructorDecl>(GD.getDecl()); -static void emitCXXDestructor(CodeGenModule &CGM, const CXXDestructorDecl *dtor, - StructorType dtorType) { // Emit the base destructor if the base and complete (vbase) destructors are // equivalent. This effectively implements -mconstructor-aliases as part of // the ABI. - if (dtorType == StructorType::Complete && + if (GD.getDtorType() == Dtor_Complete && dtor->getParent()->getNumVBases() == 0) - dtorType = StructorType::Base; + GD = GD.getWithDtorType(Dtor_Base); // The base destructor is equivalent to the base destructor of its // base class if there is exactly one non-virtual base class with a // non-trivial destructor, there are no fields with a non-trivial // destructor, and the body of the destructor is trivial. - if (dtorType == StructorType::Base && !CGM.TryEmitBaseDestructorAsAlias(dtor)) + if (GD.getDtorType() == Dtor_Base && !CGM.TryEmitBaseDestructorAsAlias(dtor)) return; - llvm::Function *Fn = CGM.codegenCXXStructor(dtor, dtorType); + llvm::Function *Fn = CGM.codegenCXXStructor(GD); if (Fn->isWeakForLinker()) Fn->setComdat(CGM.getModule().getOrInsertComdat(Fn->getName())); } -void MicrosoftCXXABI::emitCXXStructor(const CXXMethodDecl *MD, - StructorType Type) { - if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) { - emitCXXConstructor(CGM, CD, Type); - return; - } - emitCXXDestructor(CGM, cast<CXXDestructorDecl>(MD), Type); -} - llvm::Function * MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD, CXXCtorType CT) { @@ -3955,7 +3978,7 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD, /*Delegating=*/false, Args); // Call the destructor with our arguments. llvm::Constant *CalleePtr = - CGM.getAddrOfCXXStructor(CD, StructorType::Complete); + CGM.getAddrOfCXXStructor(GlobalDecl(CD, Ctor_Complete)); CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(CD, Ctor_Complete)); const CGFunctionInfo &CalleeInfo = CGM.getTypes().arrangeCXXConstructorCall( @@ -4006,7 +4029,7 @@ llvm::Constant *MicrosoftCXXABI::getCatchableType(QualType T, if (CT == Ctor_CopyingClosure) CopyCtor = getAddrOfCXXCtorClosure(CD, Ctor_CopyingClosure); else - CopyCtor = CGM.getAddrOfCXXStructor(CD, StructorType::Complete); + CopyCtor = CGM.getAddrOfCXXStructor(GlobalDecl(CD, Ctor_Complete)); CopyCtor = llvm::ConstantExpr::getBitCast(CopyCtor, CGM.Int8PtrTy); } else { @@ -4047,7 +4070,7 @@ llvm::Constant *MicrosoftCXXABI::getCatchableType(QualType T, }; llvm::StructType *CTType = getCatchableTypeType(); auto *GV = new llvm::GlobalVariable( - CGM.getModule(), CTType, /*Constant=*/true, getLinkageForRTTI(T), + CGM.getModule(), CTType, /*isConstant=*/true, getLinkageForRTTI(T), llvm::ConstantStruct::get(CTType, Fields), MangledName); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); GV->setSection(".xdata"); @@ -4165,7 +4188,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getCatchableTypeArray(QualType T) { getMangleContext().mangleCXXCatchableTypeArray(T, NumEntries, Out); } CTA = new llvm::GlobalVariable( - CGM.getModule(), CTAType, /*Constant=*/true, getLinkageForRTTI(T), + CGM.getModule(), CTAType, /*isConstant=*/true, getLinkageForRTTI(T), llvm::ConstantStruct::get(CTAType, Fields), MangledName); CTA->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); CTA->setSection(".xdata"); @@ -4219,7 +4242,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) { if (CXXDestructorDecl *DtorD = RD->getDestructor()) if (!DtorD->isTrivial()) CleanupFn = llvm::ConstantExpr::getBitCast( - CGM.getAddrOfCXXStructor(DtorD, StructorType::Complete), + CGM.getAddrOfCXXStructor(GlobalDecl(DtorD, Dtor_Complete)), CGM.Int8PtrTy); // This is unused as far as we can tell, initialize it to null. llvm::Constant *ForwardCompat = @@ -4234,7 +4257,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) { PointerToCatchableTypes // CatchableTypeArray }; auto *GV = new llvm::GlobalVariable( - CGM.getModule(), TIType, /*Constant=*/true, getLinkageForRTTI(T), + CGM.getModule(), TIType, /*isConstant=*/true, getLinkageForRTTI(T), llvm::ConstantStruct::get(TIType, Fields), StringRef(MangledName)); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); GV->setSection(".xdata"); diff --git a/lib/CodeGen/ModuleBuilder.cpp b/lib/CodeGen/ModuleBuilder.cpp index c0a37698e762..3b4e06045a37 100644 --- a/lib/CodeGen/ModuleBuilder.cpp +++ b/lib/CodeGen/ModuleBuilder.cpp @@ -1,9 +1,8 @@ //===--- ModuleBuilder.cpp - Emit LLVM Code from ASTs ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index 6f00c836f93d..15a2ab99fdac 100644 --- a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -1,9 +1,8 @@ //===--- ObjectFilePCHContainerOperations.cpp -----------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -22,7 +21,7 @@ #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Bitcode/BitstreamReader.h" +#include "llvm/Bitstream/BitstreamReader.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -338,8 +337,14 @@ ObjectFilePCHContainerReader::ExtractPCH(llvm::MemoryBufferRef Buffer) const { StringRef Name; Section.getName(Name); if ((!IsCOFF && Name == "__clangast") || (IsCOFF && Name == "clangast")) { - Section.getContents(PCH); - return PCH; + if (Expected<StringRef> E = Section.getContents()) + return *E; + else { + handleAllErrors(E.takeError(), [&](const llvm::ErrorInfoBase &EIB) { + EIB.log(llvm::errs()); + }); + return ""; + } } } } diff --git a/lib/CodeGen/PatternInit.cpp b/lib/CodeGen/PatternInit.cpp new file mode 100644 index 000000000000..3410c7f21533 --- /dev/null +++ b/lib/CodeGen/PatternInit.cpp @@ -0,0 +1,85 @@ +//===--- PatternInit.cpp - Pattern Initialization -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "PatternInit.h" +#include "CodeGenModule.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Type.h" + +llvm::Constant *clang::CodeGen::initializationPatternFor(CodeGenModule &CGM, + llvm::Type *Ty) { + // The following value is a guaranteed unmappable pointer value and has a + // repeated byte-pattern which makes it easier to synthesize. We use it for + // pointers as well as integers so that aggregates are likely to be + // initialized with this repeated value. + // For 32-bit platforms it's a bit trickier because, across systems, only the + // zero page can reasonably be expected to be unmapped. We use max 0xFFFFFFFF + // assuming that memory access will overlap into zero page. + const uint64_t IntValue = + CGM.getContext().getTargetInfo().getMaxPointerWidth() < 64 + ? 0xFFFFFFFFFFFFFFFFull + : 0xAAAAAAAAAAAAAAAAull; + // Floating-point values are initialized as NaNs because they propagate. Using + // a repeated byte pattern means that it will be easier to initialize + // all-floating-point aggregates and arrays with memset. Further, aggregates + // which mix integral and a few floats might also initialize with memset + // followed by a handful of stores for the floats. Using fairly unique NaNs + // also means they'll be easier to distinguish in a crash. + constexpr bool NegativeNaN = true; + constexpr uint64_t NaNPayload = 0xFFFFFFFFFFFFFFFFull; + if (Ty->isIntOrIntVectorTy()) { + unsigned BitWidth = cast<llvm::IntegerType>( + Ty->isVectorTy() ? Ty->getVectorElementType() : Ty) + ->getBitWidth(); + if (BitWidth <= 64) + return llvm::ConstantInt::get(Ty, IntValue); + return llvm::ConstantInt::get( + Ty, llvm::APInt::getSplat(BitWidth, llvm::APInt(64, IntValue))); + } + if (Ty->isPtrOrPtrVectorTy()) { + auto *PtrTy = cast<llvm::PointerType>( + Ty->isVectorTy() ? Ty->getVectorElementType() : Ty); + unsigned PtrWidth = CGM.getContext().getTargetInfo().getPointerWidth( + PtrTy->getAddressSpace()); + if (PtrWidth > 64) + llvm_unreachable("pattern initialization of unsupported pointer width"); + llvm::Type *IntTy = llvm::IntegerType::get(CGM.getLLVMContext(), PtrWidth); + auto *Int = llvm::ConstantInt::get(IntTy, IntValue); + return llvm::ConstantExpr::getIntToPtr(Int, PtrTy); + } + if (Ty->isFPOrFPVectorTy()) { + unsigned BitWidth = llvm::APFloat::semanticsSizeInBits( + (Ty->isVectorTy() ? Ty->getVectorElementType() : Ty) + ->getFltSemantics()); + llvm::APInt Payload(64, NaNPayload); + if (BitWidth >= 64) + Payload = llvm::APInt::getSplat(BitWidth, Payload); + return llvm::ConstantFP::getQNaN(Ty, NegativeNaN, &Payload); + } + if (Ty->isArrayTy()) { + // Note: this doesn't touch tail padding (at the end of an object, before + // the next array object). It is instead handled by replaceUndef. + auto *ArrTy = cast<llvm::ArrayType>(Ty); + llvm::SmallVector<llvm::Constant *, 8> Element( + ArrTy->getNumElements(), + initializationPatternFor(CGM, ArrTy->getElementType())); + return llvm::ConstantArray::get(ArrTy, Element); + } + + // Note: this doesn't touch struct padding. It will initialize as much union + // padding as is required for the largest type in the union. Padding is + // instead handled by replaceUndef. Stores to structs with volatile members + // don't have a volatile qualifier when initialized according to C++. This is + // fine because stack-based volatiles don't really have volatile semantics + // anyways, and the initialization shouldn't be observable. + auto *StructTy = cast<llvm::StructType>(Ty); + llvm::SmallVector<llvm::Constant *, 8> Struct(StructTy->getNumElements()); + for (unsigned El = 0; El != Struct.size(); ++El) + Struct[El] = initializationPatternFor(CGM, StructTy->getElementType(El)); + return llvm::ConstantStruct::get(StructTy, Struct); +} diff --git a/lib/CodeGen/PatternInit.h b/lib/CodeGen/PatternInit.h new file mode 100644 index 000000000000..f117dde9ac66 --- /dev/null +++ b/lib/CodeGen/PatternInit.h @@ -0,0 +1,27 @@ +//===- PatternInit - Pattern initialization ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_CODEGEN_PATTERNINIT_H +#define LLVM_CLANG_LIB_CODEGEN_PATTERNINIT_H + +namespace llvm { +class Constant; +class Type; +} // namespace llvm + +namespace clang { +namespace CodeGen { + +class CodeGenModule; + +llvm::Constant *initializationPatternFor(CodeGenModule &, llvm::Type *); + +} // end namespace CodeGen +} // end namespace clang + +#endif diff --git a/lib/CodeGen/SanitizerMetadata.cpp b/lib/CodeGen/SanitizerMetadata.cpp index 23cf9e490828..ebc9cd5529bc 100644 --- a/lib/CodeGen/SanitizerMetadata.cpp +++ b/lib/CodeGen/SanitizerMetadata.cpp @@ -1,9 +1,8 @@ //===--- SanitizerMetadata.cpp - Blacklist for sanitizers -----------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -21,14 +20,17 @@ using namespace CodeGen; SanitizerMetadata::SanitizerMetadata(CodeGenModule &CGM) : CGM(CGM) {} +static bool isAsanHwasanOrMemTag(const SanitizerSet& SS) { + return SS.hasOneOf(SanitizerKind::Address | SanitizerKind::KernelAddress | + SanitizerKind::HWAddress | SanitizerKind::KernelHWAddress | + SanitizerKind::MemTag); +} + void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV, SourceLocation Loc, StringRef Name, QualType Ty, bool IsDynInit, bool IsBlacklisted) { - if (!CGM.getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address | - SanitizerKind::KernelAddress | - SanitizerKind::HWAddress | - SanitizerKind::KernelHWAddress)) + if (!isAsanHwasanOrMemTag(CGM.getLangOpts().Sanitize)) return; IsDynInit &= !CGM.isInSanitizerBlacklist(GV, Loc, Ty, "init"); IsBlacklisted |= CGM.isInSanitizerBlacklist(GV, Loc, Ty); @@ -59,10 +61,7 @@ void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV, void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV, const VarDecl &D, bool IsDynInit) { - if (!CGM.getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address | - SanitizerKind::KernelAddress | - SanitizerKind::HWAddress | - SanitizerKind::KernelHWAddress)) + if (!isAsanHwasanOrMemTag(CGM.getLangOpts().Sanitize)) return; std::string QualName; llvm::raw_string_ostream OS(QualName); @@ -79,10 +78,7 @@ void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV, void SanitizerMetadata::disableSanitizerForGlobal(llvm::GlobalVariable *GV) { // For now, just make sure the global is not modified by the ASan // instrumentation. - if (CGM.getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address | - SanitizerKind::KernelAddress | - SanitizerKind::HWAddress | - SanitizerKind::KernelHWAddress)) + if (isAsanHwasanOrMemTag(CGM.getLangOpts().Sanitize)) reportGlobalToASan(GV, SourceLocation(), "", QualType(), false, true); } diff --git a/lib/CodeGen/SanitizerMetadata.h b/lib/CodeGen/SanitizerMetadata.h index 166f0e6c9b58..7ffac4360d9c 100644 --- a/lib/CodeGen/SanitizerMetadata.h +++ b/lib/CodeGen/SanitizerMetadata.h @@ -1,9 +1,8 @@ //===--- SanitizerMetadata.h - Metadata for sanitizers ----------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/SwiftCallingConv.cpp b/lib/CodeGen/SwiftCallingConv.cpp index 75a0fa5ce189..8bce93b71c0c 100644 --- a/lib/CodeGen/SwiftCallingConv.cpp +++ b/lib/CodeGen/SwiftCallingConv.cpp @@ -1,9 +1,8 @@ //===--- SwiftCallingConv.cpp - Lowering for the Swift calling convention -===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index 89ec73670a73..5da988fb8a3c 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -1,9 +1,8 @@ //===---- TargetInfo.cpp - Encapsulate target details -----------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -310,10 +309,9 @@ static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF, // Advance the pointer past the argument, then store that back. CharUnits FullDirectSize = DirectSize.alignTo(SlotSize); - llvm::Value *NextPtr = - CGF.Builder.CreateConstInBoundsByteGEP(Addr.getPointer(), FullDirectSize, - "argp.next"); - CGF.Builder.CreateStore(NextPtr, VAListAddr); + Address NextPtr = + CGF.Builder.CreateConstInBoundsByteGEP(Addr, FullDirectSize, "argp.next"); + CGF.Builder.CreateStore(NextPtr.getPointer(), VAListAddr); // If the argument is smaller than a slot, and this is a big-endian // target, the argument will be right-adjusted in its slot. @@ -451,7 +449,9 @@ llvm::Value *TargetCodeGenInfo::performAddrSpaceCast( // space, an address space conversion may end up as a bitcast. if (auto *C = dyn_cast<llvm::Constant>(Src)) return performAddrSpaceCast(CGF.CGM, C, SrcAddr, DestAddr, DestTy); - return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Src, DestTy); + // Try to preserve the source's name to make IR more readable. + return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Src, DestTy, Src->hasName() ? Src->getName() + ".ascast" : ""); } llvm::Constant * @@ -464,8 +464,11 @@ TargetCodeGenInfo::performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *Src, } llvm::SyncScope::ID -TargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, llvm::LLVMContext &C) const { - return C.getOrInsertSyncScopeID(""); /* default sync scope */ +TargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, + SyncScope Scope, + llvm::AtomicOrdering Ordering, + llvm::LLVMContext &Ctx) const { + return Ctx.getOrInsertSyncScopeID(""); /* default sync scope */ } static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays); @@ -761,6 +764,22 @@ public: void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { + TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); + if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D)) { + if (const auto *Attr = FD->getAttr<WebAssemblyImportModuleAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + llvm::AttrBuilder B; + B.addAttribute("wasm-import-module", Attr->getImportModule()); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + } + if (const auto *Attr = FD->getAttr<WebAssemblyImportNameAttr>()) { + llvm::Function *Fn = cast<llvm::Function>(GV); + llvm::AttrBuilder B; + B.addAttribute("wasm-import-name", Attr->getImportName()); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + } + } + if (auto *FD = dyn_cast_or_null<FunctionDecl>(D)) { llvm::Function *Fn = cast<llvm::Function>(GV); if (!FD->doesThisDeclarationHaveABody() && !FD->hasPrototype()) @@ -814,7 +833,7 @@ ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const { Address WebAssemblyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect=*/ false, + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*IsIndirect=*/ false, getContext().getTypeInfoInChars(Ty), CharUnits::fromQuantity(4), /*AllowHigherAlign=*/ true); @@ -2205,8 +2224,8 @@ public: /// WinX86_64ABIInfo - The Windows X86_64 ABI information. class WinX86_64ABIInfo : public SwiftABIInfo { public: - WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT) - : SwiftABIInfo(CGT), + WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) + : SwiftABIInfo(CGT), AVXLevel(AVXLevel), IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {} void computeInfo(CGFunctionInfo &FI) const override; @@ -2242,7 +2261,9 @@ private: void computeVectorCallArgs(CGFunctionInfo &FI, unsigned FreeSSERegs, bool IsVectorCall, bool IsRegCall) const; - bool IsMingw64; + X86AVXABILevel AVXLevel; + + bool IsMingw64; }; class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { @@ -2254,6 +2275,12 @@ public: return static_cast<const X86_64ABIInfo&>(TargetCodeGenInfo::getABIInfo()); } + /// Disable tail call on x86-64. The epilogue code before the tail jump blocks + /// the autoreleaseRV/retainRV optimization. + bool shouldSuppressTailCallsOfRetainAutoreleasedReturnValue() const override { + return true; + } + int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { return 7; } @@ -2325,22 +2352,6 @@ public: } }; -class PS4TargetCodeGenInfo : public X86_64TargetCodeGenInfo { -public: - PS4TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) - : X86_64TargetCodeGenInfo(CGT, AVXLevel) {} - - void getDependentLibraryOption(llvm::StringRef Lib, - llvm::SmallString<24> &Opt) const override { - Opt = "\01"; - // If the argument contains a space, enclose it in quotes. - if (Lib.find(" ") != StringRef::npos) - Opt += "\"" + Lib.str() + "\""; - else - Opt += Lib; - } -}; - static std::string qualifyWindowsLibrary(llvm::StringRef Lib) { // If the argument does not end in .lib, automatically add the suffix. // If the argument contains a space, enclose it in quotes. @@ -2402,7 +2413,7 @@ class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo { public: WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) - : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)) {} + : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT, AVXLevel)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; @@ -3555,7 +3566,7 @@ void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { // using __attribute__((ms_abi)). In such case to correctly emit Win64 // compatible code delegate this call to WinX86_64ABIInfo::computeInfo. if (CallingConv == llvm::CallingConv::Win64) { - WinX86_64ABIInfo Win64ABIInfo(CGT); + WinX86_64ABIInfo Win64ABIInfo(CGT, AVXLevel); Win64ABIInfo.computeInfo(FI); return; } @@ -3627,8 +3638,8 @@ void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) { - Address overflow_arg_area_p = CGF.Builder.CreateStructGEP( - VAListAddr, 2, CharUnits::fromQuantity(8), "overflow_arg_area_p"); + Address overflow_arg_area_p = + CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_p"); llvm::Value *overflow_arg_area = CGF.Builder.CreateLoad(overflow_arg_area_p, "overflow_arg_area"); @@ -3699,18 +3710,14 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, Address gp_offset_p = Address::invalid(), fp_offset_p = Address::invalid(); llvm::Value *gp_offset = nullptr, *fp_offset = nullptr; if (neededInt) { - gp_offset_p = - CGF.Builder.CreateStructGEP(VAListAddr, 0, CharUnits::Zero(), - "gp_offset_p"); + gp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "gp_offset_p"); gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset"); InRegs = llvm::ConstantInt::get(CGF.Int32Ty, 48 - neededInt * 8); InRegs = CGF.Builder.CreateICmpULE(gp_offset, InRegs, "fits_in_gp"); } if (neededSSE) { - fp_offset_p = - CGF.Builder.CreateStructGEP(VAListAddr, 1, CharUnits::fromQuantity(4), - "fp_offset_p"); + fp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 1, "fp_offset_p"); fp_offset = CGF.Builder.CreateLoad(fp_offset_p, "fp_offset"); llvm::Value *FitsInFP = llvm::ConstantInt::get(CGF.Int32Ty, 176 - neededSSE * 16); @@ -3739,8 +3746,7 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, // loads than necessary. Can we clean this up? llvm::Type *LTy = CGF.ConvertTypeForMem(Ty); llvm::Value *RegSaveArea = CGF.Builder.CreateLoad( - CGF.Builder.CreateStructGEP(VAListAddr, 3, CharUnits::fromQuantity(16)), - "reg_save_area"); + CGF.Builder.CreateStructGEP(VAListAddr, 3), "reg_save_area"); Address RegAddr = Address::invalid(); if (neededInt && neededSSE) { @@ -3766,16 +3772,13 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, llvm::Value *V = CGF.Builder.CreateAlignedLoad( TyLo, CGF.Builder.CreateBitCast(RegLoAddr, PTyLo), CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(TyLo))); - CGF.Builder.CreateStore(V, - CGF.Builder.CreateStructGEP(Tmp, 0, CharUnits::Zero())); + CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); // Copy the second element. V = CGF.Builder.CreateAlignedLoad( TyHi, CGF.Builder.CreateBitCast(RegHiAddr, PTyHi), CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(TyHi))); - CharUnits Offset = CharUnits::fromQuantity( - getDataLayout().getStructLayout(ST)->getElementOffset(1)); - CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1, Offset)); + CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy); } else if (neededInt) { @@ -3822,12 +3825,10 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST); V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast( RegAddrLo, ST->getStructElementType(0))); - CGF.Builder.CreateStore(V, - CGF.Builder.CreateStructGEP(Tmp, 0, CharUnits::Zero())); + CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0)); V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast( RegAddrHi, ST->getStructElementType(1))); - CGF.Builder.CreateStore(V, - CGF.Builder.CreateStructGEP(Tmp, 1, CharUnits::fromQuantity(8))); + CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy); } @@ -4019,9 +4020,17 @@ void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, } void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { - bool IsVectorCall = - FI.getCallingConvention() == llvm::CallingConv::X86_VectorCall; - bool IsRegCall = FI.getCallingConvention() == llvm::CallingConv::X86_RegCall; + const unsigned CC = FI.getCallingConvention(); + bool IsVectorCall = CC == llvm::CallingConv::X86_VectorCall; + bool IsRegCall = CC == llvm::CallingConv::X86_RegCall; + + // If __attribute__((sysv_abi)) is in use, use the SysV argument + // classification rules. + if (CC == llvm::CallingConv::X86_64_SysV) { + X86_64ABIInfo SysVABIInfo(CGT, AVXLevel); + SysVABIInfo.computeInfo(FI); + return; + } unsigned FreeSSERegs = 0; if (IsVectorCall) { @@ -4169,9 +4178,9 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, // The calling convention either uses 1-2 GPRs or 1 FPR. Address NumRegsAddr = Address::invalid(); if (isInt || IsSoftFloatABI) { - NumRegsAddr = Builder.CreateStructGEP(VAList, 0, CharUnits::Zero(), "gpr"); + NumRegsAddr = Builder.CreateStructGEP(VAList, 0, "gpr"); } else { - NumRegsAddr = Builder.CreateStructGEP(VAList, 1, CharUnits::One(), "fpr"); + NumRegsAddr = Builder.CreateStructGEP(VAList, 1, "fpr"); } llvm::Value *NumRegs = Builder.CreateLoad(NumRegsAddr, "numUsedRegs"); @@ -4199,8 +4208,7 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, { CGF.EmitBlock(UsingRegs); - Address RegSaveAreaPtr = - Builder.CreateStructGEP(VAList, 4, CharUnits::fromQuantity(8)); + Address RegSaveAreaPtr = Builder.CreateStructGEP(VAList, 4); RegAddr = Address(Builder.CreateLoad(RegSaveAreaPtr), CharUnits::fromQuantity(8)); assert(RegAddr.getElementType() == CGF.Int8Ty); @@ -4248,8 +4256,7 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, Size = CGF.getPointerSize(); } - Address OverflowAreaAddr = - Builder.CreateStructGEP(VAList, 3, CharUnits::fromQuantity(4)); + Address OverflowAreaAddr = Builder.CreateStructGEP(VAList, 3); Address OverflowArea(Builder.CreateLoad(OverflowAreaAddr, "argp.cur"), OverflowAreaAlign); // Round up address of argument to alignment @@ -5283,31 +5290,24 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack"); llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); - auto TyInfo = getContext().getTypeInfoInChars(Ty); - CharUnits TyAlign = TyInfo.second; + CharUnits TySize = getContext().getTypeSizeInChars(Ty); + CharUnits TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty); Address reg_offs_p = Address::invalid(); llvm::Value *reg_offs = nullptr; int reg_top_index; - CharUnits reg_top_offset; - int RegSize = IsIndirect ? 8 : TyInfo.first.getQuantity(); + int RegSize = IsIndirect ? 8 : TySize.getQuantity(); if (!IsFPR) { // 3 is the field number of __gr_offs - reg_offs_p = - CGF.Builder.CreateStructGEP(VAListAddr, 3, CharUnits::fromQuantity(24), - "gr_offs_p"); + reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p"); reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs"); reg_top_index = 1; // field number for __gr_top - reg_top_offset = CharUnits::fromQuantity(8); RegSize = llvm::alignTo(RegSize, 8); } else { // 4 is the field number of __vr_offs. - reg_offs_p = - CGF.Builder.CreateStructGEP(VAListAddr, 4, CharUnits::fromQuantity(28), - "vr_offs_p"); + reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p"); reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs"); reg_top_index = 2; // field number for __vr_top - reg_top_offset = CharUnits::fromQuantity(16); RegSize = 16 * NumRegs; } @@ -5369,8 +5369,8 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, CGF.EmitBlock(InRegBlock); llvm::Value *reg_top = nullptr; - Address reg_top_p = CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, - reg_top_offset, "reg_top_p"); + Address reg_top_p = + CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p"); reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top"); Address BaseAddr(CGF.Builder.CreateInBoundsGEP(reg_top, reg_offs), CharUnits::fromQuantity(IsFPR ? 16 : 8)); @@ -5410,8 +5410,7 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, BaseOffset); LoadAddr = CGF.Builder.CreateElementBitCast(LoadAddr, BaseTy); - Address StoreAddr = - CGF.Builder.CreateConstArrayGEP(Tmp, i, BaseTyInfo.first); + Address StoreAddr = CGF.Builder.CreateConstArrayGEP(Tmp, i); llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr); CGF.Builder.CreateStore(Elem, StoreAddr); @@ -5425,8 +5424,8 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, CharUnits SlotSize = BaseAddr.getAlignment(); if (CGF.CGM.getDataLayout().isBigEndian() && !IsIndirect && (IsHFA || !isAggregateTypeForABI(Ty)) && - TyInfo.first < SlotSize) { - CharUnits Offset = SlotSize - TyInfo.first; + TySize < SlotSize) { + CharUnits Offset = SlotSize - TySize; BaseAddr = CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, Offset); } @@ -5440,8 +5439,7 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, //======================================= CGF.EmitBlock(OnStackBlock); - Address stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, - CharUnits::Zero(), "stack_p"); + Address stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p"); llvm::Value *OnStackPtr = CGF.Builder.CreateLoad(stack_p, "stack"); // Again, stack arguments may need realignment. In this case both integer and @@ -5469,7 +5467,7 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, if (IsIndirect) StackSize = StackSlotSize; else - StackSize = TyInfo.first.alignTo(StackSlotSize); + StackSize = TySize.alignTo(StackSlotSize); llvm::Value *StackSizeC = CGF.Builder.getSize(StackSize); llvm::Value *NewStack = @@ -5479,8 +5477,8 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, CGF.Builder.CreateStore(NewStack, stack_p); if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(Ty) && - TyInfo.first < StackSlotSize) { - CharUnits Offset = StackSlotSize - TyInfo.first; + TySize < StackSlotSize) { + CharUnits Offset = StackSlotSize - TySize; OnStackAddr = CGF.Builder.CreateConstInBoundsByteGEP(OnStackAddr, Offset); } @@ -5498,7 +5496,7 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, if (IsIndirect) return Address(CGF.Builder.CreateLoad(ResAddr, "vaarg.addr"), - TyInfo.second); + TyAlign); return ResAddr; } @@ -5598,17 +5596,22 @@ public: ABIKind getABIKind() const { return Kind; } private: - ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic) const; - ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic) const; + ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic, + unsigned functionCallConv) const; + ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic, + unsigned functionCallConv) const; ABIArgInfo classifyHomogeneousAggregate(QualType Ty, const Type *Base, uint64_t Members) const; ABIArgInfo coerceIllegalVector(QualType Ty) const; bool isIllegalVectorType(QualType Ty) const; + bool containsAnyFP16Vectors(QualType Ty) const; bool isHomogeneousAggregateBaseType(QualType Ty) const override; bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t Members) const override; + bool isEffectivelyAAPCS_VFP(unsigned callConvention, bool acceptHalf) const; + void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, @@ -5729,11 +5732,13 @@ void WindowsARMTargetCodeGenInfo::setTargetAttributes( void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const { if (!::classifyReturnType(getCXXABI(), FI, *this)) - FI.getReturnInfo() = - classifyReturnType(FI.getReturnType(), FI.isVariadic()); + FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic(), + FI.getCallingConvention()); for (auto &I : FI.arguments()) - I.info = classifyArgumentType(I.type, FI.isVariadic()); + I.info = classifyArgumentType(I.type, FI.isVariadic(), + FI.getCallingConvention()); + // Always honor user-specified calling convention. if (FI.getCallingConvention() != llvm::CallingConv::C) @@ -5799,9 +5804,7 @@ ABIArgInfo ARMABIInfo::classifyHomogeneousAggregate(QualType Ty, // Base can be a floating-point or a vector. if (const VectorType *VT = Base->getAs<VectorType>()) { // FP16 vectors should be converted to integer vectors - if (!getTarget().hasLegalHalfType() && - (VT->getElementType()->isFloat16Type() || - VT->getElementType()->isHalfType())) { + if (!getTarget().hasLegalHalfType() && containsAnyFP16Vectors(Ty)) { uint64_t Size = getContext().getTypeSize(VT); llvm::Type *NewVecTy = llvm::VectorType::get( llvm::Type::getInt32Ty(getVMContext()), Size / 32); @@ -5812,8 +5815,8 @@ ABIArgInfo ARMABIInfo::classifyHomogeneousAggregate(QualType Ty, return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); } -ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, - bool isVariadic) const { +ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic, + unsigned functionCallConv) const { // 6.1.2.1 The following argument types are VFP CPRCs: // A single-precision floating-point type (including promoted // half-precision types); A double-precision floating-point type; @@ -5821,7 +5824,9 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, // with a Base Type of a single- or double-precision floating-point type, // 64-bit containerized vectors or 128-bit containerized vectors with one // to four Elements. - bool IsEffectivelyAAPCS_VFP = getABIKind() == AAPCS_VFP && !isVariadic; + // Variadic functions should always marshal to the base standard. + bool IsAAPCS_VFP = + !isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ false); Ty = useFirstFieldIfTransparentUnion(Ty); @@ -5834,7 +5839,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, // half type natively, and does not need to interwork with AAPCS code. if ((Ty->isFloat16Type() || Ty->isHalfType()) && !getContext().getLangOpts().NativeHalfArgsAndReturns) { - llvm::Type *ResType = IsEffectivelyAAPCS_VFP ? + llvm::Type *ResType = IsAAPCS_VFP ? llvm::Type::getFloatTy(getVMContext()) : llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); @@ -5858,7 +5863,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); - if (IsEffectivelyAAPCS_VFP) { + if (IsAAPCS_VFP) { // Homogeneous Aggregates need to be expanded when we can fit the aggregate // into VFP registers. const Type *Base = nullptr; @@ -6015,10 +6020,12 @@ static bool isIntegerLikeType(QualType Ty, ASTContext &Context, return true; } -ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, - bool isVariadic) const { - bool IsEffectivelyAAPCS_VFP = - (getABIKind() == AAPCS_VFP || getABIKind() == AAPCS16_VFP) && !isVariadic; +ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic, + unsigned functionCallConv) const { + + // Variadic functions should always marshal to the base standard. + bool IsAAPCS_VFP = + !isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ true); if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); @@ -6039,7 +6046,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, // half type natively, and does not need to interwork with AAPCS code. if ((RetTy->isFloat16Type() || RetTy->isHalfType()) && !getContext().getLangOpts().NativeHalfArgsAndReturns) { - llvm::Type *ResType = IsEffectivelyAAPCS_VFP ? + llvm::Type *ResType = IsAAPCS_VFP ? llvm::Type::getFloatTy(getVMContext()) : llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); @@ -6088,7 +6095,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, return ABIArgInfo::getIgnore(); // Check for homogeneous aggregates with AAPCS-VFP. - if (IsEffectivelyAAPCS_VFP) { + if (IsAAPCS_VFP) { const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(RetTy, Base, Members)) @@ -6158,6 +6165,37 @@ bool ARMABIInfo::isIllegalVectorType(QualType Ty) const { return false; } +/// Return true if a type contains any 16-bit floating point vectors +bool ARMABIInfo::containsAnyFP16Vectors(QualType Ty) const { + if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { + uint64_t NElements = AT->getSize().getZExtValue(); + if (NElements == 0) + return false; + return containsAnyFP16Vectors(AT->getElementType()); + } else if (const RecordType *RT = Ty->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl(); + + // If this is a C++ record, check the bases first. + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) + if (llvm::any_of(CXXRD->bases(), [this](const CXXBaseSpecifier &B) { + return containsAnyFP16Vectors(B.getType()); + })) + return true; + + if (llvm::any_of(RD->fields(), [this](FieldDecl *FD) { + return FD && containsAnyFP16Vectors(FD->getType()); + })) + return true; + + return false; + } else { + if (const VectorType *VT = Ty->getAs<VectorType>()) + return (VT->getElementType()->isFloat16Type() || + VT->getElementType()->isHalfType()); + return false; + } +} + bool ARMABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize, llvm::Type *eltTy, unsigned numElts) const { @@ -6193,6 +6231,16 @@ bool ARMABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, return Members <= 4; } +bool ARMABIInfo::isEffectivelyAAPCS_VFP(unsigned callConvention, + bool acceptHalf) const { + // Give precedence to user-specified calling conventions. + if (callConvention != llvm::CallingConv::C) + return (callConvention == llvm::CallingConv::ARM_AAPCS_VFP); + else + return (getABIKind() == AAPCS_VFP) || + (acceptHalf && (getABIKind() == AAPCS16_VFP)); +} + Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { CharUnits SlotSize = CharUnits::fromQuantity(4); @@ -6204,19 +6252,19 @@ Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, return Addr; } - auto TyInfo = getContext().getTypeInfoInChars(Ty); - CharUnits TyAlignForABI = TyInfo.second; + CharUnits TySize = getContext().getTypeSizeInChars(Ty); + CharUnits TyAlignForABI = getContext().getTypeUnadjustedAlignInChars(Ty); // Use indirect if size of the illegal vector is bigger than 16 bytes. bool IsIndirect = false; const Type *Base = nullptr; uint64_t Members = 0; - if (TyInfo.first > CharUnits::fromQuantity(16) && isIllegalVectorType(Ty)) { + if (TySize > CharUnits::fromQuantity(16) && isIllegalVectorType(Ty)) { IsIndirect = true; // ARMv7k passes structs bigger than 16 bytes indirectly, in space // allocated by the caller. - } else if (TyInfo.first > CharUnits::fromQuantity(16) && + } else if (TySize > CharUnits::fromQuantity(16) && getABIKind() == ARMABIInfo::AAPCS16_VFP && !isHomogeneousAggregate(Ty, Base, Members)) { IsIndirect = true; @@ -6236,8 +6284,8 @@ Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, } else { TyAlignForABI = CharUnits::fromQuantity(4); } - TyInfo.second = TyAlignForABI; + std::pair<CharUnits, CharUnits> TyInfo = { TySize, TyAlignForABI }; return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo, SlotSize, /*AllowHigherAlign*/ true); } @@ -6275,10 +6323,58 @@ private: static void addNVVMMetadata(llvm::Function *F, StringRef Name, int Operand); }; +/// Checks if the type is unsupported directly by the current target. +static bool isUnsupportedType(ASTContext &Context, QualType T) { + if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type()) + return true; + if (!Context.getTargetInfo().hasFloat128Type() && + (T->isFloat128Type() || + (T->isRealFloatingType() && Context.getTypeSize(T) == 128))) + return true; + if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() && + Context.getTypeSize(T) > 64) + return true; + if (const auto *AT = T->getAsArrayTypeUnsafe()) + return isUnsupportedType(Context, AT->getElementType()); + const auto *RT = T->getAs<RecordType>(); + if (!RT) + return false; + const RecordDecl *RD = RT->getDecl(); + + // If this is a C++ record, check the bases first. + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) + for (const CXXBaseSpecifier &I : CXXRD->bases()) + if (isUnsupportedType(Context, I.getType())) + return true; + + for (const FieldDecl *I : RD->fields()) + if (isUnsupportedType(Context, I->getType())) + return true; + return false; +} + +/// Coerce the given type into an array with maximum allowed size of elements. +static ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, ASTContext &Context, + llvm::LLVMContext &LLVMContext, + unsigned MaxSize) { + // Alignment and Size are measured in bits. + const uint64_t Size = Context.getTypeSize(Ty); + const uint64_t Alignment = Context.getTypeAlign(Ty); + const unsigned Div = std::min<unsigned>(MaxSize, Alignment); + llvm::Type *IntType = llvm::Type::getIntNTy(LLVMContext, Div); + const uint64_t NumElements = (Size + Div - 1) / Div; + return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements)); +} + ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); + if (getContext().getLangOpts().OpenMP && + getContext().getLangOpts().OpenMPIsDevice && + isUnsupportedType(getContext(), RetTy)) + return coerceToIntArrayWithLimit(RetTy, getContext(), getVMContext(), 64); + // note: this is different from default ABI if (!RetTy->isScalarType()) return ABIArgInfo::getDirect(); @@ -6584,8 +6680,7 @@ Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, // Vector arguments are always passed in the high bits of a // single (8 byte) or double (16 byte) stack slot. Address OverflowArgAreaPtr = - CGF.Builder.CreateStructGEP(VAListAddr, 2, CharUnits::fromQuantity(16), - "overflow_arg_area_ptr"); + CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_ptr"); Address OverflowArgArea = Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"), TyInfo.second); @@ -6617,9 +6712,8 @@ Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, RegPadding = Padding; // values are passed in the low bits of a GPR } - Address RegCountPtr = CGF.Builder.CreateStructGEP( - VAListAddr, RegCountField, RegCountField * CharUnits::fromQuantity(8), - "reg_count_ptr"); + Address RegCountPtr = + CGF.Builder.CreateStructGEP(VAListAddr, RegCountField, "reg_count_ptr"); llvm::Value *RegCount = CGF.Builder.CreateLoad(RegCountPtr, "reg_count"); llvm::Value *MaxRegsV = llvm::ConstantInt::get(IndexTy, MaxRegs); llvm::Value *InRegs = CGF.Builder.CreateICmpULT(RegCount, MaxRegsV, @@ -6642,8 +6736,7 @@ Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, llvm::Value *RegOffset = CGF.Builder.CreateAdd(ScaledRegCount, RegBase, "reg_offset"); Address RegSaveAreaPtr = - CGF.Builder.CreateStructGEP(VAListAddr, 3, CharUnits::fromQuantity(24), - "reg_save_area_ptr"); + CGF.Builder.CreateStructGEP(VAListAddr, 3, "reg_save_area_ptr"); llvm::Value *RegSaveArea = CGF.Builder.CreateLoad(RegSaveAreaPtr, "reg_save_area"); Address RawRegAddr(CGF.Builder.CreateGEP(RegSaveArea, RegOffset, @@ -6663,8 +6756,8 @@ Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, CGF.EmitBlock(InMemBlock); // Work out the address of a stack argument. - Address OverflowArgAreaPtr = CGF.Builder.CreateStructGEP( - VAListAddr, 2, CharUnits::fromQuantity(16), "overflow_arg_area_ptr"); + Address OverflowArgAreaPtr = + CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_ptr"); Address OverflowArgArea = Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"), PaddedSize); @@ -6774,21 +6867,19 @@ void MSP430TargetCodeGenInfo::setTargetAttributes( if (GV->isDeclaration()) return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { - if (const MSP430InterruptAttr *attr = FD->getAttr<MSP430InterruptAttr>()) { - // Handle 'interrupt' attribute: - llvm::Function *F = cast<llvm::Function>(GV); + const auto *InterruptAttr = FD->getAttr<MSP430InterruptAttr>(); + if (!InterruptAttr) + return; - // Step 1: Set ISR calling convention. - F->setCallingConv(llvm::CallingConv::MSP430_INTR); + // Handle 'interrupt' attribute: + llvm::Function *F = cast<llvm::Function>(GV); - // Step 2: Add attributes goodness. - F->addFnAttr(llvm::Attribute::NoInline); + // Step 1: Set ISR calling convention. + F->setCallingConv(llvm::CallingConv::MSP430_INTR); - // Step 3: Emit ISR vector alias. - unsigned Num = attr->getNumber() / 2; - llvm::GlobalAlias::create(llvm::Function::ExternalLinkage, - "__isr_" + Twine(Num), F); - } + // Step 2: Add attributes goodness. + F->addFnAttr(llvm::Attribute::NoInline); + F->addFnAttr("interrupt", llvm::utostr(InterruptAttr->getNumber())); } } @@ -7764,8 +7855,10 @@ public: } LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const override; - llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S, - llvm::LLVMContext &C) const override; + llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts, + SyncScope Scope, + llvm::AtomicOrdering Ordering, + llvm::LLVMContext &Ctx) const override; llvm::Function * createEnqueuedBlockKernel(CodeGenFunction &CGF, llvm::Function *BlockInvokeFunc, @@ -7775,8 +7868,36 @@ public: }; } +static bool requiresAMDGPUProtectedVisibility(const Decl *D, + llvm::GlobalValue *GV) { + if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility) + return false; + + return D->hasAttr<OpenCLKernelAttr>() || + (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) || + (isa<VarDecl>(D) && + (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() || + D->hasAttr<HIPPinnedShadowAttr>())); +} + +static bool requiresAMDGPUDefaultVisibility(const Decl *D, + llvm::GlobalValue *GV) { + if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility) + return false; + + return isa<VarDecl>(D) && D->hasAttr<HIPPinnedShadowAttr>(); +} + void AMDGPUTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + if (requiresAMDGPUDefaultVisibility(D, GV)) { + GV->setVisibility(llvm::GlobalValue::DefaultVisibility); + GV->setDSOLocal(false); + } else if (requiresAMDGPUProtectedVisibility(D, GV)) { + GV->setVisibility(llvm::GlobalValue::ProtectedVisibility); + GV->setDSOLocal(true); + } + if (GV->isDeclaration()) return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); @@ -7788,14 +7909,23 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( const auto *ReqdWGS = M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; - if (M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>() && + if (((M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>()) || + (M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>())) && (M.getTriple().getOS() == llvm::Triple::AMDHSA)) - F->addFnAttr("amdgpu-implicitarg-num-bytes", "48"); + F->addFnAttr("amdgpu-implicitarg-num-bytes", "56"); const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); if (ReqdWGS || FlatWGS) { - unsigned Min = FlatWGS ? FlatWGS->getMin() : 0; - unsigned Max = FlatWGS ? FlatWGS->getMax() : 0; + unsigned Min = 0; + unsigned Max = 0; + if (FlatWGS) { + Min = FlatWGS->getMin() + ->EvaluateKnownConstInt(M.getContext()) + .getExtValue(); + Max = FlatWGS->getMax() + ->EvaluateKnownConstInt(M.getContext()) + .getExtValue(); + } if (ReqdWGS && Min == 0 && Max == 0) Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim(); @@ -7809,8 +7939,12 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( } if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) { - unsigned Min = Attr->getMin(); - unsigned Max = Attr->getMax(); + unsigned Min = + Attr->getMin()->EvaluateKnownConstInt(M.getContext()).getExtValue(); + unsigned Max = Attr->getMax() ? Attr->getMax() + ->EvaluateKnownConstInt(M.getContext()) + .getExtValue() + : 0; if (Min != 0) { assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max"); @@ -7884,10 +8018,12 @@ AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, } llvm::SyncScope::ID -AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, - llvm::LLVMContext &C) const { - StringRef Name; - switch (S) { +AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, + SyncScope Scope, + llvm::AtomicOrdering Ordering, + llvm::LLVMContext &Ctx) const { + std::string Name; + switch (Scope) { case SyncScope::OpenCLWorkGroup: Name = "workgroup"; break; @@ -7898,9 +8034,17 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, Name = ""; break; case SyncScope::OpenCLSubGroup: - Name = "subgroup"; + Name = "wavefront"; } - return C.getOrInsertSyncScopeID(Name); + + if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) { + if (!Name.empty()) + Name = Twine(Twine(Name) + Twine("-")).str(); + + Name = Twine(Twine(Name) + Twine("one-as")).str(); + } + + return Ctx.getOrInsertSyncScopeID(Name); } bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { @@ -8198,9 +8342,8 @@ Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, } // Update VAList. - llvm::Value *NextPtr = - Builder.CreateConstInBoundsByteGEP(Addr.getPointer(), Stride, "ap.next"); - Builder.CreateStore(NextPtr, VAListAddr); + Address NextPtr = Builder.CreateConstInBoundsByteGEP(Addr, Stride, "ap.next"); + Builder.CreateStore(NextPtr.getPointer(), VAListAddr); return Builder.CreateBitCast(ArgAddr, ArgPtrTy, "arg.addr"); } @@ -8553,9 +8696,8 @@ Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, // Increment the VAList. if (!ArgSize.isZero()) { - llvm::Value *APN = - Builder.CreateConstInBoundsByteGEP(AP.getPointer(), ArgSize); - Builder.CreateStore(APN, VAListAddr); + Address APN = Builder.CreateConstInBoundsByteGEP(AP, ArgSize); + Builder.CreateStore(APN.getPointer(), VAListAddr); } return Val; @@ -9392,8 +9534,6 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { switch (Triple.getOS()) { case llvm::Triple::Win32: return SetCGInfo(new WinX86_64TargetCodeGenInfo(Types, AVXLevel)); - case llvm::Triple::PS4: - return SetCGInfo(new PS4TargetCodeGenInfo(Types, AVXLevel)); default: return SetCGInfo(new X86_64TargetCodeGenInfo(Types, AVXLevel)); } diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h index b530260ea48f..e1e90e73cb58 100644 --- a/lib/CodeGen/TargetInfo.h +++ b/lib/CodeGen/TargetInfo.h @@ -1,9 +1,8 @@ //===---- TargetInfo.h - Encapsulate target details -------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -157,6 +156,12 @@ public: return ""; } + /// Determine whether a call to objc_retainAutoreleasedReturnValue should be + /// marked as 'notail'. + virtual bool shouldSuppressTailCallsOfRetainAutoreleasedReturnValue() const { + return false; + } + /// Return a constant used by UBSan as a signature to identify functions /// possessing type information, or 0 if the platform is unsupported. virtual llvm::Constant * @@ -262,9 +267,16 @@ public: LangAS SrcAddr, LangAS DestAddr, llvm::Type *DestTy) const; + /// Get address space of pointer parameter for __cxa_atexit. + virtual LangAS getAddrSpaceOfCxaAtexitPtrParam() const { + return LangAS::Default; + } + /// Get the syncscope used in LLVM IR. - virtual llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S, - llvm::LLVMContext &C) const; + virtual llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts, + SyncScope Scope, + llvm::AtomicOrdering Ordering, + llvm::LLVMContext &Ctx) const; /// Interface class for filling custom fields of a block literal for OpenCL. class TargetOpenCLBlockHelper { diff --git a/lib/CodeGen/VarBypassDetector.cpp b/lib/CodeGen/VarBypassDetector.cpp index 859cdd4282cc..f3a172e91c4f 100644 --- a/lib/CodeGen/VarBypassDetector.cpp +++ b/lib/CodeGen/VarBypassDetector.cpp @@ -1,9 +1,8 @@ //===--- VarBypassDetector.h - Bypass jumps detector --------------*- C++ -*-=// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/VarBypassDetector.h b/lib/CodeGen/VarBypassDetector.h index 47fe13cfacd6..8a2e388eae3f 100644 --- a/lib/CodeGen/VarBypassDetector.h +++ b/lib/CodeGen/VarBypassDetector.h @@ -1,9 +1,8 @@ //===--- VarBypassDetector.cpp - Bypass jumps detector ------------*- C++ -*-=// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // |