diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
commit | cfca06d7963fa0909f90483b42a6d7d194d01e08 (patch) | |
tree | 209fb2a2d68f8f277793fc8df46c753d31bc853b /clang/lib/CodeGen | |
parent | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff) | |
download | src-cfca06d7963fa0909f90483b42a6d7d194d01e08.tar.gz src-cfca06d7963fa0909f90483b42a6d7d194d01e08.zip |
Notes
Diffstat (limited to 'clang/lib/CodeGen')
74 files changed, 12472 insertions, 5251 deletions
diff --git a/clang/lib/CodeGen/ABIInfo.h b/clang/lib/CodeGen/ABIInfo.h index 0c3a076da0b5..bb40dace8a84 100644 --- a/clang/lib/CodeGen/ABIInfo.h +++ b/clang/lib/CodeGen/ABIInfo.h @@ -60,6 +60,8 @@ namespace swiftcall { virtual bool supportsSwift() const { return false; } + virtual bool allowBFloatArgsAndRet() const { return false; } + CodeGen::CGCXXABI &getCXXABI() const; ASTContext &getContext() const; llvm::LLVMContext &getVMContext() const; @@ -102,6 +104,10 @@ namespace swiftcall { bool isHomogeneousAggregate(QualType Ty, const Type *&Base, uint64_t &Members) const; + // Implement the Type::IsPromotableIntegerType for ABI specific needs. The + // only difference is that this considers _ExtInt as well. + bool isPromotableIntegerTypeForABI(QualType Ty) const; + /// A convenience method to return an indirect ABIArgInfo with an /// expected alignment equal to the ABI alignment of the given type. CodeGen::ABIArgInfo diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 0bfcab88a3a9..dce0940670a2 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/StackSafetyAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Bitcode/BitcodeReader.h" @@ -31,6 +32,7 @@ #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSummaryIndex.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/LTO/LTOBackend.h" #include "llvm/MC/MCAsmInfo.h" @@ -45,12 +47,18 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Coroutines.h" +#include "llvm/Transforms/Coroutines/CoroCleanup.h" +#include "llvm/Transforms/Coroutines/CoroEarly.h" +#include "llvm/Transforms/Coroutines/CoroElide.h" +#include "llvm/Transforms/Coroutines/CoroSplit.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" #include "llvm/Transforms/InstCombine/InstCombine.h" @@ -71,6 +79,7 @@ #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" +#include "llvm/Transforms/Utils/UniqueInternalLinkageNames.h" #include <memory> using namespace clang; using namespace llvm; @@ -216,6 +225,7 @@ getSancovOptsFromCGOpts(const CodeGenOptions &CGOpts) { Opts.TracePCGuard = CGOpts.SanitizeCoverageTracePCGuard; Opts.NoPrune = CGOpts.SanitizeCoverageNoPrune; Opts.Inline8bitCounters = CGOpts.SanitizeCoverageInline8bitCounters; + Opts.InlineBoolFlag = CGOpts.SanitizeCoverageInlineBoolFlag; Opts.PCTable = CGOpts.SanitizeCoveragePCTable; Opts.StackDepth = CGOpts.SanitizeCoverageStackDepth; return Opts; @@ -227,7 +237,9 @@ static void addSanitizerCoveragePass(const PassManagerBuilder &Builder, static_cast<const PassManagerBuilderWrapper &>(Builder); const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts(); auto Opts = getSancovOptsFromCGOpts(CGOpts); - PM.add(createModuleSanitizerCoverageLegacyPassPass(Opts)); + PM.add(createModuleSanitizerCoverageLegacyPassPass( + Opts, CGOpts.SanitizeCoverageAllowlistFiles, + CGOpts.SanitizeCoverageBlocklistFiles)); } // Check if ASan should use GC-friendly instrumentation for globals. @@ -350,7 +362,7 @@ static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, break; case CodeGenOptions::MASSV: TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV); - break; + break; case CodeGenOptions::SVML: TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML); break; @@ -413,7 +425,8 @@ static CodeGenFileType getCodeGenFileType(BackendAction Action) { } } -static void initTargetOptions(llvm::TargetOptions &Options, +static void initTargetOptions(DiagnosticsEngine &Diags, + llvm::TargetOptions &Options, const CodeGenOptions &CodeGenOpts, const clang::TargetOptions &TargetOpts, const LangOptions &LangOpts, @@ -436,15 +449,15 @@ static void initTargetOptions(llvm::TargetOptions &Options, // Set FP fusion mode. switch (LangOpts.getDefaultFPContractMode()) { - case LangOptions::FPC_Off: + case LangOptions::FPM_Off: // Preserve any contraction performed by the front-end. (Strict performs // splitting of the muladd intrinsic in the backend.) Options.AllowFPOpFusion = llvm::FPOpFusion::Standard; break; - case LangOptions::FPC_On: + case LangOptions::FPM_On: Options.AllowFPOpFusion = llvm::FPOpFusion::Standard; break; - case LangOptions::FPC_Fast: + case LangOptions::FPM_Fast: Options.AllowFPOpFusion = llvm::FPOpFusion::Fast; break; } @@ -466,22 +479,44 @@ static void initTargetOptions(llvm::TargetOptions &Options, if (LangOpts.WasmExceptions) Options.ExceptionModel = llvm::ExceptionHandling::Wasm; - Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath; - Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath; + Options.NoInfsFPMath = LangOpts.NoHonorInfs; + Options.NoNaNsFPMath = LangOpts.NoHonorNaNs; Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS; - Options.UnsafeFPMath = CodeGenOpts.UnsafeFPMath; + Options.UnsafeFPMath = LangOpts.UnsafeFPMath; Options.StackAlignmentOverride = CodeGenOpts.StackAlignment; + + Options.BBSections = + llvm::StringSwitch<llvm::BasicBlockSection>(CodeGenOpts.BBSections) + .Case("all", llvm::BasicBlockSection::All) + .Case("labels", llvm::BasicBlockSection::Labels) + .StartsWith("list=", llvm::BasicBlockSection::List) + .Case("none", llvm::BasicBlockSection::None) + .Default(llvm::BasicBlockSection::None); + + if (Options.BBSections == llvm::BasicBlockSection::List) { + ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = + MemoryBuffer::getFile(CodeGenOpts.BBSections.substr(5)); + if (!MBOrErr) + Diags.Report(diag::err_fe_unable_to_load_basic_block_sections_file) + << MBOrErr.getError().message(); + else + Options.BBSectionsFuncListBuf = std::move(*MBOrErr); + } + Options.FunctionSections = CodeGenOpts.FunctionSections; Options.DataSections = CodeGenOpts.DataSections; Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames; + Options.UniqueBasicBlockSectionNames = + CodeGenOpts.UniqueBasicBlockSectionNames; Options.TLSSize = CodeGenOpts.TLSSize; Options.EmulatedTLS = CodeGenOpts.EmulatedTLS; Options.ExplicitEmulatedTLS = CodeGenOpts.ExplicitEmulatedTLS; Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning(); Options.EmitStackSizeSection = CodeGenOpts.StackSizeSection; Options.EmitAddrsig = CodeGenOpts.Addrsig; - Options.EnableDebugEntryValues = CodeGenOpts.EnableDebugEntryValues; Options.ForceDwarfFrameSection = CodeGenOpts.ForceDwarfFrameSection; + Options.EmitCallSiteInfo = CodeGenOpts.EmitCallSiteInfo; + Options.XRayOmitFunctionIndex = CodeGenOpts.XRayOmitFunctionIndex; Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile; Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll; @@ -502,6 +537,8 @@ static void initTargetOptions(llvm::TargetOptions &Options, Entry.Group == frontend::IncludeDirGroup::System)) Options.MCOptions.IASSearchPaths.push_back( Entry.IgnoreSysRoot ? Entry.Path : HSOpts.Sysroot + Entry.Path); + Options.MCOptions.Argv0 = CodeGenOpts.Argv0; + Options.MCOptions.CommandLineArgs = CodeGenOpts.CommandLineArgs; } static Optional<GCOVOptions> getGCOVOptions(const CodeGenOptions &CodeGenOpts) { if (CodeGenOpts.DisableGCov) @@ -514,12 +551,9 @@ static Optional<GCOVOptions> getGCOVOptions(const CodeGenOptions &CodeGenOpts) { Options.EmitNotes = CodeGenOpts.EmitGcovNotes; Options.EmitData = CodeGenOpts.EmitGcovArcs; llvm::copy(CodeGenOpts.CoverageVersion, std::begin(Options.Version)); - Options.UseCfgChecksum = CodeGenOpts.CoverageExtraChecksum; Options.NoRedZone = CodeGenOpts.DisableRedZone; - Options.FunctionNamesInData = !CodeGenOpts.CoverageNoFunctionNamesInData; Options.Filter = CodeGenOpts.ProfileFilterFiles; Options.Exclude = CodeGenOpts.ProfileExcludeFiles; - Options.ExitBlockBeforeBody = CodeGenOpts.CoverageExitBlockBeforeBody; return Options; } @@ -553,13 +587,24 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, std::unique_ptr<TargetLibraryInfoImpl> TLII( createTLII(TargetTriple, CodeGenOpts)); + // If we reached here with a non-empty index file name, then the index file + // was empty and we are not performing ThinLTO backend compilation (used in + // testing in a distributed build environment). Drop any the type test + // assume sequences inserted for whole program vtables so that codegen doesn't + // complain. + if (!CodeGenOpts.ThinLTOIndexFile.empty()) + MPM.add(createLowerTypeTestsPass(/*ExportSummary=*/nullptr, + /*ImportSummary=*/nullptr, + /*DropTypeTests=*/true)); + PassManagerBuilderWrapper PMBuilder(TargetTriple, CodeGenOpts, LangOpts); // At O0 and O1 we only run the always inliner which is more efficient. At // higher optimization levels we run the normal inliner. if (CodeGenOpts.OptimizationLevel <= 1) { - bool InsertLifetimeIntrinsics = (CodeGenOpts.OptimizationLevel != 0 && - !CodeGenOpts.DisableLifetimeMarkers); + bool InsertLifetimeIntrinsics = ((CodeGenOpts.OptimizationLevel != 0 && + !CodeGenOpts.DisableLifetimeMarkers) || + LangOpts.Coroutines); PMBuilder.Inliner = createAlwaysInlinerLegacyPass(InsertLifetimeIntrinsics); } else { // We do not want to inline hot callsites for SamplePGO module-summary build @@ -575,6 +620,9 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize; PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP; PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop; + // Only enable CGProfilePass when using integrated assembler, since + // non-integrated assemblers don't recognize .cgprofile section. + PMBuilder.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS; PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops; // Loop interleaving in the loop vectorizer has historically been set to be @@ -689,6 +737,12 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, if (!CodeGenOpts.RewriteMapFiles.empty()) addSymbolRewriterPass(CodeGenOpts, &MPM); + // Add UniqueInternalLinkageNames Pass which renames internal linkage symbols + // with unique names. + if (CodeGenOpts.UniqueInternalLinkageNames) { + MPM.add(createUniqueInternalLinkageNamesPass()); + } + if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts)) { MPM.add(createGCOVProfilerPass(*Options)); if (CodeGenOpts.getDebugInfo() == codegenoptions::NoDebugInfo) @@ -718,7 +772,7 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, if (!CodeGenOpts.InstrProfileOutput.empty()) PMBuilder.PGOInstrGen = CodeGenOpts.InstrProfileOutput; else - PMBuilder.PGOInstrGen = DefaultProfileGenName; + PMBuilder.PGOInstrGen = std::string(DefaultProfileGenName); } if (CodeGenOpts.hasProfileIRUse()) { PMBuilder.PGOInstrUse = CodeGenOpts.ProfileInstrumentUsePath; @@ -766,7 +820,7 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { CodeGenOpt::Level OptLevel = getCGOptLevel(CodeGenOpts); llvm::TargetOptions Options; - initTargetOptions(Options, CodeGenOpts, TargetOpts, LangOpts, HSOpts); + initTargetOptions(Diags, Options, CodeGenOpts, TargetOpts, LangOpts, HSOpts); TM.reset(TheTarget->createTargetMachine(Triple, TargetOpts.CPU, FeaturesStr, Options, RM, CM, OptLevel)); } @@ -924,7 +978,7 @@ static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) { llvm_unreachable("Invalid optimization level!"); case 1: - return PassBuilder::O1; + return PassBuilder::OptimizationLevel::O1; case 2: switch (Opts.OptimizeSize) { @@ -932,24 +986,49 @@ static PassBuilder::OptimizationLevel mapToLevel(const CodeGenOptions &Opts) { llvm_unreachable("Invalid optimization level for size!"); case 0: - return PassBuilder::O2; + return PassBuilder::OptimizationLevel::O2; case 1: - return PassBuilder::Os; + return PassBuilder::OptimizationLevel::Os; case 2: - return PassBuilder::Oz; + return PassBuilder::OptimizationLevel::Oz; } case 3: - return PassBuilder::O3; + return PassBuilder::OptimizationLevel::O3; } } +static void addCoroutinePassesAtO0(ModulePassManager &MPM, + const LangOptions &LangOpts, + const CodeGenOptions &CodeGenOpts) { + if (!LangOpts.Coroutines) + return; + + MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass())); + + CGSCCPassManager CGPM(CodeGenOpts.DebugPassManager); + CGPM.addPass(CoroSplitPass()); + CGPM.addPass(createCGSCCToFunctionPassAdaptor(CoroElidePass())); + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); + + MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass())); +} + static void addSanitizersAtO0(ModulePassManager &MPM, const Triple &TargetTriple, const LangOptions &LangOpts, const CodeGenOptions &CodeGenOpts) { + if (CodeGenOpts.SanitizeCoverageType || + CodeGenOpts.SanitizeCoverageIndirectCalls || + CodeGenOpts.SanitizeCoverageTraceCmp) { + auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts); + MPM.addPass(ModuleSanitizerCoveragePass( + SancovOpts, CodeGenOpts.SanitizeCoverageAllowlistFiles, + CodeGenOpts.SanitizeCoverageBlocklistFiles)); + } + auto ASanPass = [&](SanitizerMask Mask, bool CompileKernel) { MPM.addPass(RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>()); bool Recover = CodeGenOpts.SanitizeRecover.has(Mask); @@ -970,8 +1049,11 @@ static void addSanitizersAtO0(ModulePassManager &MPM, } if (LangOpts.Sanitize.has(SanitizerKind::Memory)) { - MPM.addPass(MemorySanitizerPass({})); - MPM.addPass(createModuleToFunctionPassAdaptor(MemorySanitizerPass({}))); + bool Recover = CodeGenOpts.SanitizeRecover.has(SanitizerKind::Memory); + int TrackOrigins = CodeGenOpts.SanitizeMemoryTrackOrigins; + MPM.addPass(MemorySanitizerPass({TrackOrigins, Recover, false})); + MPM.addPass(createModuleToFunctionPassAdaptor( + MemorySanitizerPass({TrackOrigins, Recover, false}))); } if (LangOpts.Sanitize.has(SanitizerKind::KernelMemory)) { @@ -1013,7 +1095,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( if (CodeGenOpts.hasProfileIRInstr()) // -fprofile-generate. PGOOpt = PGOOptions(CodeGenOpts.InstrProfileOutput.empty() - ? DefaultProfileGenName + ? std::string(DefaultProfileGenName) : CodeGenOpts.InstrProfileOutput, "", "", PGOOptions::IRInstr, PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling); @@ -1046,13 +1128,13 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( "Cannot run CSProfileGen pass with ProfileGen or SampleUse " " pass"); PGOOpt->CSProfileGenFile = CodeGenOpts.InstrProfileOutput.empty() - ? DefaultProfileGenName + ? std::string(DefaultProfileGenName) : CodeGenOpts.InstrProfileOutput; PGOOpt->CSAction = PGOOptions::CSIRInstr; } else PGOOpt = PGOOptions("", CodeGenOpts.InstrProfileOutput.empty() - ? DefaultProfileGenName + ? std::string(DefaultProfileGenName) : CodeGenOpts.InstrProfileOutput, "", PGOOptions::NoAction, PGOOptions::CSIRInstr, CodeGenOpts.DebugInfoForProfiling); @@ -1065,6 +1147,10 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( PTO.LoopInterleaving = CodeGenOpts.UnrollLoops; PTO.LoopVectorization = CodeGenOpts.VectorizeLoop; PTO.SLPVectorization = CodeGenOpts.VectorizeSLP; + // Only enable CGProfilePass when using integrated assembler, since + // non-integrated assemblers don't recognize .cgprofile section. + PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS; + PTO.Coroutines = LangOpts.Coroutines; PassInstrumentationCallbacks PIC; StandardInstrumentations SI; @@ -1114,6 +1200,15 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( bool IsLTO = CodeGenOpts.PrepareForLTO; if (CodeGenOpts.OptimizationLevel == 0) { + // If we reached here with a non-empty index file name, then the index + // file was empty and we are not performing ThinLTO backend compilation + // (used in testing in a distributed build environment). Drop any the type + // test assume sequences inserted for whole program vtables so that + // codegen doesn't complain. + if (!CodeGenOpts.ThinLTOIndexFile.empty()) + MPM.addPass(LowerTypeTestsPass(/*ExportSummary=*/nullptr, + /*ImportSummary=*/nullptr, + /*DropTypeTests=*/true)); if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts)) MPM.addPass(GCOVProfilerPass(*Options)); if (Optional<InstrProfOptions> Options = @@ -1124,7 +1219,10 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // which is just that always inlining occurs. Further, disable generating // lifetime intrinsics to avoid enabling further optimizations during // code generation. - MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/false)); + // However, we need to insert lifetime intrinsics to avoid invalid access + // caused by multithreaded coroutines. + MPM.addPass( + AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/LangOpts.Coroutines)); // At -O0, we can still do PGO. Add all the requested passes for // instrumentation PGO, if requested. @@ -1140,6 +1238,12 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) MPM.addPass(createModuleToFunctionPassAdaptor(BoundsCheckingPass())); + // Add UniqueInternalLinkageNames Pass which renames internal linkage + // symbols with unique names. + if (CodeGenOpts.UniqueInternalLinkageNames) { + MPM.addPass(UniqueInternalLinkageNamesPass()); + } + // Lastly, add semantically necessary passes for LTO. if (IsLTO || IsThinLTO) { MPM.addPass(CanonicalizeAliasesPass()); @@ -1150,6 +1254,18 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // configure the pipeline. PassBuilder::OptimizationLevel Level = mapToLevel(CodeGenOpts); + // If we reached here with a non-empty index file name, then the index + // file was empty and we are not performing ThinLTO backend compilation + // (used in testing in a distributed build environment). Drop any the type + // test assume sequences inserted for whole program vtables so that + // codegen doesn't complain. + if (!CodeGenOpts.ThinLTOIndexFile.empty()) + PB.registerPipelineStartEPCallback([](ModulePassManager &MPM) { + MPM.addPass(LowerTypeTestsPass(/*ExportSummary=*/nullptr, + /*ImportSummary=*/nullptr, + /*DropTypeTests=*/true)); + }); + PB.registerPipelineStartEPCallback([](ModulePassManager &MPM) { MPM.addPass(createModuleToFunctionPassAdaptor( EntryExitInstrumenterPass(/*PostInlining=*/false))); @@ -1157,50 +1273,60 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // Register callbacks to schedule sanitizer passes at the appropriate part of // the pipeline. - // FIXME: either handle asan/the remaining sanitizers or error out if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) PB.registerScalarOptimizerLateEPCallback( [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { FPM.addPass(BoundsCheckingPass()); }); + + if (CodeGenOpts.SanitizeCoverageType || + CodeGenOpts.SanitizeCoverageIndirectCalls || + CodeGenOpts.SanitizeCoverageTraceCmp) { + PB.registerOptimizerLastEPCallback( + [this](ModulePassManager &MPM, + PassBuilder::OptimizationLevel Level) { + auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts); + MPM.addPass(ModuleSanitizerCoveragePass( + SancovOpts, CodeGenOpts.SanitizeCoverageAllowlistFiles, + CodeGenOpts.SanitizeCoverageBlocklistFiles)); + }); + } + if (LangOpts.Sanitize.has(SanitizerKind::Memory)) { - PB.registerPipelineStartEPCallback([](ModulePassManager &MPM) { - MPM.addPass(MemorySanitizerPass({})); - }); + int TrackOrigins = CodeGenOpts.SanitizeMemoryTrackOrigins; + bool Recover = CodeGenOpts.SanitizeRecover.has(SanitizerKind::Memory); PB.registerOptimizerLastEPCallback( - [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { - FPM.addPass(MemorySanitizerPass({})); + [TrackOrigins, Recover](ModulePassManager &MPM, + PassBuilder::OptimizationLevel Level) { + MPM.addPass(MemorySanitizerPass({TrackOrigins, Recover, false})); + MPM.addPass(createModuleToFunctionPassAdaptor( + MemorySanitizerPass({TrackOrigins, Recover, false}))); }); } if (LangOpts.Sanitize.has(SanitizerKind::Thread)) { - PB.registerPipelineStartEPCallback( - [](ModulePassManager &MPM) { MPM.addPass(ThreadSanitizerPass()); }); PB.registerOptimizerLastEPCallback( - [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { - FPM.addPass(ThreadSanitizerPass()); + [](ModulePassManager &MPM, PassBuilder::OptimizationLevel Level) { + MPM.addPass(ThreadSanitizerPass()); + MPM.addPass( + createModuleToFunctionPassAdaptor(ThreadSanitizerPass())); }); } if (LangOpts.Sanitize.has(SanitizerKind::Address)) { - PB.registerPipelineStartEPCallback([&](ModulePassManager &MPM) { - MPM.addPass( - RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>()); - }); bool Recover = CodeGenOpts.SanitizeRecover.has(SanitizerKind::Address); bool UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope; - PB.registerOptimizerLastEPCallback( - [Recover, UseAfterScope](FunctionPassManager &FPM, - PassBuilder::OptimizationLevel Level) { - FPM.addPass(AddressSanitizerPass( - /*CompileKernel=*/false, Recover, UseAfterScope)); - }); bool ModuleUseAfterScope = asanUseGlobalsGC(TargetTriple, CodeGenOpts); bool UseOdrIndicator = CodeGenOpts.SanitizeAddressUseOdrIndicator; - PB.registerPipelineStartEPCallback( - [Recover, ModuleUseAfterScope, - UseOdrIndicator](ModulePassManager &MPM) { + PB.registerOptimizerLastEPCallback( + [Recover, UseAfterScope, ModuleUseAfterScope, UseOdrIndicator]( + ModulePassManager &MPM, PassBuilder::OptimizationLevel Level) { + MPM.addPass( + RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>()); MPM.addPass(ModuleAddressSanitizerPass( /*CompileKernel=*/false, Recover, ModuleUseAfterScope, UseOdrIndicator)); + MPM.addPass( + createModuleToFunctionPassAdaptor(AddressSanitizerPass( + /*CompileKernel=*/false, Recover, UseAfterScope))); }); } if (Optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts)) @@ -1213,6 +1339,12 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( MPM.addPass(InstrProfiling(*Options, false)); }); + // Add UniqueInternalLinkageNames Pass which renames internal linkage + // symbols with unique names. + if (CodeGenOpts.UniqueInternalLinkageNames) { + MPM.addPass(UniqueInternalLinkageNamesPass()); + } + if (IsThinLTO) { MPM = PB.buildThinLTOPreLinkDefaultPipeline( Level, CodeGenOpts.DebugPassManager); @@ -1229,13 +1361,6 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( } } - if (CodeGenOpts.SanitizeCoverageType || - CodeGenOpts.SanitizeCoverageIndirectCalls || - CodeGenOpts.SanitizeCoverageTraceCmp) { - auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts); - MPM.addPass(ModuleSanitizerCoveragePass(SancovOpts)); - } - if (LangOpts.Sanitize.has(SanitizerKind::HWAddress)) { bool Recover = CodeGenOpts.SanitizeRecover.has(SanitizerKind::HWAddress); MPM.addPass(HWAddressSanitizerPass( @@ -1247,6 +1372,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( } if (CodeGenOpts.OptimizationLevel == 0) { + addCoroutinePassesAtO0(MPM, LangOpts, CodeGenOpts); addSanitizersAtO0(MPM, TargetTriple, LangOpts, CodeGenOpts); } } @@ -1358,15 +1484,12 @@ BitcodeModule *clang::FindThinLTOModule(MutableArrayRef<BitcodeModule> BMs) { return nullptr; } -static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, - const HeaderSearchOptions &HeaderOpts, - const CodeGenOptions &CGOpts, - const clang::TargetOptions &TOpts, - const LangOptions &LOpts, - std::unique_ptr<raw_pwrite_stream> OS, - std::string SampleProfile, - std::string ProfileRemapping, - BackendAction Action) { +static void runThinLTOBackend( + DiagnosticsEngine &Diags, ModuleSummaryIndex *CombinedIndex, Module *M, + const HeaderSearchOptions &HeaderOpts, const CodeGenOptions &CGOpts, + const clang::TargetOptions &TOpts, const LangOptions &LOpts, + std::unique_ptr<raw_pwrite_stream> OS, std::string SampleProfile, + std::string ProfileRemapping, BackendAction Action) { StringMap<DenseMap<GlobalValue::GUID, GlobalValueSummary *>> ModuleToDefinedGVSummaries; CombinedIndex->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); @@ -1436,7 +1559,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, Conf.RelocModel = CGOpts.RelocationModel; Conf.CGOptLevel = getCGOptLevel(CGOpts); Conf.OptLevel = CGOpts.OptimizationLevel; - initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts); + initTargetOptions(Diags, Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts); Conf.SampleProfile = std::move(SampleProfile); Conf.PTO.LoopUnrolling = CGOpts.UnrollLoops; // For historical reasons, loop interleaving is set to mirror setting for loop @@ -1444,6 +1567,9 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, Conf.PTO.LoopInterleaving = CGOpts.UnrollLoops; Conf.PTO.LoopVectorization = CGOpts.VectorizeLoop; Conf.PTO.SLPVectorization = CGOpts.VectorizeSLP; + // Only enable CGProfilePass when using integrated assembler, since + // non-integrated assemblers don't recognize .cgprofile section. + Conf.PTO.CallGraphProfile = !CGOpts.DisableIntegratedAS; // Context sensitive profile. if (CGOpts.hasProfileCSIRInstr()) { @@ -1525,8 +1651,8 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, // of an error). if (CombinedIndex) { if (!CombinedIndex->skipModuleByDistributedBackend()) { - runThinLTOBackend(CombinedIndex.get(), M, HeaderOpts, CGOpts, TOpts, - LOpts, std::move(OS), CGOpts.SampleProfileFile, + runThinLTOBackend(Diags, CombinedIndex.get(), M, HeaderOpts, CGOpts, + TOpts, LOpts, std::move(OS), CGOpts.SampleProfileFile, CGOpts.ProfileRemappingFile, Action); return; } diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index 149982d82790..a58450ddd4c5 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -119,8 +119,9 @@ namespace { ValueTy = lvalue.getType(); ValueSizeInBits = C.getTypeSize(ValueTy); AtomicTy = ValueTy = CGF.getContext().getExtVectorType( - lvalue.getType(), lvalue.getExtVectorAddress() - .getElementType()->getVectorNumElements()); + lvalue.getType(), cast<llvm::VectorType>( + lvalue.getExtVectorAddress().getElementType()) + ->getNumElements()); AtomicSizeInBits = C.getTypeSize(AtomicTy); AtomicAlign = ValueAlign = lvalue.getAlignment(); LVal = lvalue; @@ -1826,7 +1827,7 @@ void AtomicInfo::EmitAtomicUpdateOp( auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO); // Do the atomic load. - auto *OldVal = EmitAtomicLoadOp(AO, IsVolatile); + auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile); // For non-simple lvalues perform compare-and-swap procedure. auto *ContBB = CGF.createBasicBlock("atomic_cont"); auto *ExitBB = CGF.createBasicBlock("atomic_exit"); @@ -1908,7 +1909,7 @@ void AtomicInfo::EmitAtomicUpdateOp(llvm::AtomicOrdering AO, RValue UpdateRVal, auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO); // Do the atomic load. - auto *OldVal = EmitAtomicLoadOp(AO, IsVolatile); + auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile); // For non-simple lvalues perform compare-and-swap procedure. auto *ContBB = CGF.createBasicBlock("atomic_cont"); auto *ExitBB = CGF.createBasicBlock("atomic_exit"); @@ -2018,6 +2019,10 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest, intValue, addr.getElementType(), /*isSigned=*/false); llvm::StoreInst *store = Builder.CreateStore(intValue, addr); + if (AO == llvm::AtomicOrdering::Acquire) + AO = llvm::AtomicOrdering::Monotonic; + else if (AO == llvm::AtomicOrdering::AcquireRelease) + AO = llvm::AtomicOrdering::Release; // Initializations don't need to be atomic. if (!isInit) store->setAtomic(AO); diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index 11f54d1f7fb2..615b78235041 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -36,7 +36,7 @@ CGBlockInfo::CGBlockInfo(const BlockDecl *block, StringRef name) : Name(name), CXXThisIndex(0), CanBeGlobal(false), NeedsCopyDispose(false), HasCXXObject(false), UsesStret(false), HasCapturedVariableLayout(false), CapturesNonExternalType(false), LocalAddress(Address::invalid()), - StructureType(nullptr), Block(block), DominatingIP(nullptr) { + StructureType(nullptr), Block(block) { // Skip asm prefix, if any. 'name' is usually taken directly from // the mangled name of the enclosing function. @@ -775,151 +775,23 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, llvm::StructType::get(CGM.getLLVMContext(), elementTypes, true); } -/// Enter the scope of a block. This should be run at the entrance to -/// a full-expression so that the block's cleanups are pushed at the -/// right place in the stack. -static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) { - assert(CGF.HaveInsertPoint()); - - // Allocate the block info and place it at the head of the list. - CGBlockInfo &blockInfo = - *new CGBlockInfo(block, CGF.CurFn->getName()); - blockInfo.NextBlockInfo = CGF.FirstBlockInfo; - CGF.FirstBlockInfo = &blockInfo; - - // Compute information about the layout, etc., of this block, - // pushing cleanups as necessary. - computeBlockInfo(CGF.CGM, &CGF, blockInfo); - - // Nothing else to do if it can be global. - if (blockInfo.CanBeGlobal) return; - - // Make the allocation for the block. - blockInfo.LocalAddress = CGF.CreateTempAlloca(blockInfo.StructureType, - blockInfo.BlockAlign, "block"); - - // If there are cleanups to emit, enter them (but inactive). - if (!blockInfo.NeedsCopyDispose) return; - - // Walk through the captures (in order) and find the ones not - // captured by constant. - for (const auto &CI : block->captures()) { - // Ignore __block captures; there's nothing special in the - // on-stack block that we need to do for them. - if (CI.isByRef()) continue; - - // Ignore variables that are constant-captured. - const VarDecl *variable = CI.getVariable(); - CGBlockInfo::Capture &capture = blockInfo.getCapture(variable); - if (capture.isConstant()) continue; - - // Ignore objects that aren't destructed. - QualType VT = getCaptureFieldType(CGF, CI); - QualType::DestructionKind dtorKind = VT.isDestructedType(); - if (dtorKind == QualType::DK_none) continue; - - CodeGenFunction::Destroyer *destroyer; - - // Block captures count as local values and have imprecise semantics. - // They also can't be arrays, so need to worry about that. - // - // For const-qualified captures, emit clang.arc.use to ensure the captured - // object doesn't get released while we are still depending on its validity - // within the block. - if (VT.isConstQualified() && - VT.getObjCLifetime() == Qualifiers::OCL_Strong && - CGF.CGM.getCodeGenOpts().OptimizationLevel != 0) { - assert(CGF.CGM.getLangOpts().ObjCAutoRefCount && - "expected ObjC ARC to be enabled"); - destroyer = CodeGenFunction::emitARCIntrinsicUse; - } else if (dtorKind == QualType::DK_objc_strong_lifetime) { - destroyer = CodeGenFunction::destroyARCStrongImprecise; - } else { - destroyer = CGF.getDestroyer(dtorKind); - } - - // GEP down to the address. - Address addr = - CGF.Builder.CreateStructGEP(blockInfo.LocalAddress, capture.getIndex()); - - // We can use that GEP as the dominating IP. - if (!blockInfo.DominatingIP) - blockInfo.DominatingIP = cast<llvm::Instruction>(addr.getPointer()); - - CleanupKind cleanupKind = InactiveNormalCleanup; - bool useArrayEHCleanup = CGF.needsEHCleanup(dtorKind); - if (useArrayEHCleanup) - cleanupKind = InactiveNormalAndEHCleanup; - - CGF.pushDestroy(cleanupKind, addr, VT, - destroyer, useArrayEHCleanup); - - // Remember where that cleanup was. - capture.setCleanup(CGF.EHStack.stable_begin()); - } -} - -/// Enter a full-expression with a non-trivial number of objects to -/// clean up. This is in this file because, at the moment, the only -/// kind of cleanup object is a BlockDecl*. -void CodeGenFunction::enterNonTrivialFullExpression(const FullExpr *E) { - if (const auto EWC = dyn_cast<ExprWithCleanups>(E)) { - assert(EWC->getNumObjects() != 0); - for (const ExprWithCleanups::CleanupObject &C : EWC->getObjects()) - enterBlockScope(*this, C); - } -} - -/// Find the layout for the given block in a linked list and remove it. -static CGBlockInfo *findAndRemoveBlockInfo(CGBlockInfo **head, - const BlockDecl *block) { - while (true) { - assert(head && *head); - CGBlockInfo *cur = *head; - - // If this is the block we're looking for, splice it out of the list. - if (cur->getBlockDecl() == block) { - *head = cur->NextBlockInfo; - return cur; - } - - head = &cur->NextBlockInfo; - } -} - -/// Destroy a chain of block layouts. -void CodeGenFunction::destroyBlockInfos(CGBlockInfo *head) { - assert(head && "destroying an empty chain"); - do { - CGBlockInfo *cur = head; - head = cur->NextBlockInfo; - delete cur; - } while (head != nullptr); -} - /// Emit a block literal expression in the current function. llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) { // If the block has no captures, we won't have a pre-computed // layout for it. - if (!blockExpr->getBlockDecl()->hasCaptures()) { + if (!blockExpr->getBlockDecl()->hasCaptures()) // The block literal is emitted as a global variable, and the block invoke // function has to be extracted from its initializer. - if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) { + if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) return Block; - } - CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName()); - computeBlockInfo(CGM, this, blockInfo); - blockInfo.BlockExpression = blockExpr; - return EmitBlockLiteral(blockInfo); - } - - // Find the block info for this block and take ownership of it. - std::unique_ptr<CGBlockInfo> blockInfo; - blockInfo.reset(findAndRemoveBlockInfo(&FirstBlockInfo, - blockExpr->getBlockDecl())); - blockInfo->BlockExpression = blockExpr; - return EmitBlockLiteral(*blockInfo); + CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName()); + computeBlockInfo(CGM, this, blockInfo); + blockInfo.BlockExpression = blockExpr; + if (!blockInfo.CanBeGlobal) + blockInfo.LocalAddress = CreateTempAlloca(blockInfo.StructureType, + blockInfo.BlockAlign, "block"); + return EmitBlockLiteral(blockInfo); } llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { @@ -1161,12 +1033,64 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { /*captured by init*/ false); } - // Activate the cleanup if layout pushed one. - if (!CI.isByRef()) { - EHScopeStack::stable_iterator cleanup = capture.getCleanup(); - if (cleanup.isValid()) - ActivateCleanupBlock(cleanup, blockInfo.DominatingIP); + // Push a cleanup for the capture if necessary. + if (!blockInfo.NeedsCopyDispose) + continue; + + // Ignore __block captures; there's nothing special in the on-stack block + // that we need to do for them. + if (CI.isByRef()) + continue; + + // Ignore objects that aren't destructed. + QualType::DestructionKind dtorKind = type.isDestructedType(); + if (dtorKind == QualType::DK_none) + continue; + + CodeGenFunction::Destroyer *destroyer; + + // Block captures count as local values and have imprecise semantics. + // They also can't be arrays, so need to worry about that. + // + // For const-qualified captures, emit clang.arc.use to ensure the captured + // object doesn't get released while we are still depending on its validity + // within the block. + if (type.isConstQualified() && + type.getObjCLifetime() == Qualifiers::OCL_Strong && + CGM.getCodeGenOpts().OptimizationLevel != 0) { + assert(CGM.getLangOpts().ObjCAutoRefCount && + "expected ObjC ARC to be enabled"); + destroyer = emitARCIntrinsicUse; + } else if (dtorKind == QualType::DK_objc_strong_lifetime) { + destroyer = destroyARCStrongImprecise; + } else { + destroyer = getDestroyer(dtorKind); } + + CleanupKind cleanupKind = NormalCleanup; + bool useArrayEHCleanup = needsEHCleanup(dtorKind); + if (useArrayEHCleanup) + cleanupKind = NormalAndEHCleanup; + + // Extend the lifetime of the capture to the end of the scope enclosing the + // block expression except when the block decl is in the list of RetExpr's + // cleanup objects, in which case its lifetime ends after the full + // expression. + auto IsBlockDeclInRetExpr = [&]() { + auto *EWC = llvm::dyn_cast_or_null<ExprWithCleanups>(RetExpr); + if (EWC) + for (auto &C : EWC->getObjects()) + if (auto *BD = C.dyn_cast<BlockDecl *>()) + if (BD == blockDecl) + return true; + return false; + }; + + if (IsBlockDeclInRetExpr()) + pushDestroy(cleanupKind, blockField, type, destroyer, useArrayEHCleanup); + else + pushLifetimeExtendedDestroy(cleanupKind, blockField, type, destroyer, + useArrayEHCleanup); } // Cast to the converted block-pointer type, which happens (somewhat @@ -1449,7 +1373,8 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, llvm::IRBuilder<> b(llvm::BasicBlock::Create(CGM.getLLVMContext(), "entry", Init)); b.CreateAlignedStore(CGM.getNSConcreteGlobalBlock(), - b.CreateStructGEP(literal, 0), CGM.getPointerAlign().getQuantity()); + b.CreateStructGEP(literal, 0), + CGM.getPointerAlign().getAsAlign()); b.CreateRetVoid(); // We can't use the normal LLVM global initialisation array, because we // need to specify that this runs early in library initialisation. @@ -2031,11 +1956,13 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { FunctionDecl *FD = FunctionDecl::Create( C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, FunctionTy, nullptr, SC_Static, false, false); - setBlockHelperAttributesVisibility(blockInfo.CapturesNonExternalType, Fn, FI, CGM); + // This is necessary to avoid inheriting the previous line number. + FD->setImplicit(); StartFunction(FD, ReturnTy, Fn, FI, args); - ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getBeginLoc()}; + auto AL = ApplyDebugLocation::CreateArtificial(*this); + llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo(); Address src = GetAddrOfLocalVar(&SrcDecl); @@ -2226,10 +2153,12 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { setBlockHelperAttributesVisibility(blockInfo.CapturesNonExternalType, Fn, FI, CGM); + // This is necessary to avoid inheriting the previous line number. + FD->setImplicit(); StartFunction(FD, ReturnTy, Fn, FI, args); markAsIgnoreThreadCheckingAtRuntime(Fn); - ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getBeginLoc()}; + auto AL = ApplyDebugLocation::CreateArtificial(*this); llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo(); diff --git a/clang/lib/CodeGen/CGBlocks.h b/clang/lib/CodeGen/CGBlocks.h index c4bfde666154..698ecd3d926a 100644 --- a/clang/lib/CodeGen/CGBlocks.h +++ b/clang/lib/CodeGen/CGBlocks.h @@ -257,10 +257,6 @@ public: // This could be zero if no forced alignment is required. CharUnits BlockHeaderForcedGapSize; - /// An instruction which dominates the full-expression that the - /// block is inside. - llvm::Instruction *DominatingIP; - /// The next block in the block-info chain. Invalid if this block /// info is not part of the CGF's block-info chain, which is true /// if it corresponds to a global block or a block whose expression diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h index 107c9275431c..38e96c0f4ee6 100644 --- a/clang/lib/CodeGen/CGBuilder.h +++ b/clang/lib/CodeGen/CGBuilder.h @@ -22,16 +22,15 @@ class CodeGenFunction; /// This is an IRBuilder insertion helper that forwards to /// CodeGenFunction::InsertHelper, which adds necessary metadata to /// instructions. -class CGBuilderInserter : protected llvm::IRBuilderDefaultInserter { +class CGBuilderInserter final : public llvm::IRBuilderDefaultInserter { public: CGBuilderInserter() = default; explicit CGBuilderInserter(CodeGenFunction *CGF) : CGF(CGF) {} -protected: /// This forwards to CodeGenFunction::InsertHelper. void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB, - llvm::BasicBlock::iterator InsertPt) const; + llvm::BasicBlock::iterator InsertPt) const override; private: CodeGenFunction *CGF = nullptr; }; @@ -68,38 +67,34 @@ public: // take an alignment. llvm::LoadInst *CreateLoad(Address Addr, const llvm::Twine &Name = "") { return CreateAlignedLoad(Addr.getPointer(), - Addr.getAlignment().getQuantity(), - Name); + Addr.getAlignment().getAsAlign(), Name); } llvm::LoadInst *CreateLoad(Address Addr, const char *Name) { // This overload is required to prevent string literals from // ending up in the IsVolatile overload. return CreateAlignedLoad(Addr.getPointer(), - Addr.getAlignment().getQuantity(), - Name); + Addr.getAlignment().getAsAlign(), Name); } llvm::LoadInst *CreateLoad(Address Addr, bool IsVolatile, const llvm::Twine &Name = "") { - return CreateAlignedLoad(Addr.getPointer(), - Addr.getAlignment().getQuantity(), - IsVolatile, - Name); + return CreateAlignedLoad( + Addr.getPointer(), Addr.getAlignment().getAsAlign(), IsVolatile, Name); } using CGBuilderBaseTy::CreateAlignedLoad; llvm::LoadInst *CreateAlignedLoad(llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name = "") { - return CreateAlignedLoad(Addr, Align.getQuantity(), Name); + return CreateAlignedLoad(Addr, Align.getAsAlign(), Name); } llvm::LoadInst *CreateAlignedLoad(llvm::Value *Addr, CharUnits Align, const char *Name) { - return CreateAlignedLoad(Addr, Align.getQuantity(), Name); + return CreateAlignedLoad(Addr, Align.getAsAlign(), Name); } llvm::LoadInst *CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name = "") { assert(Addr->getType()->getPointerElementType() == Ty); - return CreateAlignedLoad(Addr, Align.getQuantity(), Name); + return CreateAlignedLoad(Addr, Align.getAsAlign(), Name); } // Note that we intentionally hide the CreateStore APIs that don't @@ -113,7 +108,7 @@ public: using CGBuilderBaseTy::CreateAlignedStore; llvm::StoreInst *CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile = false) { - return CreateAlignedStore(Val, Addr, Align.getQuantity(), IsVolatile); + return CreateAlignedStore(Val, Addr, Align.getAsAlign(), IsVolatile); } // FIXME: these "default-aligned" APIs should be removed, @@ -284,6 +279,13 @@ public: IsVolatile); } + using CGBuilderBaseTy::CreateMemCpyInline; + llvm::CallInst *CreateMemCpyInline(Address Dest, Address Src, uint64_t Size) { + return CreateMemCpyInline( + Dest.getPointer(), Dest.getAlignment().getAsAlign(), Src.getPointer(), + Src.getAlignment().getAsAlign(), getInt64(Size)); + } + using CGBuilderBaseTy::CreateMemMove; llvm::CallInst *CreateMemMove(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile = false) { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 09fd3087b494..8994b939093e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -28,6 +28,7 @@ #include "clang/CodeGen/CGFunctionInfo.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" @@ -43,9 +44,10 @@ #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/MatrixBuilder.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ScopedPrinter.h" -#include "llvm/Support/TargetParser.h" +#include "llvm/Support/X86TargetParser.h" #include <sstream> using namespace clang; @@ -74,6 +76,8 @@ static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, break; } } + if (CGF.CGM.stopAutoInit()) + return; CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes); } @@ -215,8 +219,9 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); if (Invert) - Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, - llvm::ConstantInt::get(IntType, -1)); + Result = + CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, + llvm::ConstantInt::getAllOnesValue(IntType)); Result = EmitFromInt(CGF, Result, T, ValueType); return RValue::get(Result); } @@ -411,6 +416,25 @@ static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, } } +// Emit an intrinsic where all operands are of the same type as the result. +// Depending on mode, this may be a constrained floating-point intrinsic. +static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, + unsigned IntrinsicID, + unsigned ConstrainedIntrinsicID, + llvm::Type *Ty, + ArrayRef<Value *> Args) { + Function *F; + if (CGF.Builder.getIsFPConstrained()) + F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty); + else + F = CGF.CGM.getIntrinsic(IntrinsicID, Ty); + + if (CGF.Builder.getIsFPConstrained()) + return CGF.Builder.CreateConstrainedFPCall(F, Args); + else + return CGF.Builder.CreateCall(F, Args); +} + // Emit a simple mangled intrinsic that has 1 argument and a return type // matching the argument type. static Value *emitUnaryBuiltin(CodeGenFunction &CGF, @@ -566,7 +590,9 @@ static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type) { assert(Type->isIntegerType() && "Given type is not an integer."); - unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width; + unsigned Width = Type->isBooleanType() ? 1 + : Type->isExtIntType() ? context.getIntWidth(Type) + : context.getTypeInfo(Type).Width; bool Signed = Type->isSignedIntegerType(); return {Width, Signed}; } @@ -1251,6 +1277,8 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( FunctionDecl *FD = FunctionDecl::Create( Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, FuncionTy, nullptr, SC_PrivateExtern, false, false); + // Avoid generating debug location info for the function. + FD->setImplicit(); StartFunction(FD, ReturnTy, Fn, FI, Args); @@ -1320,14 +1348,42 @@ RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) { } else if (const Expr *TheExpr = Item.getExpr()) { ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false); - // Check if this is a retainable type. - if (TheExpr->getType()->isObjCRetainableType()) { + // If a temporary object that requires destruction after the full + // expression is passed, push a lifetime-extended cleanup to extend its + // lifetime to the end of the enclosing block scope. + auto LifetimeExtendObject = [&](const Expr *E) { + E = E->IgnoreParenCasts(); + // Extend lifetimes of objects returned by function calls and message + // sends. + + // FIXME: We should do this in other cases in which temporaries are + // created including arguments of non-ARC types (e.g., C++ + // temporaries). + if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E)) + return true; + return false; + }; + + if (TheExpr->getType()->isObjCRetainableType() && + getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) { assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar && "Only scalar can be a ObjC retainable type"); - // Check if the object is constant, if not, save it in - // RetainableOperands. - if (!isa<Constant>(ArgVal)) - RetainableOperands.push_back(ArgVal); + if (!isa<Constant>(ArgVal)) { + CleanupKind Cleanup = getARCCleanupKind(); + QualType Ty = TheExpr->getType(); + Address Alloca = Address::invalid(); + Address Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca); + ArgVal = EmitARCRetain(Ty, ArgVal); + Builder.CreateStore(ArgVal, Addr); + pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty, + CodeGenFunction::destroyARCStrongPrecise, + Cleanup & EHCleanup); + + // Push a clang.arc.use call to ensure ARC optimizer knows that the + // argument has to be alive. + if (CGM.getCodeGenOpts().OptimizationLevel != 0) + pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal); + } } } else { ArgVal = Builder.getInt32(Item.getConstValue().getQuantity()); @@ -1349,18 +1405,6 @@ RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) { llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction( Layout, BufAddr.getAlignment()); EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args); - - // Push a clang.arc.use cleanup for each object in RetainableOperands. The - // cleanup will cause the use to appear after the final log call, keeping - // the object valid while it’s held in the log buffer. Note that if there’s - // a release cleanup on the object, it will already be active; since - // cleanups are emitted in reverse order, the use will occur before the - // object is released. - if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount && - CGM.getCodeGenOpts().OptimizationLevel != 0) - for (llvm::Value *Object : RetainableOperands) - pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), Object); - return RValue::get(BufAddr.getPointer()); } @@ -1521,8 +1565,7 @@ static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType, // We check whether we are in a recursive type if (CanonicalType->isRecordType()) { - Value *TmpRes = - dumpRecord(CGF, CanonicalType, FieldPtr, Align, Func, Lvl + 1); + TmpRes = dumpRecord(CGF, CanonicalType, FieldPtr, Align, Func, Lvl + 1); Res = CGF.Builder.CreateAdd(TmpRes, Res); continue; } @@ -1629,7 +1672,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_ceilf: case Builtin::BI__builtin_ceilf16: case Builtin::BI__builtin_ceill: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::ceil, Intrinsic::experimental_constrained_ceil)); @@ -1650,7 +1693,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_cosf: case Builtin::BI__builtin_cosf16: case Builtin::BI__builtin_cosl: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::cos, Intrinsic::experimental_constrained_cos)); @@ -1661,7 +1704,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_expf: case Builtin::BI__builtin_expf16: case Builtin::BI__builtin_expl: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::exp, Intrinsic::experimental_constrained_exp)); @@ -1672,7 +1715,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_exp2f: case Builtin::BI__builtin_exp2f16: case Builtin::BI__builtin_exp2l: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::exp2, Intrinsic::experimental_constrained_exp2)); @@ -1693,7 +1736,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_floorf: case Builtin::BI__builtin_floorf16: case Builtin::BI__builtin_floorl: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::floor, Intrinsic::experimental_constrained_floor)); @@ -1704,7 +1747,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_fmaf: case Builtin::BI__builtin_fmaf16: case Builtin::BI__builtin_fmal: - return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E, + return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::fma, Intrinsic::experimental_constrained_fma)); @@ -1715,7 +1758,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_fmaxf: case Builtin::BI__builtin_fmaxf16: case Builtin::BI__builtin_fmaxl: - return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, + return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::maxnum, Intrinsic::experimental_constrained_maxnum)); @@ -1726,7 +1769,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_fminf: case Builtin::BI__builtin_fminf16: case Builtin::BI__builtin_fminl: - return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, + return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::minnum, Intrinsic::experimental_constrained_minnum)); @@ -1751,7 +1794,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_logf: case Builtin::BI__builtin_logf16: case Builtin::BI__builtin_logl: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::log, Intrinsic::experimental_constrained_log)); @@ -1762,7 +1805,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_log10f: case Builtin::BI__builtin_log10f16: case Builtin::BI__builtin_log10l: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::log10, Intrinsic::experimental_constrained_log10)); @@ -1773,7 +1816,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_log2f: case Builtin::BI__builtin_log2f16: case Builtin::BI__builtin_log2l: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::log2, Intrinsic::experimental_constrained_log2)); @@ -1783,7 +1826,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_nearbyint: case Builtin::BI__builtin_nearbyintf: case Builtin::BI__builtin_nearbyintl: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::nearbyint, Intrinsic::experimental_constrained_nearbyint)); @@ -1794,7 +1837,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_powf: case Builtin::BI__builtin_powf16: case Builtin::BI__builtin_powl: - return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, + return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::pow, Intrinsic::experimental_constrained_pow)); @@ -1805,7 +1848,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_rintf: case Builtin::BI__builtin_rintf16: case Builtin::BI__builtin_rintl: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::rint, Intrinsic::experimental_constrained_rint)); @@ -1816,7 +1859,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_roundf: case Builtin::BI__builtin_roundf16: case Builtin::BI__builtin_roundl: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::round, Intrinsic::experimental_constrained_round)); @@ -1827,7 +1870,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_sinf: case Builtin::BI__builtin_sinf16: case Builtin::BI__builtin_sinl: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::sin, Intrinsic::experimental_constrained_sin)); @@ -1838,7 +1881,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_sqrtf: case Builtin::BI__builtin_sqrtf16: case Builtin::BI__builtin_sqrtl: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt)); @@ -1849,7 +1892,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_truncf: case Builtin::BI__builtin_truncf16: case Builtin::BI__builtin_truncl: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::trunc, Intrinsic::experimental_constrained_trunc)); @@ -2152,6 +2195,33 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); return RValue::get(Result); } + case Builtin::BI__builtin_expect_with_probability: { + Value *ArgValue = EmitScalarExpr(E->getArg(0)); + llvm::Type *ArgType = ArgValue->getType(); + + Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); + llvm::APFloat Probability(0.0); + const Expr *ProbArg = E->getArg(2); + bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext()); + assert(EvalSucceed && "probability should be able to evaluate as float"); + (void)EvalSucceed; + bool LoseInfo = false; + Probability.convert(llvm::APFloat::IEEEdouble(), + llvm::RoundingMode::Dynamic, &LoseInfo); + llvm::Type *Ty = ConvertType(ProbArg->getType()); + Constant *Confidence = ConstantFP::get(Ty, Probability); + // Don't generate llvm.expect.with.probability on -O0 as the backend + // won't use it for anything. + // Note, we still IRGen ExpectedValue because it could have side-effects. + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + return RValue::get(ArgValue); + + Function *FnExpect = + CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType); + Value *Result = Builder.CreateCall( + FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval"); + return RValue::get(Result); + } case Builtin::BI__builtin_assume_aligned: { const Expr *Ptr = E->getArg(0); Value *PtrValue = EmitScalarExpr(Ptr); @@ -2164,7 +2234,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, AlignmentCI = ConstantInt::get(AlignmentCI->getType(), llvm::Value::MaximumAlignment); - EmitAlignmentAssumption(PtrValue, Ptr, + emitAlignmentAssumption(PtrValue, Ptr, /*The expr loc is sufficient.*/ SourceLocation(), AlignmentCI, OffsetValue); return RValue::get(PtrValue); @@ -2336,6 +2406,53 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); } + case Builtin::BI__builtin_matrix_transpose: { + const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>(); + Value *MatValue = EmitScalarExpr(E->getArg(0)); + MatrixBuilder<CGBuilderTy> MB(Builder); + Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(), + MatrixTy->getNumColumns()); + return RValue::get(Result); + } + + case Builtin::BI__builtin_matrix_column_major_load: { + MatrixBuilder<CGBuilderTy> MB(Builder); + // Emit everything that isn't dependent on the first parameter type + Value *Stride = EmitScalarExpr(E->getArg(3)); + const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>(); + auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>(); + assert(PtrTy && "arg0 must be of pointer type"); + bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified(); + + Address Src = EmitPointerWithAlignment(E->getArg(0)); + EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(0)->getType(), + E->getArg(0)->getExprLoc(), FD, 0); + Value *Result = MB.CreateColumnMajorLoad( + Src.getPointer(), Align(Src.getAlignment().getQuantity()), Stride, + IsVolatile, ResultTy->getNumRows(), ResultTy->getNumColumns(), + "matrix"); + return RValue::get(Result); + } + + case Builtin::BI__builtin_matrix_column_major_store: { + MatrixBuilder<CGBuilderTy> MB(Builder); + Value *Matrix = EmitScalarExpr(E->getArg(0)); + Address Dst = EmitPointerWithAlignment(E->getArg(1)); + Value *Stride = EmitScalarExpr(E->getArg(2)); + + const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>(); + auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>(); + assert(PtrTy && "arg1 must be of pointer type"); + bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified(); + + EmitNonNullArgCheck(RValue::get(Dst.getPointer()), E->getArg(1)->getType(), + E->getArg(1)->getExprLoc(), FD, 0); + Value *Result = MB.CreateColumnMajorStore( + Matrix, Dst.getPointer(), Align(Dst.getAlignment().getQuantity()), + Stride, IsVolatile, MatrixTy->getNumRows(), MatrixTy->getNumColumns()); + return RValue::get(Result); + } + case Builtin::BIfinite: case Builtin::BI__finite: case Builtin::BIfinitef: @@ -2518,6 +2635,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Dest.getPointer()); } + case Builtin::BI__builtin_memcpy_inline: { + Address Dest = EmitPointerWithAlignment(E->getArg(0)); + Address Src = EmitPointerWithAlignment(E->getArg(1)); + uint64_t Size = + E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue(); + EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), + E->getArg(0)->getExprLoc(), FD, 0); + EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), + E->getArg(1)->getExprLoc(), FD, 1); + Builder.CreateMemCpyInline(Dest, Src, Size); + return RValue::get(nullptr); + } + case Builtin::BI__builtin_char_memchr: BuiltinID = Builtin::BI__builtin_memchr; break; @@ -3222,6 +3352,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), ConvertType(E->getType()))); } + case Builtin::BI__warn_memset_zero_len: + return RValue::getIgnored(); case Builtin::BI__annotation: { // Re-encode each wide string to UTF8 and make an MDString. SmallVector<Metadata *, 1> Strings; @@ -3928,7 +4060,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, auto *V = Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy); Builder.CreateAlignedStore( - V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy)); + V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy)); } return std::tie(ElemPtr, TmpSize, TmpPtr); }; @@ -3947,19 +4079,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // Create a vector of the arguments, as well as a constant value to // express to the runtime the number of variadic arguments. - std::vector<llvm::Value *> Args = { - Queue, Flags, Range, - Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4), - ElemPtr}; - std::vector<llvm::Type *> ArgTys = { + llvm::Value *const Args[] = {Queue, Flags, + Range, Kernel, + Block, ConstantInt::get(IntTy, NumArgs - 4), + ElemPtr}; + llvm::Type *const ArgTys[] = { QueueTy, IntTy, RangeTy, GenericVoidPtrTy, GenericVoidPtrTy, IntTy, ElemPtr->getType()}; - llvm::FunctionType *FTy = llvm::FunctionType::get( - Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); - auto Call = - RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), - llvm::ArrayRef<llvm::Value *>(Args))); + llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false); + auto Call = RValue::get( + Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), Args)); if (TmpSize) EmitLifetimeEnd(TmpSize, TmpPtr); return Call; @@ -4115,6 +4245,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIprintf: if (getTarget().getTriple().isNVPTX()) return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue); + if (getTarget().getTriple().getArch() == Triple::amdgcn && + getLangOpts().HIP) + return EmitAMDGPUDevicePrintfCallExpr(E, ReturnValue); break; case Builtin::BI__builtin_canonicalize: case Builtin::BI__builtin_canonicalizef: @@ -4427,35 +4560,41 @@ Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, - bool HasLegalHalfType=true, - bool V1Ty=false) { + bool HasLegalHalfType = true, + bool V1Ty = false, + bool AllowBFloatArgsAndRet = true) { int IsQuad = TypeFlags.isQuad(); switch (TypeFlags.getEltType()) { case NeonTypeFlags::Int8: case NeonTypeFlags::Poly8: - return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); + return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); case NeonTypeFlags::Int16: case NeonTypeFlags::Poly16: - return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); + return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); + case NeonTypeFlags::BFloat16: + if (AllowBFloatArgsAndRet) + return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad)); + else + return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); case NeonTypeFlags::Float16: if (HasLegalHalfType) - return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad)); + return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad)); else - return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); + return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); case NeonTypeFlags::Int32: - return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); + return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); case NeonTypeFlags::Int64: case NeonTypeFlags::Poly64: - return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); + return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); case NeonTypeFlags::Poly128: // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. // There is a lot of i128 and f128 API missing. // so we use v16i8 to represent poly128 and get pattern matched. - return llvm::VectorType::get(CGF->Int8Ty, 16); + return llvm::FixedVectorType::get(CGF->Int8Ty, 16); case NeonTypeFlags::Float32: - return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); + return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); case NeonTypeFlags::Float64: - return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); + return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); } llvm_unreachable("Unknown vector element type!"); } @@ -4465,34 +4604,46 @@ static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF, int IsQuad = IntTypeFlags.isQuad(); switch (IntTypeFlags.getEltType()) { case NeonTypeFlags::Int16: - return llvm::VectorType::get(CGF->HalfTy, (4 << IsQuad)); + return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad)); case NeonTypeFlags::Int32: - return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad)); + return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad)); case NeonTypeFlags::Int64: - return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad)); + return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad)); default: llvm_unreachable("Type can't be converted to floating-point!"); } } -Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { - unsigned nElts = V->getType()->getVectorNumElements(); - Value* SV = llvm::ConstantVector::getSplat(nElts, C); +Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C, + const ElementCount &Count) { + Value *SV = llvm::ConstantVector::getSplat(Count, C); return Builder.CreateShuffleVector(V, V, SV, "lane"); } +Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { + ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount(); + return EmitNeonSplat(V, C, EC); +} + Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, const char *name, unsigned shift, bool rightshift) { unsigned j = 0; for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); - ai != ae; ++ai, ++j) + ai != ae; ++ai, ++j) { + if (F->isConstrainedFPIntrinsic()) + if (ai->getType()->isMetadataTy()) + continue; if (shift > 0 && shift == j) Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); else Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); + } - return Builder.CreateCall(F, Ops, name); + if (F->isConstrainedFPIntrinsic()) + return Builder.CreateConstrainedFPCall(F, Ops, name); + else + return Builder.CreateCall(F, Ops, name); } Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, @@ -4556,17 +4707,17 @@ enum { }; namespace { -struct NeonIntrinsicInfo { +struct ARMVectorIntrinsicInfo { const char *NameHint; unsigned BuiltinID; unsigned LLVMIntrinsic; unsigned AltLLVMIntrinsic; - unsigned TypeModifier; + uint64_t TypeModifier; bool operator<(unsigned RHSBuiltinID) const { return BuiltinID < RHSBuiltinID; } - bool operator<(const NeonIntrinsicInfo &TE) const { + bool operator<(const ARMVectorIntrinsicInfo &TE) const { return BuiltinID < TE.BuiltinID; } }; @@ -4584,7 +4735,12 @@ struct NeonIntrinsicInfo { Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ TypeModifier } -static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { +static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = { + NEONMAP1(__a32_vcvt_bf16_v, arm_neon_vcvtfp2bf, 0), + NEONMAP0(splat_lane_v), + NEONMAP0(splat_laneq_v), + NEONMAP0(splatq_lane_v), + NEONMAP0(splatq_laneq_v), NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), NEONMAP1(vabs_v, arm_neon_vabs, 0), @@ -4594,6 +4750,11 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vaeseq_v, arm_neon_aese, 0), NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), + NEONMAP1(vbfdot_v, arm_neon_bfdot, 0), + NEONMAP1(vbfdotq_v, arm_neon_bfdot, 0), + NEONMAP1(vbfmlalbq_v, arm_neon_bfmlalb, 0), + NEONMAP1(vbfmlaltq_v, arm_neon_bfmlalt, 0), + NEONMAP1(vbfmmlaq_v, arm_neon_bfmmla, 0), NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), NEONMAP1(vcadd_rot270_v, arm_neon_vcadd_rot270, Add1ArgType), @@ -4654,6 +4815,7 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0), NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), + NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0), NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0), NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), @@ -4752,6 +4914,7 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), + NEONMAP2(vmmlaq_v, arm_neon_ummla, arm_neon_smmla, 0), NEONMAP0(vmovl_v), NEONMAP0(vmovn_v), NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), @@ -4859,13 +5022,21 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP0(vtrnq_v), NEONMAP0(vtst_v), NEONMAP0(vtstq_v), + NEONMAP1(vusdot_v, arm_neon_usdot, 0), + NEONMAP1(vusdotq_v, arm_neon_usdot, 0), + NEONMAP1(vusmmlaq_v, arm_neon_usmmla, 0), NEONMAP0(vuzp_v), NEONMAP0(vuzpq_v), NEONMAP0(vzip_v), NEONMAP0(vzipq_v) }; -static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { +static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { + NEONMAP1(__a64_vcvtq_low_bf16_v, aarch64_neon_bfcvtn, 0), + NEONMAP0(splat_lane_v), + NEONMAP0(splat_laneq_v), + NEONMAP0(splatq_lane_v), + NEONMAP0(splatq_laneq_v), NEONMAP1(vabs_v, aarch64_neon_abs, 0), NEONMAP1(vabsq_v, aarch64_neon_abs, 0), NEONMAP0(vaddhn_v), @@ -4873,6 +5044,11 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), + NEONMAP1(vbfdot_v, aarch64_neon_bfdot, 0), + NEONMAP1(vbfdotq_v, aarch64_neon_bfdot, 0), + NEONMAP1(vbfmlalbq_v, aarch64_neon_bfmlalb, 0), + NEONMAP1(vbfmlaltq_v, aarch64_neon_bfmlalt, 0), + NEONMAP1(vbfmmlaq_v, aarch64_neon_bfmmla, 0), NEONMAP1(vcadd_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType), NEONMAP1(vcadd_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType), NEONMAP1(vcaddq_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType), @@ -4916,6 +5092,7 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), NEONMAP0(vcvtq_f16_v), NEONMAP0(vcvtq_f32_v), + NEONMAP1(vcvtq_high_bf16_v, aarch64_neon_bfcvtn2, 0), NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), @@ -4950,6 +5127,7 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0), NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0), NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0), + NEONMAP2(vmmlaq_v, aarch64_neon_ummla, aarch64_neon_smmla, 0), NEONMAP0(vmovl_v), NEONMAP0(vmovn_v), NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), @@ -4964,14 +5142,22 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), + NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0), + NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0), NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), + NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0), + NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0), NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), + NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0), + NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0), NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), + NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0), + NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0), NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), @@ -5024,9 +5210,12 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP0(vsubhn_v), NEONMAP0(vtst_v), NEONMAP0(vtstq_v), + NEONMAP1(vusdot_v, aarch64_neon_usdot, 0), + NEONMAP1(vusdotq_v, aarch64_neon_usdot, 0), + NEONMAP1(vusmmlaq_v, aarch64_neon_usmmla, 0), }; -static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { +static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = { NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), @@ -5059,6 +5248,7 @@ static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), + NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0), NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), @@ -5256,24 +5446,42 @@ static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { #undef NEONMAP1 #undef NEONMAP2 +#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ + { \ + #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \ + TypeModifier \ + } + +#define SVEMAP2(NameBase, TypeModifier) \ + { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier } +static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = { +#define GET_SVE_LLVM_INTRINSIC_MAP +#include "clang/Basic/arm_sve_builtin_cg.inc" +#undef GET_SVE_LLVM_INTRINSIC_MAP +}; + +#undef SVEMAP1 +#undef SVEMAP2 + static bool NEONSIMDIntrinsicsProvenSorted = false; static bool AArch64SIMDIntrinsicsProvenSorted = false; static bool AArch64SISDIntrinsicsProvenSorted = false; +static bool AArch64SVEIntrinsicsProvenSorted = false; - -static const NeonIntrinsicInfo * -findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap, - unsigned BuiltinID, bool &MapProvenSorted) { +static const ARMVectorIntrinsicInfo * +findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap, + unsigned BuiltinID, bool &MapProvenSorted) { #ifndef NDEBUG if (!MapProvenSorted) { - assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap))); + assert(llvm::is_sorted(IntrinsicMap)); MapProvenSorted = true; } #endif - const NeonIntrinsicInfo *Builtin = llvm::lower_bound(IntrinsicMap, BuiltinID); + const ARMVectorIntrinsicInfo *Builtin = + llvm::lower_bound(IntrinsicMap, BuiltinID); if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) return Builtin; @@ -5296,7 +5504,7 @@ Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, if (Modifier & AddRetType) { llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); if (Modifier & VectorizeRetType) - Ty = llvm::VectorType::get( + Ty = llvm::FixedVectorType::get( Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1); Tys.push_back(Ty); @@ -5305,7 +5513,7 @@ Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, // Arguments. if (Modifier & VectorizeArgTypes) { int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1; - ArgType = llvm::VectorType::get(ArgType, Elts); + ArgType = llvm::FixedVectorType::get(ArgType, Elts); } if (Modifier & (Add1ArgType | Add2ArgTypes)) @@ -5320,10 +5528,9 @@ Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, return CGM.getIntrinsic(IntrinsicID, Tys); } -static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, - const NeonIntrinsicInfo &SISDInfo, - SmallVectorImpl<Value *> &Ops, - const CallExpr *E) { +static Value *EmitCommonNeonSISDBuiltinExpr( + CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, + SmallVectorImpl<Value *> &Ops, const CallExpr *E) { unsigned BuiltinID = SISDInfo.BuiltinID; unsigned int Int = SISDInfo.LLVMIntrinsic; unsigned Modifier = SISDInfo.TypeModifier; @@ -5368,8 +5575,8 @@ static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy()); // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate // it before inserting. - Ops[j] = - CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType()); + Ops[j] = CGF.Builder.CreateTruncOrBitCast( + Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType()); Ops[j] = CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0); } @@ -5399,8 +5606,11 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( bool Usgn = Type.isUnsigned(); bool Quad = Type.isQuad(); const bool HasLegalHalfType = getTarget().hasLegalHalfType(); + const bool AllowBFloatArgsAndRet = + getTargetHooks().getABIInfo().allowBFloatArgsAndRet(); - llvm::VectorType *VTy = GetNeonType(this, Type, HasLegalHalfType); + llvm::VectorType *VTy = GetNeonType(this, Type, HasLegalHalfType, false, + AllowBFloatArgsAndRet); llvm::Type *Ty = VTy; if (!Ty) return nullptr; @@ -5415,6 +5625,19 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( switch (BuiltinID) { default: break; + case NEON::BI__builtin_neon_splat_lane_v: + case NEON::BI__builtin_neon_splat_laneq_v: + case NEON::BI__builtin_neon_splatq_lane_v: + case NEON::BI__builtin_neon_splatq_laneq_v: { + auto NumElements = VTy->getElementCount(); + if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v) + NumElements = NumElements * 2; + if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v) + NumElements = NumElements / 2; + + Ops[0] = Builder.CreateBitCast(Ops[0], VTy); + return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements); + } case NEON::BI__builtin_neon_vpadd_v: case NEON::BI__builtin_neon_vpaddq_v: // We don't allow fp/int overloading of intrinsics. @@ -5467,7 +5690,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Ty = HalfTy; break; } - llvm::Type *VecFlt = llvm::VectorType::get(Ty, VTy->getNumElements()); + auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements()); llvm::Type *Tys[] = { VTy, VecFlt }; Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); return EmitNeonCall(F, Ops, NameHint); @@ -5614,7 +5837,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vext_v: case NEON::BI__builtin_neon_vextq_v: { int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); - SmallVector<uint32_t, 16> Indices; + SmallVector<int, 16> Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) Indices.push_back(i+CV); @@ -5624,13 +5847,14 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( } case NEON::BI__builtin_neon_vfma_v: case NEON::BI__builtin_neon_vfmaq_v: { - Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); // NEON intrinsic puts accumulator first, unlike the LLVM fma. - return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); + return emitCallMaybeConstrainedFPBuiltin( + *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty, + {Ops[1], Ops[2], Ops[0]}); } case NEON::BI__builtin_neon_vld1_v: case NEON::BI__builtin_neon_vld1q_v: { @@ -5644,7 +5868,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vld1q_x3_v: case NEON::BI__builtin_neon_vld1_x4_v: case NEON::BI__builtin_neon_vld1q_x4_v: { - llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); + llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getElementType()); Ops[1] = Builder.CreateBitCast(Ops[1], PTy); llvm::Type *Tys[2] = { VTy, PTy }; Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); @@ -5726,8 +5950,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); - llvm::Type *NarrowTy = - llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); + auto *NarrowTy = + llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2); llvm::Type *Tys[2] = { Ty, NarrowTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); } @@ -5736,8 +5960,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( // The source operand type has twice as many elements of half the size. unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); - llvm::Type *NarrowTy = - llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); + auto *NarrowTy = + llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2); llvm::Type *Tys[2] = { Ty, NarrowTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); } @@ -5749,6 +5973,29 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Ops.resize(2); return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint); } + case NEON::BI__builtin_neon_vqdmulhq_lane_v: + case NEON::BI__builtin_neon_vqdmulh_lane_v: + case NEON::BI__builtin_neon_vqrdmulhq_lane_v: + case NEON::BI__builtin_neon_vqrdmulh_lane_v: { + auto *RTy = cast<llvm::VectorType>(Ty); + if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v || + BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v) + RTy = llvm::FixedVectorType::get(RTy->getElementType(), + RTy->getNumElements() * 2); + llvm::Type *Tys[2] = { + RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false, + /*isQuad*/ false))}; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); + } + case NEON::BI__builtin_neon_vqdmulhq_laneq_v: + case NEON::BI__builtin_neon_vqdmulh_laneq_v: + case NEON::BI__builtin_neon_vqrdmulhq_laneq_v: + case NEON::BI__builtin_neon_vqrdmulh_laneq_v: { + llvm::Type *Tys[2] = { + Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false, + /*isQuad*/ true))}; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); + } case NEON::BI__builtin_neon_vqshl_n_v: case NEON::BI__builtin_neon_vqshlq_n_v: return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", @@ -5765,7 +6012,9 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); case NEON::BI__builtin_neon_vrndi_v: case NEON::BI__builtin_neon_vrndiq_v: - Int = Intrinsic::nearbyint; + Int = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_nearbyint + : Intrinsic::nearbyint; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); case NEON::BI__builtin_neon_vrshr_n_v: case NEON::BI__builtin_neon_vrshrq_n_v: @@ -5823,7 +6072,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vst1q_x3_v: case NEON::BI__builtin_neon_vst1_x4_v: case NEON::BI__builtin_neon_vst1q_x4_v: { - llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); + llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getElementType()); // TODO: Currently in AArch32 mode the pointer operand comes first, whereas // in AArch64 it comes last. We may want to stick to one or another. if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be || @@ -5860,7 +6109,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector<uint32_t, 16> Indices; + SmallVector<int, 16> Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { Indices.push_back(i+vi); Indices.push_back(i+e+vi); @@ -5888,7 +6137,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector<uint32_t, 16> Indices; + SmallVector<int, 16> Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) Indices.push_back(2*i+vi); @@ -5906,7 +6155,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector<uint32_t, 16> Indices; + SmallVector<int, 16> Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { Indices.push_back((i + vi*e) >> 1); Indices.push_back(((i + vi*e) >> 1)+e); @@ -5919,40 +6168,91 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( } case NEON::BI__builtin_neon_vdot_v: case NEON::BI__builtin_neon_vdotq_v: { - llvm::Type *InputTy = - llvm::VectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); + auto *InputTy = + llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); llvm::Type *Tys[2] = { Ty, InputTy }; Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot"); } case NEON::BI__builtin_neon_vfmlal_low_v: case NEON::BI__builtin_neon_vfmlalq_low_v: { - llvm::Type *InputTy = - llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); + auto *InputTy = + llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); llvm::Type *Tys[2] = { Ty, InputTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low"); } case NEON::BI__builtin_neon_vfmlsl_low_v: case NEON::BI__builtin_neon_vfmlslq_low_v: { - llvm::Type *InputTy = - llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); + auto *InputTy = + llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); llvm::Type *Tys[2] = { Ty, InputTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low"); } case NEON::BI__builtin_neon_vfmlal_high_v: case NEON::BI__builtin_neon_vfmlalq_high_v: { - llvm::Type *InputTy = - llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); + auto *InputTy = + llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); llvm::Type *Tys[2] = { Ty, InputTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high"); } case NEON::BI__builtin_neon_vfmlsl_high_v: case NEON::BI__builtin_neon_vfmlslq_high_v: { - llvm::Type *InputTy = - llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); + auto *InputTy = + llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); llvm::Type *Tys[2] = { Ty, InputTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high"); } + case NEON::BI__builtin_neon_vmmlaq_v: { + auto *InputTy = + llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); + llvm::Type *Tys[2] = { Ty, InputTy }; + Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmmla"); + } + case NEON::BI__builtin_neon_vusmmlaq_v: { + auto *InputTy = + llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); + llvm::Type *Tys[2] = { Ty, InputTy }; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla"); + } + case NEON::BI__builtin_neon_vusdot_v: + case NEON::BI__builtin_neon_vusdotq_v: { + auto *InputTy = + llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); + llvm::Type *Tys[2] = { Ty, InputTy }; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot"); + } + case NEON::BI__builtin_neon_vbfdot_v: + case NEON::BI__builtin_neon_vbfdotq_v: { + llvm::Type *InputTy = + llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); + llvm::Type *Tys[2] = { Ty, InputTy }; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot"); + } + case NEON::BI__builtin_neon_vbfmmlaq_v: { + llvm::Type *InputTy = + llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); + llvm::Type *Tys[2] = { Ty, InputTy }; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfmmla"); + } + case NEON::BI__builtin_neon_vbfmlalbq_v: { + llvm::Type *InputTy = + llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); + llvm::Type *Tys[2] = { Ty, InputTy }; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfmlalb"); + } + case NEON::BI__builtin_neon_vbfmlaltq_v: { + llvm::Type *InputTy = + llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); + llvm::Type *Tys[2] = { Ty, InputTy }; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfmlalt"); + } + case NEON::BI__builtin_neon___a32_vcvt_bf16_v: { + llvm::Type *Tys[1] = { Ty }; + Function *F = CGM.getIntrinsic(Int, Tys); + return EmitNeonCall(F, Ops, "vcvtfp2bf"); + } + } assert(Int && "Expected valid intrinsic number"); @@ -5997,7 +6297,7 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, TblOps.push_back(ExtOp); // Build a vector containing sequential number like (0, 1, 2, ..., 15) - SmallVector<uint32_t, 16> Indices; + SmallVector<int, 16> Indices; llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { Indices.push_back(2*i); @@ -6061,6 +6361,12 @@ Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { llvm::ConstantInt::get(Int32Ty, Value)); } +enum SpecialRegisterAccessKind { + NormalRead, + VolatileRead, + Write, +}; + // Generates the IR for the read/write special register builtin, // ValueType is the type of the value that is to be written or read, // RegisterType is the type of the register being written to or read from. @@ -6068,7 +6374,7 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, - bool IsRead, + SpecialRegisterAccessKind AccessKind, StringRef SysReg = "") { // write and register intrinsics only support 32 and 64 bit operations. assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64)) @@ -6093,8 +6399,12 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64)) && "Can't fit 64-bit value in 32-bit register"); - if (IsRead) { - llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); + if (AccessKind != Write) { + assert(AccessKind == NormalRead || AccessKind == VolatileRead); + llvm::Function *F = CGM.getIntrinsic( + AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register + : llvm::Intrinsic::read_register, + Types); llvm::Value *Call = Builder.CreateCall(F, Metadata); if (MixedTypes) @@ -6132,21 +6442,27 @@ static bool HasExtraNeonArgument(unsigned BuiltinID) { default: break; case NEON::BI__builtin_neon_vget_lane_i8: case NEON::BI__builtin_neon_vget_lane_i16: + case NEON::BI__builtin_neon_vget_lane_bf16: case NEON::BI__builtin_neon_vget_lane_i32: case NEON::BI__builtin_neon_vget_lane_i64: case NEON::BI__builtin_neon_vget_lane_f32: case NEON::BI__builtin_neon_vgetq_lane_i8: case NEON::BI__builtin_neon_vgetq_lane_i16: + case NEON::BI__builtin_neon_vgetq_lane_bf16: case NEON::BI__builtin_neon_vgetq_lane_i32: case NEON::BI__builtin_neon_vgetq_lane_i64: case NEON::BI__builtin_neon_vgetq_lane_f32: + case NEON::BI__builtin_neon_vduph_lane_bf16: + case NEON::BI__builtin_neon_vduph_laneq_bf16: case NEON::BI__builtin_neon_vset_lane_i8: case NEON::BI__builtin_neon_vset_lane_i16: + case NEON::BI__builtin_neon_vset_lane_bf16: case NEON::BI__builtin_neon_vset_lane_i32: case NEON::BI__builtin_neon_vset_lane_i64: case NEON::BI__builtin_neon_vset_lane_f32: case NEON::BI__builtin_neon_vsetq_lane_i8: case NEON::BI__builtin_neon_vsetq_lane_i16: + case NEON::BI__builtin_neon_vsetq_lane_bf16: case NEON::BI__builtin_neon_vsetq_lane_i32: case NEON::BI__builtin_neon_vsetq_lane_i64: case NEON::BI__builtin_neon_vsetq_lane_f32: @@ -6154,6 +6470,7 @@ static bool HasExtraNeonArgument(unsigned BuiltinID) { case NEON::BI__builtin_neon_vsha1cq_u32: case NEON::BI__builtin_neon_vsha1pq_u32: case NEON::BI__builtin_neon_vsha1mq_u32: + case NEON::BI__builtin_neon_vcvth_bf16_f32: case clang::ARM::BI_MoveToCoprocessor: case clang::ARM::BI_MoveToCoprocessor2: return false; @@ -6466,9 +6783,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, BuiltinID == ARM::BI__builtin_arm_wsr64 || BuiltinID == ARM::BI__builtin_arm_wsrp) { - bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr || - BuiltinID == ARM::BI__builtin_arm_rsr64 || - BuiltinID == ARM::BI__builtin_arm_rsrp; + SpecialRegisterAccessKind AccessKind = Write; + if (BuiltinID == ARM::BI__builtin_arm_rsr || + BuiltinID == ARM::BI__builtin_arm_rsr64 || + BuiltinID == ARM::BI__builtin_arm_rsrp) + AccessKind = VolatileRead; bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp || BuiltinID == ARM::BI__builtin_arm_wsrp; @@ -6487,12 +6806,16 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, ValueType = RegisterType = Int32Ty; } - return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); + return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, + AccessKind); } // Deal with MVE builtins if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch)) return Result; + // Handle CDE builtins + if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch)) + return Result; // Find out if any arguments are required to be integer constant // expressions. @@ -6589,12 +6912,16 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vget_lane_i16: case NEON::BI__builtin_neon_vget_lane_i32: case NEON::BI__builtin_neon_vget_lane_i64: + case NEON::BI__builtin_neon_vget_lane_bf16: case NEON::BI__builtin_neon_vget_lane_f32: case NEON::BI__builtin_neon_vgetq_lane_i8: case NEON::BI__builtin_neon_vgetq_lane_i16: case NEON::BI__builtin_neon_vgetq_lane_i32: case NEON::BI__builtin_neon_vgetq_lane_i64: + case NEON::BI__builtin_neon_vgetq_lane_bf16: case NEON::BI__builtin_neon_vgetq_lane_f32: + case NEON::BI__builtin_neon_vduph_lane_bf16: + case NEON::BI__builtin_neon_vduph_laneq_bf16: return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane"); case NEON::BI__builtin_neon_vrndns_f32: { @@ -6607,11 +6934,13 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vset_lane_i16: case NEON::BI__builtin_neon_vset_lane_i32: case NEON::BI__builtin_neon_vset_lane_i64: + case NEON::BI__builtin_neon_vset_lane_bf16: case NEON::BI__builtin_neon_vset_lane_f32: case NEON::BI__builtin_neon_vsetq_lane_i8: case NEON::BI__builtin_neon_vsetq_lane_i16: case NEON::BI__builtin_neon_vsetq_lane_i32: case NEON::BI__builtin_neon_vsetq_lane_i64: + case NEON::BI__builtin_neon_vsetq_lane_bf16: case NEON::BI__builtin_neon_vsetq_lane_f32: return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); @@ -6628,6 +6957,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops, "vsha1h"); + case NEON::BI__builtin_neon_vcvth_bf16_f32: { + return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops, + "vcvtbfp2bf"); + } + // The ARM _MoveToCoprocessor builtins put the input register value as // the first argument, but the LLVM intrinsic expects it as the third one. case ARM::BI_MoveToCoprocessor: @@ -6807,7 +7141,9 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, bool rightShift = false; llvm::VectorType *VTy = GetNeonType(this, Type, - getTarget().hasLegalHalfType()); + getTarget().hasLegalHalfType(), + false, + getTarget().hasBFloat16Type()); llvm::Type *Ty = VTy; if (!Ty) return nullptr; @@ -6815,7 +7151,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, // Many NEON builtins have identical semantics and uses in ARM and // AArch64. Emit these in a single function. auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap); - const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( + const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap( IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); if (Builtin) return EmitCommonNeonBuiltinExpr( @@ -6831,19 +7167,18 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, if (VTy->getElementType()->isIntegerTy(64)) { // Extract the other lane. Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); + int Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); // Load the value as a one-element vector. - Ty = llvm::VectorType::get(VTy->getElementType(), 1); + Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1); llvm::Type *Tys[] = {Ty, Int8PtrTy}; Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys); Value *Align = getAlignmentValue32(PtrOp0); Value *Ld = Builder.CreateCall(F, {Ops[0], Align}); // Combine them. - uint32_t Indices[] = {1 - Lane, Lane}; - SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); - return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); + int Indices[] = {1 - Lane, Lane}; + return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane"); } LLVM_FALLTHROUGH; case NEON::BI__builtin_neon_vld1_lane_v: { @@ -6966,8 +7301,9 @@ static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, // equal to the lane size. In LLVM IR, an LShr with that parameter would be // undefined behavior, but in MVE it's legal, so we must convert it to code // that is not undefined in IR. - unsigned LaneBits = - V->getType()->getVectorElementType()->getPrimitiveSizeInBits(); + unsigned LaneBits = cast<llvm::VectorType>(V->getType()) + ->getElementType() + ->getPrimitiveSizeInBits(); if (Shift == LaneBits) { // An unsigned shift of the full lane size always generates zero, so we can // simply emit a zero vector. A signed shift of the full lane size does the @@ -6988,6 +7324,86 @@ static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) { return Builder.CreateVectorSplat(Elements, V); } +static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder, + CodeGenFunction *CGF, + llvm::Value *V, + llvm::Type *DestType) { + // Convert one MVE vector type into another by reinterpreting its in-register + // format. + // + // Little-endian, this is identical to a bitcast (which reinterprets the + // memory format). But big-endian, they're not necessarily the same, because + // the register and memory formats map to each other differently depending on + // the lane size. + // + // We generate a bitcast whenever we can (if we're little-endian, or if the + // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic + // that performs the different kind of reinterpretation. + if (CGF->getTarget().isBigEndian() && + V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) { + return Builder.CreateCall( + CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq, + {DestType, V->getType()}), + V); + } else { + return Builder.CreateBitCast(V, DestType); + } +} + +static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) { + // Make a shufflevector that extracts every other element of a vector (evens + // or odds, as desired). + SmallVector<int, 16> Indices; + unsigned InputElements = + cast<llvm::VectorType>(V->getType())->getNumElements(); + for (unsigned i = 0; i < InputElements; i += 2) + Indices.push_back(i + Odd); + return Builder.CreateShuffleVector(V, llvm::UndefValue::get(V->getType()), + Indices); +} + +static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0, + llvm::Value *V1) { + // Make a shufflevector that interleaves two vectors element by element. + assert(V0->getType() == V1->getType() && "Can't zip different vector types"); + SmallVector<int, 16> Indices; + unsigned InputElements = + cast<llvm::VectorType>(V0->getType())->getNumElements(); + for (unsigned i = 0; i < InputElements; i++) { + Indices.push_back(i); + Indices.push_back(i + InputElements); + } + return Builder.CreateShuffleVector(V0, V1, Indices); +} + +template<unsigned HighBit, unsigned OtherBits> +static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) { + // MVE-specific helper function to make a vector splat of a constant such as + // UINT_MAX or INT_MIN, in which all bits below the highest one are equal. + llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType(); + unsigned LaneBits = T->getPrimitiveSizeInBits(); + uint32_t Value = HighBit << (LaneBits - 1); + if (OtherBits) + Value |= (1UL << (LaneBits - 1)) - 1; + llvm::Value *Lane = llvm::ConstantInt::get(T, Value); + return ARMMVEVectorSplat(Builder, Lane); +} + +static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder, + llvm::Value *V, + unsigned ReverseWidth) { + // MVE-specific helper function which reverses the elements of a + // vector within every (ReverseWidth)-bit collection of lanes. + SmallVector<int, 16> Indices; + unsigned LaneSize = V->getType()->getScalarSizeInBits(); + unsigned Elements = 128 / LaneSize; + unsigned Mask = ReverseWidth / LaneSize - 1; + for (unsigned i = 0; i < Elements; i++) + Indices.push_back(i ^ Mask); + return Builder.CreateShuffleVector(V, llvm::UndefValue::get(V->getType()), + Indices); +} + Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, @@ -7089,6 +7505,17 @@ Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID, llvm_unreachable("unknown custom codegen type."); } +Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID, + const CallExpr *E, + ReturnValueSlot ReturnValue, + llvm::Triple::ArchType Arch) { + switch (BuiltinID) { + default: + return nullptr; +#include "clang/Basic/arm_cde_builtin_cg.inc" + } +} + static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops, @@ -7238,7 +7665,7 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID } Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { - llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); + auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4); Op = Builder.CreateBitCast(Op, Int16Ty); Value *V = UndefValue::get(VTy); llvm::Constant *CI = ConstantInt::get(SizeTy, 0); @@ -7246,9 +7673,840 @@ Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { return Op; } +/// SVEBuiltinMemEltTy - Returns the memory element type for this memory +/// access builtin. Only required if it can't be inferred from the base pointer +/// operand. +llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(SVETypeFlags TypeFlags) { + switch (TypeFlags.getMemEltType()) { + case SVETypeFlags::MemEltTyDefault: + return getEltType(TypeFlags); + case SVETypeFlags::MemEltTyInt8: + return Builder.getInt8Ty(); + case SVETypeFlags::MemEltTyInt16: + return Builder.getInt16Ty(); + case SVETypeFlags::MemEltTyInt32: + return Builder.getInt32Ty(); + case SVETypeFlags::MemEltTyInt64: + return Builder.getInt64Ty(); + } + llvm_unreachable("Unknown MemEltType"); +} + +llvm::Type *CodeGenFunction::getEltType(SVETypeFlags TypeFlags) { + switch (TypeFlags.getEltType()) { + default: + llvm_unreachable("Invalid SVETypeFlag!"); + + case SVETypeFlags::EltTyInt8: + return Builder.getInt8Ty(); + case SVETypeFlags::EltTyInt16: + return Builder.getInt16Ty(); + case SVETypeFlags::EltTyInt32: + return Builder.getInt32Ty(); + case SVETypeFlags::EltTyInt64: + return Builder.getInt64Ty(); + + case SVETypeFlags::EltTyFloat16: + return Builder.getHalfTy(); + case SVETypeFlags::EltTyFloat32: + return Builder.getFloatTy(); + case SVETypeFlags::EltTyFloat64: + return Builder.getDoubleTy(); + + case SVETypeFlags::EltTyBFloat16: + return Builder.getBFloatTy(); + + case SVETypeFlags::EltTyBool8: + case SVETypeFlags::EltTyBool16: + case SVETypeFlags::EltTyBool32: + case SVETypeFlags::EltTyBool64: + return Builder.getInt1Ty(); + } +} + +// Return the llvm predicate vector type corresponding to the specified element +// TypeFlags. +llvm::ScalableVectorType * +CodeGenFunction::getSVEPredType(SVETypeFlags TypeFlags) { + switch (TypeFlags.getEltType()) { + default: llvm_unreachable("Unhandled SVETypeFlag!"); + + case SVETypeFlags::EltTyInt8: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); + case SVETypeFlags::EltTyInt16: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); + case SVETypeFlags::EltTyInt32: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); + case SVETypeFlags::EltTyInt64: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); + + case SVETypeFlags::EltTyBFloat16: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); + case SVETypeFlags::EltTyFloat16: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); + case SVETypeFlags::EltTyFloat32: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); + case SVETypeFlags::EltTyFloat64: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); + + case SVETypeFlags::EltTyBool8: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); + case SVETypeFlags::EltTyBool16: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); + case SVETypeFlags::EltTyBool32: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); + case SVETypeFlags::EltTyBool64: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); + } +} + +// Return the llvm vector type corresponding to the specified element TypeFlags. +llvm::ScalableVectorType * +CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) { + switch (TypeFlags.getEltType()) { + default: + llvm_unreachable("Invalid SVETypeFlag!"); + + case SVETypeFlags::EltTyInt8: + return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16); + case SVETypeFlags::EltTyInt16: + return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8); + case SVETypeFlags::EltTyInt32: + return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4); + case SVETypeFlags::EltTyInt64: + return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2); + + case SVETypeFlags::EltTyFloat16: + return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8); + case SVETypeFlags::EltTyBFloat16: + return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8); + case SVETypeFlags::EltTyFloat32: + return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4); + case SVETypeFlags::EltTyFloat64: + return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2); + + case SVETypeFlags::EltTyBool8: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); + case SVETypeFlags::EltTyBool16: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); + case SVETypeFlags::EltTyBool32: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); + case SVETypeFlags::EltTyBool64: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); + } +} + +llvm::Value *CodeGenFunction::EmitSVEAllTruePred(SVETypeFlags TypeFlags) { + Function *Ptrue = + CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags)); + return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)}); +} + +constexpr unsigned SVEBitsPerBlock = 128; + +static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) { + unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits(); + return llvm::ScalableVectorType::get(EltTy, NumElts); +} + +// Reinterpret the input predicate so that it can be used to correctly isolate +// the elements of the specified datatype. +Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred, + llvm::ScalableVectorType *VTy) { + auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy); + if (Pred->getType() == RTy) + return Pred; + + unsigned IntID; + llvm::Type *IntrinsicTy; + switch (VTy->getMinNumElements()) { + default: + llvm_unreachable("unsupported element count!"); + case 2: + case 4: + case 8: + IntID = Intrinsic::aarch64_sve_convert_from_svbool; + IntrinsicTy = RTy; + break; + case 16: + IntID = Intrinsic::aarch64_sve_convert_to_svbool; + IntrinsicTy = Pred->getType(); + break; + } + + Function *F = CGM.getIntrinsic(IntID, IntrinsicTy); + Value *C = Builder.CreateCall(F, Pred); + assert(C->getType() == RTy && "Unexpected return type!"); + return C; +} + +Value *CodeGenFunction::EmitSVEGatherLoad(SVETypeFlags TypeFlags, + SmallVectorImpl<Value *> &Ops, + unsigned IntID) { + auto *ResultTy = getSVEType(TypeFlags); + auto *OverloadedTy = + llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy); + + // At the ACLE level there's only one predicate type, svbool_t, which is + // mapped to <n x 16 x i1>. However, this might be incompatible with the + // actual type being loaded. For example, when loading doubles (i64) the + // predicated should be <n x 2 x i1> instead. At the IR level the type of + // the predicate and the data being loaded must match. Cast accordingly. + Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy); + + Function *F = nullptr; + if (Ops[1]->getType()->isVectorTy()) + // This is the "vector base, scalar offset" case. In order to uniquely + // map this built-in to an LLVM IR intrinsic, we need both the return type + // and the type of the vector base. + F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()}); + else + // This is the "scalar base, vector offset case". The type of the offset + // is encoded in the name of the intrinsic. We only need to specify the + // return type in order to uniquely map this built-in to an LLVM IR + // intrinsic. + F = CGM.getIntrinsic(IntID, OverloadedTy); + + // Pass 0 when the offset is missing. This can only be applied when using + // the "vector base" addressing mode for which ACLE allows no offset. The + // corresponding LLVM IR always requires an offset. + if (Ops.size() == 2) { + assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset"); + Ops.push_back(ConstantInt::get(Int64Ty, 0)); + } + + // For "vector base, scalar index" scale the index so that it becomes a + // scalar offset. + if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) { + unsigned BytesPerElt = + OverloadedTy->getElementType()->getScalarSizeInBits() / 8; + Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt); + Ops[2] = Builder.CreateMul(Ops[2], Scale); + } + + Value *Call = Builder.CreateCall(F, Ops); + + // The following sext/zext is only needed when ResultTy != OverloadedTy. In + // other cases it's folded into a nop. + return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy) + : Builder.CreateSExt(Call, ResultTy); +} + +Value *CodeGenFunction::EmitSVEScatterStore(SVETypeFlags TypeFlags, + SmallVectorImpl<Value *> &Ops, + unsigned IntID) { + auto *SrcDataTy = getSVEType(TypeFlags); + auto *OverloadedTy = + llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy); + + // In ACLE the source data is passed in the last argument, whereas in LLVM IR + // it's the first argument. Move it accordingly. + Ops.insert(Ops.begin(), Ops.pop_back_val()); + + Function *F = nullptr; + if (Ops[2]->getType()->isVectorTy()) + // This is the "vector base, scalar offset" case. In order to uniquely + // map this built-in to an LLVM IR intrinsic, we need both the return type + // and the type of the vector base. + F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()}); + else + // This is the "scalar base, vector offset case". The type of the offset + // is encoded in the name of the intrinsic. We only need to specify the + // return type in order to uniquely map this built-in to an LLVM IR + // intrinsic. + F = CGM.getIntrinsic(IntID, OverloadedTy); + + // Pass 0 when the offset is missing. This can only be applied when using + // the "vector base" addressing mode for which ACLE allows no offset. The + // corresponding LLVM IR always requires an offset. + if (Ops.size() == 3) { + assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset"); + Ops.push_back(ConstantInt::get(Int64Ty, 0)); + } + + // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's + // folded into a nop. + Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy); + + // At the ACLE level there's only one predicate type, svbool_t, which is + // mapped to <n x 16 x i1>. However, this might be incompatible with the + // actual type being stored. For example, when storing doubles (i64) the + // predicated should be <n x 2 x i1> instead. At the IR level the type of + // the predicate and the data being stored must match. Cast accordingly. + Ops[1] = EmitSVEPredicateCast(Ops[1], OverloadedTy); + + // For "vector base, scalar index" scale the index so that it becomes a + // scalar offset. + if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) { + unsigned BytesPerElt = + OverloadedTy->getElementType()->getScalarSizeInBits() / 8; + Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt); + Ops[3] = Builder.CreateMul(Ops[3], Scale); + } + + return Builder.CreateCall(F, Ops); +} + +Value *CodeGenFunction::EmitSVEGatherPrefetch(SVETypeFlags TypeFlags, + SmallVectorImpl<Value *> &Ops, + unsigned IntID) { + // The gather prefetches are overloaded on the vector input - this can either + // be the vector of base addresses or vector of offsets. + auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType()); + if (!OverloadedTy) + OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType()); + + // Cast the predicate from svbool_t to the right number of elements. + Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy); + + // vector + imm addressing modes + if (Ops[1]->getType()->isVectorTy()) { + if (Ops.size() == 3) { + // Pass 0 for 'vector+imm' when the index is omitted. + Ops.push_back(ConstantInt::get(Int64Ty, 0)); + + // The sv_prfop is the last operand in the builtin and IR intrinsic. + std::swap(Ops[2], Ops[3]); + } else { + // Index needs to be passed as scaled offset. + llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags); + unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8; + Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt); + Ops[2] = Builder.CreateMul(Ops[2], Scale); + } + } + + Function *F = CGM.getIntrinsic(IntID, OverloadedTy); + return Builder.CreateCall(F, Ops); +} + +Value *CodeGenFunction::EmitSVEStructLoad(SVETypeFlags TypeFlags, + SmallVectorImpl<Value*> &Ops, + unsigned IntID) { + llvm::ScalableVectorType *VTy = getSVEType(TypeFlags); + auto VecPtrTy = llvm::PointerType::getUnqual(VTy); + auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType()); + + unsigned N; + switch (IntID) { + case Intrinsic::aarch64_sve_ld2: + N = 2; + break; + case Intrinsic::aarch64_sve_ld3: + N = 3; + break; + case Intrinsic::aarch64_sve_ld4: + N = 4; + break; + default: + llvm_unreachable("unknown intrinsic!"); + } + auto RetTy = llvm::VectorType::get(VTy->getElementType(), + VTy->getElementCount() * N); + + Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy); + Value *BasePtr= Builder.CreateBitCast(Ops[1], VecPtrTy); + Value *Offset = Ops.size() > 2 ? Ops[2] : Builder.getInt32(0); + BasePtr = Builder.CreateGEP(VTy, BasePtr, Offset); + BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy); + + Function *F = CGM.getIntrinsic(IntID, {RetTy, Predicate->getType()}); + return Builder.CreateCall(F, { Predicate, BasePtr }); +} + +Value *CodeGenFunction::EmitSVEStructStore(SVETypeFlags TypeFlags, + SmallVectorImpl<Value*> &Ops, + unsigned IntID) { + llvm::ScalableVectorType *VTy = getSVEType(TypeFlags); + auto VecPtrTy = llvm::PointerType::getUnqual(VTy); + auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType()); + + unsigned N; + switch (IntID) { + case Intrinsic::aarch64_sve_st2: + N = 2; + break; + case Intrinsic::aarch64_sve_st3: + N = 3; + break; + case Intrinsic::aarch64_sve_st4: + N = 4; + break; + default: + llvm_unreachable("unknown intrinsic!"); + } + auto TupleTy = + llvm::VectorType::get(VTy->getElementType(), VTy->getElementCount() * N); + + Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy); + Value *BasePtr = Builder.CreateBitCast(Ops[1], VecPtrTy); + Value *Offset = Ops.size() > 3 ? Ops[2] : Builder.getInt32(0); + Value *Val = Ops.back(); + BasePtr = Builder.CreateGEP(VTy, BasePtr, Offset); + BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy); + + // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we + // need to break up the tuple vector. + SmallVector<llvm::Value*, 5> Operands; + Function *FExtr = + CGM.getIntrinsic(Intrinsic::aarch64_sve_tuple_get, {VTy, TupleTy}); + for (unsigned I = 0; I < N; ++I) + Operands.push_back(Builder.CreateCall(FExtr, {Val, Builder.getInt32(I)})); + Operands.append({Predicate, BasePtr}); + + Function *F = CGM.getIntrinsic(IntID, { VTy }); + return Builder.CreateCall(F, Operands); +} + +// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and +// svpmullt_pair intrinsics, with the exception that their results are bitcast +// to a wider type. +Value *CodeGenFunction::EmitSVEPMull(SVETypeFlags TypeFlags, + SmallVectorImpl<Value *> &Ops, + unsigned BuiltinID) { + // Splat scalar operand to vector (intrinsics with _n infix) + if (TypeFlags.hasSplatOperand()) { + unsigned OpNo = TypeFlags.getSplatOperand(); + Ops[OpNo] = EmitSVEDupX(Ops[OpNo]); + } + + // The pair-wise function has a narrower overloaded type. + Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType()); + Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]}); + + // Now bitcast to the wider result type. + llvm::ScalableVectorType *Ty = getSVEType(TypeFlags); + return EmitSVEReinterpret(Call, Ty); +} + +Value *CodeGenFunction::EmitSVEMovl(SVETypeFlags TypeFlags, + ArrayRef<Value *> Ops, unsigned BuiltinID) { + llvm::Type *OverloadedTy = getSVEType(TypeFlags); + Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy); + return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)}); +} + +Value *CodeGenFunction::EmitSVEPrefetchLoad(SVETypeFlags TypeFlags, + SmallVectorImpl<Value *> &Ops, + unsigned BuiltinID) { + auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags); + auto *VectorTy = getSVEVectorForElementType(MemEltTy); + auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); + + Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); + Value *BasePtr = Ops[1]; + + // Implement the index operand if not omitted. + if (Ops.size() > 3) { + BasePtr = Builder.CreateBitCast(BasePtr, MemoryTy->getPointerTo()); + BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]); + } + + // Prefetch intriniscs always expect an i8* + BasePtr = Builder.CreateBitCast(BasePtr, llvm::PointerType::getUnqual(Int8Ty)); + Value *PrfOp = Ops.back(); + + Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType()); + return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp}); +} + +Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E, + llvm::Type *ReturnTy, + SmallVectorImpl<Value *> &Ops, + unsigned BuiltinID, + bool IsZExtReturn) { + QualType LangPTy = E->getArg(1)->getType(); + llvm::Type *MemEltTy = CGM.getTypes().ConvertType( + LangPTy->getAs<PointerType>()->getPointeeType()); + + // The vector type that is returned may be different from the + // eventual type loaded from memory. + auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy); + auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); + + Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); + Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo()); + Value *Offset = Ops.size() > 2 ? Ops[2] : Builder.getInt32(0); + BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Offset); + + BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo()); + Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy); + Value *Load = Builder.CreateCall(F, {Predicate, BasePtr}); + + return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy) + : Builder.CreateSExt(Load, VectorTy); +} + +Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, + SmallVectorImpl<Value *> &Ops, + unsigned BuiltinID) { + QualType LangPTy = E->getArg(1)->getType(); + llvm::Type *MemEltTy = CGM.getTypes().ConvertType( + LangPTy->getAs<PointerType>()->getPointeeType()); + + // The vector type that is stored may be different from the + // eventual type stored to memory. + auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType()); + auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); + + Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); + Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo()); + Value *Offset = Ops.size() == 4 ? Ops[2] : Builder.getInt32(0); + BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Offset); + + // Last value is always the data + llvm::Value *Val = Builder.CreateTrunc(Ops.back(), MemoryTy); + + BasePtr = Builder.CreateBitCast(BasePtr, MemEltTy->getPointerTo()); + Function *F = CGM.getIntrinsic(BuiltinID, MemoryTy); + return Builder.CreateCall(F, {Val, Predicate, BasePtr}); +} + +// Limit the usage of scalable llvm IR generated by the ACLE by using the +// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat. +Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) { + auto F = CGM.getIntrinsic(Intrinsic::aarch64_sve_dup_x, Ty); + return Builder.CreateCall(F, Scalar); +} + +Value *CodeGenFunction::EmitSVEDupX(Value* Scalar) { + return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType())); +} + +Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) { + // FIXME: For big endian this needs an additional REV, or needs a separate + // intrinsic that is code-generated as a no-op, because the LLVM bitcast + // instruction is defined as 'bitwise' equivalent from memory point of + // view (when storing/reloading), whereas the svreinterpret builtin + // implements bitwise equivalent cast from register point of view. + // LLVM CodeGen for a bitcast must add an explicit REV for big-endian. + return Builder.CreateBitCast(Val, Ty); +} + +static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, + SmallVectorImpl<Value *> &Ops) { + auto *SplatZero = Constant::getNullValue(Ty); + Ops.insert(Ops.begin(), SplatZero); +} + +static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, + SmallVectorImpl<Value *> &Ops) { + auto *SplatUndef = UndefValue::get(Ty); + Ops.insert(Ops.begin(), SplatUndef); +} + +SmallVector<llvm::Type *, 2> CodeGenFunction::getSVEOverloadTypes( + SVETypeFlags TypeFlags, llvm::Type *ResultType, ArrayRef<Value *> Ops) { + if (TypeFlags.isOverloadNone()) + return {}; + + llvm::Type *DefaultType = getSVEType(TypeFlags); + + if (TypeFlags.isOverloadWhile()) + return {DefaultType, Ops[1]->getType()}; + + if (TypeFlags.isOverloadWhileRW()) + return {getSVEPredType(TypeFlags), Ops[0]->getType()}; + + if (TypeFlags.isOverloadCvt() || TypeFlags.isTupleSet()) + return {Ops[0]->getType(), Ops.back()->getType()}; + + if (TypeFlags.isTupleCreate() || TypeFlags.isTupleGet()) + return {ResultType, Ops[0]->getType()}; + + assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads"); + return {DefaultType}; +} + +Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + // Find out if any arguments are required to be integer constant expressions. + unsigned ICEArguments = 0; + ASTContext::GetBuiltinTypeError Error; + getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); + assert(Error == ASTContext::GE_None && "Should not codegen an error"); + + llvm::Type *Ty = ConvertType(E->getType()); + if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 && + BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64) { + Value *Val = EmitScalarExpr(E->getArg(0)); + return EmitSVEReinterpret(Val, Ty); + } + + llvm::SmallVector<Value *, 4> Ops; + for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { + if ((ICEArguments & (1 << i)) == 0) + Ops.push_back(EmitScalarExpr(E->getArg(i))); + else { + // If this is required to be a constant, constant fold it so that we know + // that the generated intrinsic gets a ConstantInt. + llvm::APSInt Result; + if (!E->getArg(i)->isIntegerConstantExpr(Result, getContext())) + llvm_unreachable("Expected argument to be a constant"); + + // Immediates for SVE llvm intrinsics are always 32bit. We can safely + // truncate because the immediate has been range checked and no valid + // immediate requires more than a handful of bits. + Result = Result.extOrTrunc(32); + Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); + } + } + + auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID, + AArch64SVEIntrinsicsProvenSorted); + SVETypeFlags TypeFlags(Builtin->TypeModifier); + if (TypeFlags.isLoad()) + return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic, + TypeFlags.isZExtReturn()); + else if (TypeFlags.isStore()) + return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isGatherLoad()) + return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isScatterStore()) + return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isPrefetch()) + return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isGatherPrefetch()) + return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isStructLoad()) + return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isStructStore()) + return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isUndef()) + return UndefValue::get(Ty); + else if (Builtin->LLVMIntrinsic != 0) { + if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp) + InsertExplicitZeroOperand(Builder, Ty, Ops); + + if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp) + InsertExplicitUndefOperand(Builder, Ty, Ops); + + // Some ACLE builtins leave out the argument to specify the predicate + // pattern, which is expected to be expanded to an SV_ALL pattern. + if (TypeFlags.isAppendSVALL()) + Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31)); + if (TypeFlags.isInsertOp1SVALL()) + Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31)); + + // Predicates must match the main datatype. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType())) + if (PredTy->getElementType()->isIntegerTy(1)) + Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags)); + + // Splat scalar operand to vector (intrinsics with _n infix) + if (TypeFlags.hasSplatOperand()) { + unsigned OpNo = TypeFlags.getSplatOperand(); + Ops[OpNo] = EmitSVEDupX(Ops[OpNo]); + } + + if (TypeFlags.isReverseCompare()) + std::swap(Ops[1], Ops[2]); + + if (TypeFlags.isReverseUSDOT()) + std::swap(Ops[1], Ops[2]); + + // Predicated intrinsics with _z suffix need a select w/ zeroinitializer. + if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) { + llvm::Type *OpndTy = Ops[1]->getType(); + auto *SplatZero = Constant::getNullValue(OpndTy); + Function *Sel = CGM.getIntrinsic(Intrinsic::aarch64_sve_sel, OpndTy); + Ops[1] = Builder.CreateCall(Sel, {Ops[0], Ops[1], SplatZero}); + } + + Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic, + getSVEOverloadTypes(TypeFlags, Ty, Ops)); + Value *Call = Builder.CreateCall(F, Ops); + + // Predicate results must be converted to svbool_t. + if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType())) + if (PredTy->getScalarType()->isIntegerTy(1)) + Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty)); + + return Call; + } + + switch (BuiltinID) { + default: + return nullptr; + + case SVE::BI__builtin_sve_svmov_b_z: { + // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op) + SVETypeFlags TypeFlags(Builtin->TypeModifier); + llvm::Type* OverloadedTy = getSVEType(TypeFlags); + Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy); + return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]}); + } + + case SVE::BI__builtin_sve_svnot_b_z: { + // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg) + SVETypeFlags TypeFlags(Builtin->TypeModifier); + llvm::Type* OverloadedTy = getSVEType(TypeFlags); + Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy); + return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]}); + } + + case SVE::BI__builtin_sve_svmovlb_u16: + case SVE::BI__builtin_sve_svmovlb_u32: + case SVE::BI__builtin_sve_svmovlb_u64: + return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb); + + case SVE::BI__builtin_sve_svmovlb_s16: + case SVE::BI__builtin_sve_svmovlb_s32: + case SVE::BI__builtin_sve_svmovlb_s64: + return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb); + + case SVE::BI__builtin_sve_svmovlt_u16: + case SVE::BI__builtin_sve_svmovlt_u32: + case SVE::BI__builtin_sve_svmovlt_u64: + return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt); + + case SVE::BI__builtin_sve_svmovlt_s16: + case SVE::BI__builtin_sve_svmovlt_s32: + case SVE::BI__builtin_sve_svmovlt_s64: + return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt); + + case SVE::BI__builtin_sve_svpmullt_u16: + case SVE::BI__builtin_sve_svpmullt_u64: + case SVE::BI__builtin_sve_svpmullt_n_u16: + case SVE::BI__builtin_sve_svpmullt_n_u64: + return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair); + + case SVE::BI__builtin_sve_svpmullb_u16: + case SVE::BI__builtin_sve_svpmullb_u64: + case SVE::BI__builtin_sve_svpmullb_n_u16: + case SVE::BI__builtin_sve_svpmullb_n_u64: + return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair); + + case SVE::BI__builtin_sve_svdup_n_b8: + case SVE::BI__builtin_sve_svdup_n_b16: + case SVE::BI__builtin_sve_svdup_n_b32: + case SVE::BI__builtin_sve_svdup_n_b64: { + Value *CmpNE = + Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType())); + llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags); + Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy); + return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty)); + } + + case SVE::BI__builtin_sve_svdupq_n_b8: + case SVE::BI__builtin_sve_svdupq_n_b16: + case SVE::BI__builtin_sve_svdupq_n_b32: + case SVE::BI__builtin_sve_svdupq_n_b64: + case SVE::BI__builtin_sve_svdupq_n_u8: + case SVE::BI__builtin_sve_svdupq_n_s8: + case SVE::BI__builtin_sve_svdupq_n_u64: + case SVE::BI__builtin_sve_svdupq_n_f64: + case SVE::BI__builtin_sve_svdupq_n_s64: + case SVE::BI__builtin_sve_svdupq_n_u16: + case SVE::BI__builtin_sve_svdupq_n_f16: + case SVE::BI__builtin_sve_svdupq_n_bf16: + case SVE::BI__builtin_sve_svdupq_n_s16: + case SVE::BI__builtin_sve_svdupq_n_u32: + case SVE::BI__builtin_sve_svdupq_n_f32: + case SVE::BI__builtin_sve_svdupq_n_s32: { + // These builtins are implemented by storing each element to an array and using + // ld1rq to materialize a vector. + unsigned NumOpnds = Ops.size(); + + bool IsBoolTy = + cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1); + + // For svdupq_n_b* the element type of is an integer of type 128/numelts, + // so that the compare can use the width that is natural for the expected + // number of predicate lanes. + llvm::Type *EltTy = Ops[0]->getType(); + if (IsBoolTy) + EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds); + + Address Alloca = CreateTempAlloca(llvm::ArrayType::get(EltTy, NumOpnds), + CharUnits::fromQuantity(16)); + for (unsigned I = 0; I < NumOpnds; ++I) + Builder.CreateDefaultAlignedStore( + IsBoolTy ? Builder.CreateZExt(Ops[I], EltTy) : Ops[I], + Builder.CreateGEP(Alloca.getPointer(), + {Builder.getInt64(0), Builder.getInt64(I)})); + + SVETypeFlags TypeFlags(Builtin->TypeModifier); + Value *Pred = EmitSVEAllTruePred(TypeFlags); + + llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy); + Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_ld1rq, OverloadedTy); + Value *Alloca0 = Builder.CreateGEP( + Alloca.getPointer(), {Builder.getInt64(0), Builder.getInt64(0)}); + Value *LD1RQ = Builder.CreateCall(F, {Pred, Alloca0}); + + if (!IsBoolTy) + return LD1RQ; + + // For svdupq_n_b* we need to add an additional 'cmpne' with '0'. + F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne + : Intrinsic::aarch64_sve_cmpne_wide, + OverloadedTy); + Value *Call = + Builder.CreateCall(F, {Pred, LD1RQ, EmitSVEDupX(Builder.getInt64(0))}); + return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty)); + } + + case SVE::BI__builtin_sve_svpfalse_b: + return ConstantInt::getFalse(Ty); + + case SVE::BI__builtin_sve_svlen_bf16: + case SVE::BI__builtin_sve_svlen_f16: + case SVE::BI__builtin_sve_svlen_f32: + case SVE::BI__builtin_sve_svlen_f64: + case SVE::BI__builtin_sve_svlen_s8: + case SVE::BI__builtin_sve_svlen_s16: + case SVE::BI__builtin_sve_svlen_s32: + case SVE::BI__builtin_sve_svlen_s64: + case SVE::BI__builtin_sve_svlen_u8: + case SVE::BI__builtin_sve_svlen_u16: + case SVE::BI__builtin_sve_svlen_u32: + case SVE::BI__builtin_sve_svlen_u64: { + SVETypeFlags TF(Builtin->TypeModifier); + auto VTy = cast<llvm::VectorType>(getSVEType(TF)); + auto NumEls = llvm::ConstantInt::get(Ty, VTy->getElementCount().Min); + + Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty); + return Builder.CreateMul(NumEls, Builder.CreateCall(F)); + } + + case SVE::BI__builtin_sve_svtbl2_u8: + case SVE::BI__builtin_sve_svtbl2_s8: + case SVE::BI__builtin_sve_svtbl2_u16: + case SVE::BI__builtin_sve_svtbl2_s16: + case SVE::BI__builtin_sve_svtbl2_u32: + case SVE::BI__builtin_sve_svtbl2_s32: + case SVE::BI__builtin_sve_svtbl2_u64: + case SVE::BI__builtin_sve_svtbl2_s64: + case SVE::BI__builtin_sve_svtbl2_f16: + case SVE::BI__builtin_sve_svtbl2_bf16: + case SVE::BI__builtin_sve_svtbl2_f32: + case SVE::BI__builtin_sve_svtbl2_f64: { + SVETypeFlags TF(Builtin->TypeModifier); + auto VTy = cast<llvm::VectorType>(getSVEType(TF)); + auto TupleTy = llvm::VectorType::get(VTy->getElementType(), + VTy->getElementCount() * 2); + Function *FExtr = + CGM.getIntrinsic(Intrinsic::aarch64_sve_tuple_get, {VTy, TupleTy}); + Value *V0 = Builder.CreateCall(FExtr, {Ops[0], Builder.getInt32(0)}); + Value *V1 = Builder.CreateCall(FExtr, {Ops[0], Builder.getInt32(1)}); + Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy); + return Builder.CreateCall(F, {V0, V1, Ops[1]}); + } + } + + /// Should not happen + return nullptr; +} + Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch) { + if (BuiltinID >= AArch64::FirstSVEBuiltin && + BuiltinID <= AArch64::LastSVEBuiltin) + return EmitAArch64SVEBuiltinExpr(BuiltinID, E); + unsigned HintID = static_cast<unsigned>(-1); switch (BuiltinID) { default: break; @@ -7589,9 +8847,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, BuiltinID == AArch64::BI__builtin_arm_wsr64 || BuiltinID == AArch64::BI__builtin_arm_wsrp) { - bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr || - BuiltinID == AArch64::BI__builtin_arm_rsr64 || - BuiltinID == AArch64::BI__builtin_arm_rsrp; + SpecialRegisterAccessKind AccessKind = Write; + if (BuiltinID == AArch64::BI__builtin_arm_rsr || + BuiltinID == AArch64::BI__builtin_arm_rsr64 || + BuiltinID == AArch64::BI__builtin_arm_rsrp) + AccessKind = VolatileRead; bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp || BuiltinID == AArch64::BI__builtin_arm_wsrp; @@ -7609,7 +8869,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, ValueType = Int32Ty; } - return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); + return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, + AccessKind); } if (BuiltinID == AArch64::BI_ReadStatusReg || @@ -7665,7 +8926,27 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, assert(Error == ASTContext::GE_None && "Should not codegen an error"); llvm::SmallVector<Value*, 4> Ops; + Address PtrOp0 = Address::invalid(); for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { + if (i == 0) { + switch (BuiltinID) { + case NEON::BI__builtin_neon_vld1_v: + case NEON::BI__builtin_neon_vld1q_v: + case NEON::BI__builtin_neon_vld1_dup_v: + case NEON::BI__builtin_neon_vld1q_dup_v: + case NEON::BI__builtin_neon_vld1_lane_v: + case NEON::BI__builtin_neon_vld1q_lane_v: + case NEON::BI__builtin_neon_vst1_v: + case NEON::BI__builtin_neon_vst1q_v: + case NEON::BI__builtin_neon_vst1_lane_v: + case NEON::BI__builtin_neon_vst1q_lane_v: + // Get the alignment for the argument in addition to the value; + // we'll use it later. + PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); + Ops.push_back(PtrOp0.getPointer()); + continue; + } + } if ((ICEArguments & (1 << i)) == 0) { Ops.push_back(EmitScalarExpr(E->getArg(i))); } else { @@ -7680,7 +8961,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap); - const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( + const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap( SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); if (Builtin) { @@ -7896,7 +9177,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n"); } case NEON::BI__builtin_neon_vpaddd_s64: { - llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2); + auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2); Value *Vec = EmitScalarExpr(E->getArg(0)); // The vector is v2f64, so make sure it's bitcast to that. Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); @@ -7908,8 +9189,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateAdd(Op0, Op1, "vpaddd"); } case NEON::BI__builtin_neon_vpaddd_f64: { - llvm::Type *Ty = - llvm::VectorType::get(DoubleTy, 2); + auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2); Value *Vec = EmitScalarExpr(E->getArg(0)); // The vector is v2f64, so make sure it's bitcast to that. Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); @@ -7921,8 +9201,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateFAdd(Op0, Op1, "vpaddd"); } case NEON::BI__builtin_neon_vpadds_f32: { - llvm::Type *Ty = - llvm::VectorType::get(FloatTy, 2); + auto *Ty = llvm::FixedVectorType::get(FloatTy, 2); Value *Vec = EmitScalarExpr(E->getArg(0)); // The vector is v2f32, so make sure it's bitcast to that. Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); @@ -8085,97 +9364,107 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vset_lane_i16: case NEON::BI__builtin_neon_vset_lane_i32: case NEON::BI__builtin_neon_vset_lane_i64: + case NEON::BI__builtin_neon_vset_lane_bf16: case NEON::BI__builtin_neon_vset_lane_f32: case NEON::BI__builtin_neon_vsetq_lane_i8: case NEON::BI__builtin_neon_vsetq_lane_i16: case NEON::BI__builtin_neon_vsetq_lane_i32: case NEON::BI__builtin_neon_vsetq_lane_i64: + case NEON::BI__builtin_neon_vsetq_lane_bf16: case NEON::BI__builtin_neon_vsetq_lane_f32: Ops.push_back(EmitScalarExpr(E->getArg(2))); return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); case NEON::BI__builtin_neon_vset_lane_f64: // The vector type needs a cast for the v1f64 variant. - Ops[1] = Builder.CreateBitCast(Ops[1], - llvm::VectorType::get(DoubleTy, 1)); + Ops[1] = + Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1)); Ops.push_back(EmitScalarExpr(E->getArg(2))); return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); case NEON::BI__builtin_neon_vsetq_lane_f64: // The vector type needs a cast for the v2f64 variant. - Ops[1] = Builder.CreateBitCast(Ops[1], - llvm::VectorType::get(DoubleTy, 2)); + Ops[1] = + Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2)); Ops.push_back(EmitScalarExpr(E->getArg(2))); return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); case NEON::BI__builtin_neon_vget_lane_i8: case NEON::BI__builtin_neon_vdupb_lane_i8: - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vget_lane"); case NEON::BI__builtin_neon_vgetq_lane_i8: case NEON::BI__builtin_neon_vdupb_laneq_i8: - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); case NEON::BI__builtin_neon_vget_lane_i16: case NEON::BI__builtin_neon_vduph_lane_i16: - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vget_lane"); case NEON::BI__builtin_neon_vgetq_lane_i16: case NEON::BI__builtin_neon_vduph_laneq_i16: - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); case NEON::BI__builtin_neon_vget_lane_i32: case NEON::BI__builtin_neon_vdups_lane_i32: - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vget_lane"); case NEON::BI__builtin_neon_vdups_lane_f32: - Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::VectorType::get(FloatTy, 2)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vdups_lane"); case NEON::BI__builtin_neon_vgetq_lane_i32: case NEON::BI__builtin_neon_vdups_laneq_i32: - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); case NEON::BI__builtin_neon_vget_lane_i64: case NEON::BI__builtin_neon_vdupd_lane_i64: - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vget_lane"); case NEON::BI__builtin_neon_vdupd_lane_f64: - Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::VectorType::get(DoubleTy, 1)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vdupd_lane"); case NEON::BI__builtin_neon_vgetq_lane_i64: case NEON::BI__builtin_neon_vdupd_laneq_i64: - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); case NEON::BI__builtin_neon_vget_lane_f32: - Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::VectorType::get(FloatTy, 2)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vget_lane"); case NEON::BI__builtin_neon_vget_lane_f64: - Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::VectorType::get(DoubleTy, 1)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vget_lane"); case NEON::BI__builtin_neon_vgetq_lane_f32: case NEON::BI__builtin_neon_vdups_laneq_f32: - Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::VectorType::get(FloatTy, 4)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); case NEON::BI__builtin_neon_vgetq_lane_f64: case NEON::BI__builtin_neon_vdupd_laneq_f64: - Ops[0] = Builder.CreateBitCast(Ops[0], - llvm::VectorType::get(DoubleTy, 2)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); case NEON::BI__builtin_neon_vaddh_f16: @@ -8190,18 +9479,20 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vdivh_f16: Ops.push_back(EmitScalarExpr(E->getArg(1))); return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh"); - case NEON::BI__builtin_neon_vfmah_f16: { - Function *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); + case NEON::BI__builtin_neon_vfmah_f16: // NEON intrinsic puts accumulator first, unlike the LLVM fma. - return Builder.CreateCall(F, - {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]}); - } + return emitCallMaybeConstrainedFPBuiltin( + *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy, + {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]}); case NEON::BI__builtin_neon_vfmsh_f16: { - Function *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy); + // FIXME: This should be an fneg instruction: Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy); Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh"); + // NEON intrinsic puts accumulator first, unlike the LLVM fma. - return Builder.CreateCall(F, {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]}); + return emitCallMaybeConstrainedFPBuiltin( + *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy, + {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]}); } case NEON::BI__builtin_neon_vaddd_s64: case NEON::BI__builtin_neon_vaddd_u64: @@ -8214,7 +9505,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, SmallVector<Value *, 2> ProductOps; ProductOps.push_back(vectorWrapScalar16(Ops[1])); ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2)))); - llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); + auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4); Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), ProductOps, "vqdmlXl"); Constant *CI = ConstantInt::get(SizeTy, 0); @@ -8311,7 +9602,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, SmallVector<Value *, 2> ProductOps; ProductOps.push_back(vectorWrapScalar16(Ops[1])); ProductOps.push_back(vectorWrapScalar16(Ops[2])); - llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); + auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4); Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), ProductOps, "vqdmlXl"); Constant *CI = ConstantInt::get(SizeTy, 0); @@ -8358,10 +9649,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, : Intrinsic::aarch64_neon_sqsub; return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); } + case NEON::BI__builtin_neon_vget_lane_bf16: + case NEON::BI__builtin_neon_vduph_lane_bf16: case NEON::BI__builtin_neon_vduph_lane_f16: { return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vget_lane"); } + case NEON::BI__builtin_neon_vgetq_lane_bf16: + case NEON::BI__builtin_neon_vduph_laneq_bf16: case NEON::BI__builtin_neon_vduph_laneq_f16: { return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); @@ -8520,8 +9815,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, // Not all intrinsics handled by the common case work for AArch64 yet, so only // defer to common code if it's been added to our special map. - Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, - AArch64SIMDIntrinsicsProvenSorted); + Builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, + AArch64SIMDIntrinsicsProvenSorted); if (Builtin) return EmitCommonNeonBuiltinExpr( @@ -8559,16 +9854,18 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[2] = Addend; // Now adjust things to handle the lane access. - llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ? - llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) : - VTy; + auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v + ? llvm::FixedVectorType::get(VTy->getElementType(), + VTy->getNumElements() / 2) + : VTy; llvm::Constant *cst = cast<Constant>(Ops[3]); - Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst); + Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst); Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy); Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane"); Ops.pop_back(); - Int = Intrinsic::fma; + Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma + : Intrinsic::fma; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); } case NEON::BI__builtin_neon_vfma_laneq_v: { @@ -8581,31 +9878,35 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, NeonTypeFlags(NeonTypeFlags::Float64, false, true)); Ops[2] = Builder.CreateBitCast(Ops[2], VTy); Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); - Function *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); - Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); + Value *Result; + Result = emitCallMaybeConstrainedFPBuiltin( + *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, + DoubleTy, {Ops[1], Ops[2], Ops[0]}); return Builder.CreateBitCast(Result, Ty); } - Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), - VTy->getNumElements() * 2); + auto *STy = llvm::FixedVectorType::get(VTy->getElementType(), + VTy->getNumElements() * 2); Ops[2] = Builder.CreateBitCast(Ops[2], STy); - Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), + Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cast<ConstantInt>(Ops[3])); Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); - return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); + return emitCallMaybeConstrainedFPBuiltin( + *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty, + {Ops[2], Ops[1], Ops[0]}); } case NEON::BI__builtin_neon_vfmaq_laneq_v: { - Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); - return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); + return emitCallMaybeConstrainedFPBuiltin( + *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty, + {Ops[2], Ops[1], Ops[0]}); } case NEON::BI__builtin_neon_vfmah_lane_f16: case NEON::BI__builtin_neon_vfmas_lane_f32: @@ -8615,9 +9916,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vfmad_laneq_f64: { Ops.push_back(EmitScalarExpr(E->getArg(3))); llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); - Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty); Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); - return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); + return emitCallMaybeConstrainedFPBuiltin( + *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty, + {Ops[1], Ops[2], Ops[0]}); } case NEON::BI__builtin_neon_vmull_v: // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. @@ -8657,8 +9959,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, unsigned ArgElts = VTy->getNumElements(); llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType()); unsigned BitWidth = EltTy->getBitWidth(); - llvm::Type *ArgTy = llvm::VectorType::get( - llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts); + auto *ArgTy = llvm::FixedVectorType::get( + llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts); llvm::Type* Tys[2] = { VTy, ArgTy }; Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp; SmallVector<llvm::Value*, 1> TmpOps; @@ -8726,27 +10028,37 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); case NEON::BI__builtin_neon_vrndah_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); - Int = Intrinsic::round; + Int = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_round + : Intrinsic::round; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda"); } case NEON::BI__builtin_neon_vrnda_v: case NEON::BI__builtin_neon_vrndaq_v: { - Int = Intrinsic::round; + Int = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_round + : Intrinsic::round; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); } case NEON::BI__builtin_neon_vrndih_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); - Int = Intrinsic::nearbyint; + Int = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_nearbyint + : Intrinsic::nearbyint; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi"); } case NEON::BI__builtin_neon_vrndmh_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); - Int = Intrinsic::floor; + Int = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_floor + : Intrinsic::floor; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm"); } case NEON::BI__builtin_neon_vrndm_v: case NEON::BI__builtin_neon_vrndmq_v: { - Int = Intrinsic::floor; + Int = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_floor + : Intrinsic::floor; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); } case NEON::BI__builtin_neon_vrndnh_f16: { @@ -8766,32 +10078,44 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vrndph_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); - Int = Intrinsic::ceil; + Int = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_ceil + : Intrinsic::ceil; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp"); } case NEON::BI__builtin_neon_vrndp_v: case NEON::BI__builtin_neon_vrndpq_v: { - Int = Intrinsic::ceil; + Int = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_ceil + : Intrinsic::ceil; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); } case NEON::BI__builtin_neon_vrndxh_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); - Int = Intrinsic::rint; + Int = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_rint + : Intrinsic::rint; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx"); } case NEON::BI__builtin_neon_vrndx_v: case NEON::BI__builtin_neon_vrndxq_v: { - Int = Intrinsic::rint; + Int = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_rint + : Intrinsic::rint; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); } case NEON::BI__builtin_neon_vrndh_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); - Int = Intrinsic::trunc; + Int = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_trunc + : Intrinsic::trunc; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz"); } case NEON::BI__builtin_neon_vrnd_v: case NEON::BI__builtin_neon_vrndq_v: { - Int = Intrinsic::trunc; + Int = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_trunc + : Intrinsic::trunc; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); } case NEON::BI__builtin_neon_vcvt_f64_v: @@ -8942,12 +10266,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vsqrth_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); - Int = Intrinsic::sqrt; + Int = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_sqrt + : Intrinsic::sqrt; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt"); } case NEON::BI__builtin_neon_vsqrt_v: case NEON::BI__builtin_neon_vsqrtq_v: { - Int = Intrinsic::sqrt; + Int = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_sqrt + : Intrinsic::sqrt; Ops[0] = Builder.CreateBitCast(Ops[0], Ty); return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); } @@ -8963,7 +10291,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vaddv_s8: { Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int8Ty, 8); + VTy = llvm::FixedVectorType::get(Int8Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); @@ -8975,7 +10303,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vaddv_s16: { Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int16Ty, 4); + VTy = llvm::FixedVectorType::get(Int16Ty, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); @@ -8987,7 +10315,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vaddvq_s8: { Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int8Ty, 16); + VTy = llvm::FixedVectorType::get(Int8Ty, 16); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); @@ -8999,7 +10327,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vaddvq_s16: { Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int16Ty, 8); + VTy = llvm::FixedVectorType::get(Int16Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); @@ -9008,7 +10336,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vmaxv_u8: { Int = Intrinsic::aarch64_neon_umaxv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int8Ty, 8); + VTy = llvm::FixedVectorType::get(Int8Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); @@ -9017,7 +10345,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vmaxv_u16: { Int = Intrinsic::aarch64_neon_umaxv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int16Ty, 4); + VTy = llvm::FixedVectorType::get(Int16Ty, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); @@ -9026,7 +10354,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vmaxvq_u8: { Int = Intrinsic::aarch64_neon_umaxv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int8Ty, 16); + VTy = llvm::FixedVectorType::get(Int8Ty, 16); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); @@ -9035,7 +10363,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vmaxvq_u16: { Int = Intrinsic::aarch64_neon_umaxv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int16Ty, 8); + VTy = llvm::FixedVectorType::get(Int16Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); @@ -9044,7 +10372,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vmaxv_s8: { Int = Intrinsic::aarch64_neon_smaxv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int8Ty, 8); + VTy = llvm::FixedVectorType::get(Int8Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); @@ -9053,7 +10381,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vmaxv_s16: { Int = Intrinsic::aarch64_neon_smaxv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int16Ty, 4); + VTy = llvm::FixedVectorType::get(Int16Ty, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); @@ -9062,7 +10390,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vmaxvq_s8: { Int = Intrinsic::aarch64_neon_smaxv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int8Ty, 16); + VTy = llvm::FixedVectorType::get(Int8Ty, 16); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); @@ -9071,7 +10399,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vmaxvq_s16: { Int = Intrinsic::aarch64_neon_smaxv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int16Ty, 8); + VTy = llvm::FixedVectorType::get(Int16Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); @@ -9080,7 +10408,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vmaxv_f16: { Int = Intrinsic::aarch64_neon_fmaxv; Ty = HalfTy; - VTy = llvm::VectorType::get(HalfTy, 4); + VTy = llvm::FixedVectorType::get(HalfTy, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); @@ -9089,7 +10417,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vmaxvq_f16: { Int = Intrinsic::aarch64_neon_fmaxv; Ty = HalfTy; - VTy = llvm::VectorType::get(HalfTy, 8); + VTy = llvm::FixedVectorType::get(HalfTy, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); @@ -9098,7 +10426,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vminv_u8: { Int = Intrinsic::aarch64_neon_uminv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int8Ty, 8); + VTy = llvm::FixedVectorType::get(Int8Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); @@ -9107,7 +10435,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vminv_u16: { Int = Intrinsic::aarch64_neon_uminv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int16Ty, 4); + VTy = llvm::FixedVectorType::get(Int16Ty, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); @@ -9116,7 +10444,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vminvq_u8: { Int = Intrinsic::aarch64_neon_uminv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int8Ty, 16); + VTy = llvm::FixedVectorType::get(Int8Ty, 16); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); @@ -9125,7 +10453,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vminvq_u16: { Int = Intrinsic::aarch64_neon_uminv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int16Ty, 8); + VTy = llvm::FixedVectorType::get(Int16Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); @@ -9134,7 +10462,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vminv_s8: { Int = Intrinsic::aarch64_neon_sminv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int8Ty, 8); + VTy = llvm::FixedVectorType::get(Int8Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); @@ -9143,7 +10471,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vminv_s16: { Int = Intrinsic::aarch64_neon_sminv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int16Ty, 4); + VTy = llvm::FixedVectorType::get(Int16Ty, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); @@ -9152,7 +10480,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vminvq_s8: { Int = Intrinsic::aarch64_neon_sminv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int8Ty, 16); + VTy = llvm::FixedVectorType::get(Int8Ty, 16); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); @@ -9161,7 +10489,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vminvq_s16: { Int = Intrinsic::aarch64_neon_sminv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int16Ty, 8); + VTy = llvm::FixedVectorType::get(Int16Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); @@ -9170,7 +10498,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vminv_f16: { Int = Intrinsic::aarch64_neon_fminv; Ty = HalfTy; - VTy = llvm::VectorType::get(HalfTy, 4); + VTy = llvm::FixedVectorType::get(HalfTy, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); @@ -9179,7 +10507,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vminvq_f16: { Int = Intrinsic::aarch64_neon_fminv; Ty = HalfTy; - VTy = llvm::VectorType::get(HalfTy, 8); + VTy = llvm::FixedVectorType::get(HalfTy, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); @@ -9188,7 +10516,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vmaxnmv_f16: { Int = Intrinsic::aarch64_neon_fmaxnmv; Ty = HalfTy; - VTy = llvm::VectorType::get(HalfTy, 4); + VTy = llvm::FixedVectorType::get(HalfTy, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv"); @@ -9197,7 +10525,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vmaxnmvq_f16: { Int = Intrinsic::aarch64_neon_fmaxnmv; Ty = HalfTy; - VTy = llvm::VectorType::get(HalfTy, 8); + VTy = llvm::FixedVectorType::get(HalfTy, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv"); @@ -9206,7 +10534,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vminnmv_f16: { Int = Intrinsic::aarch64_neon_fminnmv; Ty = HalfTy; - VTy = llvm::VectorType::get(HalfTy, 4); + VTy = llvm::FixedVectorType::get(HalfTy, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv"); @@ -9215,7 +10543,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vminnmvq_f16: { Int = Intrinsic::aarch64_neon_fminnmv; Ty = HalfTy; - VTy = llvm::VectorType::get(HalfTy, 8); + VTy = llvm::FixedVectorType::get(HalfTy, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv"); @@ -9229,7 +10557,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vaddlv_u8: { Int = Intrinsic::aarch64_neon_uaddlv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int8Ty, 8); + VTy = llvm::FixedVectorType::get(Int8Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); @@ -9238,7 +10566,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vaddlv_u16: { Int = Intrinsic::aarch64_neon_uaddlv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int16Ty, 4); + VTy = llvm::FixedVectorType::get(Int16Ty, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); @@ -9246,7 +10574,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vaddlvq_u8: { Int = Intrinsic::aarch64_neon_uaddlv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int8Ty, 16); + VTy = llvm::FixedVectorType::get(Int8Ty, 16); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); @@ -9255,7 +10583,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vaddlvq_u16: { Int = Intrinsic::aarch64_neon_uaddlv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int16Ty, 8); + VTy = llvm::FixedVectorType::get(Int16Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); @@ -9263,7 +10591,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vaddlv_s8: { Int = Intrinsic::aarch64_neon_saddlv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int8Ty, 8); + VTy = llvm::FixedVectorType::get(Int8Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); @@ -9272,7 +10600,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vaddlv_s16: { Int = Intrinsic::aarch64_neon_saddlv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int16Ty, 4); + VTy = llvm::FixedVectorType::get(Int16Ty, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); @@ -9280,7 +10608,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vaddlvq_s8: { Int = Intrinsic::aarch64_neon_saddlv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int8Ty, 16); + VTy = llvm::FixedVectorType::get(Int8Ty, 16); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); @@ -9289,7 +10617,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vaddlvq_s16: { Int = Intrinsic::aarch64_neon_saddlv; Ty = Int32Ty; - VTy = llvm::VectorType::get(Int16Ty, 8); + VTy = llvm::FixedVectorType::get(Int16Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); @@ -9325,24 +10653,20 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vld1_v: case NEON::BI__builtin_neon_vld1q_v: { Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); - auto Alignment = CharUnits::fromQuantity( - BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16); - return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment); + return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment()); } case NEON::BI__builtin_neon_vst1_v: case NEON::BI__builtin_neon_vst1q_v: Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); Ops[1] = Builder.CreateBitCast(Ops[1], VTy); - return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); + return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment()); case NEON::BI__builtin_neon_vld1_lane_v: case NEON::BI__builtin_neon_vld1q_lane_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ty = llvm::PointerType::getUnqual(VTy->getElementType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - auto Alignment = CharUnits::fromQuantity( - BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16); - Ops[0] = - Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); + Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], + PtrOp0.getAlignment()); return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); } case NEON::BI__builtin_neon_vld1_dup_v: @@ -9350,10 +10674,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *V = UndefValue::get(Ty); Ty = llvm::PointerType::getUnqual(VTy->getElementType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - auto Alignment = CharUnits::fromQuantity( - BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16); - Ops[0] = - Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment); + Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], + PtrOp0.getAlignment()); llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); return EmitNeonSplat(Ops[0], CI); @@ -9363,8 +10685,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - return Builder.CreateDefaultAlignedStore(Ops[1], - Builder.CreateBitCast(Ops[0], Ty)); + return Builder.CreateAlignedStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty), + PtrOp0.getAlignment()); case NEON::BI__builtin_neon_vld2_v: case NEON::BI__builtin_neon_vld2q_v: { llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); @@ -9538,7 +10860,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector<uint32_t, 16> Indices; + SmallVector<int, 16> Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { Indices.push_back(i+vi); Indices.push_back(i+e+vi); @@ -9557,7 +10879,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector<uint32_t, 16> Indices; + SmallVector<int, 16> Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) Indices.push_back(2*i+vi); @@ -9575,7 +10897,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector<uint32_t, 16> Indices; + SmallVector<int, 16> Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { Indices.push_back((i + vi*e) >> 1); Indices.push_back(((i + vi*e) >> 1)+e); @@ -9633,33 +10955,103 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { - assert(BuiltinID == BPF::BI__builtin_preserve_field_info && - "unexpected ARM builtin"); + assert((BuiltinID == BPF::BI__builtin_preserve_field_info || + BuiltinID == BPF::BI__builtin_btf_type_id) && + "unexpected BPF builtin"); - const Expr *Arg = E->getArg(0); - bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField; + switch (BuiltinID) { + default: + llvm_unreachable("Unexpected BPF builtin"); + case BPF::BI__builtin_preserve_field_info: { + const Expr *Arg = E->getArg(0); + bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField; - if (!getDebugInfo()) { - CGM.Error(E->getExprLoc(), "using builtin_preserve_field_info() without -g"); - return IsBitField ? EmitLValue(Arg).getBitFieldPointer() - : EmitLValue(Arg).getPointer(*this); - } + if (!getDebugInfo()) { + CGM.Error(E->getExprLoc(), + "using __builtin_preserve_field_info() without -g"); + return IsBitField ? EmitLValue(Arg).getBitFieldPointer() + : EmitLValue(Arg).getPointer(*this); + } - // Enable underlying preserve_*_access_index() generation. - bool OldIsInPreservedAIRegion = IsInPreservedAIRegion; - IsInPreservedAIRegion = true; - Value *FieldAddr = IsBitField ? EmitLValue(Arg).getBitFieldPointer() - : EmitLValue(Arg).getPointer(*this); - IsInPreservedAIRegion = OldIsInPreservedAIRegion; + // Enable underlying preserve_*_access_index() generation. + bool OldIsInPreservedAIRegion = IsInPreservedAIRegion; + IsInPreservedAIRegion = true; + Value *FieldAddr = IsBitField ? EmitLValue(Arg).getBitFieldPointer() + : EmitLValue(Arg).getPointer(*this); + IsInPreservedAIRegion = OldIsInPreservedAIRegion; + + ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); + Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue()); + + // Built the IR for the preserve_field_info intrinsic. + llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration( + &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info, + {FieldAddr->getType()}); + return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind}); + } + case BPF::BI__builtin_btf_type_id: { + Value *FieldVal = nullptr; + + // The LValue cannot be converted Value in order to be used as the function + // parameter. If it is a structure, it is the "alloca" result of the LValue + // (a pointer) is used in the parameter. If it is a simple type, + // the value will be loaded from its corresponding "alloca" and used as + // the parameter. In our case, let us just get a pointer of the LValue + // since we do not really use the parameter. The purpose of parameter + // is to prevent the generated IR llvm.bpf.btf.type.id intrinsic call, + // which carries metadata, from being changed. + bool IsLValue = E->getArg(0)->isLValue(); + if (IsLValue) + FieldVal = EmitLValue(E->getArg(0)).getPointer(*this); + else + FieldVal = EmitScalarExpr(E->getArg(0)); - ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); - Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue()); + if (!getDebugInfo()) { + CGM.Error(E->getExprLoc(), "using __builtin_btf_type_id() without -g"); + return nullptr; + } - // Built the IR for the preserve_field_info intrinsic. - llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration( - &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info, - {FieldAddr->getType()}); - return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind}); + // Generate debuginfo type for the first argument. + llvm::DIType *DbgInfo = + getDebugInfo()->getOrCreateStandaloneType(E->getArg(0)->getType(), + E->getArg(0)->getExprLoc()); + + ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); + Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue()); + + // Built the IR for the btf_type_id intrinsic. + // + // In the above, we converted LValue argument to a pointer to LValue. + // For example, the following + // int v; + // C1: __builtin_btf_type_id(v, flag); + // will be converted to + // L1: llvm.bpf.btf.type.id(&v, flag) + // This makes it hard to differentiate from + // C2: __builtin_btf_type_id(&v, flag); + // to + // L2: llvm.bpf.btf.type.id(&v, flag) + // + // If both C1 and C2 are present in the code, the llvm may later + // on do CSE on L1 and L2, which will result in incorrect tagged types. + // + // The C1->L1 transformation only happens if the argument of + // __builtin_btf_type_id() is a LValue. So Let us put whether + // the argument is an LValue or not into generated IR. This should + // prevent potential CSE from causing debuginfo type loss. + // + // The generated IR intrinsics will hence look like + // L1: llvm.bpf.btf.type.id(&v, 1, flag) !di_type_for_{v}; + // L2: llvm.bpf.btf.type.id(&v, 0, flag) !di_type_for_{&v}; + Constant *CV = ConstantInt::get(IntTy, IsLValue); + llvm::Function *FnBtfTypeId = llvm::Intrinsic::getDeclaration( + &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, + {FieldVal->getType(), CV->getType()}); + CallInst *Fn = Builder.CreateCall(FnBtfTypeId, {FieldVal, CV, FlagValue}); + Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); + return Fn; + } + } } llvm::Value *CodeGenFunction:: @@ -9679,8 +11071,8 @@ BuildVector(ArrayRef<llvm::Value*> Ops) { } // Otherwise, insertelement the values to build the vector. - Value *Result = - llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); + Value *Result = llvm::UndefValue::get( + llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size())); for (unsigned i = 0, e = Ops.size(); i != e; ++i) Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i)); @@ -9692,14 +11084,15 @@ BuildVector(ArrayRef<llvm::Value*> Ops) { static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts) { - llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(), - cast<IntegerType>(Mask->getType())->getBitWidth()); + auto *MaskTy = llvm::FixedVectorType::get( + CGF.Builder.getInt1Ty(), + cast<IntegerType>(Mask->getType())->getBitWidth()); Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy); // If we have less than 8 elements, then the starting mask was an i8 and // we need to extract down to the right number of elements. if (NumElts < 8) { - uint32_t Indices[4]; + int Indices[4]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i; MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec, @@ -9709,42 +11102,40 @@ static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, return MaskVec; } -static Value *EmitX86MaskedStore(CodeGenFunction &CGF, - ArrayRef<Value *> Ops, - unsigned Align) { +static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops, + Align Alignment) { // Cast the pointer to right type. Value *Ptr = CGF.Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ops[1]->getType())); - Value *MaskVec = getMaskVecValue(CGF, Ops[2], - Ops[1]->getType()->getVectorNumElements()); + Value *MaskVec = getMaskVecValue( + CGF, Ops[2], cast<llvm::VectorType>(Ops[1]->getType())->getNumElements()); - return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Align, MaskVec); + return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec); } -static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, - ArrayRef<Value *> Ops, unsigned Align) { +static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops, + Align Alignment) { // Cast the pointer to right type. Value *Ptr = CGF.Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ops[1]->getType())); - Value *MaskVec = getMaskVecValue(CGF, Ops[2], - Ops[1]->getType()->getVectorNumElements()); + Value *MaskVec = getMaskVecValue( + CGF, Ops[2], cast<llvm::VectorType>(Ops[1]->getType())->getNumElements()); - return CGF.Builder.CreateMaskedLoad(Ptr, Align, MaskVec, Ops[1]); + return CGF.Builder.CreateMaskedLoad(Ptr, Alignment, MaskVec, Ops[1]); } static Value *EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { - llvm::Type *ResultTy = Ops[1]->getType(); - llvm::Type *PtrTy = ResultTy->getVectorElementType(); + auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType()); + llvm::Type *PtrTy = ResultTy->getElementType(); // Cast the pointer to element type. Value *Ptr = CGF.Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(PtrTy)); - Value *MaskVec = getMaskVecValue(CGF, Ops[2], - ResultTy->getVectorNumElements()); + Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements()); llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload, ResultTy); @@ -9754,10 +11145,9 @@ static Value *EmitX86ExpandLoad(CodeGenFunction &CGF, static Value *EmitX86CompressExpand(CodeGenFunction &CGF, ArrayRef<Value *> Ops, bool IsCompress) { - llvm::Type *ResultTy = Ops[1]->getType(); + auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType()); - Value *MaskVec = getMaskVecValue(CGF, Ops[2], - ResultTy->getVectorNumElements()); + Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements()); Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress : Intrinsic::x86_avx512_mask_expand; @@ -9767,15 +11157,14 @@ static Value *EmitX86CompressExpand(CodeGenFunction &CGF, static Value *EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { - llvm::Type *ResultTy = Ops[1]->getType(); - llvm::Type *PtrTy = ResultTy->getVectorElementType(); + auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType()); + llvm::Type *PtrTy = ResultTy->getElementType(); // Cast the pointer to element type. Value *Ptr = CGF.Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(PtrTy)); - Value *MaskVec = getMaskVecValue(CGF, Ops[2], - ResultTy->getVectorNumElements()); + Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements()); llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore, ResultTy); @@ -9804,7 +11193,7 @@ static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, // Funnel shifts amounts are treated as modulo and types are all power-of-2 so // we only care about the lowest log2 bits anyway. if (Amt->getType() != Ty) { - unsigned NumElts = Ty->getVectorNumElements(); + unsigned NumElts = cast<llvm::VectorType>(Ty)->getNumElements(); Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false); Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt); } @@ -9862,7 +11251,8 @@ static Value *EmitX86Select(CodeGenFunction &CGF, if (C->isAllOnesValue()) return Op0; - Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements()); + Mask = getMaskVecValue( + CGF, Mask, cast<llvm::VectorType>(Op0->getType())->getNumElements()); return CGF.Builder.CreateSelect(Mask, Op0, Op1); } @@ -9874,9 +11264,8 @@ static Value *EmitX86ScalarSelect(CodeGenFunction &CGF, if (C->isAllOnesValue()) return Op0; - llvm::VectorType *MaskTy = - llvm::VectorType::get(CGF.Builder.getInt1Ty(), - Mask->getType()->getIntegerBitWidth()); + auto *MaskTy = llvm::FixedVectorType::get( + CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth()); Mask = CGF.Builder.CreateBitCast(Mask, MaskTy); Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0); return CGF.Builder.CreateSelect(Mask, Op0, Op1); @@ -9891,7 +11280,7 @@ static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, } if (NumElts < 8) { - uint32_t Indices[8]; + int Indices[8]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i; for (unsigned i = NumElts; i != 8; ++i) @@ -9909,15 +11298,16 @@ static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef<Value *> Ops) { assert((Ops.size() == 2 || Ops.size() == 4) && "Unexpected number of arguments"); - unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned NumElts = + cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); Value *Cmp; if (CC == 3) { Cmp = Constant::getNullValue( - llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); + llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts)); } else if (CC == 7) { Cmp = Constant::getAllOnesValue( - llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); + llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts)); } else { ICmpInst::Predicate Pred; switch (CC) { @@ -10033,24 +11423,19 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops, // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding). if (IID != Intrinsic::not_intrinsic && - cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4) { + (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 || + IsAddSub)) { Function *Intr = CGF.CGM.getIntrinsic(IID); Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() }); } else { llvm::Type *Ty = A->getType(); - Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty); - Res = CGF.Builder.CreateCall(FMA, {A, B, C} ); - - if (IsAddSub) { - // Negate even elts in C using a mask. - unsigned NumElts = Ty->getVectorNumElements(); - SmallVector<uint32_t, 16> Indices(NumElts); - for (unsigned i = 0; i != NumElts; ++i) - Indices[i] = i + (i % 2) * NumElts; - - Value *NegC = CGF.Builder.CreateFNeg(C); - Value *FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} ); - Res = CGF.Builder.CreateShuffleVector(FMSub, Res, Indices); + Function *FMA; + if (CGF.Builder.getIsFPConstrained()) { + FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty); + Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C}); + } else { + FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty); + Res = CGF.Builder.CreateCall(FMA, {A, B, C}); } } @@ -10108,6 +11493,10 @@ EmitScalarFMAExpr(CodeGenFunction &CGF, MutableArrayRef<Value *> Ops, Intrinsic::x86_avx512_vfmadd_f64; Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2], Ops[4]}); + } else if (CGF.Builder.getIsFPConstrained()) { + Function *FMA = CGF.CGM.getIntrinsic( + Intrinsic::experimental_constrained_fma, Ops[0]->getType()); + Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3)); } else { Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType()); Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3)); @@ -10132,8 +11521,8 @@ static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef<Value *> Ops) { llvm::Type *Ty = Ops[0]->getType(); // Arguments have a vXi32 type so cast to vXi64. - Ty = llvm::VectorType::get(CGF.Int64Ty, - Ty->getPrimitiveSizeInBits() / 64); + Ty = llvm::FixedVectorType::get(CGF.Int64Ty, + Ty->getPrimitiveSizeInBits() / 64); Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty); Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty); @@ -10187,7 +11576,7 @@ static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy) { - unsigned NumberOfElements = DstTy->getVectorNumElements(); + unsigned NumberOfElements = cast<llvm::VectorType>(DstTy)->getNumElements(); Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements); return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); } @@ -10209,6 +11598,43 @@ Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) { return EmitX86CpuIs(CPUStr); } +// Convert F16 halfs to floats. +static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, + ArrayRef<Value *> Ops, + llvm::Type *DstTy) { + assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) && + "Unknown cvtph2ps intrinsic"); + + // If the SAE intrinsic doesn't use default rounding then we can't upgrade. + if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) { + Function *F = + CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512); + return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]}); + } + + unsigned NumDstElts = cast<llvm::VectorType>(DstTy)->getNumElements(); + Value *Src = Ops[0]; + + // Extract the subvector. + if (NumDstElts != cast<llvm::VectorType>(Src->getType())->getNumElements()) { + assert(NumDstElts == 4 && "Unexpected vector size"); + Src = CGF.Builder.CreateShuffleVector(Src, UndefValue::get(Src->getType()), + ArrayRef<int>{0, 1, 2, 3}); + } + + // Bitcast from vXi16 to vXf16. + auto *HalfTy = llvm::FixedVectorType::get( + llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts); + Src = CGF.Builder.CreateBitCast(Src, HalfTy); + + // Perform the fp-extension. + Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps"); + + if (Ops.size() >= 3) + Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]); + return Res; +} + // Convert a BF16 to a float. static Value *EmitX86CvtBF16ToFloatExpr(CodeGenFunction &CGF, const CallExpr *E, @@ -10245,11 +11671,11 @@ Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr) #define X86_VENDOR(ENUM, STRING) \ .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)}) -#define X86_CPU_TYPE_COMPAT_WITH_ALIAS(ARCHNAME, ENUM, STR, ALIAS) \ - .Cases(STR, ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)}) -#define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) \ +#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \ + .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)}) +#define X86_CPU_TYPE(ENUM, STR) \ .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)}) -#define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR) \ +#define X86_CPU_SUBTYPE(ENUM, STR) \ .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)}) #include "llvm/Support/X86TargetParser.def" .Default({0, 0}); @@ -10279,7 +11705,7 @@ CodeGenFunction::GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs) { for (const StringRef &FeatureStr : FeatureStrs) { unsigned Feature = StringSwitch<unsigned>(FeatureStr) -#define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, VAL) +#define X86_FEATURE_COMPAT(ENUM, STR) .Case(STR, llvm::X86::FEATURE_##ENUM) #include "llvm/Support/X86TargetParser.def" ; FeaturesMask |= (1ULL << Feature); @@ -10404,8 +11830,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // TODO: The builtins could be removed if the SSE header files used vector // extension comparisons directly (vector ordered/unordered may need // additional support via __builtin_isnan()). - auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) { - Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); + auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred, + bool IsSignaling) { + Value *Cmp; + if (IsSignaling) + Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]); + else + Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType()); llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy); Value *Sext = Builder.CreateSExt(Cmp, IntVecTy); @@ -10484,7 +11915,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vec_ext_v16hi: case X86::BI__builtin_ia32_vec_ext_v8si: case X86::BI__builtin_ia32_vec_ext_v4di: { - unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned NumElts = + cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue(); Index &= NumElts - 1; // These builtins exist so we can ensure the index is an ICE and in range. @@ -10499,7 +11931,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vec_set_v16hi: case X86::BI__builtin_ia32_vec_set_v8si: case X86::BI__builtin_ia32_vec_set_v4di: { - unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned NumElts = + cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue(); Index &= NumElts - 1; // These builtins exist so we can ensure the index is an ICE and in range. @@ -10587,12 +12020,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_storedquqi512_mask: case X86::BI__builtin_ia32_storeupd512_mask: case X86::BI__builtin_ia32_storeups512_mask: - return EmitX86MaskedStore(*this, Ops, 1); + return EmitX86MaskedStore(*this, Ops, Align(1)); case X86::BI__builtin_ia32_storess128_mask: - case X86::BI__builtin_ia32_storesd128_mask: { - return EmitX86MaskedStore(*this, Ops, 1); - } + case X86::BI__builtin_ia32_storesd128_mask: + return EmitX86MaskedStore(*this, Ops, Align(1)); + case X86::BI__builtin_ia32_vpopcntb_128: case X86::BI__builtin_ia32_vpopcntd_128: case X86::BI__builtin_ia32_vpopcntq_128: @@ -10678,10 +12111,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vfmaddpd512_mask3: case X86::BI__builtin_ia32_vfmsubpd512_mask3: return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false); - case X86::BI__builtin_ia32_vfmaddsubps: - case X86::BI__builtin_ia32_vfmaddsubpd: - case X86::BI__builtin_ia32_vfmaddsubps256: - case X86::BI__builtin_ia32_vfmaddsubpd256: case X86::BI__builtin_ia32_vfmaddsubps512_mask: case X86::BI__builtin_ia32_vfmaddsubps512_maskz: case X86::BI__builtin_ia32_vfmaddsubps512_mask3: @@ -10703,11 +12132,11 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_movdqa32store512_mask: case X86::BI__builtin_ia32_movdqa64store512_mask: case X86::BI__builtin_ia32_storeaps512_mask: - case X86::BI__builtin_ia32_storeapd512_mask: { - unsigned Align = - getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); - return EmitX86MaskedStore(*this, Ops, Align); - } + case X86::BI__builtin_ia32_storeapd512_mask: + return EmitX86MaskedStore( + *this, Ops, + getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign()); + case X86::BI__builtin_ia32_loadups128_mask: case X86::BI__builtin_ia32_loadups256_mask: case X86::BI__builtin_ia32_loadups512_mask: @@ -10726,11 +12155,11 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_loaddqudi128_mask: case X86::BI__builtin_ia32_loaddqudi256_mask: case X86::BI__builtin_ia32_loaddqudi512_mask: - return EmitX86MaskedLoad(*this, Ops, 1); + return EmitX86MaskedLoad(*this, Ops, Align(1)); case X86::BI__builtin_ia32_loadss128_mask: case X86::BI__builtin_ia32_loadsd128_mask: - return EmitX86MaskedLoad(*this, Ops, 1); + return EmitX86MaskedLoad(*this, Ops, Align(1)); case X86::BI__builtin_ia32_loadaps128_mask: case X86::BI__builtin_ia32_loadaps256_mask: @@ -10743,11 +12172,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_movdqa32load512_mask: case X86::BI__builtin_ia32_movdqa64load128_mask: case X86::BI__builtin_ia32_movdqa64load256_mask: - case X86::BI__builtin_ia32_movdqa64load512_mask: { - unsigned Align = - getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); - return EmitX86MaskedLoad(*this, Ops, Align); - } + case X86::BI__builtin_ia32_movdqa64load512_mask: + return EmitX86MaskedLoad( + *this, Ops, + getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign()); case X86::BI__builtin_ia32_expandloaddf128_mask: case X86::BI__builtin_ia32_expandloaddf256_mask: @@ -10930,8 +12358,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, break; } - unsigned MinElts = std::min(Ops[0]->getType()->getVectorNumElements(), - Ops[2]->getType()->getVectorNumElements()); + unsigned MinElts = + std::min(cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(), + cast<llvm::VectorType>(Ops[2]->getType())->getNumElements()); Ops[3] = getMaskVecValue(*this, Ops[3], MinElts); Function *Intr = CGM.getIntrinsic(IID); return Builder.CreateCall(Intr, Ops); @@ -11038,8 +12467,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, break; } - unsigned MinElts = std::min(Ops[2]->getType()->getVectorNumElements(), - Ops[3]->getType()->getVectorNumElements()); + unsigned MinElts = + std::min(cast<llvm::VectorType>(Ops[2]->getType())->getNumElements(), + cast<llvm::VectorType>(Ops[3]->getType())->getNumElements()); Ops[1] = getMaskVecValue(*this, Ops[1], MinElts); Function *Intr = CGM.getIntrinsic(IID); return Builder.CreateCall(Intr, Ops); @@ -11061,16 +12491,17 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_extracti64x2_256_mask: case X86::BI__builtin_ia32_extractf64x2_512_mask: case X86::BI__builtin_ia32_extracti64x2_512_mask: { - llvm::Type *DstTy = ConvertType(E->getType()); - unsigned NumElts = DstTy->getVectorNumElements(); - unsigned SrcNumElts = Ops[0]->getType()->getVectorNumElements(); + auto *DstTy = cast<llvm::VectorType>(ConvertType(E->getType())); + unsigned NumElts = DstTy->getNumElements(); + unsigned SrcNumElts = + cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); unsigned SubVectors = SrcNumElts / NumElts; unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue(); assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors"); Index &= SubVectors - 1; // Remove any extra bits. Index *= NumElts; - uint32_t Indices[16]; + int Indices[16]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i + Index; @@ -11100,15 +12531,17 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_inserti64x2_256: case X86::BI__builtin_ia32_insertf64x2_512: case X86::BI__builtin_ia32_inserti64x2_512: { - unsigned DstNumElts = Ops[0]->getType()->getVectorNumElements(); - unsigned SrcNumElts = Ops[1]->getType()->getVectorNumElements(); + unsigned DstNumElts = + cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); + unsigned SrcNumElts = + cast<llvm::VectorType>(Ops[1]->getType())->getNumElements(); unsigned SubVectors = DstNumElts / SrcNumElts; unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue(); assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors"); Index &= SubVectors - 1; // Remove any extra bits. Index *= SrcNumElts; - uint32_t Indices[16]; + int Indices[16]; for (unsigned i = 0; i != DstNumElts; ++i) Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i; @@ -11165,10 +12598,11 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pblendw256: case X86::BI__builtin_ia32_pblendd128: case X86::BI__builtin_ia32_pblendd256: { - unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned NumElts = + cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); - uint32_t Indices[16]; + int Indices[16]; // If there are more than 8 elements, the immediate is used twice so make // sure we handle that. for (unsigned i = 0; i != NumElts; ++i) @@ -11182,13 +12616,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pshuflw256: case X86::BI__builtin_ia32_pshuflw512: { uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); - llvm::Type *Ty = Ops[0]->getType(); - unsigned NumElts = Ty->getVectorNumElements(); + auto *Ty = cast<llvm::VectorType>(Ops[0]->getType()); + unsigned NumElts = Ty->getNumElements(); // Splat the 8-bits of immediate 4 times to help the loop wrap around. Imm = (Imm & 0xff) * 0x01010101; - uint32_t Indices[32]; + int Indices[32]; for (unsigned l = 0; l != NumElts; l += 8) { for (unsigned i = 0; i != 4; ++i) { Indices[l + i] = l + (Imm & 3); @@ -11206,13 +12640,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pshufhw256: case X86::BI__builtin_ia32_pshufhw512: { uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); - llvm::Type *Ty = Ops[0]->getType(); - unsigned NumElts = Ty->getVectorNumElements(); + auto *Ty = cast<llvm::VectorType>(Ops[0]->getType()); + unsigned NumElts = Ty->getNumElements(); // Splat the 8-bits of immediate 4 times to help the loop wrap around. Imm = (Imm & 0xff) * 0x01010101; - uint32_t Indices[32]; + int Indices[32]; for (unsigned l = 0; l != NumElts; l += 8) { for (unsigned i = 0; i != 4; ++i) Indices[l + i] = l + i; @@ -11236,15 +12670,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vpermilpd512: case X86::BI__builtin_ia32_vpermilps512: { uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); - llvm::Type *Ty = Ops[0]->getType(); - unsigned NumElts = Ty->getVectorNumElements(); + auto *Ty = cast<llvm::VectorType>(Ops[0]->getType()); + unsigned NumElts = Ty->getNumElements(); unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128; unsigned NumLaneElts = NumElts / NumLanes; // Splat the 8-bits of immediate 4 times to help the loop wrap around. Imm = (Imm & 0xff) * 0x01010101; - uint32_t Indices[16]; + int Indices[16]; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { for (unsigned i = 0; i != NumLaneElts; ++i) { Indices[i + l] = (Imm % NumLaneElts) + l; @@ -11263,15 +12697,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_shufps256: case X86::BI__builtin_ia32_shufps512: { uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); - llvm::Type *Ty = Ops[0]->getType(); - unsigned NumElts = Ty->getVectorNumElements(); + auto *Ty = cast<llvm::VectorType>(Ops[0]->getType()); + unsigned NumElts = Ty->getNumElements(); unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128; unsigned NumLaneElts = NumElts / NumLanes; // Splat the 8-bits of immediate 4 times to help the loop wrap around. Imm = (Imm & 0xff) * 0x01010101; - uint32_t Indices[16]; + int Indices[16]; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { for (unsigned i = 0; i != NumLaneElts; ++i) { unsigned Index = Imm % NumLaneElts; @@ -11291,11 +12725,11 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_permdi512: case X86::BI__builtin_ia32_permdf512: { unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); - llvm::Type *Ty = Ops[0]->getType(); - unsigned NumElts = Ty->getVectorNumElements(); + auto *Ty = cast<llvm::VectorType>(Ops[0]->getType()); + unsigned NumElts = Ty->getNumElements(); // These intrinsics operate on 256-bit lanes of four 64-bit elements. - uint32_t Indices[8]; + int Indices[8]; for (unsigned l = 0; l != NumElts; l += 4) for (unsigned i = 0; i != 4; ++i) Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3); @@ -11309,7 +12743,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_palignr512: { unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff; - unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned NumElts = + cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); assert(NumElts % 16 == 0); // If palignr is shifting the pair of vectors more than the size of two @@ -11325,7 +12760,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); } - uint32_t Indices[64]; + int Indices[64]; // 256-bit palignr operates on 128-bit lanes so we need to handle that for (unsigned l = 0; l != NumElts; l += 16) { for (unsigned i = 0; i != 16; ++i) { @@ -11346,13 +12781,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_alignq128: case X86::BI__builtin_ia32_alignq256: case X86::BI__builtin_ia32_alignq512: { - unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned NumElts = + cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff; // Mask the shift amount to width of two vectors. ShiftVal &= (2 * NumElts) - 1; - uint32_t Indices[16]; + int Indices[16]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i + ShiftVal; @@ -11369,12 +12805,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_shuf_i32x4: case X86::BI__builtin_ia32_shuf_i64x2: { unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); - llvm::Type *Ty = Ops[0]->getType(); - unsigned NumElts = Ty->getVectorNumElements(); + auto *Ty = cast<llvm::VectorType>(Ops[0]->getType()); + unsigned NumElts = Ty->getNumElements(); unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2; unsigned NumLaneElts = NumElts / NumLanes; - uint32_t Indices[16]; + int Indices[16]; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { unsigned Index = (Imm % NumLanes) * NumLaneElts; Imm /= NumLanes; // Discard the bits we just used. @@ -11395,7 +12831,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vperm2f128_si256: case X86::BI__builtin_ia32_permti256: { unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); - unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned NumElts = + cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); // This takes a very simple approach since there are two lanes and a // shuffle can have 2 inputs. So we reserve the first input for the first @@ -11403,7 +12840,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // duplicate sources, but this can be dealt with in the backend. Value *OutOps[2]; - uint32_t Indices[8]; + int Indices[8]; for (unsigned l = 0; l != 2; ++l) { // Determine the source for this lane. if (Imm & (1 << ((l * 4) + 3))) @@ -11433,15 +12870,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pslldqi256_byteshift: case X86::BI__builtin_ia32_pslldqi512_byteshift: { unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; - llvm::Type *ResultType = Ops[0]->getType(); + auto *ResultType = cast<llvm::VectorType>(Ops[0]->getType()); // Builtin type is vXi64 so multiply by 8 to get bytes. - unsigned NumElts = ResultType->getVectorNumElements() * 8; + unsigned NumElts = ResultType->getNumElements() * 8; // If pslldq is shifting the vector more than 15 bytes, emit zero. if (ShiftVal >= 16) return llvm::Constant::getNullValue(ResultType); - uint32_t Indices[64]; + int Indices[64]; // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that for (unsigned l = 0; l != NumElts; l += 16) { for (unsigned i = 0; i != 16; ++i) { @@ -11451,7 +12888,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } } - llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts); + auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts); Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast"); Value *Zero = llvm::Constant::getNullValue(VecTy); Value *SV = Builder.CreateShuffleVector(Zero, Cast, @@ -11463,15 +12900,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_psrldqi256_byteshift: case X86::BI__builtin_ia32_psrldqi512_byteshift: { unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; - llvm::Type *ResultType = Ops[0]->getType(); + auto *ResultType = cast<llvm::VectorType>(Ops[0]->getType()); // Builtin type is vXi64 so multiply by 8 to get bytes. - unsigned NumElts = ResultType->getVectorNumElements() * 8; + unsigned NumElts = ResultType->getNumElements() * 8; // If psrldq is shifting the vector more than 15 bytes, emit zero. if (ShiftVal >= 16) return llvm::Constant::getNullValue(ResultType); - uint32_t Indices[64]; + int Indices[64]; // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that for (unsigned l = 0; l != NumElts; l += 16) { for (unsigned i = 0; i != 16; ++i) { @@ -11481,7 +12918,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } } - llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts); + auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts); Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast"); Value *Zero = llvm::Constant::getNullValue(VecTy); Value *SV = Builder.CreateShuffleVector(Cast, Zero, @@ -11501,7 +12938,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Value *In = getMaskVecValue(*this, Ops[0], NumElts); - uint32_t Indices[64]; + int Indices[64]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = NumElts + i - ShiftVal; @@ -11523,7 +12960,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Value *In = getMaskVecValue(*this, Ops[0], NumElts); - uint32_t Indices[64]; + int Indices[64]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i + ShiftVal; @@ -11555,7 +12992,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Unaligned nontemporal store of the scalar value. StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC); SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); - SI->setAlignment(llvm::Align::None()); + SI->setAlignment(llvm::Align(1)); return SI; } // Rotate is a special case of funnel shift - 1st 2 args are the same. @@ -11803,7 +13240,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); - uint32_t Indices[64]; + int Indices[64]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i; @@ -11832,8 +13269,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_sqrtss: case X86::BI__builtin_ia32_sqrtsd: { Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0); - Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType()); - A = Builder.CreateCall(F, {A}); + Function *F; + if (Builder.getIsFPConstrained()) { + F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, + A->getType()); + A = Builder.CreateConstrainedFPCall(F, {A}); + } else { + F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType()); + A = Builder.CreateCall(F, {A}); + } return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0); } case X86::BI__builtin_ia32_sqrtsd_round_mask: @@ -11848,8 +13292,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); } Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0); - Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType()); - A = Builder.CreateCall(F, A); + Function *F; + if (Builder.getIsFPConstrained()) { + F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, + A->getType()); + A = Builder.CreateConstrainedFPCall(F, A); + } else { + F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType()); + A = Builder.CreateCall(F, A); + } Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0); A = EmitX86ScalarSelect(*this, Ops[3], A, Src); return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0); @@ -11871,8 +13322,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); } } - Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType()); - return Builder.CreateCall(F, Ops[0]); + if (Builder.getIsFPConstrained()) { + Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, + Ops[0]->getType()); + return Builder.CreateConstrainedFPCall(F, Ops[0]); + } else { + Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType()); + return Builder.CreateCall(F, Ops[0]); + } } case X86::BI__builtin_ia32_pabsb128: case X86::BI__builtin_ia32_pabsw128: @@ -12089,7 +13546,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_fpclasspd128_mask: case X86::BI__builtin_ia32_fpclasspd256_mask: case X86::BI__builtin_ia32_fpclasspd512_mask: { - unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned NumElts = + cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); Value *MaskIn = Ops[2]; Ops.erase(&Ops[2]); @@ -12126,7 +13584,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vp2intersect_d_512: case X86::BI__builtin_ia32_vp2intersect_d_256: case X86::BI__builtin_ia32_vp2intersect_d_128: { - unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned NumElts = + cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); Intrinsic::ID ID; switch (BuiltinID) { @@ -12184,7 +13643,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vpshufbitqmb128_mask: case X86::BI__builtin_ia32_vpshufbitqmb256_mask: case X86::BI__builtin_ia32_vpshufbitqmb512_mask: { - unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned NumElts = + cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); Value *MaskIn = Ops[2]; Ops.erase(&Ops[2]); @@ -12209,28 +13669,28 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // packed comparison intrinsics case X86::BI__builtin_ia32_cmpeqps: case X86::BI__builtin_ia32_cmpeqpd: - return getVectorFCmpIR(CmpInst::FCMP_OEQ); + return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false); case X86::BI__builtin_ia32_cmpltps: case X86::BI__builtin_ia32_cmpltpd: - return getVectorFCmpIR(CmpInst::FCMP_OLT); + return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true); case X86::BI__builtin_ia32_cmpleps: case X86::BI__builtin_ia32_cmplepd: - return getVectorFCmpIR(CmpInst::FCMP_OLE); + return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true); case X86::BI__builtin_ia32_cmpunordps: case X86::BI__builtin_ia32_cmpunordpd: - return getVectorFCmpIR(CmpInst::FCMP_UNO); + return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false); case X86::BI__builtin_ia32_cmpneqps: case X86::BI__builtin_ia32_cmpneqpd: - return getVectorFCmpIR(CmpInst::FCMP_UNE); + return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false); case X86::BI__builtin_ia32_cmpnltps: case X86::BI__builtin_ia32_cmpnltpd: - return getVectorFCmpIR(CmpInst::FCMP_UGE); + return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true); case X86::BI__builtin_ia32_cmpnleps: case X86::BI__builtin_ia32_cmpnlepd: - return getVectorFCmpIR(CmpInst::FCMP_UGT); + return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true); case X86::BI__builtin_ia32_cmpordps: case X86::BI__builtin_ia32_cmpordpd: - return getVectorFCmpIR(CmpInst::FCMP_ORD); + return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false); case X86::BI__builtin_ia32_cmpps: case X86::BI__builtin_ia32_cmpps256: case X86::BI__builtin_ia32_cmppd: @@ -12255,42 +13715,90 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Ignoring requested signaling behaviour, // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT. FCmpInst::Predicate Pred; - switch (CC) { - case 0x00: Pred = FCmpInst::FCMP_OEQ; break; - case 0x01: Pred = FCmpInst::FCMP_OLT; break; - case 0x02: Pred = FCmpInst::FCMP_OLE; break; - case 0x03: Pred = FCmpInst::FCMP_UNO; break; - case 0x04: Pred = FCmpInst::FCMP_UNE; break; - case 0x05: Pred = FCmpInst::FCMP_UGE; break; - case 0x06: Pred = FCmpInst::FCMP_UGT; break; - case 0x07: Pred = FCmpInst::FCMP_ORD; break; - case 0x08: Pred = FCmpInst::FCMP_UEQ; break; - case 0x09: Pred = FCmpInst::FCMP_ULT; break; - case 0x0a: Pred = FCmpInst::FCMP_ULE; break; - case 0x0b: Pred = FCmpInst::FCMP_FALSE; break; - case 0x0c: Pred = FCmpInst::FCMP_ONE; break; - case 0x0d: Pred = FCmpInst::FCMP_OGE; break; - case 0x0e: Pred = FCmpInst::FCMP_OGT; break; - case 0x0f: Pred = FCmpInst::FCMP_TRUE; break; - case 0x10: Pred = FCmpInst::FCMP_OEQ; break; - case 0x11: Pred = FCmpInst::FCMP_OLT; break; - case 0x12: Pred = FCmpInst::FCMP_OLE; break; - case 0x13: Pred = FCmpInst::FCMP_UNO; break; - case 0x14: Pred = FCmpInst::FCMP_UNE; break; - case 0x15: Pred = FCmpInst::FCMP_UGE; break; - case 0x16: Pred = FCmpInst::FCMP_UGT; break; - case 0x17: Pred = FCmpInst::FCMP_ORD; break; - case 0x18: Pred = FCmpInst::FCMP_UEQ; break; - case 0x19: Pred = FCmpInst::FCMP_ULT; break; - case 0x1a: Pred = FCmpInst::FCMP_ULE; break; - case 0x1b: Pred = FCmpInst::FCMP_FALSE; break; - case 0x1c: Pred = FCmpInst::FCMP_ONE; break; - case 0x1d: Pred = FCmpInst::FCMP_OGE; break; - case 0x1e: Pred = FCmpInst::FCMP_OGT; break; - case 0x1f: Pred = FCmpInst::FCMP_TRUE; break; + bool IsSignaling; + // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling + // behavior is inverted. We'll handle that after the switch. + switch (CC & 0xf) { + case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break; + case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break; + case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break; + case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break; + case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break; + case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break; + case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break; + case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break; + case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break; + case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break; + case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break; + case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break; + case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break; + case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break; + case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break; + case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break; default: llvm_unreachable("Unhandled CC"); } + // Invert the signalling behavior for 16-31. + if (CC & 0x10) + IsSignaling = !IsSignaling; + + // If the predicate is true or false and we're using constrained intrinsics, + // we don't have a compare intrinsic we can use. Just use the legacy X86 + // specific intrinsic. + if ((Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE) && + Builder.getIsFPConstrained()) { + + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_cmpps: + IID = Intrinsic::x86_sse_cmp_ps; + break; + case X86::BI__builtin_ia32_cmpps256: + IID = Intrinsic::x86_avx_cmp_ps_256; + break; + case X86::BI__builtin_ia32_cmppd: + IID = Intrinsic::x86_sse2_cmp_pd; + break; + case X86::BI__builtin_ia32_cmppd256: + IID = Intrinsic::x86_avx_cmp_pd_256; + break; + case X86::BI__builtin_ia32_cmpps512_mask: + IID = Intrinsic::x86_avx512_cmp_ps_512; + break; + case X86::BI__builtin_ia32_cmppd512_mask: + IID = Intrinsic::x86_avx512_cmp_pd_512; + break; + case X86::BI__builtin_ia32_cmpps128_mask: + IID = Intrinsic::x86_avx512_cmp_ps_128; + break; + case X86::BI__builtin_ia32_cmpps256_mask: + IID = Intrinsic::x86_avx512_cmp_ps_256; + break; + case X86::BI__builtin_ia32_cmppd128_mask: + IID = Intrinsic::x86_avx512_cmp_pd_128; + break; + case X86::BI__builtin_ia32_cmppd256_mask: + IID = Intrinsic::x86_avx512_cmp_pd_256; + break; + } + + Function *Intr = CGM.getIntrinsic(IID); + if (cast<llvm::VectorType>(Intr->getReturnType()) + ->getElementType() + ->isIntegerTy(1)) { + unsigned NumElts = + cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); + Value *MaskIn = Ops[3]; + Ops.erase(&Ops[3]); + + Value *Cmp = Builder.CreateCall(Intr, Ops); + return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn); + } + + return Builder.CreateCall(Intr, Ops); + } + // Builtins without the _mask suffix return a vector of integers // of the same width as the input vectors switch (BuiltinID) { @@ -12300,12 +13808,18 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cmpps256_mask: case X86::BI__builtin_ia32_cmppd128_mask: case X86::BI__builtin_ia32_cmppd256_mask: { - unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); - Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); + // FIXME: Support SAE. + unsigned NumElts = + cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); + Value *Cmp; + if (IsSignaling) + Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]); + else + Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]); } default: - return getVectorFCmpIR(Pred); + return getVectorFCmpIR(Pred, IsSignaling); } } @@ -12343,10 +13857,19 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_cmpordsd: return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); + // f16c half2float intrinsics + case X86::BI__builtin_ia32_vcvtph2ps: + case X86::BI__builtin_ia32_vcvtph2ps256: + case X86::BI__builtin_ia32_vcvtph2ps_mask: + case X86::BI__builtin_ia32_vcvtph2ps256_mask: + case X86::BI__builtin_ia32_vcvtph2ps512_mask: + return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType())); + // AVX512 bf16 intrinsics case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: { - Ops[2] = getMaskVecValue(*this, Ops[2], - Ops[0]->getType()->getVectorNumElements()); + Ops[2] = getMaskVecValue( + *this, Ops[2], + cast<llvm::VectorType>(Ops[0]->getType())->getNumElements()); Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128; return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); } @@ -12506,7 +14029,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__stosb: { // We treat __stosb as a volatile memset - it may not generate "rep stosb" // instruction, but it will create a memset that won't be optimized away. - return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align::None(), true); + return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true); } case X86::BI__ud2: // llvm.trap makes a ud2a instruction on x86. @@ -12731,9 +14254,14 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_vsx_xvsqrtdp: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); - ID = Intrinsic::sqrt; - llvm::Function *F = CGM.getIntrinsic(ID, ResultType); - return Builder.CreateCall(F, X); + if (Builder.getIsFPConstrained()) { + llvm::Function *F = CGM.getIntrinsic( + Intrinsic::experimental_constrained_sqrt, ResultType); + return Builder.CreateConstrainedFPCall(F, X); + } else { + llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); + return Builder.CreateCall(F, X); + } } // Count leading zeros case PPC::BI__builtin_altivec_vclzb: @@ -12790,21 +14318,32 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Value *X = EmitScalarExpr(E->getArg(0)); if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim || BuiltinID == PPC::BI__builtin_vsx_xvrspim) - ID = Intrinsic::floor; + ID = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_floor + : Intrinsic::floor; else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi || BuiltinID == PPC::BI__builtin_vsx_xvrspi) - ID = Intrinsic::round; + ID = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_round + : Intrinsic::round; else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || BuiltinID == PPC::BI__builtin_vsx_xvrspic) - ID = Intrinsic::nearbyint; + ID = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_nearbyint + : Intrinsic::nearbyint; else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || BuiltinID == PPC::BI__builtin_vsx_xvrspip) - ID = Intrinsic::ceil; + ID = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_ceil + : Intrinsic::ceil; else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz || BuiltinID == PPC::BI__builtin_vsx_xvrspiz) - ID = Intrinsic::trunc; + ID = Builder.getIsFPConstrained() + ? Intrinsic::experimental_constrained_trunc + : Intrinsic::trunc; llvm::Function *F = CGM.getIntrinsic(ID, ResultType); - return Builder.CreateCall(F, X); + return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X) + : Builder.CreateCall(F, X); } // Absolute value @@ -12829,25 +14368,43 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); Value *Z = EmitScalarExpr(E->getArg(2)); - Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); - llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); + llvm::Function *F; + if (Builder.getIsFPConstrained()) + F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); + else + F = CGM.getIntrinsic(Intrinsic::fma, ResultType); switch (BuiltinID) { case PPC::BI__builtin_vsx_xvmaddadp: case PPC::BI__builtin_vsx_xvmaddasp: - return Builder.CreateCall(F, {X, Y, Z}); + if (Builder.getIsFPConstrained()) + return Builder.CreateConstrainedFPCall(F, {X, Y, Z}); + else + return Builder.CreateCall(F, {X, Y, Z}); case PPC::BI__builtin_vsx_xvnmaddadp: case PPC::BI__builtin_vsx_xvnmaddasp: - return Builder.CreateFSub(Zero, - Builder.CreateCall(F, {X, Y, Z}), "sub"); + if (Builder.getIsFPConstrained()) + return Builder.CreateFNeg( + Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg"); + else + return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg"); case PPC::BI__builtin_vsx_xvmsubadp: case PPC::BI__builtin_vsx_xvmsubasp: - return Builder.CreateCall(F, - {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); + if (Builder.getIsFPConstrained()) + return Builder.CreateConstrainedFPCall( + F, {X, Y, Builder.CreateFNeg(Z, "neg")}); + else + return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}); case PPC::BI__builtin_vsx_xvnmsubadp: case PPC::BI__builtin_vsx_xvnmsubasp: - Value *FsubRes = - Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); - return Builder.CreateFSub(Zero, FsubRes, "sub"); + if (Builder.getIsFPConstrained()) + return Builder.CreateFNeg( + Builder.CreateConstrainedFPCall( + F, {X, Y, Builder.CreateFNeg(Z, "neg")}), + "neg"); + else + return Builder.CreateFNeg( + Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}), + "neg"); } llvm_unreachable("Unknown FMA operation"); return nullptr; // Suppress no-return warning @@ -12873,25 +14430,22 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, // Need to cast the second argument from a vector of unsigned int to a // vector of long long. - Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); + Ops[1] = + Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int64Ty, 2)); if (getTarget().isLittleEndian()) { - // Create a shuffle mask of (1, 0) - Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), - ConstantInt::get(Int32Ty, 0) - }; - Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); - // Reverse the double words in the vector we will extract from. - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); - Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2)); + Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ArrayRef<int>{1, 0}); // Reverse the index. Index = MaxIndex - Index; } // Intrinsic expects the first arg to be a vector of int. - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4)); Ops[2] = ConstantInt::getSigned(Int32Ty, Index); return Builder.CreateCall(F, Ops); } @@ -12900,7 +14454,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); // Intrinsic expects the first argument to be a vector of doublewords. - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2)); // The second argument is a compile time constant int that needs to // be clamped to the range [0, 12]. @@ -12918,13 +14473,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, // Emit the call, then reverse the double words of the results vector. Value *Call = Builder.CreateCall(F, Ops); - // Create a shuffle mask of (1, 0) - Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), - ConstantInt::get(Int32Ty, 0) - }; - Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); - - Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask); + Value *ShuffleCall = + Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0}); return ShuffleCall; } else { Ops[1] = ConstantInt::getSigned(Int32Ty, Index); @@ -12937,21 +14487,20 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, assert(ArgCI && "Third arg must be constant integer!"); unsigned Index = ArgCI->getZExtValue(); - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); - Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2)); + Ops[1] = + Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int64Ty, 2)); // Account for endianness by treating this as just a shuffle. So we use the // same indices for both LE and BE in order to produce expected results in // both cases. - unsigned ElemIdx0 = (Index & 2) >> 1; - unsigned ElemIdx1 = 2 + (Index & 1); - - Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0), - ConstantInt::get(Int32Ty, ElemIdx1)}; - Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + int ElemIdx0 = (Index & 2) >> 1; + int ElemIdx1 = 2 + (Index & 1); + int ShuffleElts[2] = {ElemIdx0, ElemIdx1}; Value *ShuffleCall = - Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); + Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleElts); QualType BIRetType = E->getType(); auto RetTy = ConvertType(BIRetType); return Builder.CreateBitCast(ShuffleCall, RetTy); @@ -12961,14 +14510,16 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]); assert(ArgCI && "Third argument must be a compile time constant"); unsigned Index = ArgCI->getZExtValue() & 0x3; - Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); - Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4)); + Ops[0] = + Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4)); + Ops[1] = + Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int32Ty, 4)); // Create a shuffle mask - unsigned ElemIdx0; - unsigned ElemIdx1; - unsigned ElemIdx2; - unsigned ElemIdx3; + int ElemIdx0; + int ElemIdx1; + int ElemIdx2; + int ElemIdx3; if (getTarget().isLittleEndian()) { // Little endian element N comes from element 8+N-Index of the // concatenated wide vector (of course, using modulo arithmetic on @@ -12985,14 +14536,9 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, ElemIdx3 = Index + 3; } - Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0), - ConstantInt::get(Int32Ty, ElemIdx1), - ConstantInt::get(Int32Ty, ElemIdx2), - ConstantInt::get(Int32Ty, ElemIdx3)}; - - Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3}; Value *ShuffleCall = - Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); + Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleElts); QualType BIRetType = E->getType(); auto RetTy = ConvertType(BIRetType); return Builder.CreateBitCast(ShuffleCall, RetTy); @@ -13001,7 +14547,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_pack_vector_int128: { bool isLittleEndian = getTarget().isLittleEndian(); Value *UndefValue = - llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), 2)); + llvm::UndefValue::get(llvm::FixedVectorType::get(Ops[0]->getType(), 2)); Value *Res = Builder.CreateInsertElement( UndefValue, Ops[0], (uint64_t)(isLittleEndian ? 1 : 0)); Res = Builder.CreateInsertElement(Res, Ops[1], @@ -13012,7 +14558,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, case PPC::BI__builtin_unpack_vector_int128: { ConstantInt *Index = cast<ConstantInt>(Ops[1]); Value *Unpacked = Builder.CreateBitCast( - Ops[0], llvm::VectorType::get(ConvertType(E->getType()), 2)); + Ops[0], llvm::FixedVectorType::get(ConvertType(E->getType()), 2)); if (getTarget().isLittleEndian()) Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue()); @@ -13022,8 +14568,91 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, } } +namespace { +// If \p E is not null pointer, insert address space cast to match return +// type of \p E if necessary. +Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF, + const CallExpr *E = nullptr) { + auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr); + auto *Call = CGF.Builder.CreateCall(F); + Call->addAttribute( + AttributeList::ReturnIndex, + Attribute::getWithDereferenceableBytes(Call->getContext(), 64)); + Call->addAttribute(AttributeList::ReturnIndex, + Attribute::getWithAlignment(Call->getContext(), Align(4))); + if (!E) + return Call; + QualType BuiltinRetType = E->getType(); + auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType)); + if (RetTy == Call->getType()) + return Call; + return CGF.Builder.CreateAddrSpaceCast(Call, RetTy); +} + +// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively. +Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) { + const unsigned XOffset = 4; + auto *DP = EmitAMDGPUDispatchPtr(CGF); + // Indexing the HSA kernel_dispatch_packet struct. + auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 2); + auto *GEP = CGF.Builder.CreateGEP(DP, Offset); + auto *DstTy = + CGF.Int16Ty->getPointerTo(GEP->getType()->getPointerAddressSpace()); + auto *Cast = CGF.Builder.CreateBitCast(GEP, DstTy); + auto *LD = CGF.Builder.CreateLoad(Address(Cast, CharUnits::fromQuantity(2))); + llvm::MDBuilder MDHelper(CGF.getLLVMContext()); + llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1), + APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1)); + LD->setMetadata(llvm::LLVMContext::MD_range, RNode); + LD->setMetadata(llvm::LLVMContext::MD_invariant_load, + llvm::MDNode::get(CGF.getLLVMContext(), None)); + return LD; +} +} // namespace + +// For processing memory ordering and memory scope arguments of various +// amdgcn builtins. +// \p Order takes a C++11 comptabile memory-ordering specifier and converts +// it into LLVM's memory ordering specifier using atomic C ABI, and writes +// to \p AO. \p Scope takes a const char * and converts it into AMDGCN +// specific SyncScopeID and writes it to \p SSID. +bool CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, + llvm::AtomicOrdering &AO, + llvm::SyncScope::ID &SSID) { + if (isa<llvm::ConstantInt>(Order)) { + int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); + + // Map C11/C++11 memory ordering to LLVM memory ordering + switch (static_cast<llvm::AtomicOrderingCABI>(ord)) { + case llvm::AtomicOrderingCABI::acquire: + AO = llvm::AtomicOrdering::Acquire; + break; + case llvm::AtomicOrderingCABI::release: + AO = llvm::AtomicOrdering::Release; + break; + case llvm::AtomicOrderingCABI::acq_rel: + AO = llvm::AtomicOrdering::AcquireRelease; + break; + case llvm::AtomicOrderingCABI::seq_cst: + AO = llvm::AtomicOrdering::SequentiallyConsistent; + break; + case llvm::AtomicOrderingCABI::consume: + case llvm::AtomicOrderingCABI::relaxed: + break; + } + + StringRef scp; + llvm::getConstantStringInfo(Scope, scp); + SSID = getLLVMContext().getOrInsertSyncScopeID(scp); + return true; + } + return false; +} + Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { + llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; + llvm::SyncScope::ID SSID; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { @@ -13091,6 +14720,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_rcpf: case AMDGPU::BI__builtin_amdgcn_rcph: return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp); + case AMDGPU::BI__builtin_amdgcn_sqrt: + case AMDGPU::BI__builtin_amdgcn_sqrtf: + case AMDGPU::BI__builtin_amdgcn_sqrth: + return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sqrt); case AMDGPU::BI__builtin_amdgcn_rsq: case AMDGPU::BI__builtin_amdgcn_rsqf: case AMDGPU::BI__builtin_amdgcn_rsqh: @@ -13104,6 +14737,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_cosf: case AMDGPU::BI__builtin_amdgcn_cosh: return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); + case AMDGPU::BI__builtin_amdgcn_dispatch_ptr: + return EmitAMDGPUDispatchPtr(*this, E); case AMDGPU::BI__builtin_amdgcn_log_clampf: return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); case AMDGPU::BI__builtin_amdgcn_ldexp: @@ -13146,7 +14781,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); // FIXME-GFX10: How should 32 bit mask be handled? - Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp, + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp, { Builder.getInt64Ty(), Src0->getType() }); return Builder.CreateCall(F, { Src0, Src1, Src2 }); } @@ -13157,7 +14792,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); // FIXME-GFX10: How should 32 bit mask be handled? - Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp, + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp, { Builder.getInt64Ty(), Src0->getType() }); return Builder.CreateCall(F, { Src0, Src1, Src2 }); } @@ -13178,7 +14813,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, } case AMDGPU::BI__builtin_amdgcn_read_exec: { CallInst *CI = cast<CallInst>( - EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); + EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, NormalRead, "exec")); CI->setConvergent(); return CI; } @@ -13187,7 +14822,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ? "exec_lo" : "exec_hi"; CallInst *CI = cast<CallInst>( - EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, true, RegName)); + EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, NormalRead, RegName)); CI->setConvergent(); return CI; } @@ -13199,6 +14834,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_workitem_id_z: return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024); + // amdgcn workgroup size + case AMDGPU::BI__builtin_amdgcn_workgroup_size_x: + return EmitAMDGPUWorkGroupSize(*this, 0); + case AMDGPU::BI__builtin_amdgcn_workgroup_size_y: + return EmitAMDGPUWorkGroupSize(*this, 1); + case AMDGPU::BI__builtin_amdgcn_workgroup_size_z: + return EmitAMDGPUWorkGroupSize(*this, 2); + // r600 intrinsics case AMDGPU::BI__builtin_r600_recipsqrt_ieee: case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: @@ -13209,6 +14852,61 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024); case AMDGPU::BI__builtin_r600_read_tidig_z: return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024); + case AMDGPU::BI__builtin_amdgcn_alignbit: { + llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); + llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); + Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType()); + return Builder.CreateCall(F, { Src0, Src1, Src2 }); + } + + case AMDGPU::BI__builtin_amdgcn_fence: { + if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)), + EmitScalarExpr(E->getArg(1)), AO, SSID)) + return Builder.CreateFence(AO, SSID); + LLVM_FALLTHROUGH; + } + case AMDGPU::BI__builtin_amdgcn_atomic_inc32: + case AMDGPU::BI__builtin_amdgcn_atomic_inc64: + case AMDGPU::BI__builtin_amdgcn_atomic_dec32: + case AMDGPU::BI__builtin_amdgcn_atomic_dec64: { + unsigned BuiltinAtomicOp; + llvm::Type *ResultType = ConvertType(E->getType()); + + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_atomic_inc32: + case AMDGPU::BI__builtin_amdgcn_atomic_inc64: + BuiltinAtomicOp = Intrinsic::amdgcn_atomic_inc; + break; + case AMDGPU::BI__builtin_amdgcn_atomic_dec32: + case AMDGPU::BI__builtin_amdgcn_atomic_dec64: + BuiltinAtomicOp = Intrinsic::amdgcn_atomic_dec; + break; + } + + Value *Ptr = EmitScalarExpr(E->getArg(0)); + Value *Val = EmitScalarExpr(E->getArg(1)); + + llvm::Function *F = + CGM.getIntrinsic(BuiltinAtomicOp, {ResultType, Ptr->getType()}); + + if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)), + EmitScalarExpr(E->getArg(3)), AO, SSID)) { + + // llvm.amdgcn.atomic.inc and llvm.amdgcn.atomic.dec expects ordering and + // scope as unsigned values + Value *MemOrder = Builder.getInt32(static_cast<int>(AO)); + Value *MemScope = Builder.getInt32(static_cast<int>(SSID)); + + QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); + bool Volatile = + PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); + Value *IsVolatile = Builder.getInt1(static_cast<bool>(Volatile)); + + return Builder.CreateCall(F, {Ptr, Val, MemOrder, MemScope, IsVolatile}); + } + LLVM_FALLTHROUGH; + } default: return nullptr; } @@ -13306,8 +15004,13 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, case SystemZ::BI__builtin_s390_vfsqdb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); - Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); - return Builder.CreateCall(F, X); + if (Builder.getIsFPConstrained()) { + Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType); + return Builder.CreateConstrainedFPCall(F, { X }); + } else { + Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); + return Builder.CreateCall(F, X); + } } case SystemZ::BI__builtin_s390_vfmasb: case SystemZ::BI__builtin_s390_vfmadb: { @@ -13315,8 +15018,13 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); Value *Z = EmitScalarExpr(E->getArg(2)); - Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); - return Builder.CreateCall(F, {X, Y, Z}); + if (Builder.getIsFPConstrained()) { + Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); + return Builder.CreateConstrainedFPCall(F, {X, Y, Z}); + } else { + Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); + return Builder.CreateCall(F, {X, Y, Z}); + } } case SystemZ::BI__builtin_s390_vfmssb: case SystemZ::BI__builtin_s390_vfmsdb: { @@ -13324,8 +15032,13 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); Value *Z = EmitScalarExpr(E->getArg(2)); - Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); - return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}); + if (Builder.getIsFPConstrained()) { + Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); + return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}); + } else { + Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); + return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}); + } } case SystemZ::BI__builtin_s390_vfnmasb: case SystemZ::BI__builtin_s390_vfnmadb: { @@ -13333,8 +15046,13 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); Value *Z = EmitScalarExpr(E->getArg(2)); - Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); - return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg"); + if (Builder.getIsFPConstrained()) { + Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); + return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg"); + } else { + Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); + return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg"); + } } case SystemZ::BI__builtin_s390_vfnmssb: case SystemZ::BI__builtin_s390_vfnmsdb: { @@ -13342,9 +15060,15 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); Value *Z = EmitScalarExpr(E->getArg(2)); - Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); - Value *NegZ = Builder.CreateFNeg(Z, "neg"); - return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ})); + if (Builder.getIsFPConstrained()) { + Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); + Value *NegZ = Builder.CreateFNeg(Z, "sub"); + return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ})); + } else { + Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); + Value *NegZ = Builder.CreateFNeg(Z, "neg"); + return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ})); + } } case SystemZ::BI__builtin_s390_vflpsb: case SystemZ::BI__builtin_s390_vflpdb: { @@ -13373,30 +15097,42 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, // Check whether this instance can be represented via a LLVM standard // intrinsic. We only support some combinations of M4 and M5. Intrinsic::ID ID = Intrinsic::not_intrinsic; + Intrinsic::ID CI; switch (M4.getZExtValue()) { default: break; case 0: // IEEE-inexact exception allowed switch (M5.getZExtValue()) { default: break; - case 0: ID = Intrinsic::rint; break; + case 0: ID = Intrinsic::rint; + CI = Intrinsic::experimental_constrained_rint; break; } break; case 4: // IEEE-inexact exception suppressed switch (M5.getZExtValue()) { default: break; - case 0: ID = Intrinsic::nearbyint; break; - case 1: ID = Intrinsic::round; break; - case 5: ID = Intrinsic::trunc; break; - case 6: ID = Intrinsic::ceil; break; - case 7: ID = Intrinsic::floor; break; + case 0: ID = Intrinsic::nearbyint; + CI = Intrinsic::experimental_constrained_nearbyint; break; + case 1: ID = Intrinsic::round; + CI = Intrinsic::experimental_constrained_round; break; + case 5: ID = Intrinsic::trunc; + CI = Intrinsic::experimental_constrained_trunc; break; + case 6: ID = Intrinsic::ceil; + CI = Intrinsic::experimental_constrained_ceil; break; + case 7: ID = Intrinsic::floor; + CI = Intrinsic::experimental_constrained_floor; break; } break; } if (ID != Intrinsic::not_intrinsic) { - Function *F = CGM.getIntrinsic(ID, ResultType); - return Builder.CreateCall(F, X); + if (Builder.getIsFPConstrained()) { + Function *F = CGM.getIntrinsic(CI, ResultType); + return Builder.CreateConstrainedFPCall(F, X); + } else { + Function *F = CGM.getIntrinsic(ID, ResultType); + return Builder.CreateCall(F, X); + } } - switch (BuiltinID) { + switch (BuiltinID) { // FIXME: constrained version? case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break; case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break; default: llvm_unreachable("Unknown BuiltinID"); @@ -13419,13 +15155,20 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, // Check whether this instance can be represented via a LLVM standard // intrinsic. We only support some values of M4. Intrinsic::ID ID = Intrinsic::not_intrinsic; + Intrinsic::ID CI; switch (M4.getZExtValue()) { default: break; - case 4: ID = Intrinsic::maxnum; break; + case 4: ID = Intrinsic::maxnum; + CI = Intrinsic::experimental_constrained_maxnum; break; } if (ID != Intrinsic::not_intrinsic) { - Function *F = CGM.getIntrinsic(ID, ResultType); - return Builder.CreateCall(F, {X, Y}); + if (Builder.getIsFPConstrained()) { + Function *F = CGM.getIntrinsic(CI, ResultType); + return Builder.CreateConstrainedFPCall(F, {X, Y}); + } else { + Function *F = CGM.getIntrinsic(ID, ResultType); + return Builder.CreateCall(F, {X, Y}); + } } switch (BuiltinID) { case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break; @@ -13449,13 +15192,20 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, // Check whether this instance can be represented via a LLVM standard // intrinsic. We only support some values of M4. Intrinsic::ID ID = Intrinsic::not_intrinsic; + Intrinsic::ID CI; switch (M4.getZExtValue()) { default: break; - case 4: ID = Intrinsic::minnum; break; + case 4: ID = Intrinsic::minnum; + CI = Intrinsic::experimental_constrained_minnum; break; } if (ID != Intrinsic::not_intrinsic) { - Function *F = CGM.getIntrinsic(ID, ResultType); - return Builder.CreateCall(F, {X, Y}); + if (Builder.getIsFPConstrained()) { + Function *F = CGM.getIntrinsic(CI, ResultType); + return Builder.CreateConstrainedFPCall(F, {X, Y}); + } else { + Function *F = CGM.getIntrinsic(ID, ResultType); + return Builder.CreateCall(F, {X, Y}); + } } switch (BuiltinID) { case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break; @@ -13815,7 +15565,7 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { auto MakeLdg = [&](unsigned IntrinsicID) { Value *Ptr = EmitScalarExpr(E->getArg(0)); clang::CharUnits Align = - getNaturalPointeeTypeAlignment(E->getArg(0)->getType()); + CGM.getNaturalPointeeTypeAlignment(E->getArg(0)->getType()); return Builder.CreateCall( CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), Ptr->getType()}), @@ -14344,7 +16094,7 @@ RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) { Result = Builder.CreatePointerCast(Result, Args.SrcType); // Emit an alignment assumption to ensure that the new alignment is // propagated to loads/stores, etc. - EmitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment); + emitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment); } assert(Result->getType() == Args.SrcType); return RValue::get(Result); @@ -14368,30 +16118,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType); return Builder.CreateCall(Callee, Args); } - case WebAssembly::BI__builtin_wasm_memory_init: { - llvm::APSInt SegConst; - if (!E->getArg(0)->isIntegerConstantExpr(SegConst, getContext())) - llvm_unreachable("Constant arg isn't actually constant?"); - llvm::APSInt MemConst; - if (!E->getArg(1)->isIntegerConstantExpr(MemConst, getContext())) - llvm_unreachable("Constant arg isn't actually constant?"); - if (!MemConst.isNullValue()) - ErrorUnsupported(E, "non-zero memory index"); - Value *Args[] = {llvm::ConstantInt::get(getLLVMContext(), SegConst), - llvm::ConstantInt::get(getLLVMContext(), MemConst), - EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)), - EmitScalarExpr(E->getArg(4))}; - Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_init); - return Builder.CreateCall(Callee, Args); - } - case WebAssembly::BI__builtin_wasm_data_drop: { - llvm::APSInt SegConst; - if (!E->getArg(0)->isIntegerConstantExpr(SegConst, getContext())) - llvm_unreachable("Constant arg isn't actually constant?"); - Value *Arg = llvm::ConstantInt::get(getLLVMContext(), SegConst); - Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_data_drop); - return Builder.CreateCall(Callee, {Arg}); - } case WebAssembly::BI__builtin_wasm_tls_size: { llvm::Type *ResultType = ConvertType(E->getType()); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType); @@ -14460,8 +16186,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64: case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32: case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64: - case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: - case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64x2_f64x2: { + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed, @@ -14472,8 +16197,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64: case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32: case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64: - case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: - case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64x2_f64x2: { + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned, @@ -14500,6 +16224,55 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } + case WebAssembly::BI__builtin_wasm_pmin_f32x4: + case WebAssembly::BI__builtin_wasm_pmin_f64x2: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType())); + return Builder.CreateCall(Callee, {LHS, RHS}); + } + case WebAssembly::BI__builtin_wasm_pmax_f32x4: + case WebAssembly::BI__builtin_wasm_pmax_f64x2: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType())); + return Builder.CreateCall(Callee, {LHS, RHS}); + } + case WebAssembly::BI__builtin_wasm_ceil_f32x4: + case WebAssembly::BI__builtin_wasm_floor_f32x4: + case WebAssembly::BI__builtin_wasm_trunc_f32x4: + case WebAssembly::BI__builtin_wasm_nearest_f32x4: + case WebAssembly::BI__builtin_wasm_ceil_f64x2: + case WebAssembly::BI__builtin_wasm_floor_f64x2: + case WebAssembly::BI__builtin_wasm_trunc_f64x2: + case WebAssembly::BI__builtin_wasm_nearest_f64x2: { + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_ceil_f32x4: + case WebAssembly::BI__builtin_wasm_ceil_f64x2: + IntNo = Intrinsic::wasm_ceil; + break; + case WebAssembly::BI__builtin_wasm_floor_f32x4: + case WebAssembly::BI__builtin_wasm_floor_f64x2: + IntNo = Intrinsic::wasm_floor; + break; + case WebAssembly::BI__builtin_wasm_trunc_f32x4: + case WebAssembly::BI__builtin_wasm_trunc_f64x2: + IntNo = Intrinsic::wasm_trunc; + break; + case WebAssembly::BI__builtin_wasm_nearest_f32x4: + case WebAssembly::BI__builtin_wasm_nearest_f64x2: + IntNo = Intrinsic::wasm_nearest; + break; + default: + llvm_unreachable("unexpected builtin ID"); + } + Value *Value = EmitScalarExpr(E->getArg(0)); + Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); + return Builder.CreateCall(Callee, Value); + } case WebAssembly::BI__builtin_wasm_swizzle_v8x16: { Value *Src = EmitScalarExpr(E->getArg(0)); Value *Indices = EmitScalarExpr(E->getArg(1)); @@ -14551,7 +16324,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, switch (BuiltinID) { case WebAssembly::BI__builtin_wasm_replace_lane_i8x16: case WebAssembly::BI__builtin_wasm_replace_lane_i16x8: { - llvm::Type *ElemType = ConvertType(E->getType())->getVectorElementType(); + llvm::Type *ElemType = + cast<llvm::VectorType>(ConvertType(E->getType()))->getElementType(); Value *Trunc = Builder.CreateTrunc(Val, ElemType); return Builder.CreateInsertElement(Vec, Trunc, Lane); } @@ -14598,6 +16372,56 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } + case WebAssembly::BI__builtin_wasm_abs_i8x16: + case WebAssembly::BI__builtin_wasm_abs_i16x8: + case WebAssembly::BI__builtin_wasm_abs_i32x4: { + Value *Vec = EmitScalarExpr(E->getArg(0)); + Value *Neg = Builder.CreateNeg(Vec, "neg"); + Constant *Zero = llvm::Constant::getNullValue(Vec->getType()); + Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond"); + return Builder.CreateSelect(ICmp, Neg, Vec, "abs"); + } + case WebAssembly::BI__builtin_wasm_min_s_i8x16: + case WebAssembly::BI__builtin_wasm_min_u_i8x16: + case WebAssembly::BI__builtin_wasm_max_s_i8x16: + case WebAssembly::BI__builtin_wasm_max_u_i8x16: + case WebAssembly::BI__builtin_wasm_min_s_i16x8: + case WebAssembly::BI__builtin_wasm_min_u_i16x8: + case WebAssembly::BI__builtin_wasm_max_s_i16x8: + case WebAssembly::BI__builtin_wasm_max_u_i16x8: + case WebAssembly::BI__builtin_wasm_min_s_i32x4: + case WebAssembly::BI__builtin_wasm_min_u_i32x4: + case WebAssembly::BI__builtin_wasm_max_s_i32x4: + case WebAssembly::BI__builtin_wasm_max_u_i32x4: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Value *ICmp; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_min_s_i8x16: + case WebAssembly::BI__builtin_wasm_min_s_i16x8: + case WebAssembly::BI__builtin_wasm_min_s_i32x4: + ICmp = Builder.CreateICmpSLT(LHS, RHS); + break; + case WebAssembly::BI__builtin_wasm_min_u_i8x16: + case WebAssembly::BI__builtin_wasm_min_u_i16x8: + case WebAssembly::BI__builtin_wasm_min_u_i32x4: + ICmp = Builder.CreateICmpULT(LHS, RHS); + break; + case WebAssembly::BI__builtin_wasm_max_s_i8x16: + case WebAssembly::BI__builtin_wasm_max_s_i16x8: + case WebAssembly::BI__builtin_wasm_max_s_i32x4: + ICmp = Builder.CreateICmpSGT(LHS, RHS); + break; + case WebAssembly::BI__builtin_wasm_max_u_i8x16: + case WebAssembly::BI__builtin_wasm_max_u_i16x8: + case WebAssembly::BI__builtin_wasm_max_u_i32x4: + ICmp = Builder.CreateICmpUGT(LHS, RHS); + break; + default: + llvm_unreachable("unexpected builtin ID"); + } + return Builder.CreateSelect(ICmp, LHS, RHS); + } case WebAssembly::BI__builtin_wasm_avgr_u_i8x16: case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: { Value *LHS = EmitScalarExpr(E->getArg(0)); @@ -14649,6 +16473,14 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } + case WebAssembly::BI__builtin_wasm_bitmask_i8x16: + case WebAssembly::BI__builtin_wasm_bitmask_i16x8: + case WebAssembly::BI__builtin_wasm_bitmask_i32x4: { + Value *Vec = EmitScalarExpr(E->getArg(0)); + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType()); + return Builder.CreateCall(Callee, {Vec}); + } case WebAssembly::BI__builtin_wasm_abs_f32x4: case WebAssembly::BI__builtin_wasm_abs_f64x2: { Value *Vec = EmitScalarExpr(E->getArg(0)); @@ -14741,68 +16573,124 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Vec->getType()}); return Builder.CreateCall(Callee, Vec); } + case WebAssembly::BI__builtin_wasm_shuffle_v8x16: { + Value *Ops[18]; + size_t OpIdx = 0; + Ops[OpIdx++] = EmitScalarExpr(E->getArg(0)); + Ops[OpIdx++] = EmitScalarExpr(E->getArg(1)); + while (OpIdx < 18) { + llvm::APSInt LaneConst; + if (!E->getArg(OpIdx)->isIntegerConstantExpr(LaneConst, getContext())) + llvm_unreachable("Constant arg isn't actually constant?"); + Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), LaneConst); + } + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle); + return Builder.CreateCall(Callee, Ops); + } default: return nullptr; } } +static std::pair<Intrinsic::ID, unsigned> +getIntrinsicForHexagonNonGCCBuiltin(unsigned BuiltinID) { + struct Info { + unsigned BuiltinID; + Intrinsic::ID IntrinsicID; + unsigned VecLen; + }; + Info Infos[] = { +#define CUSTOM_BUILTIN_MAPPING(x,s) \ + { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s }, + CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0) + CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0) + CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0) + CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0) + CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0) + CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0) + CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0) + CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0) + CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0) + CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0) + CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0) + CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0) + CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0) + CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0) + CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0) + CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0) + CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0) + CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0) + CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0) + CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0) + CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0) + CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0) + CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64) + CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64) + CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64) + CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64) + CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128) + CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128) + CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128) + CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128) +#include "clang/Basic/BuiltinsHexagonMapCustomDep.def" +#undef CUSTOM_BUILTIN_MAPPING + }; + + auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; }; + static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true); + (void)SortOnce; + + const Info *F = std::lower_bound(std::begin(Infos), std::end(Infos), + Info{BuiltinID, 0, 0}, CmpInfo); + if (F == std::end(Infos) || F->BuiltinID != BuiltinID) + return {Intrinsic::not_intrinsic, 0}; + + return {F->IntrinsicID, F->VecLen}; +} + Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { - SmallVector<llvm::Value *, 4> Ops; - Intrinsic::ID ID = Intrinsic::not_intrinsic; + Intrinsic::ID ID; + unsigned VecLen; + std::tie(ID, VecLen) = getIntrinsicForHexagonNonGCCBuiltin(BuiltinID); - auto MakeCircLd = [&](unsigned IntID, bool HasImm) { + auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) { // The base pointer is passed by address, so it needs to be loaded. - Address BP = EmitPointerWithAlignment(E->getArg(0)); - BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy), - BP.getAlignment()); + Address A = EmitPointerWithAlignment(E->getArg(0)); + Address BP = Address( + Builder.CreateBitCast(A.getPointer(), Int8PtrPtrTy), A.getAlignment()); llvm::Value *Base = Builder.CreateLoad(BP); - // Operands are Base, Increment, Modifier, Start. - if (HasImm) - Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), - EmitScalarExpr(E->getArg(3)) }; - else - Ops = { Base, EmitScalarExpr(E->getArg(1)), - EmitScalarExpr(E->getArg(2)) }; + // The treatment of both loads and stores is the same: the arguments for + // the builtin are the same as the arguments for the intrinsic. + // Load: + // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start) + // builtin(Base, Mod, Start) -> intr(Base, Mod, Start) + // Store: + // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start) + // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start) + SmallVector<llvm::Value*,5> Ops = { Base }; + for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i) + Ops.push_back(EmitScalarExpr(E->getArg(i))); llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops); - llvm::Value *NewBase = Builder.CreateExtractValue(Result, 1); - llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), - NewBase->getType()->getPointerTo()); + // The load intrinsics generate two results (Value, NewBase), stores + // generate one (NewBase). The new base address needs to be stored. + llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1) + : Result; + llvm::Value *LV = Builder.CreateBitCast( + EmitScalarExpr(E->getArg(0)), NewBase->getType()->getPointerTo()); Address Dest = EmitPointerWithAlignment(E->getArg(0)); - // The intrinsic generates two results. The new value for the base pointer - // needs to be stored. - Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment()); - return Builder.CreateExtractValue(Result, 0); - }; - - auto MakeCircSt = [&](unsigned IntID, bool HasImm) { - // The base pointer is passed by address, so it needs to be loaded. - Address BP = EmitPointerWithAlignment(E->getArg(0)); - BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy), - BP.getAlignment()); - llvm::Value *Base = Builder.CreateLoad(BP); - // Operands are Base, Increment, Modifier, Value, Start. - if (HasImm) - Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), - EmitScalarExpr(E->getArg(3)), EmitScalarExpr(E->getArg(4)) }; - else - Ops = { Base, EmitScalarExpr(E->getArg(1)), - EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)) }; - - llvm::Value *NewBase = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops); - llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), - NewBase->getType()->getPointerTo()); - Address Dest = EmitPointerWithAlignment(E->getArg(0)); - // The intrinsic generates one result, which is the new value for the base - // pointer. It needs to be stored. - return Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment()); + llvm::Value *RetVal = + Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment()); + if (IsLoad) + RetVal = Builder.CreateExtractValue(Result, 0); + return RetVal; }; // Handle the conversion of bit-reverse load intrinsics to bit code. // The intrinsic call after this function only reads from memory and the // write to memory is dealt by the store instruction. - auto MakeBrevLd = [&](unsigned IntID, llvm::Type *DestTy) { + auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) { // The intrinsic generates one result, which is the new value for the base // pointer. It needs to be returned. The result of the load instruction is // passed to intrinsic by address, so the value needs to be stored. @@ -14820,9 +16708,9 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, // Operands are Base, Dest, Modifier. // The intrinsic format in LLVM IR is defined as // { ValueType, i8* } (i8*, i32). - Ops = {BaseAddress, EmitScalarExpr(E->getArg(2))}; + llvm::Value *Result = Builder.CreateCall( + CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))}); - llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops); // The value needs to be stored as the variable is passed by reference. llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0); @@ -14838,95 +16726,65 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, return Builder.CreateExtractValue(Result, 1); }; + auto V2Q = [this, VecLen] (llvm::Value *Vec) { + Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B + : Intrinsic::hexagon_V6_vandvrt; + return Builder.CreateCall(CGM.getIntrinsic(ID), + {Vec, Builder.getInt32(-1)}); + }; + auto Q2V = [this, VecLen] (llvm::Value *Pred) { + Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B + : Intrinsic::hexagon_V6_vandqrt; + return Builder.CreateCall(CGM.getIntrinsic(ID), + {Pred, Builder.getInt32(-1)}); + }; + switch (BuiltinID) { + // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR, + // and the corresponding C/C++ builtins use loads/stores to update + // the predicate. case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry: - case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: { - Address Dest = EmitPointerWithAlignment(E->getArg(2)); - unsigned Size; - if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vaddcarry) { - Size = 512; - ID = Intrinsic::hexagon_V6_vaddcarry; - } else { - Size = 1024; - ID = Intrinsic::hexagon_V6_vaddcarry_128B; - } - Dest = Builder.CreateBitCast(Dest, - llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0)); - LoadInst *QLd = Builder.CreateLoad(Dest); - Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd }; - llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); - llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1); - llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)), - Vprd->getType()->getPointerTo(0)); - Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment()); - return Builder.CreateExtractValue(Result, 0); - } + case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry: case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: { - Address Dest = EmitPointerWithAlignment(E->getArg(2)); - unsigned Size; - if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vsubcarry) { - Size = 512; - ID = Intrinsic::hexagon_V6_vsubcarry; - } else { - Size = 1024; - ID = Intrinsic::hexagon_V6_vsubcarry_128B; - } - Dest = Builder.CreateBitCast(Dest, - llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0)); - LoadInst *QLd = Builder.CreateLoad(Dest); - Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd }; - llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); - llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1); - llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)), - Vprd->getType()->getPointerTo(0)); - Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment()); + // Get the type from the 0-th argument. + llvm::Type *VecType = ConvertType(E->getArg(0)->getType()); + Address PredAddr = Builder.CreateBitCast( + EmitPointerWithAlignment(E->getArg(2)), VecType->getPointerTo(0)); + llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr)); + llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), + {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn}); + + llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1); + Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.getPointer(), + PredAddr.getAlignment()); return Builder.CreateExtractValue(Result, 0); } + case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci: - return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pci, /*HasImm*/true); case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci: - return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pci, /*HasImm*/true); case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci: - return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pci, /*HasImm*/true); case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci: - return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pci, /*HasImm*/true); case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci: - return MakeCircLd(Intrinsic::hexagon_L2_loadri_pci, /*HasImm*/true); case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci: - return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pci, /*HasImm*/true); case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr: - return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pcr, /*HasImm*/false); case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr: - return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pcr, /*HasImm*/false); case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr: - return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pcr, /*HasImm*/false); case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr: - return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pcr, /*HasImm*/false); case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr: - return MakeCircLd(Intrinsic::hexagon_L2_loadri_pcr, /*HasImm*/false); case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr: - return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pcr, /*HasImm*/false); + return MakeCircOp(ID, /*IsLoad=*/true); case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci: - return MakeCircSt(Intrinsic::hexagon_S2_storerb_pci, /*HasImm*/true); case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci: - return MakeCircSt(Intrinsic::hexagon_S2_storerh_pci, /*HasImm*/true); case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci: - return MakeCircSt(Intrinsic::hexagon_S2_storerf_pci, /*HasImm*/true); case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci: - return MakeCircSt(Intrinsic::hexagon_S2_storeri_pci, /*HasImm*/true); case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci: - return MakeCircSt(Intrinsic::hexagon_S2_storerd_pci, /*HasImm*/true); case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr: - return MakeCircSt(Intrinsic::hexagon_S2_storerb_pcr, /*HasImm*/false); case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr: - return MakeCircSt(Intrinsic::hexagon_S2_storerh_pcr, /*HasImm*/false); case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr: - return MakeCircSt(Intrinsic::hexagon_S2_storerf_pcr, /*HasImm*/false); case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr: - return MakeCircSt(Intrinsic::hexagon_S2_storeri_pcr, /*HasImm*/false); case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr: - return MakeCircSt(Intrinsic::hexagon_S2_storerd_pcr, /*HasImm*/false); + return MakeCircOp(ID, /*IsLoad=*/false); case Hexagon::BI__builtin_brev_ldub: return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty); case Hexagon::BI__builtin_brev_ldb: @@ -14939,8 +16797,40 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty); case Hexagon::BI__builtin_brev_ldd: return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty); - default: - break; + + default: { + if (ID == Intrinsic::not_intrinsic) + return nullptr; + + auto IsVectorPredTy = [](llvm::Type *T) { + return T->isVectorTy() && + cast<llvm::VectorType>(T)->getElementType()->isIntegerTy(1); + }; + + llvm::Function *IntrFn = CGM.getIntrinsic(ID); + llvm::FunctionType *IntrTy = IntrFn->getFunctionType(); + SmallVector<llvm::Value*,4> Ops; + for (unsigned i = 0, e = IntrTy->getNumParams(); i != e; ++i) { + llvm::Type *T = IntrTy->getParamType(i); + const Expr *A = E->getArg(i); + if (IsVectorPredTy(T)) { + // There will be an implicit cast to a boolean vector. Strip it. + if (auto *Cast = dyn_cast<ImplicitCastExpr>(A)) { + if (Cast->getCastKind() == CK_BitCast) + A = Cast->getSubExpr(); + } + Ops.push_back(V2Q(EmitScalarExpr(A))); + } else { + Ops.push_back(EmitScalarExpr(A)); + } + } + + llvm::Value *Call = Builder.CreateCall(IntrFn, Ops); + if (IsVectorPredTy(IntrTy->getReturnType())) + Call = Q2V(Call); + + return Call; + } // default } // switch return nullptr; diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index 5c5cbaff0252..baf2c79cc2b6 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -50,7 +50,7 @@ private: struct VarInfo { llvm::GlobalVariable *Var; const VarDecl *D; - unsigned Flag; + DeviceVarFlags Flags; }; llvm::SmallVector<VarInfo, 16> DeviceVars; /// Keeps track of variable containing handle of GPU binary. Populated by @@ -117,23 +117,38 @@ private: void emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args); void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args); - std::string getDeviceSideName(const Decl *ND); + std::string getDeviceSideName(const NamedDecl *ND) override; public: CGNVCUDARuntime(CodeGenModule &CGM); void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override; void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var, - unsigned Flags) override { - DeviceVars.push_back({&Var, VD, Flags}); + bool Extern, bool Constant) override { + DeviceVars.push_back({&Var, + VD, + {DeviceVarFlags::Variable, Extern, Constant, + /*Normalized*/ false, /*Type*/ 0}}); + } + void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var, + bool Extern, int Type) override { + DeviceVars.push_back({&Var, + VD, + {DeviceVarFlags::Surface, Extern, /*Constant*/ false, + /*Normalized*/ false, Type}}); + } + void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var, + bool Extern, int Type, bool Normalized) override { + DeviceVars.push_back({&Var, + VD, + {DeviceVarFlags::Texture, Extern, /*Constant*/ false, + Normalized, Type}}); } /// Creates module constructor function llvm::Function *makeModuleCtorFunction() override; /// Creates module destructor function llvm::Function *makeModuleDtorFunction() override; - /// Construct and return the stub name of a kernel. - std::string getDeviceStubName(llvm::StringRef Name) const override; }; } @@ -204,40 +219,30 @@ llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const { return llvm::FunctionType::get(VoidTy, Params, false); } -std::string CGNVCUDARuntime::getDeviceSideName(const Decl *D) { - auto *ND = cast<const NamedDecl>(D); +std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) { + GlobalDecl GD; + // D could be either a kernel or a variable. + if (auto *FD = dyn_cast<FunctionDecl>(ND)) + GD = GlobalDecl(FD, KernelReferenceKind::Kernel); + else + GD = GlobalDecl(ND); std::string DeviceSideName; if (DeviceMC->shouldMangleDeclName(ND)) { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); - DeviceMC->mangleName(ND, Out); - DeviceSideName = Out.str(); + DeviceMC->mangleName(GD, Out); + DeviceSideName = std::string(Out.str()); } else - DeviceSideName = ND->getIdentifier()->getName(); + DeviceSideName = std::string(ND->getIdentifier()->getName()); return DeviceSideName; } void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) { - // Ensure either we have different ABIs between host and device compilations, - // says host compilation following MSVC ABI but device compilation follows - // Itanium C++ ABI or, if they follow the same ABI, kernel names after - // mangling should be the same after name stubbing. The later checking is - // very important as the device kernel name being mangled in host-compilation - // is used to resolve the device binaries to be executed. Inconsistent naming - // result in undefined behavior. Even though we cannot check that naming - // directly between host- and device-compilations, the host- and - // device-mangling in host compilation could help catching certain ones. - assert((CGF.CGM.getContext().getAuxTargetInfo() && - (CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI() != - CGF.CGM.getContext().getTargetInfo().getCXXABI())) || - getDeviceStubName(getDeviceSideName(CGF.CurFuncDecl)) == - CGF.CurFn->getName()); - EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl}); if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(), CudaFeature::CUDA_USES_NEW_LAUNCH) || - CGF.getLangOpts().HIPUseNewLaunchAPI) + (CGF.getLangOpts().HIP && CGF.getLangOpts().HIPUseNewLaunchAPI)) emitDeviceStubBodyNew(CGF, Args); else emitDeviceStubBodyLegacy(CGF, Args); @@ -418,7 +423,8 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { // each emitted kernel. llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin(); for (auto &&I : EmittedKernels) { - llvm::Constant *KernelName = makeConstantString(getDeviceSideName(I.D)); + llvm::Constant *KernelName = + makeConstantString(getDeviceSideName(cast<NamedDecl>(I.D))); llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy); llvm::Value *Args[] = { &GpuBinaryHandlePtr, @@ -434,30 +440,70 @@ llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() { Builder.CreateCall(RegisterFunc, Args); } + llvm::Type *VarSizeTy = IntTy; + // For HIP or CUDA 9.0+, device variable size is type of `size_t`. + if (CGM.getLangOpts().HIP || + ToCudaVersion(CGM.getTarget().getSDKVersion()) >= CudaVersion::CUDA_90) + VarSizeTy = SizeTy; + // void __cudaRegisterVar(void **, char *, char *, const char *, // int, int, int, int) llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy, - CharPtrTy, IntTy, IntTy, + CharPtrTy, IntTy, VarSizeTy, IntTy, IntTy}; llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction( - llvm::FunctionType::get(IntTy, RegisterVarParams, false), + llvm::FunctionType::get(VoidTy, RegisterVarParams, false), addUnderscoredPrefixToName("RegisterVar")); + // void __cudaRegisterSurface(void **, const struct surfaceReference *, + // const void **, const char *, int, int); + llvm::FunctionCallee RegisterSurf = CGM.CreateRuntimeFunction( + llvm::FunctionType::get( + VoidTy, {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy}, + false), + addUnderscoredPrefixToName("RegisterSurface")); + // void __cudaRegisterTexture(void **, const struct textureReference *, + // const void **, const char *, int, int, int) + llvm::FunctionCallee RegisterTex = CGM.CreateRuntimeFunction( + llvm::FunctionType::get( + VoidTy, + {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy, IntTy}, + false), + addUnderscoredPrefixToName("RegisterTexture")); for (auto &&Info : DeviceVars) { llvm::GlobalVariable *Var = Info.Var; - unsigned Flags = Info.Flag; llvm::Constant *VarName = makeConstantString(getDeviceSideName(Info.D)); - uint64_t VarSize = - CGM.getDataLayout().getTypeAllocSize(Var->getValueType()); - llvm::Value *Args[] = { - &GpuBinaryHandlePtr, - Builder.CreateBitCast(Var, VoidPtrTy), - VarName, - VarName, - llvm::ConstantInt::get(IntTy, (Flags & ExternDeviceVar) ? 1 : 0), - llvm::ConstantInt::get(IntTy, VarSize), - llvm::ConstantInt::get(IntTy, (Flags & ConstantDeviceVar) ? 1 : 0), - llvm::ConstantInt::get(IntTy, 0)}; - Builder.CreateCall(RegisterVar, Args); + switch (Info.Flags.getKind()) { + case DeviceVarFlags::Variable: { + uint64_t VarSize = + CGM.getDataLayout().getTypeAllocSize(Var->getValueType()); + llvm::Value *Args[] = { + &GpuBinaryHandlePtr, + Builder.CreateBitCast(Var, VoidPtrTy), + VarName, + VarName, + llvm::ConstantInt::get(IntTy, Info.Flags.isExtern()), + llvm::ConstantInt::get(VarSizeTy, VarSize), + llvm::ConstantInt::get(IntTy, Info.Flags.isConstant()), + llvm::ConstantInt::get(IntTy, 0)}; + Builder.CreateCall(RegisterVar, Args); + break; + } + case DeviceVarFlags::Surface: + Builder.CreateCall( + RegisterSurf, + {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName, + VarName, llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()), + llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())}); + break; + case DeviceVarFlags::Texture: + Builder.CreateCall( + RegisterTex, + {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName, + VarName, llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()), + llvm::ConstantInt::get(IntTy, Info.Flags.isNormalized()), + llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())}); + break; + } } Builder.CreateRetVoid(); @@ -551,8 +597,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { if (CudaGpuBinary) { // If fatbin is available from early finalization, create a string // literal containing the fat binary loaded from the given file. - FatBinStr = makeConstantString(CudaGpuBinary->getBuffer(), "", - FatbinConstantName, 8); + FatBinStr = makeConstantString(std::string(CudaGpuBinary->getBuffer()), + "", FatbinConstantName, 8); } else { // If fatbin is not available, create an external symbol // __hip_fatbin in section .hip_fatbin. The external symbol is supposed @@ -586,7 +632,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // For CUDA, create a string literal containing the fat binary loaded from // the given file. - FatBinStr = makeConstantString(CudaGpuBinary->getBuffer(), "", + FatBinStr = makeConstantString(std::string(CudaGpuBinary->getBuffer()), "", FatbinConstantName, 8); FatMagic = CudaFatMagic; } @@ -691,8 +737,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { SmallString<64> ModuleID; llvm::raw_svector_ostream OS(ModuleID); OS << ModuleIDPrefix << llvm::format("%" PRIx64, FatbinWrapper->getGUID()); - llvm::Constant *ModuleIDConstant = - makeConstantString(ModuleID.str(), "", ModuleIDSectionName, 32); + llvm::Constant *ModuleIDConstant = makeConstantString( + std::string(ModuleID.str()), "", ModuleIDSectionName, 32); // Create an alias for the FatbinWrapper that nvcc will look for. llvm::GlobalAlias::create(llvm::GlobalValue::ExternalLinkage, @@ -797,12 +843,6 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() { return ModuleDtorFunc; } -std::string CGNVCUDARuntime::getDeviceStubName(llvm::StringRef Name) const { - if (!CGM.getLangOpts().HIP) - return Name; - return (Name + ".stub").str(); -} - CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) { return new CGNVCUDARuntime(CGM); } diff --git a/clang/lib/CodeGen/CGCUDARuntime.h b/clang/lib/CodeGen/CGCUDARuntime.h index e548a3a546d4..19e70a2022a5 100644 --- a/clang/lib/CodeGen/CGCUDARuntime.h +++ b/clang/lib/CodeGen/CGCUDARuntime.h @@ -25,6 +25,7 @@ class GlobalVariable; namespace clang { class CUDAKernelCallExpr; +class NamedDecl; class VarDecl; namespace CodeGen { @@ -41,9 +42,30 @@ protected: public: // Global variable properties that must be passed to CUDA runtime. - enum DeviceVarFlags { - ExternDeviceVar = 0x01, // extern - ConstantDeviceVar = 0x02, // __constant__ + class DeviceVarFlags { + public: + enum DeviceVarKind { + Variable, // Variable + Surface, // Builtin surface + Texture, // Builtin texture + }; + + private: + unsigned Kind : 2; + unsigned Extern : 1; + unsigned Constant : 1; // Constant variable. + unsigned Normalized : 1; // Normalized texture. + int SurfTexType; // Type of surface/texutre. + + public: + DeviceVarFlags(DeviceVarKind K, bool E, bool C, bool N, int T) + : Kind(K), Extern(E), Constant(C), Normalized(N), SurfTexType(T) {} + + DeviceVarKind getKind() const { return static_cast<DeviceVarKind>(Kind); } + bool isExtern() const { return Extern; } + bool isConstant() const { return Constant; } + bool isNormalized() const { return Normalized; } + int getSurfTexType() const { return SurfTexType; } }; CGCUDARuntime(CodeGenModule &CGM) : CGM(CGM) {} @@ -56,7 +78,11 @@ public: /// Emits a kernel launch stub. virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0; virtual void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var, - unsigned Flags) = 0; + bool Extern, bool Constant) = 0; + virtual void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var, + bool Extern, int Type) = 0; + virtual void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var, + bool Extern, int Type, bool Normalized) = 0; /// Constructs and returns a module initialization function or nullptr if it's /// not needed. Must be called after all kernels have been emitted. @@ -66,8 +92,9 @@ public: /// Must be called after ModuleCtorFunction virtual llvm::Function *makeModuleDtorFunction() = 0; - /// Construct and return the stub name of a kernel. - virtual std::string getDeviceStubName(llvm::StringRef Name) const = 0; + /// Returns function or variable name on device side even if the current + /// compilation is for host. + virtual std::string getDeviceSideName(const NamedDecl *ND) = 0; }; /// Creates an instance of a CUDA runtime class. diff --git a/clang/lib/CodeGen/CGCXX.cpp b/clang/lib/CodeGen/CGCXX.cpp index 1928e0df3809..a4bd2c6d5da0 100644 --- a/clang/lib/CodeGen/CGCXX.cpp +++ b/clang/lib/CodeGen/CGCXX.cpp @@ -263,8 +263,8 @@ static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF, AddressPoint.AddressPointIndex; llvm::Value *VFuncPtr = CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfnkxt"); - llvm::Value *VFunc = - CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.PointerAlignInBytes); + llvm::Value *VFunc = CGF.Builder.CreateAlignedLoad( + VFuncPtr, llvm::Align(CGF.PointerAlignInBytes)); CGCallee Callee(GD, VFunc); return Callee; } diff --git a/clang/lib/CodeGen/CGCXXABI.cpp b/clang/lib/CodeGen/CGCXXABI.cpp index 7ada4032b3ee..65327a2435b5 100644 --- a/clang/lib/CodeGen/CGCXXABI.cpp +++ b/clang/lib/CodeGen/CGCXXABI.cpp @@ -156,6 +156,8 @@ void CGCXXABI::setCXXABIThisValue(CodeGenFunction &CGF, llvm::Value *ThisPtr) { void CGCXXABI::EmitReturnFromThunk(CodeGenFunction &CGF, RValue RV, QualType ResultType) { + assert(!CGF.hasAggregateEvaluationKind(ResultType) && + "cannot handle aggregates"); CGF.EmitReturnOfRValue(RV, ResultType); } @@ -313,3 +315,20 @@ CatchTypeInfo CGCXXABI::getCatchAllTypeInfo() { std::vector<CharUnits> CGCXXABI::getVBPtrOffsets(const CXXRecordDecl *RD) { return std::vector<CharUnits>(); } + +CGCXXABI::AddedStructorArgCounts CGCXXABI::addImplicitConstructorArgs( + CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type, + bool ForVirtualBase, bool Delegating, CallArgList &Args) { + AddedStructorArgs AddedArgs = + getImplicitConstructorArgs(CGF, D, Type, ForVirtualBase, Delegating); + for (size_t i = 0; i < AddedArgs.Prefix.size(); ++i) { + Args.insert(Args.begin() + 1 + i, + CallArg(RValue::get(AddedArgs.Prefix[i].Value), + AddedArgs.Prefix[i].Type)); + } + for (const auto &arg : AddedArgs.Suffix) { + Args.add(RValue::get(arg.Value), arg.Type); + } + return AddedStructorArgCounts(AddedArgs.Prefix.size(), + AddedArgs.Suffix.size()); +} diff --git a/clang/lib/CodeGen/CGCXXABI.h b/clang/lib/CodeGen/CGCXXABI.h index bff49be7a3c4..f5b3fc13bbbd 100644 --- a/clang/lib/CodeGen/CGCXXABI.h +++ b/clang/lib/CodeGen/CGCXXABI.h @@ -16,6 +16,7 @@ #include "CodeGenFunction.h" #include "clang/Basic/LLVM.h" +#include "clang/CodeGen/CodeGenABITypes.h" namespace llvm { class Constant; @@ -107,6 +108,8 @@ public: virtual bool hasMostDerivedReturn(GlobalDecl GD) const { return false; } + virtual bool useSinitAndSterm() const { return false; } + /// Returns true if the target allows calling a function through a pointer /// with a different signature than the actual function (or equivalently, /// bitcasting a function or function pointer to a different function type). @@ -287,24 +290,44 @@ public: /// Emit constructor variants required by this ABI. virtual void EmitCXXConstructors(const CXXConstructorDecl *D) = 0; - /// Notes how many arguments were added to the beginning (Prefix) and ending - /// (Suffix) of an arg list. + /// Additional implicit arguments to add to the beginning (Prefix) and end + /// (Suffix) of a constructor / destructor arg list. /// - /// Note that Prefix actually refers to the number of args *after* the first - /// one: `this` arguments always come first. + /// Note that Prefix should actually be inserted *after* the first existing + /// arg; `this` arguments always come first. struct AddedStructorArgs { + struct Arg { + llvm::Value *Value; + QualType Type; + }; + SmallVector<Arg, 1> Prefix; + SmallVector<Arg, 1> Suffix; + AddedStructorArgs() = default; + AddedStructorArgs(SmallVector<Arg, 1> P, SmallVector<Arg, 1> S) + : Prefix(std::move(P)), Suffix(std::move(S)) {} + static AddedStructorArgs prefix(SmallVector<Arg, 1> Args) { + return {std::move(Args), {}}; + } + static AddedStructorArgs suffix(SmallVector<Arg, 1> Args) { + return {{}, std::move(Args)}; + } + }; + + /// Similar to AddedStructorArgs, but only notes the number of additional + /// arguments. + struct AddedStructorArgCounts { unsigned Prefix = 0; unsigned Suffix = 0; - AddedStructorArgs() = default; - AddedStructorArgs(unsigned P, unsigned S) : Prefix(P), Suffix(S) {} - static AddedStructorArgs prefix(unsigned N) { return {N, 0}; } - static AddedStructorArgs suffix(unsigned N) { return {0, N}; } + AddedStructorArgCounts() = default; + AddedStructorArgCounts(unsigned P, unsigned S) : Prefix(P), Suffix(S) {} + static AddedStructorArgCounts prefix(unsigned N) { return {N, 0}; } + static AddedStructorArgCounts suffix(unsigned N) { return {0, N}; } }; /// Build the signature of the given constructor or destructor variant by /// adding any required parameters. For convenience, ArgTys has been /// initialized with the type of 'this'. - virtual AddedStructorArgs + virtual AddedStructorArgCounts buildStructorSignature(GlobalDecl GD, SmallVectorImpl<CanQualType> &ArgTys) = 0; @@ -365,14 +388,26 @@ public: /// Emit the ABI-specific prolog for the function. virtual void EmitInstanceFunctionProlog(CodeGenFunction &CGF) = 0; + virtual AddedStructorArgs + getImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D, + CXXCtorType Type, bool ForVirtualBase, + bool Delegating) = 0; + /// Add any ABI-specific implicit arguments needed to call a constructor. /// /// \return The number of arguments added at the beginning and end of the /// call, which is typically zero or one. - virtual AddedStructorArgs + AddedStructorArgCounts addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase, - bool Delegating, CallArgList &Args) = 0; + bool Delegating, CallArgList &Args); + + /// Get the implicit (second) parameter that comes after the "this" pointer, + /// or nullptr if there is isn't one. + virtual llvm::Value * + getCXXDestructorImplicitParam(CodeGenFunction &CGF, + const CXXDestructorDecl *DD, CXXDtorType Type, + bool ForVirtualBase, bool Delegating) = 0; /// Emit the destructor call. virtual void EmitDestructorCall(CodeGenFunction &CGF, diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index e4803fde230f..e8235c775d8f 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -16,6 +16,7 @@ #include "CGBlocks.h" #include "CGCXXABI.h" #include "CGCleanup.h" +#include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "TargetInfo.h" @@ -325,7 +326,7 @@ CodeGenTypes::arrangeCXXStructorDeclaration(GlobalDecl GD) { if (PassParams) appendParameterTypes(*this, argTypes, paramInfos, FTP); - CGCXXABI::AddedStructorArgs AddedArgs = + CGCXXABI::AddedStructorArgCounts AddedArgs = TheCXXABI.buildStructorSignature(GD, argTypes); if (!paramInfos.empty()) { // Note: prefix implies after the first param. @@ -815,6 +816,7 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC, FI->ASTCallingConvention = info.getCC(); FI->InstanceMethod = instanceMethod; FI->ChainCall = chainCall; + FI->CmseNSCall = info.getCmseNSCall(); FI->NoReturn = info.getNoReturn(); FI->ReturnsRetained = info.getProducesResult(); FI->NoCallerSavedRegs = info.getNoCallerSavedRegs(); @@ -1014,8 +1016,8 @@ static void forConstantArrayExpansion(CodeGenFunction &CGF, } } -void CodeGenFunction::ExpandTypeFromArgs( - QualType Ty, LValue LV, SmallVectorImpl<llvm::Value *>::iterator &AI) { +void CodeGenFunction::ExpandTypeFromArgs(QualType Ty, LValue LV, + llvm::Function::arg_iterator &AI) { assert(LV.isSimple() && "Unexpected non-simple lvalue during struct expansion."); @@ -1044,17 +1046,17 @@ void CodeGenFunction::ExpandTypeFromArgs( ExpandTypeFromArgs(FD->getType(), SubLV, AI); } } else if (isa<ComplexExpansion>(Exp.get())) { - auto realValue = *AI++; - auto imagValue = *AI++; + auto realValue = &*AI++; + auto imagValue = &*AI++; EmitStoreOfComplex(ComplexPairTy(realValue, imagValue), LV, /*init*/ true); } else { // Call EmitStoreOfScalar except when the lvalue is a bitfield to emit a // primitive store. assert(isa<NoExpansion>(Exp.get())); if (LV.isBitField()) - EmitStoreThroughLValue(RValue::get(*AI++), LV); + EmitStoreThroughLValue(RValue::get(&*AI++), LV); else - EmitStoreOfScalar(*AI++, LV); + EmitStoreOfScalar(&*AI++, LV); } } @@ -1232,7 +1234,7 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty, if (llvm::StructType *SrcSTy = dyn_cast<llvm::StructType>(SrcTy)) { Src = EnterStructPointerForCoercedAccess(Src, SrcSTy, DstSize, CGF); - SrcTy = Src.getType()->getElementType(); + SrcTy = Src.getElementType(); } uint64_t SrcSize = CGF.CGM.getDataLayout().getTypeAllocSize(SrcTy); @@ -1260,11 +1262,9 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty, // Otherwise do coercion through memory. This is stupid, but simple. Address Tmp = CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment()); - Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty); - Address SrcCasted = CGF.Builder.CreateElementBitCast(Src,CGF.Int8Ty); - CGF.Builder.CreateMemCpy(Casted, SrcCasted, - llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize), - false); + CGF.Builder.CreateMemCpy(Tmp.getPointer(), Tmp.getAlignment().getAsAlign(), + Src.getPointer(), Src.getAlignment().getAsAlign(), + llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize)); return CGF.Builder.CreateLoad(Tmp); } @@ -1272,18 +1272,17 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty, // store the elements rather than the aggregate to be more friendly to // fast-isel. // FIXME: Do we need to recurse here? -static void BuildAggStore(CodeGenFunction &CGF, llvm::Value *Val, - Address Dest, bool DestIsVolatile) { +void CodeGenFunction::EmitAggregateStore(llvm::Value *Val, Address Dest, + bool DestIsVolatile) { // Prefer scalar stores to first-class aggregate stores. - if (llvm::StructType *STy = - dyn_cast<llvm::StructType>(Val->getType())) { + if (llvm::StructType *STy = dyn_cast<llvm::StructType>(Val->getType())) { for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Address EltPtr = CGF.Builder.CreateStructGEP(Dest, i); - llvm::Value *Elt = CGF.Builder.CreateExtractValue(Val, i); - CGF.Builder.CreateStore(Elt, EltPtr, DestIsVolatile); + Address EltPtr = Builder.CreateStructGEP(Dest, i); + llvm::Value *Elt = Builder.CreateExtractValue(Val, i); + Builder.CreateStore(Elt, EltPtr, DestIsVolatile); } } else { - CGF.Builder.CreateStore(Val, Dest, DestIsVolatile); + Builder.CreateStore(Val, Dest, DestIsVolatile); } } @@ -1298,7 +1297,7 @@ static void CreateCoercedStore(llvm::Value *Src, bool DstIsVolatile, CodeGenFunction &CGF) { llvm::Type *SrcTy = Src->getType(); - llvm::Type *DstTy = Dst.getType()->getElementType(); + llvm::Type *DstTy = Dst.getElementType(); if (SrcTy == DstTy) { CGF.Builder.CreateStore(Src, Dst, DstIsVolatile); return; @@ -1308,7 +1307,7 @@ static void CreateCoercedStore(llvm::Value *Src, if (llvm::StructType *DstSTy = dyn_cast<llvm::StructType>(DstTy)) { Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy, SrcSize, CGF); - DstTy = Dst.getType()->getElementType(); + DstTy = Dst.getElementType(); } llvm::PointerType *SrcPtrTy = llvm::dyn_cast<llvm::PointerType>(SrcTy); @@ -1334,7 +1333,7 @@ static void CreateCoercedStore(llvm::Value *Src, // If store is legal, just bitcast the src pointer. if (SrcSize <= DstSize) { Dst = CGF.Builder.CreateElementBitCast(Dst, SrcTy); - BuildAggStore(CGF, Src, Dst, DstIsVolatile); + CGF.EmitAggregateStore(Src, Dst, DstIsVolatile); } else { // Otherwise do coercion through memory. This is stupid, but // simple. @@ -1347,11 +1346,9 @@ static void CreateCoercedStore(llvm::Value *Src, // to that information. Address Tmp = CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment()); CGF.Builder.CreateStore(Src, Tmp); - Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty); - Address DstCasted = CGF.Builder.CreateElementBitCast(Dst,CGF.Int8Ty); - CGF.Builder.CreateMemCpy(DstCasted, Casted, - llvm::ConstantInt::get(CGF.IntPtrTy, DstSize), - false); + CGF.Builder.CreateMemCpy(Dst.getPointer(), Dst.getAlignment().getAsAlign(), + Tmp.getPointer(), Tmp.getAlignment().getAsAlign(), + llvm::ConstantInt::get(CGF.IntPtrTy, DstSize)); } } @@ -1702,8 +1699,9 @@ static void AddAttributesFromFunctionProtoType(ASTContext &Ctx, FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); } -void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, - bool AttrOnCallSite, +void CodeGenModule::getDefaultFunctionAttributes(StringRef Name, + bool HasOptnone, + bool AttrOnCallSite, llvm::AttrBuilder &FuncAttrs) { // OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed. if (!HasOptnone) { @@ -1746,13 +1744,20 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD)); if (CodeGenOpts.NullPointerIsValid) - FuncAttrs.addAttribute("null-pointer-is-valid", "true"); - if (CodeGenOpts.FPDenormalMode != llvm::DenormalMode::Invalid) + FuncAttrs.addAttribute(llvm::Attribute::NullPointerIsValid); + + if (CodeGenOpts.FPDenormalMode != llvm::DenormalMode::getIEEE()) FuncAttrs.addAttribute("denormal-fp-math", - llvm::denormalModeName(CodeGenOpts.FPDenormalMode)); + CodeGenOpts.FPDenormalMode.str()); + if (CodeGenOpts.FP32DenormalMode != CodeGenOpts.FPDenormalMode) { + FuncAttrs.addAttribute( + "denormal-fp-math-f32", + CodeGenOpts.FP32DenormalMode.str()); + } FuncAttrs.addAttribute("no-trapping-math", - llvm::toStringRef(CodeGenOpts.NoTrappingMath)); + llvm::toStringRef(LangOpts.getFPExceptionMode() == + LangOptions::FPE_Ignore)); // Strict (compliant) code is the default, so only add this attribute to // indicate that we are trying to workaround a problem case. @@ -1762,25 +1767,21 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, // TODO: Are these all needed? // unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags. FuncAttrs.addAttribute("no-infs-fp-math", - llvm::toStringRef(CodeGenOpts.NoInfsFPMath)); + llvm::toStringRef(LangOpts.NoHonorInfs)); FuncAttrs.addAttribute("no-nans-fp-math", - llvm::toStringRef(CodeGenOpts.NoNaNsFPMath)); + llvm::toStringRef(LangOpts.NoHonorNaNs)); FuncAttrs.addAttribute("unsafe-fp-math", - llvm::toStringRef(CodeGenOpts.UnsafeFPMath)); + llvm::toStringRef(LangOpts.UnsafeFPMath)); FuncAttrs.addAttribute("use-soft-float", llvm::toStringRef(CodeGenOpts.SoftFloat)); FuncAttrs.addAttribute("stack-protector-buffer-size", llvm::utostr(CodeGenOpts.SSPBufferSize)); FuncAttrs.addAttribute("no-signed-zeros-fp-math", - llvm::toStringRef(CodeGenOpts.NoSignedZeros)); + llvm::toStringRef(LangOpts.NoSignedZero)); FuncAttrs.addAttribute( "correctly-rounded-divide-sqrt-fp-math", llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt)); - if (getLangOpts().OpenCL) - FuncAttrs.addAttribute("denorms-are-zero", - llvm::toStringRef(CodeGenOpts.FlushDenorm)); - // TODO: Reciprocal estimate codegen options should apply to instructions? const std::vector<std::string> &Recips = CodeGenOpts.Reciprocals; if (!Recips.empty()) @@ -1796,6 +1797,8 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, FuncAttrs.addAttribute("stackrealign"); if (CodeGenOpts.Backchain) FuncAttrs.addAttribute("backchain"); + if (CodeGenOpts.EnableSegmentedStacks) + FuncAttrs.addAttribute("split-stack"); if (CodeGenOpts.SpeculativeLoadHardening) FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening); @@ -1813,10 +1816,6 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { // Exceptions aren't supported in CUDA device code. FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); - - // Respect -fcuda-flush-denormals-to-zero. - if (CodeGenOpts.FlushDenorm) - FuncAttrs.addAttribute("nvptx-f32ftz", "true"); } for (StringRef Attr : CodeGenOpts.DefaultFunctionAttrs) { @@ -1826,31 +1825,100 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, } } -void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) { +void CodeGenModule::addDefaultFunctionDefinitionAttributes(llvm::Function &F) { llvm::AttrBuilder FuncAttrs; - ConstructDefaultFnAttrList(F.getName(), F.hasOptNone(), - /* AttrOnCallSite = */ false, FuncAttrs); + getDefaultFunctionAttributes(F.getName(), F.hasOptNone(), + /* AttrOnCallSite = */ false, FuncAttrs); + // TODO: call GetCPUAndFeaturesAttributes? F.addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs); } +void CodeGenModule::addDefaultFunctionDefinitionAttributes( + llvm::AttrBuilder &attrs) { + getDefaultFunctionAttributes(/*function name*/ "", /*optnone*/ false, + /*for call*/ false, attrs); + GetCPUAndFeaturesAttributes(GlobalDecl(), attrs); +} + +static void addNoBuiltinAttributes(llvm::AttrBuilder &FuncAttrs, + const LangOptions &LangOpts, + const NoBuiltinAttr *NBA = nullptr) { + auto AddNoBuiltinAttr = [&FuncAttrs](StringRef BuiltinName) { + SmallString<32> AttributeName; + AttributeName += "no-builtin-"; + AttributeName += BuiltinName; + FuncAttrs.addAttribute(AttributeName); + }; + + // First, handle the language options passed through -fno-builtin. + if (LangOpts.NoBuiltin) { + // -fno-builtin disables them all. + FuncAttrs.addAttribute("no-builtins"); + return; + } + + // Then, add attributes for builtins specified through -fno-builtin-<name>. + llvm::for_each(LangOpts.NoBuiltinFuncs, AddNoBuiltinAttr); + + // Now, let's check the __attribute__((no_builtin("...")) attribute added to + // the source. + if (!NBA) + return; + + // If there is a wildcard in the builtin names specified through the + // attribute, disable them all. + if (llvm::is_contained(NBA->builtinNames(), "*")) { + FuncAttrs.addAttribute("no-builtins"); + return; + } + + // And last, add the rest of the builtin names. + llvm::for_each(NBA->builtinNames(), AddNoBuiltinAttr); +} + +/// Construct the IR attribute list of a function or call. +/// +/// When adding an attribute, please consider where it should be handled: +/// +/// - getDefaultFunctionAttributes is for attributes that are essentially +/// part of the global target configuration (but perhaps can be +/// overridden on a per-function basis). Adding attributes there +/// will cause them to also be set in frontends that build on Clang's +/// target-configuration logic, as well as for code defined in library +/// modules such as CUDA's libdevice. +/// +/// - ConstructAttributeList builds on top of getDefaultFunctionAttributes +/// and adds declaration-specific, convention-specific, and +/// frontend-specific logic. The last is of particular importance: +/// attributes that restrict how the frontend generates code must be +/// added here rather than getDefaultFunctionAttributes. +/// void CodeGenModule::ConstructAttributeList( StringRef Name, const CGFunctionInfo &FI, CGCalleeInfo CalleeInfo, llvm::AttributeList &AttrList, unsigned &CallingConv, bool AttrOnCallSite) { llvm::AttrBuilder FuncAttrs; llvm::AttrBuilder RetAttrs; + // Collect function IR attributes from the CC lowering. + // We'll collect the paramete and result attributes later. CallingConv = FI.getEffectiveCallingConvention(); if (FI.isNoReturn()) FuncAttrs.addAttribute(llvm::Attribute::NoReturn); + if (FI.isCmseNSCall()) + FuncAttrs.addAttribute("cmse_nonsecure_call"); - // If we have information about the function prototype, we can learn - // attributes from there. + // Collect function IR attributes from the callee prototype if we have one. AddAttributesFromFunctionProtoType(getContext(), FuncAttrs, CalleeInfo.getCalleeFunctionProtoType()); const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl(); bool HasOptnone = false; + // The NoBuiltinAttr attached to the target FunctionDecl. + const NoBuiltinAttr *NBA = nullptr; + + // Collect function IR attributes based on declaration-specific + // information. // FIXME: handle sseregparm someday... if (TargetDecl) { if (TargetDecl->hasAttr<ReturnsTwiceAttr>()) @@ -1869,6 +1937,13 @@ void CodeGenModule::ConstructAttributeList( if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) { AddAttributesFromFunctionProtoType( getContext(), FuncAttrs, Fn->getType()->getAs<FunctionProtoType>()); + if (AttrOnCallSite && Fn->isReplaceableGlobalAllocationFunction()) { + // A sane operator new returns a non-aliasing pointer. + auto Kind = Fn->getDeclName().getCXXOverloadedOperator(); + if (getCodeGenOpts().AssumeSaneOperatorNew && + (Kind == OO_New || Kind == OO_Array_New)) + RetAttrs.addAttribute(llvm::Attribute::NoAlias); + } const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(Fn); const bool IsVirtualCall = MD && MD->isVirtual(); // Don't use [[noreturn]], _Noreturn or [[no_builtin]] for a call to a @@ -1876,22 +1951,7 @@ void CodeGenModule::ConstructAttributeList( if (!(AttrOnCallSite && IsVirtualCall)) { if (Fn->isNoReturn()) FuncAttrs.addAttribute(llvm::Attribute::NoReturn); - - const auto *NBA = Fn->getAttr<NoBuiltinAttr>(); - bool HasWildcard = NBA && llvm::is_contained(NBA->builtinNames(), "*"); - if (getLangOpts().NoBuiltin || HasWildcard) - FuncAttrs.addAttribute("no-builtins"); - else { - auto AddNoBuiltinAttr = [&FuncAttrs](StringRef BuiltinName) { - SmallString<32> AttributeName; - AttributeName += "no-builtin-"; - AttributeName += BuiltinName; - FuncAttrs.addAttribute(AttributeName); - }; - llvm::for_each(getLangOpts().NoBuiltinFuncs, AddNoBuiltinAttr); - if (NBA) - llvm::for_each(NBA->builtinNames(), AddNoBuiltinAttr); - } + NBA = Fn->getAttr<NoBuiltinAttr>(); } } @@ -1924,70 +1984,93 @@ void CodeGenModule::ConstructAttributeList( FuncAttrs.addAllocSizeAttr(AllocSize->getElemSizeParam().getLLVMIndex(), NumElemsParam); } + + if (TargetDecl->hasAttr<OpenCLKernelAttr>()) { + if (getLangOpts().OpenCLVersion <= 120) { + // OpenCL v1.2 Work groups are always uniform + FuncAttrs.addAttribute("uniform-work-group-size", "true"); + } else { + // OpenCL v2.0 Work groups may be whether uniform or not. + // '-cl-uniform-work-group-size' compile option gets a hint + // to the compiler that the global work-size be a multiple of + // the work-group size specified to clEnqueueNDRangeKernel + // (i.e. work groups are uniform). + FuncAttrs.addAttribute("uniform-work-group-size", + llvm::toStringRef(CodeGenOpts.UniformWGSize)); + } + } } - ConstructDefaultFnAttrList(Name, HasOptnone, AttrOnCallSite, FuncAttrs); + // Attach "no-builtins" attributes to: + // * call sites: both `nobuiltin` and "no-builtins" or "no-builtin-<name>". + // * definitions: "no-builtins" or "no-builtin-<name>" only. + // The attributes can come from: + // * LangOpts: -ffreestanding, -fno-builtin, -fno-builtin-<name> + // * FunctionDecl attributes: __attribute__((no_builtin(...))) + addNoBuiltinAttributes(FuncAttrs, getLangOpts(), NBA); + + // Collect function IR attributes based on global settiings. + getDefaultFunctionAttributes(Name, HasOptnone, AttrOnCallSite, FuncAttrs); - // This must run after constructing the default function attribute list - // to ensure that the speculative load hardening attribute is removed - // in the case where the -mspeculative-load-hardening flag was passed. + // Override some default IR attributes based on declaration-specific + // information. if (TargetDecl) { if (TargetDecl->hasAttr<NoSpeculativeLoadHardeningAttr>()) FuncAttrs.removeAttribute(llvm::Attribute::SpeculativeLoadHardening); if (TargetDecl->hasAttr<SpeculativeLoadHardeningAttr>()) FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening); - } - - if (CodeGenOpts.EnableSegmentedStacks && - !(TargetDecl && TargetDecl->hasAttr<NoSplitStackAttr>())) - FuncAttrs.addAttribute("split-stack"); - - // Add NonLazyBind attribute to function declarations when -fno-plt - // is used. - if (TargetDecl && CodeGenOpts.NoPLT) { - if (auto *Fn = dyn_cast<FunctionDecl>(TargetDecl)) { - if (!Fn->isDefined() && !AttrOnCallSite) { - FuncAttrs.addAttribute(llvm::Attribute::NonLazyBind); + if (TargetDecl->hasAttr<NoSplitStackAttr>()) + FuncAttrs.removeAttribute("split-stack"); + + // Add NonLazyBind attribute to function declarations when -fno-plt + // is used. + // FIXME: what if we just haven't processed the function definition + // yet, or if it's an external definition like C99 inline? + if (CodeGenOpts.NoPLT) { + if (auto *Fn = dyn_cast<FunctionDecl>(TargetDecl)) { + if (!Fn->isDefined() && !AttrOnCallSite) { + FuncAttrs.addAttribute(llvm::Attribute::NonLazyBind); + } } } } - if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>()) { - if (getLangOpts().OpenCLVersion <= 120) { - // OpenCL v1.2 Work groups are always uniform - FuncAttrs.addAttribute("uniform-work-group-size", "true"); - } else { - // OpenCL v2.0 Work groups may be whether uniform or not. - // '-cl-uniform-work-group-size' compile option gets a hint - // to the compiler that the global work-size be a multiple of - // the work-group size specified to clEnqueueNDRangeKernel - // (i.e. work groups are uniform). - FuncAttrs.addAttribute("uniform-work-group-size", - llvm::toStringRef(CodeGenOpts.UniformWGSize)); - } - } - + // Collect non-call-site function IR attributes from declaration-specific + // information. if (!AttrOnCallSite) { - bool DisableTailCalls = false; + if (TargetDecl && TargetDecl->hasAttr<CmseNSEntryAttr>()) + FuncAttrs.addAttribute("cmse_nonsecure_entry"); + + // Whether tail calls are enabled. + auto shouldDisableTailCalls = [&] { + // Should this be honored in getDefaultFunctionAttributes? + if (CodeGenOpts.DisableTailCalls) + return true; + + if (!TargetDecl) + return false; - if (CodeGenOpts.DisableTailCalls) - DisableTailCalls = true; - else if (TargetDecl) { if (TargetDecl->hasAttr<DisableTailCallsAttr>() || TargetDecl->hasAttr<AnyX86InterruptAttr>()) - DisableTailCalls = true; - else if (CodeGenOpts.NoEscapingBlockTailCalls) { + return true; + + if (CodeGenOpts.NoEscapingBlockTailCalls) { if (const auto *BD = dyn_cast<BlockDecl>(TargetDecl)) if (!BD->doesNotEscape()) - DisableTailCalls = true; + return true; } - } + return false; + }; FuncAttrs.addAttribute("disable-tail-calls", - llvm::toStringRef(DisableTailCalls)); + llvm::toStringRef(shouldDisableTailCalls())); + + // CPU/feature overrides. addDefaultFunctionDefinitionAttributes + // handles these separately to set them based on the global defaults. GetCPUAndFeaturesAttributes(CalleeInfo.getCalleeDecl(), FuncAttrs); } + // Collect attributes from arguments and return values. ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI); QualType RetTy = FI.getReturnType(); @@ -2024,11 +2107,16 @@ void CodeGenModule::ConstructAttributeList( if (const auto *RefTy = RetTy->getAs<ReferenceType>()) { QualType PTy = RefTy->getPointeeType(); if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) - RetAttrs.addDereferenceableAttr(getContext().getTypeSizeInChars(PTy) - .getQuantity()); - else if (getContext().getTargetAddressSpace(PTy) == 0 && - !CodeGenOpts.NullPointerIsValid) + RetAttrs.addDereferenceableAttr( + getMinimumObjectSize(PTy).getQuantity()); + if (getContext().getTargetAddressSpace(PTy) == 0 && + !CodeGenOpts.NullPointerIsValid) RetAttrs.addAttribute(llvm::Attribute::NonNull); + if (PTy->isObjectType()) { + llvm::Align Alignment = + getNaturalPointeeTypeAlignment(RetTy).getAsAlign(); + RetAttrs.addAlignmentAttr(Alignment); + } } bool hasUsedSRet = false; @@ -2041,6 +2129,7 @@ void CodeGenModule::ConstructAttributeList( hasUsedSRet = true; if (RetAI.getInReg()) SRETAttrs.addAttribute(llvm::Attribute::InReg); + SRETAttrs.addAlignmentAttr(RetAI.getIndirectAlign().getQuantity()); ArgAttrs[IRFunctionArgs.getSRetArgNo()] = llvm::AttributeSet::get(getLLVMContext(), SRETAttrs); } @@ -2134,11 +2223,16 @@ void CodeGenModule::ConstructAttributeList( if (const auto *RefTy = ParamType->getAs<ReferenceType>()) { QualType PTy = RefTy->getPointeeType(); if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) - Attrs.addDereferenceableAttr(getContext().getTypeSizeInChars(PTy) - .getQuantity()); - else if (getContext().getTargetAddressSpace(PTy) == 0 && - !CodeGenOpts.NullPointerIsValid) + Attrs.addDereferenceableAttr( + getMinimumObjectSize(PTy).getQuantity()); + if (getContext().getTargetAddressSpace(PTy) == 0 && + !CodeGenOpts.NullPointerIsValid) Attrs.addAttribute(llvm::Attribute::NonNull); + if (PTy->isObjectType()) { + llvm::Align Alignment = + getNaturalPointeeTypeAlignment(ParamType).getAsAlign(); + Attrs.addAlignmentAttr(Alignment); + } } switch (FI.getExtParameterInfo(ArgNo).getABI()) { @@ -2161,8 +2255,7 @@ void CodeGenModule::ConstructAttributeList( if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) { auto info = getContext().getTypeInfoInChars(PTy); Attrs.addDereferenceableAttr(info.first.getQuantity()); - Attrs.addAttribute(llvm::Attribute::getWithAlignment( - getLLVMContext(), info.second.getAsAlign())); + Attrs.addAlignmentAttr(info.second.getAsAlign()); } break; } @@ -2278,19 +2371,13 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // simplify. ClangToLLVMArgMapping IRFunctionArgs(CGM.getContext(), FI); - // Flattened function arguments. - SmallVector<llvm::Value *, 16> FnArgs; - FnArgs.reserve(IRFunctionArgs.totalIRArgs()); - for (auto &Arg : Fn->args()) { - FnArgs.push_back(&Arg); - } - assert(FnArgs.size() == IRFunctionArgs.totalIRArgs()); + assert(Fn->arg_size() == IRFunctionArgs.totalIRArgs()); // If we're using inalloca, all the memory arguments are GEPs off of the last // parameter, which is a pointer to the complete memory area. Address ArgStruct = Address::invalid(); if (IRFunctionArgs.hasInallocaArg()) { - ArgStruct = Address(FnArgs[IRFunctionArgs.getInallocaArgNo()], + ArgStruct = Address(Fn->getArg(IRFunctionArgs.getInallocaArgNo()), FI.getArgStructAlignment()); assert(ArgStruct.getType() == FI.getArgStruct()->getPointerTo()); @@ -2298,7 +2385,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // Name the struct return parameter. if (IRFunctionArgs.hasSRetArg()) { - auto AI = cast<llvm::Argument>(FnArgs[IRFunctionArgs.getSRetArgNo()]); + auto AI = Fn->getArg(IRFunctionArgs.getSRetArgNo()); AI->setName("agg.result"); AI->addAttr(llvm::Attribute::NoAlias); } @@ -2340,13 +2427,17 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, auto FieldIndex = ArgI.getInAllocaFieldIndex(); Address V = Builder.CreateStructGEP(ArgStruct, FieldIndex, Arg->getName()); + if (ArgI.getInAllocaIndirect()) + V = Address(Builder.CreateLoad(V), + getContext().getTypeAlignInChars(Ty)); ArgVals.push_back(ParamValue::forIndirect(V)); break; } case ABIArgInfo::Indirect: { assert(NumIRArgs == 1); - Address ParamAddr = Address(FnArgs[FirstIRArg], ArgI.getIndirectAlign()); + Address ParamAddr = + Address(Fn->getArg(FirstIRArg), ArgI.getIndirectAlign()); if (!hasScalarEvaluationKind(Ty)) { // Aggregates and complex variables are accessed by reference. All we @@ -2361,10 +2452,10 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // FIXME: We should have a common utility for generating an aggregate // copy. CharUnits Size = getContext().getTypeSizeInChars(Ty); - auto SizeVal = llvm::ConstantInt::get(IntPtrTy, Size.getQuantity()); - Address Dst = Builder.CreateBitCast(AlignedTemp, Int8PtrTy); - Address Src = Builder.CreateBitCast(ParamAddr, Int8PtrTy); - Builder.CreateMemCpy(Dst, Src, SizeVal, false); + Builder.CreateMemCpy( + AlignedTemp.getPointer(), AlignedTemp.getAlignment().getAsAlign(), + ParamAddr.getPointer(), ParamAddr.getAlignment().getAsAlign(), + llvm::ConstantInt::get(IntPtrTy, Size.getQuantity())); V = AlignedTemp; } ArgVals.push_back(ParamValue::forIndirect(V)); @@ -2382,16 +2473,18 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, case ABIArgInfo::Extend: case ABIArgInfo::Direct: { - - // If we have the trivial case, handle it with no muss and fuss. - if (!isa<llvm::StructType>(ArgI.getCoerceToType()) && - ArgI.getCoerceToType() == ConvertType(Ty) && - ArgI.getDirectOffset() == 0) { + auto AI = Fn->getArg(FirstIRArg); + llvm::Type *LTy = ConvertType(Arg->getType()); + + // Prepare parameter attributes. So far, only attributes for pointer + // parameters are prepared. See + // http://llvm.org/docs/LangRef.html#paramattrs. + if (ArgI.getDirectOffset() == 0 && LTy->isPointerTy() && + ArgI.getCoerceToType()->isPointerTy()) { assert(NumIRArgs == 1); - llvm::Value *V = FnArgs[FirstIRArg]; - auto AI = cast<llvm::Argument>(V); if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(Arg)) { + // Set `nonnull` attribute if any. if (getNonNullAttr(CurCodeDecl, PVD, PVD->getType(), PVD->getFunctionScopeIndex()) && !CGM.getCodeGenOpts().NullPointerIsValid) @@ -2411,9 +2504,11 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, ArrSize) { llvm::AttrBuilder Attrs; Attrs.addDereferenceableAttr( - getContext().getTypeSizeInChars(ETy).getQuantity()*ArrSize); + getContext().getTypeSizeInChars(ETy).getQuantity() * + ArrSize); AI->addAttrs(Attrs); - } else if (getContext().getTargetAddressSpace(ETy) == 0 && + } else if (getContext().getTargetInfo().getNullPointerValue( + ETy.getAddressSpace()) == 0 && !CGM.getCodeGenOpts().NullPointerIsValid) { AI->addAttr(llvm::Attribute::NonNull); } @@ -2429,6 +2524,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, AI->addAttr(llvm::Attribute::NonNull); } + // Set `align` attribute if any. const auto *AVAttr = PVD->getAttr<AlignValueAttr>(); if (!AVAttr) if (const auto *TOTy = dyn_cast<TypedefType>(OTy)) @@ -2437,21 +2533,33 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // If alignment-assumption sanitizer is enabled, we do *not* add // alignment attribute here, but emit normal alignment assumption, // so the UBSAN check could function. - llvm::Value *AlignmentValue = - EmitScalarExpr(AVAttr->getAlignment()); llvm::ConstantInt *AlignmentCI = - cast<llvm::ConstantInt>(AlignmentValue); - unsigned Alignment = std::min((unsigned)AlignmentCI->getZExtValue(), - +llvm::Value::MaximumAlignment); - AI->addAttrs(llvm::AttrBuilder().addAlignmentAttr(Alignment)); + cast<llvm::ConstantInt>(EmitScalarExpr(AVAttr->getAlignment())); + unsigned AlignmentInt = + AlignmentCI->getLimitedValue(llvm::Value::MaximumAlignment); + if (AI->getParamAlign().valueOrOne() < AlignmentInt) { + AI->removeAttr(llvm::Attribute::AttrKind::Alignment); + AI->addAttrs(llvm::AttrBuilder().addAlignmentAttr( + llvm::Align(AlignmentInt))); + } } } + // Set 'noalias' if an argument type has the `restrict` qualifier. if (Arg->getType().isRestrictQualified()) AI->addAttr(llvm::Attribute::NoAlias); + } + + // Prepare the argument value. If we have the trivial case, handle it + // with no muss and fuss. + if (!isa<llvm::StructType>(ArgI.getCoerceToType()) && + ArgI.getCoerceToType() == ConvertType(Ty) && + ArgI.getDirectOffset() == 0) { + assert(NumIRArgs == 1); // LLVM expects swifterror parameters to be used in very restricted // ways. Copy the value into a less-restricted temporary. + llvm::Value *V = AI; if (FI.getExtParameterInfo(ArgNo).getABI() == ParameterABI::SwiftErrorResult) { QualType pointeeTy = Ty->getPointeeType(); @@ -2513,7 +2621,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, assert(STy->getNumElements() == NumIRArgs); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - auto AI = FnArgs[FirstIRArg + i]; + auto AI = Fn->getArg(FirstIRArg + i); AI->setName(Arg->getName() + ".coerce" + Twine(i)); Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i); Builder.CreateStore(AI, EltPtr); @@ -2526,7 +2634,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, } else { // Simple case, just do a coerced store of the argument into the alloca. assert(NumIRArgs == 1); - auto AI = FnArgs[FirstIRArg]; + auto AI = Fn->getArg(FirstIRArg); AI->setName(Arg->getName() + ".coerce"); CreateCoercedStore(AI, Ptr, /*DstIsVolatile=*/false, *this); } @@ -2559,7 +2667,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, continue; auto eltAddr = Builder.CreateStructGEP(alloca, i); - auto elt = FnArgs[argIndex++]; + auto elt = Fn->getArg(argIndex++); Builder.CreateStore(elt, eltAddr); } assert(argIndex == FirstIRArg + NumIRArgs); @@ -2574,11 +2682,11 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, LValue LV = MakeAddrLValue(Alloca, Ty); ArgVals.push_back(ParamValue::forIndirect(Alloca)); - auto FnArgIter = FnArgs.begin() + FirstIRArg; + auto FnArgIter = Fn->arg_begin() + FirstIRArg; ExpandTypeFromArgs(Ty, LV, FnArgIter); - assert(FnArgIter == FnArgs.begin() + FirstIRArg + NumIRArgs); + assert(FnArgIter == Fn->arg_begin() + FirstIRArg + NumIRArgs); for (unsigned i = 0, e = NumIRArgs; i != e; ++i) { - auto AI = FnArgs[FirstIRArg + i]; + auto AI = Fn->getArg(FirstIRArg + i); AI->setName(Arg->getName() + "." + Twine(i)); } break; @@ -2655,10 +2763,10 @@ static llvm::Value *tryEmitFusedAutoreleaseOfResult(CodeGenFunction &CGF, bool doRetainAutorelease; - if (call->getCalledValue() == CGF.CGM.getObjCEntrypoints().objc_retain) { + if (call->getCalledOperand() == CGF.CGM.getObjCEntrypoints().objc_retain) { doRetainAutorelease = true; - } else if (call->getCalledValue() == CGF.CGM.getObjCEntrypoints() - .objc_retainAutoreleasedReturnValue) { + } else if (call->getCalledOperand() == + CGF.CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue) { doRetainAutorelease = false; // If we emitted an assembly marker for this call (and the @@ -2674,8 +2782,8 @@ static llvm::Value *tryEmitFusedAutoreleaseOfResult(CodeGenFunction &CGF, assert(prev); } assert(isa<llvm::CallInst>(prev)); - assert(cast<llvm::CallInst>(prev)->getCalledValue() == - CGF.CGM.getObjCEntrypoints().retainAutoreleasedReturnValueMarker); + assert(cast<llvm::CallInst>(prev)->getCalledOperand() == + CGF.CGM.getObjCEntrypoints().retainAutoreleasedReturnValueMarker); InstsToKill.push_back(prev); } } else { @@ -2718,8 +2826,8 @@ static llvm::Value *tryRemoveRetainOfSelf(CodeGenFunction &CGF, // Look for a retain call. llvm::CallInst *retainCall = dyn_cast<llvm::CallInst>(result->stripPointerCasts()); - if (!retainCall || - retainCall->getCalledValue() != CGF.CGM.getObjCEntrypoints().objc_retain) + if (!retainCall || retainCall->getCalledOperand() != + CGF.CGM.getObjCEntrypoints().objc_retain) return nullptr; // Look for an ordinary load of 'self'. @@ -2825,6 +2933,199 @@ static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) { return store; } +// Helper functions for EmitCMSEClearRecord + +// Set the bits corresponding to a field having width `BitWidth` and located at +// offset `BitOffset` (from the least significant bit) within a storage unit of +// `Bits.size()` bytes. Each element of `Bits` corresponds to one target byte. +// Use little-endian layout, i.e.`Bits[0]` is the LSB. +static void setBitRange(SmallVectorImpl<uint64_t> &Bits, int BitOffset, + int BitWidth, int CharWidth) { + assert(CharWidth <= 64); + assert(static_cast<unsigned>(BitWidth) <= Bits.size() * CharWidth); + + int Pos = 0; + if (BitOffset >= CharWidth) { + Pos += BitOffset / CharWidth; + BitOffset = BitOffset % CharWidth; + } + + const uint64_t Used = (uint64_t(1) << CharWidth) - 1; + if (BitOffset + BitWidth >= CharWidth) { + Bits[Pos++] |= (Used << BitOffset) & Used; + BitWidth -= CharWidth - BitOffset; + BitOffset = 0; + } + + while (BitWidth >= CharWidth) { + Bits[Pos++] = Used; + BitWidth -= CharWidth; + } + + if (BitWidth > 0) + Bits[Pos++] |= (Used >> (CharWidth - BitWidth)) << BitOffset; +} + +// Set the bits corresponding to a field having width `BitWidth` and located at +// offset `BitOffset` (from the least significant bit) within a storage unit of +// `StorageSize` bytes, located at `StorageOffset` in `Bits`. Each element of +// `Bits` corresponds to one target byte. Use target endian layout. +static void setBitRange(SmallVectorImpl<uint64_t> &Bits, int StorageOffset, + int StorageSize, int BitOffset, int BitWidth, + int CharWidth, bool BigEndian) { + + SmallVector<uint64_t, 8> TmpBits(StorageSize); + setBitRange(TmpBits, BitOffset, BitWidth, CharWidth); + + if (BigEndian) + std::reverse(TmpBits.begin(), TmpBits.end()); + + for (uint64_t V : TmpBits) + Bits[StorageOffset++] |= V; +} + +static void setUsedBits(CodeGenModule &, QualType, int, + SmallVectorImpl<uint64_t> &); + +// Set the bits in `Bits`, which correspond to the value representations of +// the actual members of the record type `RTy`. Note that this function does +// not handle base classes, virtual tables, etc, since they cannot happen in +// CMSE function arguments or return. The bit mask corresponds to the target +// memory layout, i.e. it's endian dependent. +static void setUsedBits(CodeGenModule &CGM, const RecordType *RTy, int Offset, + SmallVectorImpl<uint64_t> &Bits) { + ASTContext &Context = CGM.getContext(); + int CharWidth = Context.getCharWidth(); + const RecordDecl *RD = RTy->getDecl()->getDefinition(); + const ASTRecordLayout &ASTLayout = Context.getASTRecordLayout(RD); + const CGRecordLayout &Layout = CGM.getTypes().getCGRecordLayout(RD); + + int Idx = 0; + for (auto I = RD->field_begin(), E = RD->field_end(); I != E; ++I, ++Idx) { + const FieldDecl *F = *I; + + if (F->isUnnamedBitfield() || F->isZeroLengthBitField(Context) || + F->getType()->isIncompleteArrayType()) + continue; + + if (F->isBitField()) { + const CGBitFieldInfo &BFI = Layout.getBitFieldInfo(F); + setBitRange(Bits, Offset + BFI.StorageOffset.getQuantity(), + BFI.StorageSize / CharWidth, BFI.Offset, + BFI.Size, CharWidth, + CGM.getDataLayout().isBigEndian()); + continue; + } + + setUsedBits(CGM, F->getType(), + Offset + ASTLayout.getFieldOffset(Idx) / CharWidth, Bits); + } +} + +// Set the bits in `Bits`, which correspond to the value representations of +// the elements of an array type `ATy`. +static void setUsedBits(CodeGenModule &CGM, const ConstantArrayType *ATy, + int Offset, SmallVectorImpl<uint64_t> &Bits) { + const ASTContext &Context = CGM.getContext(); + + QualType ETy = Context.getBaseElementType(ATy); + int Size = Context.getTypeSizeInChars(ETy).getQuantity(); + SmallVector<uint64_t, 4> TmpBits(Size); + setUsedBits(CGM, ETy, 0, TmpBits); + + for (int I = 0, N = Context.getConstantArrayElementCount(ATy); I < N; ++I) { + auto Src = TmpBits.begin(); + auto Dst = Bits.begin() + Offset + I * Size; + for (int J = 0; J < Size; ++J) + *Dst++ |= *Src++; + } +} + +// Set the bits in `Bits`, which correspond to the value representations of +// the type `QTy`. +static void setUsedBits(CodeGenModule &CGM, QualType QTy, int Offset, + SmallVectorImpl<uint64_t> &Bits) { + if (const auto *RTy = QTy->getAs<RecordType>()) + return setUsedBits(CGM, RTy, Offset, Bits); + + ASTContext &Context = CGM.getContext(); + if (const auto *ATy = Context.getAsConstantArrayType(QTy)) + return setUsedBits(CGM, ATy, Offset, Bits); + + int Size = Context.getTypeSizeInChars(QTy).getQuantity(); + if (Size <= 0) + return; + + std::fill_n(Bits.begin() + Offset, Size, + (uint64_t(1) << Context.getCharWidth()) - 1); +} + +static uint64_t buildMultiCharMask(const SmallVectorImpl<uint64_t> &Bits, + int Pos, int Size, int CharWidth, + bool BigEndian) { + assert(Size > 0); + uint64_t Mask = 0; + if (BigEndian) { + for (auto P = Bits.begin() + Pos, E = Bits.begin() + Pos + Size; P != E; + ++P) + Mask = (Mask << CharWidth) | *P; + } else { + auto P = Bits.begin() + Pos + Size, End = Bits.begin() + Pos; + do + Mask = (Mask << CharWidth) | *--P; + while (P != End); + } + return Mask; +} + +// Emit code to clear the bits in a record, which aren't a part of any user +// declared member, when the record is a function return. +llvm::Value *CodeGenFunction::EmitCMSEClearRecord(llvm::Value *Src, + llvm::IntegerType *ITy, + QualType QTy) { + assert(Src->getType() == ITy); + assert(ITy->getScalarSizeInBits() <= 64); + + const llvm::DataLayout &DataLayout = CGM.getDataLayout(); + int Size = DataLayout.getTypeStoreSize(ITy); + SmallVector<uint64_t, 4> Bits(Size); + setUsedBits(CGM, QTy->getAs<RecordType>(), 0, Bits); + + int CharWidth = CGM.getContext().getCharWidth(); + uint64_t Mask = + buildMultiCharMask(Bits, 0, Size, CharWidth, DataLayout.isBigEndian()); + + return Builder.CreateAnd(Src, Mask, "cmse.clear"); +} + +// Emit code to clear the bits in a record, which aren't a part of any user +// declared member, when the record is a function argument. +llvm::Value *CodeGenFunction::EmitCMSEClearRecord(llvm::Value *Src, + llvm::ArrayType *ATy, + QualType QTy) { + const llvm::DataLayout &DataLayout = CGM.getDataLayout(); + int Size = DataLayout.getTypeStoreSize(ATy); + SmallVector<uint64_t, 16> Bits(Size); + setUsedBits(CGM, QTy->getAs<RecordType>(), 0, Bits); + + // Clear each element of the LLVM array. + int CharWidth = CGM.getContext().getCharWidth(); + int CharsPerElt = + ATy->getArrayElementType()->getScalarSizeInBits() / CharWidth; + int MaskIndex = 0; + llvm::Value *R = llvm::UndefValue::get(ATy); + for (int I = 0, N = ATy->getArrayNumElements(); I != N; ++I) { + uint64_t Mask = buildMultiCharMask(Bits, MaskIndex, CharsPerElt, CharWidth, + DataLayout.isBigEndian()); + MaskIndex += CharsPerElt; + llvm::Value *T0 = Builder.CreateExtractValue(Src, I); + llvm::Value *T1 = Builder.CreateAnd(T0, Mask, "cmse.clear"); + R = Builder.CreateInsertValue(R, T1, I); + } + + return R; +} + void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, bool EmitRetDbgLoc, SourceLocation EndLoc) { @@ -2991,6 +3292,14 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, llvm::Instruction *Ret; if (RV) { + if (CurFuncDecl && CurFuncDecl->hasAttr<CmseNSEntryAttr>()) { + // For certain return types, clear padding bits, as they may reveal + // sensitive information. + // Small struct/union types are passed as integers. + auto *ITy = dyn_cast<llvm::IntegerType>(RV->getType()); + if (ITy != nullptr && isa<RecordType>(RetTy.getCanonicalType())) + RV = EmitCMSEClearRecord(RV, ITy, RetTy); + } EmitReturnValueCheck(RV); Ret = Builder.CreateRet(RV); } else { @@ -3006,6 +3315,11 @@ void CodeGenFunction::EmitReturnValueCheck(llvm::Value *RV) { if (!CurCodeDecl) return; + // If the return block isn't reachable, neither is this check, so don't emit + // it. + if (ReturnBlock.isValid() && ReturnBlock.getBlock()->use_empty()) + return; + ReturnsNonNullAttr *RetNNAttr = nullptr; if (SanOpts.has(SanitizerKind::ReturnsNonnullAttribute)) RetNNAttr = CurCodeDecl->getAttr<ReturnsNonNullAttr>(); @@ -3026,7 +3340,7 @@ void CodeGenFunction::EmitReturnValueCheck(llvm::Value *RV) { } else { if (auto *DD = dyn_cast<DeclaratorDecl>(CurCodeDecl)) if (auto *TSI = DD->getTypeSourceInfo()) - if (auto FTL = TSI->getTypeLoc().castAs<FunctionTypeLoc>()) + if (auto FTL = TSI->getTypeLoc().getAsAdjusted<FunctionTypeLoc>()) AttrLoc = FTL.getReturnLoc().findNullabilityLoc(); CheckKind = SanitizerKind::NullabilityReturn; Handler = SanitizerHandler::NullabilityReturn; @@ -3811,6 +4125,110 @@ void CodeGenFunction::deferPlaceholderReplacement(llvm::Instruction *Old, DeferredReplacements.push_back(std::make_pair(Old, New)); } +namespace { + +/// Specify given \p NewAlign as the alignment of return value attribute. If +/// such attribute already exists, re-set it to the maximal one of two options. +LLVM_NODISCARD llvm::AttributeList +maybeRaiseRetAlignmentAttribute(llvm::LLVMContext &Ctx, + const llvm::AttributeList &Attrs, + llvm::Align NewAlign) { + llvm::Align CurAlign = Attrs.getRetAlignment().valueOrOne(); + if (CurAlign >= NewAlign) + return Attrs; + llvm::Attribute AlignAttr = llvm::Attribute::getWithAlignment(Ctx, NewAlign); + return Attrs + .removeAttribute(Ctx, llvm::AttributeList::ReturnIndex, + llvm::Attribute::AttrKind::Alignment) + .addAttribute(Ctx, llvm::AttributeList::ReturnIndex, AlignAttr); +} + +template <typename AlignedAttrTy> class AbstractAssumeAlignedAttrEmitter { +protected: + CodeGenFunction &CGF; + + /// We do nothing if this is, or becomes, nullptr. + const AlignedAttrTy *AA = nullptr; + + llvm::Value *Alignment = nullptr; // May or may not be a constant. + llvm::ConstantInt *OffsetCI = nullptr; // Constant, hopefully zero. + + AbstractAssumeAlignedAttrEmitter(CodeGenFunction &CGF_, const Decl *FuncDecl) + : CGF(CGF_) { + if (!FuncDecl) + return; + AA = FuncDecl->getAttr<AlignedAttrTy>(); + } + +public: + /// If we can, materialize the alignment as an attribute on return value. + LLVM_NODISCARD llvm::AttributeList + TryEmitAsCallSiteAttribute(const llvm::AttributeList &Attrs) { + if (!AA || OffsetCI || CGF.SanOpts.has(SanitizerKind::Alignment)) + return Attrs; + const auto *AlignmentCI = dyn_cast<llvm::ConstantInt>(Alignment); + if (!AlignmentCI) + return Attrs; + // We may legitimately have non-power-of-2 alignment here. + // If so, this is UB land, emit it via `@llvm.assume` instead. + if (!AlignmentCI->getValue().isPowerOf2()) + return Attrs; + llvm::AttributeList NewAttrs = maybeRaiseRetAlignmentAttribute( + CGF.getLLVMContext(), Attrs, + llvm::Align( + AlignmentCI->getLimitedValue(llvm::Value::MaximumAlignment))); + AA = nullptr; // We're done. Disallow doing anything else. + return NewAttrs; + } + + /// Emit alignment assumption. + /// This is a general fallback that we take if either there is an offset, + /// or the alignment is variable or we are sanitizing for alignment. + void EmitAsAnAssumption(SourceLocation Loc, QualType RetTy, RValue &Ret) { + if (!AA) + return; + CGF.emitAlignmentAssumption(Ret.getScalarVal(), RetTy, Loc, + AA->getLocation(), Alignment, OffsetCI); + AA = nullptr; // We're done. Disallow doing anything else. + } +}; + +/// Helper data structure to emit `AssumeAlignedAttr`. +class AssumeAlignedAttrEmitter final + : public AbstractAssumeAlignedAttrEmitter<AssumeAlignedAttr> { +public: + AssumeAlignedAttrEmitter(CodeGenFunction &CGF_, const Decl *FuncDecl) + : AbstractAssumeAlignedAttrEmitter(CGF_, FuncDecl) { + if (!AA) + return; + // It is guaranteed that the alignment/offset are constants. + Alignment = cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AA->getAlignment())); + if (Expr *Offset = AA->getOffset()) { + OffsetCI = cast<llvm::ConstantInt>(CGF.EmitScalarExpr(Offset)); + if (OffsetCI->isNullValue()) // Canonicalize zero offset to no offset. + OffsetCI = nullptr; + } + } +}; + +/// Helper data structure to emit `AllocAlignAttr`. +class AllocAlignAttrEmitter final + : public AbstractAssumeAlignedAttrEmitter<AllocAlignAttr> { +public: + AllocAlignAttrEmitter(CodeGenFunction &CGF_, const Decl *FuncDecl, + const CallArgList &CallArgs) + : AbstractAssumeAlignedAttrEmitter(CGF_, FuncDecl) { + if (!AA) + return; + // Alignment may or may not be a constant, and that is okay. + Alignment = CallArgs[AA->getParamIndex().getLLVMIndex()] + .getRValue(CGF) + .getScalarVal(); + } +}; + +} // namespace + RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, @@ -3829,7 +4247,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, llvm::FunctionType *IRFuncTy = getTypes().GetFunctionType(CallInfo); const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl(); - if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) { // We can only guarantee that a function is called from the correct // context/function based on the appropriate target attributes, // so only check in the case where we have both always_inline and target @@ -3840,6 +4258,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, TargetDecl->hasAttr<TargetAttr>()) checkTargetFeatures(Loc, FD); + // Some architectures (such as x86-64) have the ABI changed based on + // attribute-target/features. Give them a chance to diagnose. + CGM.getTargetCodeGenInfo().checkFunctionCallABI( + CGM, Loc, dyn_cast_or_null<FunctionDecl>(CurCodeDecl), FD, CallArgs); + } + #ifndef NDEBUG if (!(CallInfo.isVariadic() && CallInfo.getArgStruct())) { // For an inalloca varargs function, we don't expect CallInfo to match the @@ -3940,18 +4364,39 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, assert(NumIRArgs == 0); assert(getTarget().getTriple().getArch() == llvm::Triple::x86); if (I->isAggregate()) { - // Replace the placeholder with the appropriate argument slot GEP. Address Addr = I->hasLValue() ? I->getKnownLValue().getAddress(*this) : I->getKnownRValue().getAggregateAddress(); llvm::Instruction *Placeholder = cast<llvm::Instruction>(Addr.getPointer()); - CGBuilderTy::InsertPoint IP = Builder.saveIP(); - Builder.SetInsertPoint(Placeholder); - Addr = - Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex()); - Builder.restoreIP(IP); + + if (!ArgInfo.getInAllocaIndirect()) { + // Replace the placeholder with the appropriate argument slot GEP. + CGBuilderTy::InsertPoint IP = Builder.saveIP(); + Builder.SetInsertPoint(Placeholder); + Addr = Builder.CreateStructGEP(ArgMemory, + ArgInfo.getInAllocaFieldIndex()); + Builder.restoreIP(IP); + } else { + // For indirect things such as overaligned structs, replace the + // placeholder with a regular aggregate temporary alloca. Store the + // address of this alloca into the struct. + Addr = CreateMemTemp(info_it->type, "inalloca.indirect.tmp"); + Address ArgSlot = Builder.CreateStructGEP( + ArgMemory, ArgInfo.getInAllocaFieldIndex()); + Builder.CreateStore(Addr.getPointer(), ArgSlot); + } deferPlaceholderReplacement(Placeholder, Addr.getPointer()); + } else if (ArgInfo.getInAllocaIndirect()) { + // Make a temporary alloca and store the address of it into the argument + // struct. + Address Addr = CreateMemTempWithoutCast( + I->Ty, getContext().getTypeAlignInChars(I->Ty), + "indirect-arg-temp"); + I->copyInto(*this, Addr); + Address ArgSlot = + Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex()); + Builder.CreateStore(Addr.getPointer(), ArgSlot); } else { // Store the RValue into the argument struct. Address Addr = @@ -4001,8 +4446,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, bool NeedCopy = false; if (Addr.getAlignment() < Align && - llvm::getOrEnforceKnownAlignment(V, Align.getQuantity(), *TD) < - Align.getQuantity()) { + llvm::getOrEnforceKnownAlignment(V, Align.getAsAlign(), *TD) < + Align.getAsAlign()) { NeedCopy = true; } else if (I->hasLValue()) { auto LV = I->getKnownLValue(); @@ -4128,7 +4573,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType()); if (STy && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) { - llvm::Type *SrcTy = Src.getType()->getElementType(); + llvm::Type *SrcTy = Src.getElementType(); uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy); uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(STy); @@ -4156,8 +4601,18 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } else { // In the simple case, just pass the coerced loaded value. assert(NumIRArgs == 1); - IRCallArgs[FirstIRArg] = - CreateCoercedLoad(Src, ArgInfo.getCoerceToType(), *this); + llvm::Value *Load = + CreateCoercedLoad(Src, ArgInfo.getCoerceToType(), *this); + + if (CallInfo.isCmseNSCall()) { + // For certain parameter types, clear padding bits, as they may reveal + // sensitive information. + // Small struct/union types are passed as integer arrays. + auto *ATy = dyn_cast<llvm::ArrayType>(Load->getType()); + if (ATy != nullptr && isa<RecordType>(I->Ty.getCanonicalType())) + Load = EmitCMSEClearRecord(Load, ATy, I->Ty); + } + IRCallArgs[FirstIRArg] = Load; } break; @@ -4328,8 +4783,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Update the largest vector width if any arguments have vector types. for (unsigned i = 0; i < IRCallArgs.size(); ++i) { if (auto *VT = dyn_cast<llvm::VectorType>(IRCallArgs[i]->getType())) - LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, - VT->getPrimitiveSizeInBits().getFixedSize()); + LargestVectorWidth = + std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getKnownMinSize()); } // Compute the calling convention and attributes. @@ -4346,6 +4802,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::StrictFP); + // Add call-site nomerge attribute if exists. + if (InNoMergeAttributedStmt) + Attrs = + Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoMerge); + // Apply some call-site-specific attributes. // TODO: work this into building the attribute set. @@ -4378,8 +4840,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, CannotThrow = true; } else { // Otherwise, nounwind call sites will never throw. - CannotThrow = Attrs.hasAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoUnwind); + CannotThrow = Attrs.hasFnAttribute(llvm::Attribute::NoUnwind); } // If we made a temporary, be sure to clean up after ourselves. Note that we @@ -4402,6 +4863,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::StrictFP); + AssumeAlignedAttrEmitter AssumeAlignedAttrEmitter(*this, TargetDecl); + Attrs = AssumeAlignedAttrEmitter.TryEmitAsCallSiteAttribute(Attrs); + + AllocAlignAttrEmitter AllocAlignAttrEmitter(*this, TargetDecl, CallArgs); + Attrs = AllocAlignAttrEmitter.TryEmitAsCallSiteAttribute(Attrs); + // Emit the actual call/invoke instruction. llvm::CallBase *CI; if (!InvokeDest) { @@ -4437,8 +4904,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Update largest vector width from the return type. if (auto *VT = dyn_cast<llvm::VectorType>(CI->getType())) - LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, - VT->getPrimitiveSizeInBits().getFixedSize()); + LargestVectorWidth = + std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getKnownMinSize()); // Insert instrumentation or attach profile metadata at indirect call sites. // For more details, see the comment before the definition of @@ -4461,7 +4929,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Add metadata for calls to MSAllocator functions if (getDebugInfo() && TargetDecl && TargetDecl->hasAttr<MSAllocatorAttr>()) - getDebugInfo()->addHeapAllocSiteMetadata(CI, RetTy, Loc); + getDebugInfo()->addHeapAllocSiteMetadata(CI, RetTy->getPointeeType(), Loc); // 4. Finish the call. @@ -4581,7 +5049,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, DestPtr = CreateMemTemp(RetTy, "agg.tmp"); DestIsVolatile = false; } - BuildAggStore(*this, CI, DestPtr, DestIsVolatile); + EmitAggregateStore(CI, DestPtr, DestIsVolatile); return RValue::getAggregate(DestPtr); } case TEK_Scalar: { @@ -4620,22 +5088,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Emit the assume_aligned check on the return value. if (Ret.isScalar() && TargetDecl) { - if (const auto *AA = TargetDecl->getAttr<AssumeAlignedAttr>()) { - llvm::Value *OffsetValue = nullptr; - if (const auto *Offset = AA->getOffset()) - OffsetValue = EmitScalarExpr(Offset); - - llvm::Value *Alignment = EmitScalarExpr(AA->getAlignment()); - llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(Alignment); - EmitAlignmentAssumption(Ret.getScalarVal(), RetTy, Loc, AA->getLocation(), - AlignmentCI, OffsetValue); - } else if (const auto *AA = TargetDecl->getAttr<AllocAlignAttr>()) { - llvm::Value *AlignmentVal = CallArgs[AA->getParamIndex().getLLVMIndex()] - .getRValue(*this) - .getScalarVal(); - EmitAlignmentAssumption(Ret.getScalarVal(), RetTy, Loc, AA->getLocation(), - AlignmentVal); - } + AssumeAlignedAttrEmitter.EmitAsAnAssumption(Loc, RetTy, Ret); + AllocAlignAttrEmitter.EmitAsAnAssumption(Loc, RetTy, Ret); } // Explicitly call CallLifetimeEnd::Emit just to re-use the code even though @@ -4643,6 +5097,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, for (CallLifetimeEnd &LifetimeEnd : CallLifetimeEndAfterCall) LifetimeEnd.Emit(*this, /*Flags=*/{}); + if (!ReturnValue.isExternallyDestructed() && + RetTy.isDestructedType() == QualType::DK_nontrivial_c_struct) + pushDestroy(QualType::DK_nontrivial_c_struct, Ret.getAggregateAddress(), + RetTy); + return Ret; } diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h index 34558be5adb1..509ca43a9784 100644 --- a/clang/lib/CodeGen/CGCall.h +++ b/clang/lib/CodeGen/CGCall.h @@ -16,6 +16,7 @@ #include "CGValue.h" #include "EHScopeStack.h" +#include "clang/AST/ASTFwd.h" #include "clang/AST/CanonicalType.h" #include "clang/AST/GlobalDecl.h" #include "clang/AST/Type.h" @@ -357,27 +358,26 @@ class FunctionArgList : public SmallVector<const VarDecl *, 16> {}; /// ReturnValueSlot - Contains the address where the return value of a /// function can be stored, and whether the address is volatile or not. class ReturnValueSlot { - llvm::PointerIntPair<llvm::Value *, 2, unsigned int> Value; - CharUnits Alignment; + Address Addr = Address::invalid(); // Return value slot flags - enum Flags { - IS_VOLATILE = 0x1, - IS_UNUSED = 0x2, - }; + unsigned IsVolatile : 1; + unsigned IsUnused : 1; + unsigned IsExternallyDestructed : 1; public: - ReturnValueSlot() {} - ReturnValueSlot(Address Addr, bool IsVolatile, bool IsUnused = false) - : Value(Addr.isValid() ? Addr.getPointer() : nullptr, - (IsVolatile ? IS_VOLATILE : 0) | (IsUnused ? IS_UNUSED : 0)), - Alignment(Addr.isValid() ? Addr.getAlignment() : CharUnits::Zero()) {} - - bool isNull() const { return !getValue().isValid(); } - - bool isVolatile() const { return Value.getInt() & IS_VOLATILE; } - Address getValue() const { return Address(Value.getPointer(), Alignment); } - bool isUnused() const { return Value.getInt() & IS_UNUSED; } + ReturnValueSlot() + : IsVolatile(false), IsUnused(false), IsExternallyDestructed(false) {} + ReturnValueSlot(Address Addr, bool IsVolatile, bool IsUnused = false, + bool IsExternallyDestructed = false) + : Addr(Addr), IsVolatile(IsVolatile), IsUnused(IsUnused), + IsExternallyDestructed(IsExternallyDestructed) {} + + bool isNull() const { return !Addr.isValid(); } + bool isVolatile() const { return IsVolatile; } + Address getValue() const { return Addr; } + bool isUnused() const { return IsUnused; } + bool isExternallyDestructed() const { return IsExternallyDestructed; } }; } // end namespace CodeGen diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index 3f3825b76275..4d143e3e1bdf 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -35,20 +35,37 @@ using namespace CodeGen; /// Return the best known alignment for an unknown pointer to a /// particular class. CharUnits CodeGenModule::getClassPointerAlignment(const CXXRecordDecl *RD) { - if (!RD->isCompleteDefinition()) + if (!RD->hasDefinition()) return CharUnits::One(); // Hopefully won't be used anywhere. auto &layout = getContext().getASTRecordLayout(RD); // If the class is final, then we know that the pointer points to an // object of that type and can use the full alignment. - if (RD->hasAttr<FinalAttr>()) { + if (RD->isEffectivelyFinal()) return layout.getAlignment(); // Otherwise, we have to assume it could be a subclass. - } else { - return layout.getNonVirtualAlignment(); - } + return layout.getNonVirtualAlignment(); +} + +/// Return the smallest possible amount of storage that might be allocated +/// starting from the beginning of an object of a particular class. +/// +/// This may be smaller than sizeof(RD) if RD has virtual base classes. +CharUnits CodeGenModule::getMinimumClassObjectSize(const CXXRecordDecl *RD) { + if (!RD->hasDefinition()) + return CharUnits::One(); + + auto &layout = getContext().getASTRecordLayout(RD); + + // If the class is final, then we know that the pointer points to an + // object of that type and can use the full alignment. + if (RD->isEffectivelyFinal()) + return layout.getSize(); + + // Otherwise, we have to assume it could be a subclass. + return std::max(layout.getNonVirtualSize(), CharUnits::One()); } /// Return the best known alignment for a pointer to a virtual base, @@ -138,8 +155,8 @@ CodeGenFunction::EmitCXXMemberDataPointerAddress(const Expr *E, Address base, memberPtr, memberPtrType); QualType memberType = memberPtrType->getPointeeType(); - CharUnits memberAlign = getNaturalTypeAlignment(memberType, BaseInfo, - TBAAInfo); + CharUnits memberAlign = + CGM.getNaturalTypeAlignment(memberType, BaseInfo, TBAAInfo); memberAlign = CGM.getDynamicOffsetAlignment(base.getAlignment(), memberPtrType->getClass()->getAsCXXRecordDecl(), @@ -236,8 +253,13 @@ ApplyNonVirtualAndVirtualOffset(CodeGenFunction &CGF, Address addr, // Compute the offset from the static and dynamic components. llvm::Value *baseOffset; if (!nonVirtualOffset.isZero()) { - baseOffset = llvm::ConstantInt::get(CGF.PtrDiffTy, - nonVirtualOffset.getQuantity()); + llvm::Type *OffsetType = + (CGF.CGM.getTarget().getCXXABI().isItaniumFamily() && + CGF.CGM.getItaniumVTableContext().isRelativeLayout()) + ? CGF.Int32Ty + : CGF.PtrDiffTy; + baseOffset = + llvm::ConstantInt::get(OffsetType, nonVirtualOffset.getQuantity()); if (virtualOffset) { baseOffset = CGF.Builder.CreateAdd(virtualOffset, baseOffset); } @@ -730,7 +752,7 @@ bool CodeGenFunction::IsConstructorDelegationValid( // parameters // - etc. // If we ever add any of the above cases, remember that: - // - function-try-blocks will always blacklist this optimization + // - function-try-blocks will always exclude this optimization // - we need to perform the constructor prologue and cleanup in // EmitConstructorBody. @@ -2128,7 +2150,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, QualType SrcTy = D->getParamDecl(0)->getType().getNonReferenceType(); Address Src(Args[1].getRValue(*this).getScalarVal(), - getNaturalTypeAlignment(SrcTy)); + CGM.getNaturalTypeAlignment(SrcTy)); LValue SrcLVal = MakeAddrLValue(Src, SrcTy); QualType DestTy = getContext().getTypeDeclType(ClassDecl); LValue DestLVal = MakeAddrLValue(This, DestTy); @@ -2148,7 +2170,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, } // Insert any ABI-specific implicit constructor arguments. - CGCXXABI::AddedStructorArgs ExtraArgs = + CGCXXABI::AddedStructorArgCounts ExtraArgs = CGM.getCXXABI().addImplicitConstructorArgs(*this, D, Type, ForVirtualBase, Delegating, Args); @@ -2157,7 +2179,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, const CGFunctionInfo &Info = CGM.getTypes().arrangeCXXConstructorCall( Args, D, Type, ExtraArgs.Prefix, ExtraArgs.Suffix, PassPrototypeArgs); CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(D, Type)); - EmitCall(Info, Callee, ReturnValueSlot(), Args); + EmitCall(Info, Callee, ReturnValueSlot(), Args, nullptr, Loc); // Generate vtable assumptions if we're constructing a complete object // with a vtable. We don't do this for base subobjects for two reasons: @@ -2641,7 +2663,9 @@ void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD, if (SanOpts.has(SanitizerKind::CFIVCall)) EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc); else if (CGM.getCodeGenOpts().WholeProgramVTables && - CGM.HasHiddenLTOVisibility(RD)) { + // Don't insert type test assumes if we are forcing public std + // visibility. + !CGM.HasLTOVisibilityPublicStd(RD)) { llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0)); llvm::Value *TypeId = @@ -2850,7 +2874,9 @@ void CodeGenFunction::EmitForwardingCallToLambda( if (!resultType->isVoidType() && calleeFnInfo.getReturnInfo().getKind() == ABIArgInfo::Indirect && !hasScalarEvaluationKind(calleeFnInfo.getReturnType())) - returnSlot = ReturnValueSlot(ReturnValue, resultType.isVolatileQualified()); + returnSlot = + ReturnValueSlot(ReturnValue, resultType.isVolatileQualified(), + /*IsUnused=*/false, /*IsExternallyDestructed=*/true); // We don't need to separately arrange the call arguments because // the call can't be variadic anyway --- it's impossible to forward diff --git a/clang/lib/CodeGen/CGCleanup.cpp b/clang/lib/CodeGen/CGCleanup.cpp index c117dd5c25c1..ad543ef86c1a 100644 --- a/clang/lib/CodeGen/CGCleanup.cpp +++ b/clang/lib/CodeGen/CGCleanup.cpp @@ -179,12 +179,10 @@ void *EHScopeStack::pushCleanup(CleanupKind Kind, size_t Size) { char *Buffer = allocate(EHCleanupScope::getSizeForCleanupSize(Size)); bool IsNormalCleanup = Kind & NormalCleanup; bool IsEHCleanup = Kind & EHCleanup; - bool IsActive = !(Kind & InactiveCleanup); bool IsLifetimeMarker = Kind & LifetimeMarker; EHCleanupScope *Scope = new (Buffer) EHCleanupScope(IsNormalCleanup, IsEHCleanup, - IsActive, Size, BranchFixups.size(), InnermostNormalCleanup, @@ -309,9 +307,9 @@ static void createStoreInstBefore(llvm::Value *value, Address addr, static llvm::LoadInst *createLoadInstBefore(Address addr, const Twine &name, llvm::Instruction *beforeInst) { - auto load = new llvm::LoadInst(addr.getPointer(), name, beforeInst); - load->setAlignment(addr.getAlignment().getAsAlign()); - return load; + return new llvm::LoadInst(addr.getElementType(), addr.getPointer(), name, + false, addr.getAlignment().getAsAlign(), + beforeInst); } /// All the branch fixups on the EH stack have propagated out past the @@ -859,6 +857,9 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { // TODO: base this on the number of branch-afters and fixups const unsigned SwitchCapacity = 10; + // pass the abnormal exit flag to Fn (SEH cleanup) + cleanupFlags.setHasExitSwitch(); + llvm::LoadInst *Load = createLoadInstBefore(getNormalCleanupDestSlot(), "cleanup.dest", nullptr); diff --git a/clang/lib/CodeGen/CGCleanup.h b/clang/lib/CodeGen/CGCleanup.h index ffe0f9d9dd20..ef4f6b9ec133 100644 --- a/clang/lib/CodeGen/CGCleanup.h +++ b/clang/lib/CodeGen/CGCleanup.h @@ -102,7 +102,7 @@ protected: }; public: - enum Kind { Cleanup, Catch, Terminate, Filter, PadEnd }; + enum Kind { Cleanup, Catch, Terminate, Filter }; EHScope(Kind kind, EHScopeStack::stable_iterator enclosingEHScope) : CachedLandingPad(nullptr), CachedEHDispatchBlock(nullptr), @@ -284,8 +284,8 @@ public: return sizeof(EHCleanupScope) + CleanupBits.CleanupSize; } - EHCleanupScope(bool isNormal, bool isEH, bool isActive, - unsigned cleanupSize, unsigned fixupDepth, + EHCleanupScope(bool isNormal, bool isEH, unsigned cleanupSize, + unsigned fixupDepth, EHScopeStack::stable_iterator enclosingNormal, EHScopeStack::stable_iterator enclosingEH) : EHScope(EHScope::Cleanup, enclosingEH), @@ -293,7 +293,7 @@ public: ActiveFlag(nullptr), ExtInfo(nullptr), FixupDepth(fixupDepth) { CleanupBits.IsNormalCleanup = isNormal; CleanupBits.IsEHCleanup = isEH; - CleanupBits.IsActive = isActive; + CleanupBits.IsActive = true; CleanupBits.IsLifetimeMarker = false; CleanupBits.TestFlagInNormalCleanup = false; CleanupBits.TestFlagInEHCleanup = false; @@ -487,17 +487,6 @@ public: } }; -class EHPadEndScope : public EHScope { -public: - EHPadEndScope(EHScopeStack::stable_iterator enclosingEHScope) - : EHScope(PadEnd, enclosingEHScope) {} - static size_t getSize() { return sizeof(EHPadEndScope); } - - static bool classof(const EHScope *scope) { - return scope->getKind() == PadEnd; - } -}; - /// A non-stable pointer into the scope stack. class EHScopeStack::iterator { char *Ptr; @@ -535,10 +524,6 @@ public: case EHScope::Terminate: Size = EHTerminateScope::getSize(); break; - - case EHScope::PadEnd: - Size = EHPadEndScope::getSize(); - break; } Ptr += llvm::alignTo(Size, ScopeStackAlignment); return *this; diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp index aee5a927a055..5c57ad0685d5 100644 --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -275,9 +275,9 @@ RValue CodeGenFunction::EmitCoyieldExpr(const CoyieldExpr &E, void CodeGenFunction::EmitCoreturnStmt(CoreturnStmt const &S) { ++CurCoro.Data->CoreturnCount; const Expr *RV = S.getOperand(); - if (RV && RV->getType()->isVoidType()) { - // Make sure to evaluate the expression of a co_return with a void - // expression for side effects. + if (RV && RV->getType()->isVoidType() && !isa<InitListExpr>(RV)) { + // Make sure to evaluate the non initlist expression of a co_return + // with a void expression for side effects. RunCleanupsScope cleanupScope(*this); EmitIgnoredExpr(RV); } diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index cbd524eda9d0..6965c4a1209c 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -231,9 +231,16 @@ PrintingPolicy CGDebugInfo::getPrintingPolicy() const { // If we're emitting codeview, it's important to try to match MSVC's naming so // that visualizers written for MSVC will trigger for our class names. In // particular, we can't have spaces between arguments of standard templates - // like basic_string and vector. - if (CGM.getCodeGenOpts().EmitCodeView) + // like basic_string and vector, but we must have spaces between consecutive + // angle brackets that close nested template argument lists. + if (CGM.getCodeGenOpts().EmitCodeView) { PP.MSVCFormatting = true; + PP.SplitTemplateClosers = true; + } else { + // For DWARF, printing rules are underspecified. + // SplitTemplateClosers yields better interop with GCC and GDB (PR46052). + PP.SplitTemplateClosers = true; + } // Apply -fdebug-prefix-map. PP.Callbacks = &PrintCB; @@ -470,10 +477,14 @@ CGDebugInfo::createFile(StringRef FileName, } std::string CGDebugInfo::remapDIPath(StringRef Path) const { + if (DebugPrefixMap.empty()) + return Path.str(); + + SmallString<256> P = Path; for (const auto &Entry : DebugPrefixMap) - if (Path.startswith(Entry.first)) - return (Twine(Entry.second) + Path.substr(Entry.first.size())).str(); - return Path.str(); + if (llvm::sys::path::replace_path_prefix(P, Entry.first, Entry.second)) + break; + return P.str().str(); } unsigned CGDebugInfo::getLineNumber(SourceLocation Loc) { @@ -532,11 +543,12 @@ void CGDebugInfo::CreateCompileUnit() { // file to determine the real absolute path for the file. std::string MainFileDir; if (const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID())) { - MainFileDir = MainFile->getDir()->getName(); + MainFileDir = std::string(MainFile->getDir()->getName()); if (!llvm::sys::path::is_absolute(MainFileName)) { llvm::SmallString<1024> MainFileDirSS(MainFileDir); llvm::sys::path::append(MainFileDirSS, MainFileName); - MainFileName = llvm::sys::path::remove_leading_dotslash(MainFileDirSS); + MainFileName = + std::string(llvm::sys::path::remove_leading_dotslash(MainFileDirSS)); } // If the main file name provided is identical to the input file name, and // if the input file is a preprocessed source, use the module name for @@ -610,6 +622,16 @@ void CGDebugInfo::CreateCompileUnit() { remapDIPath(MainFileName), remapDIPath(getCurrentDirname()), CSInfo, getSource(SM, SM.getMainFileID())); + StringRef Sysroot, SDK; + if (CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::LLDB) { + Sysroot = CGM.getHeaderSearchOpts().Sysroot; + auto B = llvm::sys::path::rbegin(Sysroot); + auto E = llvm::sys::path::rend(Sysroot); + auto It = std::find_if(B, E, [](auto SDK) { return SDK.endswith(".sdk"); }); + if (It != E) + SDK = *It; + } + // Create new compile unit. TheCU = DBuilder.createCompileUnit( LangTag, CUFile, CGOpts.EmitVersionIdentMetadata ? Producer : "", @@ -620,7 +642,7 @@ void CGDebugInfo::CreateCompileUnit() { ? llvm::DICompileUnit::DebugNameTableKind::None : static_cast<llvm::DICompileUnit::DebugNameTableKind>( CGOpts.DebugNameTable), - CGOpts.DebugRangesBaseAddress); + CGOpts.DebugRangesBaseAddress, remapDIPath(Sysroot), SDK); } llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { @@ -750,6 +772,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { case BuiltinType::Float: case BuiltinType::LongDouble: case BuiltinType::Float16: + case BuiltinType::BFloat16: case BuiltinType::Float128: case BuiltinType::Double: // FIXME: For targets where long double and __float128 have the same size, @@ -811,6 +834,21 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { return DBuilder.createBasicType(BTName, Size, Encoding); } +llvm::DIType *CGDebugInfo::CreateType(const AutoType *Ty) { + return DBuilder.createUnspecifiedType("auto"); +} + +llvm::DIType *CGDebugInfo::CreateType(const ExtIntType *Ty) { + + StringRef Name = Ty->isUnsigned() ? "unsigned _ExtInt" : "_ExtInt"; + llvm::dwarf::TypeKind Encoding = Ty->isUnsigned() + ? llvm::dwarf::DW_ATE_unsigned + : llvm::dwarf::DW_ATE_signed; + + return DBuilder.createBasicType(Name, CGM.getContext().getTypeSize(Ty), + Encoding); +} + llvm::DIType *CGDebugInfo::CreateType(const ComplexType *Ty) { // Bit size and offset of the type. llvm::dwarf::TypeKind Encoding = llvm::dwarf::DW_ATE_complex_float; @@ -976,11 +1014,21 @@ CGDebugInfo::getOrCreateRecordFwdDecl(const RecordType *Ty, uint64_t Size = 0; uint32_t Align = 0; + llvm::DINode::DIFlags Flags = llvm::DINode::FlagFwdDecl; + + // Add flag to nontrivial forward declarations. To be consistent with MSVC, + // add the flag if a record has no definition because we don't know whether + // it will be trivial or not. + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) + if (!CXXRD->hasDefinition() || + (CXXRD->hasDefinition() && !CXXRD->isTrivial())) + Flags |= llvm::DINode::FlagNonTrivial; + // Create the type. SmallString<256> Identifier = getTypeIdentifier(Ty, CGM, TheCU); llvm::DICompositeType *RetTy = DBuilder.createReplaceableCompositeType( - getTagForRecord(RD), RDName, Ctx, DefUnit, Line, 0, Size, Align, - llvm::DINode::FlagFwdDecl, Identifier); + getTagForRecord(RD), RDName, Ctx, DefUnit, Line, 0, Size, Align, Flags, + Identifier); if (CGM.getCodeGenOpts().DebugFwdTemplateParams) if (auto *TSpecial = dyn_cast<ClassTemplateSpecializationDecl>(RD)) DBuilder.replaceArrays(RetTy, llvm::DINodeArray(), @@ -1458,16 +1506,18 @@ void CGDebugInfo::CollectRecordFields( llvm::DISubroutineType * CGDebugInfo::getOrCreateMethodType(const CXXMethodDecl *Method, - llvm::DIFile *Unit) { + llvm::DIFile *Unit, bool decl) { const FunctionProtoType *Func = Method->getType()->getAs<FunctionProtoType>(); if (Method->isStatic()) return cast_or_null<llvm::DISubroutineType>( getOrCreateType(QualType(Func, 0), Unit)); - return getOrCreateInstanceMethodType(Method->getThisType(), Func, Unit); + return getOrCreateInstanceMethodType(Method->getThisType(), Func, Unit, decl); } -llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType( - QualType ThisPtr, const FunctionProtoType *Func, llvm::DIFile *Unit) { +llvm::DISubroutineType * +CGDebugInfo::getOrCreateInstanceMethodType(QualType ThisPtr, + const FunctionProtoType *Func, + llvm::DIFile *Unit, bool decl) { // Add "this" pointer. llvm::DITypeRefArray Args( cast<llvm::DISubroutineType>(getOrCreateType(QualType(Func, 0), Unit)) @@ -1475,9 +1525,12 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType( assert(Args.size() && "Invalid number of arguments!"); SmallVector<llvm::Metadata *, 16> Elts; - // First element is always return type. For 'void' functions it is NULL. - Elts.push_back(Args[0]); + QualType temp = Func->getReturnType(); + if (temp->getTypeClass() == Type::Auto && decl) + Elts.push_back(CreateType(cast<AutoType>(temp))); + else + Elts.push_back(Args[0]); // "this" pointer is always first argument. const CXXRecordDecl *RD = ThisPtr->getPointeeCXXRecordDecl(); @@ -1536,7 +1589,7 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( isa<CXXConstructorDecl>(Method) || isa<CXXDestructorDecl>(Method); StringRef MethodName = getFunctionName(Method); - llvm::DISubroutineType *MethodTy = getOrCreateMethodType(Method, Unit); + llvm::DISubroutineType *MethodTy = getOrCreateMethodType(Method, Unit, true); // Since a single ctor/dtor corresponds to multiple functions, it doesn't // make sense to give a single ctor/dtor a linkage name. @@ -1773,18 +1826,38 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, for (unsigned i = 0, e = TAList.size(); i != e; ++i) { const TemplateArgument &TA = TAList[i]; StringRef Name; + bool defaultParameter = false; if (TPList) Name = TPList->getParam(i)->getName(); switch (TA.getKind()) { case TemplateArgument::Type: { llvm::DIType *TTy = getOrCreateType(TA.getAsType(), Unit); - TemplateParams.push_back( - DBuilder.createTemplateTypeParameter(TheCU, Name, TTy)); + + if (TPList) + if (auto *templateType = + dyn_cast_or_null<TemplateTypeParmDecl>(TPList->getParam(i))) + if (templateType->hasDefaultArgument()) + defaultParameter = + templateType->getDefaultArgument() == TA.getAsType(); + + TemplateParams.push_back(DBuilder.createTemplateTypeParameter( + TheCU, Name, TTy, defaultParameter)); + } break; case TemplateArgument::Integral: { llvm::DIType *TTy = getOrCreateType(TA.getIntegralType(), Unit); + if (TPList && CGM.getCodeGenOpts().DwarfVersion >= 5) + if (auto *templateType = + dyn_cast_or_null<NonTypeTemplateParmDecl>(TPList->getParam(i))) + if (templateType->hasDefaultArgument() && + !templateType->getDefaultArgument()->isValueDependent()) + defaultParameter = llvm::APSInt::isSameValue( + templateType->getDefaultArgument()->EvaluateKnownConstInt( + CGM.getContext()), + TA.getAsIntegral()); + TemplateParams.push_back(DBuilder.createTemplateValueParameter( - TheCU, Name, TTy, + TheCU, Name, TTy, defaultParameter, llvm::ConstantInt::get(CGM.getLLVMContext(), TA.getAsIntegral()))); } break; case TemplateArgument::Declaration: { @@ -1818,12 +1891,14 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, CharUnits chars = CGM.getContext().toCharUnitsFromBits((int64_t)fieldOffset); V = CGM.getCXXABI().EmitMemberDataPointer(MPT, chars); + } else if (const auto *GD = dyn_cast<MSGuidDecl>(D)) { + V = CGM.GetAddrOfMSGuidDecl(GD).getPointer(); } assert(V && "Failed to find template parameter pointer"); V = V->stripPointerCasts(); } TemplateParams.push_back(DBuilder.createTemplateValueParameter( - TheCU, Name, TTy, cast_or_null<llvm::Constant>(V))); + TheCU, Name, TTy, defaultParameter, cast_or_null<llvm::Constant>(V))); } break; case TemplateArgument::NullPtr: { QualType T = TA.getNullPtrType(); @@ -1841,8 +1916,8 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, V = CGM.getCXXABI().EmitNullMemberPointer(MPT); if (!V) V = llvm::ConstantInt::get(CGM.Int8Ty, 0); - TemplateParams.push_back( - DBuilder.createTemplateValueParameter(TheCU, Name, TTy, V)); + TemplateParams.push_back(DBuilder.createTemplateValueParameter( + TheCU, Name, TTy, defaultParameter, V)); } break; case TemplateArgument::Template: TemplateParams.push_back(DBuilder.createTemplateTemplateParameter( @@ -1863,7 +1938,7 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, assert(V && "Expression in template argument isn't constant"); llvm::DIType *TTy = getOrCreateType(T, Unit); TemplateParams.push_back(DBuilder.createTemplateValueParameter( - TheCU, Name, TTy, V->stripPointerCasts())); + TheCU, Name, TTy, defaultParameter, V->stripPointerCasts())); } break; // And the following should never occur: case TemplateArgument::TemplateExpansion: @@ -2071,16 +2146,17 @@ llvm::DIType *CGDebugInfo::getOrCreateStandaloneType(QualType D, return T; } -void CGDebugInfo::addHeapAllocSiteMetadata(llvm::Instruction *CI, - QualType D, +void CGDebugInfo::addHeapAllocSiteMetadata(llvm::CallBase *CI, + QualType AllocatedTy, SourceLocation Loc) { + if (CGM.getCodeGenOpts().getDebugInfo() <= + codegenoptions::DebugLineTablesOnly) + return; llvm::MDNode *node; - if (D.getTypePtr()->isVoidPointerType()) { + if (AllocatedTy->isVoidType()) node = llvm::MDNode::get(CGM.getLLVMContext(), None); - } else { - QualType PointeeTy = D.getTypePtr()->getPointeeType(); - node = getOrCreateType(PointeeTy, getOrCreateFile(Loc)); - } + else + node = getOrCreateType(AllocatedTy, getOrCreateFile(Loc)); CI->setMetadata("heapallocsite", node); } @@ -2221,12 +2297,11 @@ static bool shouldOmitDefinition(codegenoptions::DebugInfoKind DebugKind, // constructor is emitted. Skip this optimization if the class or any of // its methods are marked dllimport. if (DebugKind == codegenoptions::DebugInfoConstructor && - !CXXDecl->isLambda() && !isClassOrMethodDLLImport(CXXDecl)) { - for (const auto *Ctor : CXXDecl->ctors()) { + !CXXDecl->isLambda() && !CXXDecl->hasConstexprNonCopyMoveConstructor() && + !isClassOrMethodDLLImport(CXXDecl)) + for (const auto *Ctor : CXXDecl->ctors()) if (Ctor->isUserProvided()) return true; - } - } TemplateSpecializationKind Spec = TSK_Undeclared; if (const auto *SD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) @@ -2399,9 +2474,8 @@ llvm::DIType *CGDebugInfo::CreateType(const ObjCInterfaceType *Ty, return CreateTypeDefinition(Ty, Unit); } -llvm::DIModule * -CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod, - bool CreateSkeletonCU) { +llvm::DIModule *CGDebugInfo::getOrCreateModuleRef(ASTSourceDescriptor Mod, + bool CreateSkeletonCU) { // Use the Module pointer as the key into the cache. This is a // nullptr if the "Module" is a PCH, which is safe because we don't // support chained PCH debug info, so there can only be a single PCH. @@ -2446,32 +2520,51 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod, assert(StringRef(M->Name).startswith(CGM.getLangOpts().ModuleName) && "clang module without ASTFile must be specified by -fmodule-name"); + // Return a StringRef to the remapped Path. + auto RemapPath = [this](StringRef Path) -> std::string { + std::string Remapped = remapDIPath(Path); + StringRef Relative(Remapped); + StringRef CompDir = TheCU->getDirectory(); + if (Relative.consume_front(CompDir)) + Relative.consume_front(llvm::sys::path::get_separator()); + + return Relative.str(); + }; + if (CreateSkeletonCU && IsRootModule && !Mod.getASTFile().empty()) { // PCH files don't have a signature field in the control block, // but LLVM detects skeleton CUs by looking for a non-zero DWO id. // We use the lower 64 bits for debug info. - uint64_t Signature = - Mod.getSignature() - ? (uint64_t)Mod.getSignature()[1] << 32 | Mod.getSignature()[0] - : ~1ULL; + + uint64_t Signature = 0; + if (const auto &ModSig = Mod.getSignature()) { + for (unsigned I = 0; I != sizeof(Signature); ++I) + Signature |= (uint64_t)ModSig[I] << (I * 8); + } else { + Signature = ~1ULL; + } llvm::DIBuilder DIB(CGM.getModule()); - DIB.createCompileUnit(TheCU->getSourceLanguage(), - // TODO: Support "Source" from external AST providers? - DIB.createFile(Mod.getModuleName(), Mod.getPath()), - TheCU->getProducer(), true, StringRef(), 0, - Mod.getASTFile(), llvm::DICompileUnit::FullDebug, - Signature); + SmallString<0> PCM; + if (!llvm::sys::path::is_absolute(Mod.getASTFile())) + PCM = Mod.getPath(); + llvm::sys::path::append(PCM, Mod.getASTFile()); + DIB.createCompileUnit( + TheCU->getSourceLanguage(), + // TODO: Support "Source" from external AST providers? + DIB.createFile(Mod.getModuleName(), TheCU->getDirectory()), + TheCU->getProducer(), false, StringRef(), 0, RemapPath(PCM), + llvm::DICompileUnit::FullDebug, Signature); DIB.finalize(); } llvm::DIModule *Parent = IsRootModule ? nullptr - : getOrCreateModuleRef( - ExternalASTSource::ASTSourceDescriptor(*M->Parent), - CreateSkeletonCU); + : getOrCreateModuleRef(ASTSourceDescriptor(*M->Parent), + CreateSkeletonCU); + std::string IncludePath = Mod.getPath().str(); llvm::DIModule *DIMod = DBuilder.createModule(Parent, Mod.getModuleName(), ConfigMacros, - Mod.getPath(), CGM.getHeaderSearchOpts().Sysroot); + RemapPath(IncludePath)); ModuleCache[M].reset(DIMod); return DIMod; } @@ -2649,9 +2742,17 @@ llvm::DIType *CGDebugInfo::CreateType(const VectorType *Ty, QualType QTy(Ty, 0); auto SizeExpr = SizeExprCache.find(QTy); if (SizeExpr != SizeExprCache.end()) - Subscript = DBuilder.getOrCreateSubrange(0, SizeExpr->getSecond()); - else - Subscript = DBuilder.getOrCreateSubrange(0, Count ? Count : -1); + Subscript = DBuilder.getOrCreateSubrange( + SizeExpr->getSecond() /*count*/, nullptr /*lowerBound*/, + nullptr /*upperBound*/, nullptr /*stride*/); + else { + auto *CountNode = + llvm::ConstantAsMetadata::get(llvm::ConstantInt::getSigned( + llvm::Type::getInt64Ty(CGM.getLLVMContext()), Count ? Count : -1)); + Subscript = DBuilder.getOrCreateSubrange( + CountNode /*count*/, nullptr /*lowerBound*/, nullptr /*upperBound*/, + nullptr /*stride*/); + } llvm::DINodeArray SubscriptArray = DBuilder.getOrCreateArray(Subscript); uint64_t Size = CGM.getContext().getTypeSize(Ty); @@ -2660,6 +2761,33 @@ llvm::DIType *CGDebugInfo::CreateType(const VectorType *Ty, return DBuilder.createVectorType(Size, Align, ElementTy, SubscriptArray); } +llvm::DIType *CGDebugInfo::CreateType(const ConstantMatrixType *Ty, + llvm::DIFile *Unit) { + // FIXME: Create another debug type for matrices + // For the time being, it treats it like a nested ArrayType. + + llvm::DIType *ElementTy = getOrCreateType(Ty->getElementType(), Unit); + uint64_t Size = CGM.getContext().getTypeSize(Ty); + uint32_t Align = getTypeAlignIfRequired(Ty, CGM.getContext()); + + // Create ranges for both dimensions. + llvm::SmallVector<llvm::Metadata *, 2> Subscripts; + auto *ColumnCountNode = + llvm::ConstantAsMetadata::get(llvm::ConstantInt::getSigned( + llvm::Type::getInt64Ty(CGM.getLLVMContext()), Ty->getNumColumns())); + auto *RowCountNode = + llvm::ConstantAsMetadata::get(llvm::ConstantInt::getSigned( + llvm::Type::getInt64Ty(CGM.getLLVMContext()), Ty->getNumRows())); + Subscripts.push_back(DBuilder.getOrCreateSubrange( + ColumnCountNode /*count*/, nullptr /*lowerBound*/, nullptr /*upperBound*/, + nullptr /*stride*/)); + Subscripts.push_back(DBuilder.getOrCreateSubrange( + RowCountNode /*count*/, nullptr /*lowerBound*/, nullptr /*upperBound*/, + nullptr /*stride*/)); + llvm::DINodeArray SubscriptArray = DBuilder.getOrCreateArray(Subscripts); + return DBuilder.createArrayType(Size, Align, ElementTy, SubscriptArray); +} + llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) { uint64_t Size; uint32_t Align; @@ -2710,10 +2838,17 @@ llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) { auto SizeNode = SizeExprCache.find(EltTy); if (SizeNode != SizeExprCache.end()) - Subscripts.push_back( - DBuilder.getOrCreateSubrange(0, SizeNode->getSecond())); - else - Subscripts.push_back(DBuilder.getOrCreateSubrange(0, Count)); + Subscripts.push_back(DBuilder.getOrCreateSubrange( + SizeNode->getSecond() /*count*/, nullptr /*lowerBound*/, + nullptr /*upperBound*/, nullptr /*stride*/)); + else { + auto *CountNode = + llvm::ConstantAsMetadata::get(llvm::ConstantInt::getSigned( + llvm::Type::getInt64Ty(CGM.getLLVMContext()), Count)); + Subscripts.push_back(DBuilder.getOrCreateSubrange( + CountNode /*count*/, nullptr /*lowerBound*/, nullptr /*upperBound*/, + nullptr /*stride*/)); + } EltTy = Ty->getElementType(); } @@ -2772,7 +2907,7 @@ llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty, return DBuilder.createMemberPointerType( getOrCreateInstanceMethodType( CXXMethodDecl::getThisType(FPT, Ty->getMostRecentCXXRecordDecl()), - FPT, U), + FPT, U, false), ClassType, Size, /*Align=*/0, Flags); } @@ -3025,7 +3160,7 @@ llvm::DIModule *CGDebugInfo::getParentModuleOrNull(const Decl *D) { // option. if (Module *M = D->getOwningModule()) { // This is a (sub-)module. - auto Info = ExternalASTSource::ASTSourceDescriptor(*M); + auto Info = ASTSourceDescriptor(*M); return getOrCreateModuleRef(Info, /*SkeletonCU=*/false); } else { // This the precompiled header being built. @@ -3053,6 +3188,8 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) { case Type::ExtVector: case Type::Vector: return CreateType(cast<VectorType>(Ty), Unit); + case Type::ConstantMatrix: + return CreateType(cast<ConstantMatrixType>(Ty), Unit); case Type::ObjCObjectPointer: return CreateType(cast<ObjCObjectPointerType>(Ty), Unit); case Type::ObjCObject: @@ -3094,6 +3231,8 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) { case Type::Atomic: return CreateType(cast<AtomicType>(Ty), Unit); + case Type::ExtInt: + return CreateType(cast<ExtIntType>(Ty)); case Type::Pipe: return CreateType(cast<PipeType>(Ty), Unit); @@ -3547,7 +3686,7 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D, return DBuilder.createSubroutineType(DBuilder.getOrCreateTypeArray(None)); if (const auto *Method = dyn_cast<CXXMethodDecl>(D)) - return getOrCreateMethodType(Method, F); + return getOrCreateMethodType(Method, F, false); const auto *FTy = FnType->getAs<FunctionType>(); CallingConv CC = FTy ? FTy->getCallConv() : CallingConv::CC_C; @@ -3651,8 +3790,11 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, Name = getDynamicInitializerName(cast<VarDecl>(D), GD.getDynamicInitKind(), Fn); } else { - // Use llvm function name. Name = Fn->getName(); + + if (isa<BlockDecl>(D)) + LinkageName = Name; + Flags |= llvm::DINode::FlagPrototyped; } if (Name.startswith("\01")) @@ -3764,7 +3906,7 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, if (IsDeclForCallSite) Fn->setSubprogram(SP); - DBuilder.retainType(SP); + DBuilder.finalizeSubprogram(SP); } void CGDebugInfo::EmitFuncDeclForCallSite(llvm::CallBase *CallOrInvoke, @@ -3778,12 +3920,12 @@ void CGDebugInfo::EmitFuncDeclForCallSite(llvm::CallBase *CallOrInvoke, if (Func->getSubprogram()) return; - // Do not emit a declaration subprogram for a builtin or if call site info - // isn't required. Also, elide declarations for functions with reserved names, - // as call site-related features aren't interesting in this case (& also, the - // compiler may emit calls to these functions without debug locations, which - // makes the verifier complain). - if (CalleeDecl->getBuiltinID() != 0 || + // Do not emit a declaration subprogram for a builtin, a function with nodebug + // attribute, or if call site info isn't required. Also, elide declarations + // for functions with reserved names, as call site-related features aren't + // interesting in this case (& also, the compiler may emit calls to these + // functions without debug locations, which makes the verifier complain). + if (CalleeDecl->getBuiltinID() != 0 || CalleeDecl->hasAttr<NoDebugAttr>() || getCallSiteRelatedAttrs() == llvm::DINode::FlagZero) return; if (const auto *Id = CalleeDecl->getIdentifier()) @@ -4680,7 +4822,7 @@ void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) { if (CGM.getCodeGenOpts().getDebuggerTuning() != llvm::DebuggerKind::LLDB) return; if (Module *M = ID.getImportedModule()) { - auto Info = ExternalASTSource::ASTSourceDescriptor(*M); + auto Info = ASTSourceDescriptor(*M); auto Loc = ID.getLocation(); DBuilder.createImportedDeclaration( getCurrentContextDescriptor(cast<Decl>(ID.getDeclContext())), @@ -4844,8 +4986,7 @@ llvm::DINode::DIFlags CGDebugInfo::getCallSiteRelatedAttrs() const { (CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::LLDB || CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::GDB); - if (!SupportsDWARFv4Ext && CGM.getCodeGenOpts().DwarfVersion < 5 && - !CGM.getCodeGenOpts().EnableDebugEntryValues) + if (!SupportsDWARFv4Ext && CGM.getCodeGenOpts().DwarfVersion < 5) return llvm::DINode::FlagZero; return llvm::DINode::FlagAllCallsDescribed; diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index 90e9a61ebe96..96ef6c7c1d27 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -17,9 +17,11 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" #include "clang/AST/ExternalASTSource.h" +#include "clang/AST/PrettyPrinter.h" #include "clang/AST/Type.h" #include "clang/AST/TypeOrdering.h" #include "clang/Basic/CodeGenOptions.h" +#include "clang/Basic/Module.h" #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -60,7 +62,7 @@ class CGDebugInfo { llvm::DIBuilder DBuilder; llvm::DICompileUnit *TheCU = nullptr; ModuleMap *ClangModuleMap = nullptr; - ExternalASTSource::ASTSourceDescriptor PCHDescriptor; + ASTSourceDescriptor PCHDescriptor; SourceLocation CurLoc; llvm::MDNode *CurInlinedAt = nullptr; llvm::DIType *VTablePtrType = nullptr; @@ -165,6 +167,8 @@ class CGDebugInfo { /// ivars and property accessors. llvm::DIType *CreateType(const BuiltinType *Ty); llvm::DIType *CreateType(const ComplexType *Ty); + llvm::DIType *CreateType(const AutoType *Ty); + llvm::DIType *CreateType(const ExtIntType *Ty); llvm::DIType *CreateQualifiedType(QualType Ty, llvm::DIFile *Fg); llvm::DIType *CreateType(const TypedefType *Ty, llvm::DIFile *Fg); llvm::DIType *CreateType(const TemplateSpecializationType *Ty, @@ -188,6 +192,7 @@ class CGDebugInfo { llvm::DIType *CreateType(const ObjCTypeParamType *Ty, llvm::DIFile *Unit); llvm::DIType *CreateType(const VectorType *Ty, llvm::DIFile *F); + llvm::DIType *CreateType(const ConstantMatrixType *Ty, llvm::DIFile *F); llvm::DIType *CreateType(const ArrayType *Ty, llvm::DIFile *F); llvm::DIType *CreateType(const LValueReferenceType *Ty, llvm::DIFile *F); llvm::DIType *CreateType(const RValueReferenceType *Ty, llvm::DIFile *Unit); @@ -214,10 +219,10 @@ class CGDebugInfo { /// not updated to include implicit \c this pointer. Use this routine /// to get a method type which includes \c this pointer. llvm::DISubroutineType *getOrCreateMethodType(const CXXMethodDecl *Method, - llvm::DIFile *F); + llvm::DIFile *F, bool decl); llvm::DISubroutineType * getOrCreateInstanceMethodType(QualType ThisPtr, const FunctionProtoType *Func, - llvm::DIFile *Unit); + llvm::DIFile *Unit, bool decl); llvm::DISubroutineType * getOrCreateFunctionType(const Decl *D, QualType FnType, llvm::DIFile *F); /// \return debug info descriptor for vtable. @@ -378,9 +383,7 @@ public: /// When generating debug information for a clang module or /// precompiled header, this module map will be used to determine /// the module of origin of each Decl. - void setPCHDescriptor(ExternalASTSource::ASTSourceDescriptor PCH) { - PCHDescriptor = PCH; - } + void setPCHDescriptor(ASTSourceDescriptor PCH) { PCHDescriptor = PCH; } /// @} /// Update the current source location. If \arg loc is invalid it is @@ -506,7 +509,7 @@ public: llvm::DIType *getOrCreateStandaloneType(QualType Ty, SourceLocation Loc); /// Add heapallocsite metadata for MSAllocator calls. - void addHeapAllocSiteMetadata(llvm::Instruction *CallSite, QualType Ty, + void addHeapAllocSiteMetadata(llvm::CallBase *CallSite, QualType AllocatedTy, SourceLocation Loc); void completeType(const EnumDecl *ED); @@ -589,9 +592,8 @@ private: /// Get a reference to a clang module. If \p CreateSkeletonCU is true, /// this also creates a split dwarf skeleton compile unit. - llvm::DIModule * - getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod, - bool CreateSkeletonCU); + llvm::DIModule *getOrCreateModuleRef(ASTSourceDescriptor Mod, + bool CreateSkeletonCU); /// DebugTypeExtRefs: If \p D originated in a clang module, return it. llvm::DIModule *getParentModuleOrNull(const Decl *D); diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index 5aac7a8d54c7..1729c7ed3c31 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -31,6 +31,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "clang/Sema/Sema.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalVariable.h" @@ -40,6 +41,9 @@ using namespace clang; using namespace CodeGen; +static_assert(clang::Sema::MaximumAlignment <= llvm::Value::MaximumAlignment, + "Clang max alignment greater than what LLVM supports?"); + void CodeGenFunction::EmitDecl(const Decl &D) { switch (D.getKind()) { case Decl::BuiltinTemplate: @@ -104,6 +108,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::StaticAssert: // static_assert(X, ""); [C++0x] case Decl::Label: // __label__ x; case Decl::Import: + case Decl::MSGuid: // __declspec(uuid("...")) case Decl::OMPThreadPrivate: case Decl::OMPAllocate: case Decl::OMPCapturedExpr: @@ -111,6 +116,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::Empty: case Decl::Concept: case Decl::LifetimeExtendedTemporary: + case Decl::RequiresExprBody: // None of these decls require codegen support. return; @@ -205,9 +211,9 @@ static std::string getStaticDeclName(CodeGenModule &CGM, const VarDecl &D) { if (auto *CD = dyn_cast<CapturedDecl>(DC)) DC = cast<DeclContext>(CD->getNonClosureContext()); if (const auto *FD = dyn_cast<FunctionDecl>(DC)) - ContextName = CGM.getMangledName(FD); + ContextName = std::string(CGM.getMangledName(FD)); else if (const auto *BD = dyn_cast<BlockDecl>(DC)) - ContextName = CGM.getBlockMangledName(GlobalDecl(), BD); + ContextName = std::string(CGM.getBlockMangledName(GlobalDecl(), BD)); else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(DC)) ContextName = OMD->getSelector().getAsString(); else @@ -232,7 +238,7 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl( // Use the label if the variable is renamed with the asm-label extension. std::string Name; if (D.hasAttr<AsmLabelAttr>()) - Name = getMangledName(&D); + Name = std::string(getMangledName(&D)); else Name = getStaticDeclName(*this, D); @@ -244,7 +250,7 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl( // variables cannot have an initializer. llvm::Constant *Init = nullptr; if (Ty.getAddressSpace() == LangAS::opencl_local || - D.hasAttr<CUDASharedAttr>()) + D.hasAttr<CUDASharedAttr>() || D.hasAttr<LoaderUninitializedAttr>()) Init = llvm::UndefValue::get(LTy); else Init = EmitNullConstant(Ty); @@ -336,7 +342,7 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D, // the global to match the initializer. (We have to do this // because some types, like unions, can't be completely represented // in the LLVM type system.) - if (GV->getType()->getElementType() != Init->getType()) { + if (GV->getValueType() != Init->getType()) { llvm::GlobalVariable *OldGV = GV; GV = new llvm::GlobalVariable(CGM.getModule(), Init->getType(), @@ -756,10 +762,8 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, // If we're emitting a value with lifetime, we have to do the // initialization *before* we leave the cleanup scopes. - if (const FullExpr *fe = dyn_cast<FullExpr>(init)) { - enterFullExpression(fe); - init = fe->getSubExpr(); - } + if (const ExprWithCleanups *EWC = dyn_cast<ExprWithCleanups>(init)) + init = EWC->getSubExpr(); CodeGenFunction::RunCleanupsScope Scope(*this); // We have to maintain the illusion that the variable is @@ -1045,13 +1049,13 @@ static llvm::Constant *constWithPadding(CodeGenModule &CGM, IsPattern isPattern, llvm::Type *OrigTy = constant->getType(); if (const auto STy = dyn_cast<llvm::StructType>(OrigTy)) return constStructWithPadding(CGM, isPattern, STy, constant); - if (auto *STy = dyn_cast<llvm::SequentialType>(OrigTy)) { + if (auto *ArrayTy = dyn_cast<llvm::ArrayType>(OrigTy)) { llvm::SmallVector<llvm::Constant *, 8> Values; - unsigned Size = STy->getNumElements(); + uint64_t Size = ArrayTy->getNumElements(); if (!Size) return constant; - llvm::Type *ElemTy = STy->getElementType(); - bool ZeroInitializer = constant->isZeroValue(); + llvm::Type *ElemTy = ArrayTy->getElementType(); + bool ZeroInitializer = constant->isNullValue(); llvm::Constant *OpValue, *PaddedOp; if (ZeroInitializer) { OpValue = llvm::Constant::getNullValue(ElemTy); @@ -1067,13 +1071,12 @@ static llvm::Constant *constWithPadding(CodeGenModule &CGM, IsPattern isPattern, auto *NewElemTy = Values[0]->getType(); if (NewElemTy == ElemTy) return constant; - if (OrigTy->isArrayTy()) { - auto *ArrayTy = llvm::ArrayType::get(NewElemTy, Size); - return llvm::ConstantArray::get(ArrayTy, Values); - } else { - return llvm::ConstantVector::get(Values); - } + auto *NewArrayTy = llvm::ArrayType::get(NewElemTy, Size); + return llvm::ConstantArray::get(NewArrayTy, Values); } + // FIXME: Add handling for tail padding in vectors. Vectors don't + // have padding between or inside elements, but the total amount of + // data can be less than the allocated size. return constant; } @@ -1086,7 +1089,7 @@ Address CodeGenModule::createUnnamedGlobalFrom(const VarDecl &D, return CC->getNameAsString(); if (const auto *CD = dyn_cast<CXXDestructorDecl>(FD)) return CD->getNameAsString(); - return getMangledName(FD); + return std::string(getMangledName(FD)); } else if (const auto *OM = dyn_cast<ObjCMethodDecl>(DC)) { return OM->getNameAsString(); } else if (isa<BlockDecl>(DC)) { @@ -1397,10 +1400,15 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { Address address = Address::invalid(); Address AllocaAddr = Address::invalid(); - Address OpenMPLocalAddr = - getLangOpts().OpenMP - ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D) - : Address::invalid(); + Address OpenMPLocalAddr = Address::invalid(); + if (CGM.getLangOpts().OpenMPIRBuilder) + OpenMPLocalAddr = OMPBuilderCBHelpers::getAddressOfLocalVariable(*this, &D); + else + OpenMPLocalAddr = + getLangOpts().OpenMP + ? CGM.getOpenMPRuntime().getAddressOfLocalVariable(*this, &D) + : Address::invalid(); + bool NRVO = getLangOpts().ElideConstructors && D.isNRVOVariable(); if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) { @@ -1512,9 +1520,12 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // is rare. if (!Bypasses.IsBypassed(&D) && !(!getLangOpts().CPlusPlus && hasLabelBeenSeenInCurrentScope())) { - uint64_t size = CGM.getDataLayout().getTypeAllocSize(allocaTy); + llvm::TypeSize size = + CGM.getDataLayout().getTypeAllocSize(allocaTy); emission.SizeForLifetimeMarkers = - EmitLifetimeStart(size, AllocaAddr.getPointer()); + size.isScalable() ? EmitLifetimeStart(-1, AllocaAddr.getPointer()) + : EmitLifetimeStart(size.getFixedSize(), + AllocaAddr.getPointer()); } } else { assert(!emission.useLifetimeMarkers()); @@ -1671,9 +1682,13 @@ void CodeGenFunction::emitZeroOrPatternForAutoVarInit(QualType type, case LangOptions::TrivialAutoVarInitKind::Uninitialized: llvm_unreachable("Uninitialized handled by caller"); case LangOptions::TrivialAutoVarInitKind::Zero: + if (CGM.stopAutoInit()) + return; emitStoresForZeroInit(CGM, D, Loc, isVolatile, Builder); break; case LangOptions::TrivialAutoVarInitKind::Pattern: + if (CGM.stopAutoInit()) + return; emitStoresForPatternInit(CGM, D, Loc, isVolatile, Builder); break; } @@ -1696,6 +1711,8 @@ void CodeGenFunction::emitZeroOrPatternForAutoVarInit(QualType type, llvm_unreachable("Uninitialized handled by caller"); case LangOptions::TrivialAutoVarInitKind::Zero: + if (CGM.stopAutoInit()) + return; if (!EltSize.isOne()) SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(EltSize)); Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal, @@ -1703,6 +1720,8 @@ void CodeGenFunction::emitZeroOrPatternForAutoVarInit(QualType type, break; case LangOptions::TrivialAutoVarInitKind::Pattern: { + if (CGM.stopAutoInit()) + return; llvm::Type *ElTy = Loc.getElementType(); llvm::Constant *Constant = constWithPadding( CGM, IsPattern::Yes, initializationPatternFor(CGM, ElTy)); @@ -1861,9 +1880,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { /// /// \param init the initializing expression /// \param D the object to act as if we're initializing -/// \param loc the address to initialize; its type is a pointer -/// to the LLVM mapping of the object's type -/// \param alignment the alignment of the address +/// \param lvalue the lvalue to initialize /// \param capturedByInit true if \p D is a __block variable /// whose address is potentially changed by the initializer void CodeGenFunction::EmitExprAsInit(const Expr *init, const ValueDecl *D, @@ -2532,5 +2549,5 @@ void CodeGenModule::EmitOMPDeclareMapper(const OMPDeclareMapperDecl *D, } void CodeGenModule::EmitOMPRequiresDecl(const OMPRequiresDecl *D) { - getOpenMPRuntime().checkArchForUnifiedAddressing(D); + getOpenMPRuntime().processRequiresDirective(D); } diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index 3baa0a080f5d..5a8500364295 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -16,11 +16,12 @@ #include "CodeGenFunction.h" #include "TargetInfo.h" #include "clang/AST/Attr.h" -#include "clang/Basic/CodeGenOptions.h" +#include "clang/Basic/LangOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/Support/Path.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" using namespace clang; using namespace CodeGen; @@ -239,7 +240,7 @@ llvm::Function *CodeGenFunction::createAtExitStub(const VarDecl &VD, } const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); - llvm::Function *fn = CGM.CreateGlobalInitOrDestructFunction( + llvm::Function *fn = CGM.CreateGlobalInitOrCleanUpFunction( ty, FnName.str(), FI, VD.getLocation()); CodeGenFunction CGF(CGM); @@ -249,7 +250,7 @@ llvm::Function *CodeGenFunction::createAtExitStub(const VarDecl &VD, llvm::CallInst *call = CGF.Builder.CreateCall(dtor, addr); - // Make sure the call and the callee agree on calling convention. + // Make sure the call and the callee agree on calling convention. if (auto *dtorFn = dyn_cast<llvm::Function>( dtor.getCallee()->stripPointerCastsAndAliases())) call->setCallingConv(dtorFn->getCallingConv()); @@ -270,8 +271,12 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(const VarDecl &VD, void CodeGenFunction::registerGlobalDtorWithAtExit(llvm::Constant *dtorStub) { // extern "C" int atexit(void (*f)(void)); + assert(cast<llvm::Function>(dtorStub)->getFunctionType() == + llvm::FunctionType::get(CGM.VoidTy, false) && + "Argument to atexit has a wrong type."); + llvm::FunctionType *atexitTy = - llvm::FunctionType::get(IntTy, dtorStub->getType(), false); + llvm::FunctionType::get(IntTy, dtorStub->getType(), false); llvm::FunctionCallee atexit = CGM.CreateRuntimeFunction(atexitTy, "atexit", llvm::AttributeList(), @@ -282,6 +287,30 @@ void CodeGenFunction::registerGlobalDtorWithAtExit(llvm::Constant *dtorStub) { EmitNounwindRuntimeCall(atexit, dtorStub); } +llvm::Value * +CodeGenFunction::unregisterGlobalDtorWithUnAtExit(llvm::Function *dtorStub) { + // The unatexit subroutine unregisters __dtor functions that were previously + // registered by the atexit subroutine. If the referenced function is found, + // it is removed from the list of functions that are called at normal program + // termination and the unatexit returns a value of 0, otherwise a non-zero + // value is returned. + // + // extern "C" int unatexit(void (*f)(void)); + assert(dtorStub->getFunctionType() == + llvm::FunctionType::get(CGM.VoidTy, false) && + "Argument to unatexit has a wrong type."); + + llvm::FunctionType *unatexitTy = + llvm::FunctionType::get(IntTy, {dtorStub->getType()}, /*isVarArg=*/false); + + llvm::FunctionCallee unatexit = + CGM.CreateRuntimeFunction(unatexitTy, "unatexit", llvm::AttributeList()); + + cast<llvm::Function>(unatexit.getCallee())->setDoesNotThrow(); + + return EmitNounwindRuntimeCall(unatexit, dtorStub); +} + void CodeGenFunction::EmitCXXGuardedInit(const VarDecl &D, llvm::GlobalVariable *DeclPtr, bool PerformInit) { @@ -333,19 +362,23 @@ void CodeGenFunction::EmitCXXGuardedInitBranch(llvm::Value *NeedsInit, Builder.CreateCondBr(NeedsInit, InitBlock, NoInitBlock, Weights); } -llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction( +llvm::Function *CodeGenModule::CreateGlobalInitOrCleanUpFunction( llvm::FunctionType *FTy, const Twine &Name, const CGFunctionInfo &FI, - SourceLocation Loc, bool TLS) { - llvm::Function *Fn = - llvm::Function::Create(FTy, llvm::GlobalValue::InternalLinkage, - Name, &getModule()); + SourceLocation Loc, bool TLS, bool IsExternalLinkage) { + llvm::Function *Fn = llvm::Function::Create( + FTy, + IsExternalLinkage ? llvm::GlobalValue::ExternalLinkage + : llvm::GlobalValue::InternalLinkage, + Name, &getModule()); + if (!getLangOpts().AppleKext && !TLS) { // Set the section if needed. if (const char *Section = getTarget().getStaticInitSectionSpecifier()) Fn->setSection(Section); } - SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); + if (Fn->hasInternalLinkage()) + SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); Fn->setCallingConv(getRuntimeCC()); @@ -392,20 +425,20 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction( !isInSanitizerBlacklist(SanitizerKind::ShadowCallStack, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::ShadowCallStack); - auto RASignKind = getCodeGenOpts().getSignReturnAddress(); - if (RASignKind != CodeGenOptions::SignReturnAddressScope::None) { + auto RASignKind = getLangOpts().getSignReturnAddressScope(); + if (RASignKind != LangOptions::SignReturnAddressScopeKind::None) { Fn->addFnAttr("sign-return-address", - RASignKind == CodeGenOptions::SignReturnAddressScope::All + RASignKind == LangOptions::SignReturnAddressScopeKind::All ? "all" : "non-leaf"); - auto RASignKey = getCodeGenOpts().getSignReturnAddressKey(); + auto RASignKey = getLangOpts().getSignReturnAddressKey(); Fn->addFnAttr("sign-return-address-key", - RASignKey == CodeGenOptions::SignReturnAddressKeyValue::AKey + RASignKey == LangOptions::SignReturnAddressKeyKind::AKey ? "a_key" : "b_key"); } - if (getCodeGenOpts().BranchTargetEnforcement) + if (getLangOpts().BranchTargetEnforcement) Fn->addFnAttr("branch-target-enforcement"); return Fn; @@ -461,10 +494,8 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, } // Create a variable initialization function. - llvm::Function *Fn = - CreateGlobalInitOrDestructFunction(FTy, FnName.str(), - getTypes().arrangeNullaryFunction(), - D->getLocation()); + llvm::Function *Fn = CreateGlobalInitOrCleanUpFunction( + FTy, FnName.str(), getTypes().arrangeNullaryFunction(), D->getLocation()); auto *ISA = D->getAttr<InitSegAttr>(); CodeGenFunction(*this).GenerateCXXGlobalVarDeclInitFunc(Fn, D, Addr, @@ -533,6 +564,22 @@ void CodeGenModule::EmitCXXThreadLocalInitFunc() { CXXThreadLocals.clear(); } +static SmallString<128> getTransformedFileName(llvm::Module &M) { + SmallString<128> FileName = llvm::sys::path::filename(M.getName()); + + if (FileName.empty()) + FileName = "<null>"; + + for (size_t i = 0; i < FileName.size(); ++i) { + // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens + // to be the set of C preprocessing numbers. + if (!isPreprocessingNumberBody(FileName[i])) + FileName[i] = '_'; + } + + return FileName; +} + void CodeGenModule::EmitCXXGlobalInitFunc() { while (!CXXGlobalInits.empty() && !CXXGlobalInits.back()) @@ -541,11 +588,27 @@ CodeGenModule::EmitCXXGlobalInitFunc() { if (CXXGlobalInits.empty() && PrioritizedCXXGlobalInits.empty()) return; + const bool UseSinitAndSterm = getCXXABI().useSinitAndSterm(); + if (UseSinitAndSterm) { + GlobalUniqueModuleId = getUniqueModuleId(&getModule()); + + // FIXME: We need to figure out what to hash on or encode into the unique ID + // we need. + if (GlobalUniqueModuleId.compare("") == 0) + llvm::report_fatal_error( + "cannot produce a unique identifier for this module" + " based on strong external symbols"); + GlobalUniqueModuleId = GlobalUniqueModuleId.substr(1); + } + llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); const CGFunctionInfo &FI = getTypes().arrangeNullaryFunction(); - // Create our global initialization function. + // Create our global prioritized initialization function. if (!PrioritizedCXXGlobalInits.empty()) { + assert(!UseSinitAndSterm && "Prioritized sinit and sterm functions are not" + " supported yet."); + SmallVector<llvm::Function *, 8> LocalCXXGlobalInits; llvm::array_pod_sort(PrioritizedCXXGlobalInits.begin(), PrioritizedCXXGlobalInits.end()); @@ -565,7 +628,7 @@ CodeGenModule::EmitCXXGlobalInitFunc() { std::string PrioritySuffix = llvm::utostr(Priority); // Priority is always <= 65535 (enforced by sema). PrioritySuffix = std::string(6-PrioritySuffix.size(), '0')+PrioritySuffix; - llvm::Function *Fn = CreateGlobalInitOrDestructFunction( + llvm::Function *Fn = CreateGlobalInitOrCleanUpFunction( FTy, "_GLOBAL__I_" + PrioritySuffix, FI); for (; I < PrioE; ++I) @@ -577,22 +640,27 @@ CodeGenModule::EmitCXXGlobalInitFunc() { PrioritizedCXXGlobalInits.clear(); } - // Include the filename in the symbol name. Including "sub_" matches gcc and - // makes sure these symbols appear lexicographically behind the symbols with - // priority emitted above. - SmallString<128> FileName = llvm::sys::path::filename(getModule().getName()); - if (FileName.empty()) - FileName = "<null>"; + if (UseSinitAndSterm && CXXGlobalInits.empty()) + return; - for (size_t i = 0; i < FileName.size(); ++i) { - // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens - // to be the set of C preprocessing numbers. - if (!isPreprocessingNumberBody(FileName[i])) - FileName[i] = '_'; + // Create our global initialization function. + SmallString<128> FuncName; + bool IsExternalLinkage = false; + if (UseSinitAndSterm) { + llvm::Twine("__sinit80000000_clang_", GlobalUniqueModuleId) + .toVector(FuncName); + IsExternalLinkage = true; + } else { + // Include the filename in the symbol name. Including "sub_" matches gcc + // and makes sure these symbols appear lexicographically behind the symbols + // with priority emitted above. + llvm::Twine("_GLOBAL__sub_I_", getTransformedFileName(getModule())) + .toVector(FuncName); } - llvm::Function *Fn = CreateGlobalInitOrDestructFunction( - FTy, llvm::Twine("_GLOBAL__sub_I_", FileName), FI); + llvm::Function *Fn = CreateGlobalInitOrCleanUpFunction( + FTy, FuncName, FI, SourceLocation(), false /* TLS */, + IsExternalLinkage); CodeGenFunction(*this).GenerateCXXGlobalInitFunc(Fn, CXXGlobalInits); AddGlobalCtor(Fn); @@ -618,19 +686,38 @@ CodeGenModule::EmitCXXGlobalInitFunc() { CXXGlobalInits.clear(); } -void CodeGenModule::EmitCXXGlobalDtorFunc() { - if (CXXGlobalDtors.empty()) +void CodeGenModule::EmitCXXGlobalCleanUpFunc() { + if (CXXGlobalDtorsOrStermFinalizers.empty()) return; llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); - - // Create our global destructor function. const CGFunctionInfo &FI = getTypes().arrangeNullaryFunction(); - llvm::Function *Fn = - CreateGlobalInitOrDestructFunction(FTy, "_GLOBAL__D_a", FI); - CodeGenFunction(*this).GenerateCXXGlobalDtorsFunc(Fn, CXXGlobalDtors); + // Create our global cleanup function. + llvm::Function *Fn = nullptr; + if (getCXXABI().useSinitAndSterm()) { + if (GlobalUniqueModuleId.empty()) { + GlobalUniqueModuleId = getUniqueModuleId(&getModule()); + // FIXME: We need to figure out what to hash on or encode into the unique + // ID we need. + if (GlobalUniqueModuleId.compare("") == 0) + llvm::report_fatal_error( + "cannot produce a unique identifier for this module" + " based on strong external symbols"); + GlobalUniqueModuleId = GlobalUniqueModuleId.substr(1); + } + + Fn = CreateGlobalInitOrCleanUpFunction( + FTy, llvm::Twine("__sterm80000000_clang_", GlobalUniqueModuleId), FI, + SourceLocation(), false /* TLS */, true /* IsExternalLinkage */); + } else { + Fn = CreateGlobalInitOrCleanUpFunction(FTy, "_GLOBAL__D_a", FI); + } + + CodeGenFunction(*this).GenerateCXXGlobalCleanUpFunc( + Fn, CXXGlobalDtorsOrStermFinalizers); AddGlobalDtor(Fn); + CXXGlobalDtorsOrStermFinalizers.clear(); } /// Emit the code necessary to initialize the given global variable. @@ -726,10 +813,10 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn, FinishFunction(); } -void CodeGenFunction::GenerateCXXGlobalDtorsFunc( +void CodeGenFunction::GenerateCXXGlobalCleanUpFunc( llvm::Function *Fn, const std::vector<std::tuple<llvm::FunctionType *, llvm::WeakTrackingVH, - llvm::Constant *>> &DtorsAndObjects) { + llvm::Constant *>> &DtorsOrStermFinalizers) { { auto NL = ApplyDebugLocation::CreateEmpty(*this); StartFunction(GlobalDecl(), getContext().VoidTy, Fn, @@ -737,13 +824,22 @@ void CodeGenFunction::GenerateCXXGlobalDtorsFunc( // Emit an artificial location for this function. auto AL = ApplyDebugLocation::CreateArtificial(*this); - // Emit the dtors, in reverse order from construction. - for (unsigned i = 0, e = DtorsAndObjects.size(); i != e; ++i) { + // Emit the cleanups, in reverse order from construction. + for (unsigned i = 0, e = DtorsOrStermFinalizers.size(); i != e; ++i) { llvm::FunctionType *CalleeTy; llvm::Value *Callee; llvm::Constant *Arg; - std::tie(CalleeTy, Callee, Arg) = DtorsAndObjects[e - i - 1]; - llvm::CallInst *CI = Builder.CreateCall(CalleeTy, Callee, Arg); + std::tie(CalleeTy, Callee, Arg) = DtorsOrStermFinalizers[e - i - 1]; + + llvm::CallInst *CI = nullptr; + if (Arg == nullptr) { + assert( + CGM.getCXXABI().useSinitAndSterm() && + "Arg could not be nullptr unless using sinit and sterm functions."); + CI = Builder.CreateCall(CalleeTy, Callee); + } else + CI = Builder.CreateCall(CalleeTy, Callee, Arg); + // Make sure the call and the callee agree on calling convention. if (llvm::Function *F = dyn_cast<llvm::Function>(Callee)) CI->setCallingConv(F->getCallingConv()); @@ -767,7 +863,7 @@ llvm::Function *CodeGenFunction::generateDestroyHelper( const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(getContext().VoidTy, args); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); - llvm::Function *fn = CGM.CreateGlobalInitOrDestructFunction( + llvm::Function *fn = CGM.CreateGlobalInitOrCleanUpFunction( FTy, "__cxx_global_array_dtor", FI, VD->getLocation()); CurEHLocation = VD->getBeginLoc(); diff --git a/clang/lib/CodeGen/CGException.cpp b/clang/lib/CodeGen/CGException.cpp index 53fafab3e0e6..bdf70252b5ad 100644 --- a/clang/lib/CodeGen/CGException.cpp +++ b/clang/lib/CodeGen/CGException.cpp @@ -20,6 +20,7 @@ #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtObjC.h" #include "clang/AST/StmtVisitor.h" +#include "clang/Basic/DiagnosticSema.h" #include "clang/Basic/TargetBuiltins.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" @@ -468,6 +469,18 @@ void CodeGenFunction::EmitStartEHSpec(const Decl *D) { // encode these in an object file but MSVC doesn't do anything with it. if (getTarget().getCXXABI().isMicrosoft()) return; + // In wasm we currently treat 'throw()' in the same way as 'noexcept'. In + // case of throw with types, we ignore it and print a warning for now. + // TODO Correctly handle exception specification in wasm + if (CGM.getLangOpts().WasmExceptions) { + if (EST == EST_DynamicNone) + EHStack.pushTerminate(); + else + CGM.getDiags().Report(D->getLocation(), + diag::warn_wasm_dynamic_exception_spec_ignored) + << FD->getExceptionSpecSourceRange(); + return; + } unsigned NumExceptions = Proto->getNumExceptions(); EHFilterScope *Filter = EHStack.pushFilter(NumExceptions); @@ -544,6 +557,14 @@ void CodeGenFunction::EmitEndEHSpec(const Decl *D) { // encode these in an object file but MSVC doesn't do anything with it. if (getTarget().getCXXABI().isMicrosoft()) return; + // In wasm we currently treat 'throw()' in the same way as 'noexcept'. In + // case of throw with types, we ignore it and print a warning for now. + // TODO Correctly handle exception specification in wasm + if (CGM.getLangOpts().WasmExceptions) { + if (EST == EST_DynamicNone) + EHStack.popTerminate(); + return; + } EHFilterScope &filterScope = cast<EHFilterScope>(*EHStack.begin()); emitFilterDispatchBlock(*this, filterScope); EHStack.popFilter(); @@ -630,9 +651,6 @@ CodeGenFunction::getEHDispatchBlock(EHScopeStack::stable_iterator si) { case EHScope::Terminate: dispatchBlock = getTerminateHandler(); break; - - case EHScope::PadEnd: - llvm_unreachable("PadEnd unnecessary for Itanium!"); } scope.setCachedEHDispatchBlock(dispatchBlock); } @@ -674,9 +692,6 @@ CodeGenFunction::getFuncletEHDispatchBlock(EHScopeStack::stable_iterator SI) { case EHScope::Terminate: DispatchBlock->setName("terminate"); break; - - case EHScope::PadEnd: - llvm_unreachable("PadEnd dispatch block missing!"); } EHS.setCachedEHDispatchBlock(DispatchBlock); return DispatchBlock; @@ -692,7 +707,6 @@ static bool isNonEHScope(const EHScope &S) { case EHScope::Filter: case EHScope::Catch: case EHScope::Terminate: - case EHScope::PadEnd: return false; } @@ -703,12 +717,12 @@ llvm::BasicBlock *CodeGenFunction::getInvokeDestImpl() { assert(EHStack.requiresLandingPad()); assert(!EHStack.empty()); - // If exceptions are disabled and SEH is not in use, then there is no invoke - // destination. SEH "works" even if exceptions are off. In practice, this - // means that C++ destructors and other EH cleanups don't run, which is + // If exceptions are disabled/ignored and SEH is not in use, then there is no + // invoke destination. SEH "works" even if exceptions are off. In practice, + // this means that C++ destructors and other EH cleanups don't run, which is // consistent with MSVC's behavior. const LangOptions &LO = CGM.getLangOpts(); - if (!LO.Exceptions) { + if (!LO.Exceptions || LO.IgnoreExceptions) { if (!LO.Borland && !LO.MicrosoftExt) return nullptr; if (!currentFunctionUsesSEHTry()) @@ -751,15 +765,14 @@ llvm::BasicBlock *CodeGenFunction::getInvokeDestImpl() { llvm::BasicBlock *CodeGenFunction::EmitLandingPad() { assert(EHStack.requiresLandingPad()); - + assert(!CGM.getLangOpts().IgnoreExceptions && + "LandingPad should not be emitted when -fignore-exceptions are in " + "effect."); EHScope &innermostEHScope = *EHStack.find(EHStack.getInnermostEHScope()); switch (innermostEHScope.getKind()) { case EHScope::Terminate: return getTerminateLandingPad(); - case EHScope::PadEnd: - llvm_unreachable("PadEnd unnecessary for Itanium!"); - case EHScope::Catch: case EHScope::Cleanup: case EHScope::Filter: @@ -825,9 +838,6 @@ llvm::BasicBlock *CodeGenFunction::EmitLandingPad() { case EHScope::Catch: break; - - case EHScope::PadEnd: - llvm_unreachable("PadEnd unnecessary for Itanium!"); } EHCatchScope &catchScope = cast<EHCatchScope>(*I); @@ -1637,6 +1647,19 @@ struct PerformSEHFinally final : EHScopeStack::Cleanup { llvm::Value *IsForEH = llvm::ConstantInt::get(CGF.ConvertType(ArgTys[0]), F.isForEHCleanup()); + + // Except _leave and fall-through at the end, all other exits in a _try + // (return/goto/continue/break) are considered as abnormal terminations + // since _leave/fall-through is always Indexed 0, + // just use NormalCleanupDestSlot (>= 1 for goto/return/..), + // as 1st Arg to indicate abnormal termination + if (!F.isForEHCleanup() && F.hasExitSwitch()) { + Address Addr = CGF.getNormalCleanupDestSlot(); + llvm::Value *Load = CGF.Builder.CreateLoad(Addr, "cleanup.dest"); + llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int32Ty); + IsForEH = CGF.Builder.CreateICmpNE(Load, Zero); + } + Args.add(RValue::get(IsForEH), ArgTys[0]); Args.add(RValue::get(FP), ArgTys[1]); @@ -1792,6 +1815,48 @@ void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF, llvm::Constant *ParentI8Fn = llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy); ParentFP = Builder.CreateCall(RecoverFPIntrin, {ParentI8Fn, EntryFP}); + + // if the parent is a _finally, the passed-in ParentFP is the FP + // of parent _finally, not Establisher's FP (FP of outermost function). + // Establkisher FP is 2nd paramenter passed into parent _finally. + // Fortunately, it's always saved in parent's frame. The following + // code retrieves it, and escapes it so that spill instruction won't be + // optimized away. + if (ParentCGF.ParentCGF != nullptr) { + // Locate and escape Parent's frame_pointer.addr alloca + // Depending on target, should be 1st/2nd one in LocalDeclMap. + // Let's just scan for ImplicitParamDecl with VoidPtrTy. + llvm::AllocaInst *FramePtrAddrAlloca = nullptr; + for (auto &I : ParentCGF.LocalDeclMap) { + const VarDecl *D = cast<VarDecl>(I.first); + if (isa<ImplicitParamDecl>(D) && + D->getType() == getContext().VoidPtrTy) { + assert(D->getName().startswith("frame_pointer")); + FramePtrAddrAlloca = cast<llvm::AllocaInst>(I.second.getPointer()); + break; + } + } + assert(FramePtrAddrAlloca); + auto InsertPair = ParentCGF.EscapedLocals.insert( + std::make_pair(FramePtrAddrAlloca, ParentCGF.EscapedLocals.size())); + int FrameEscapeIdx = InsertPair.first->second; + + // an example of a filter's prolog:: + // %0 = call i8* @llvm.eh.recoverfp(bitcast(@"?fin$0@0@main@@"),..) + // %1 = call i8* @llvm.localrecover(bitcast(@"?fin$0@0@main@@"),..) + // %2 = bitcast i8* %1 to i8** + // %3 = load i8*, i8* *%2, align 8 + // ==> %3 is the frame-pointer of outermost host function + llvm::Function *FrameRecoverFn = llvm::Intrinsic::getDeclaration( + &CGM.getModule(), llvm::Intrinsic::localrecover); + llvm::Constant *ParentI8Fn = + llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy); + ParentFP = Builder.CreateCall( + FrameRecoverFn, {ParentI8Fn, ParentFP, + llvm::ConstantInt::get(Int32Ty, FrameEscapeIdx)}); + ParentFP = Builder.CreateBitCast(ParentFP, CGM.VoidPtrPtrTy); + ParentFP = Builder.CreateLoad(Address(ParentFP, getPointerAlign())); + } } // Create llvm.localrecover calls for all captures. @@ -1885,7 +1950,7 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF, OutlinedStmt->getBeginLoc(), OutlinedStmt->getBeginLoc()); CurSEHParent = ParentCGF.CurSEHParent; - CGM.SetLLVMFunctionAttributes(GlobalDecl(), FnInfo, CurFn); + CGM.SetInternalFunctionAttributes(GlobalDecl(), CurFn, FnInfo); EmitCapturedLocals(ParentCGF, OutlinedStmt, IsFilter); } @@ -1990,6 +2055,7 @@ void CodeGenFunction::pushSEHCleanup(CleanupKind Kind, void CodeGenFunction::EnterSEHTryStmt(const SEHTryStmt &S) { CodeGenFunction HelperCGF(CGM, /*suppressNewContext=*/true); + HelperCGF.ParentCGF = this; if (const SEHFinallyStmt *Finally = S.getFinallyHandler()) { // Outline the finally block. llvm::Function *FinallyFunc = diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 8e0604181fb1..9e8770573d70 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -27,6 +27,7 @@ #include "clang/AST/NSAPI.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/CodeGenOptions.h" +#include "clang/Basic/SourceManager.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/DataLayout.h" @@ -125,8 +126,8 @@ Address CodeGenFunction::CreateDefaultAlignTempAlloca(llvm::Type *Ty, void CodeGenFunction::InitTempAlloca(Address Var, llvm::Value *Init) { assert(isa<llvm::AllocaInst>(Var.getPointer())); - auto *Store = new llvm::StoreInst(Init, Var.getPointer()); - Store->setAlignment(Var.getAlignment().getAsAlign()); + auto *Store = new llvm::StoreInst(Init, Var.getPointer(), /*volatile*/ false, + Var.getAlignment().getAsAlign()); llvm::BasicBlock *Block = AllocaInsertPt->getParent(); Block->getInstList().insertAfter(AllocaInsertPt->getIterator(), Store); } @@ -144,8 +145,19 @@ Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name, Address CodeGenFunction::CreateMemTemp(QualType Ty, CharUnits Align, const Twine &Name, Address *Alloca) { - return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name, - /*ArraySize=*/nullptr, Alloca); + Address Result = CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name, + /*ArraySize=*/nullptr, Alloca); + + if (Ty->isConstantMatrixType()) { + auto *ArrayTy = cast<llvm::ArrayType>(Result.getType()->getElementType()); + auto *VectorTy = llvm::FixedVectorType::get(ArrayTy->getElementType(), + ArrayTy->getNumElements()); + + Result = Address( + Builder.CreateBitCast(Result.getPointer(), VectorTy->getPointerTo()), + Result.getAlignment()); + } + return Result; } Address CodeGenFunction::CreateMemTempWithoutCast(QualType Ty, CharUnits Align, @@ -415,6 +427,11 @@ static Address createReferenceTemporary(CodeGenFunction &CGF, llvm_unreachable("unknown storage duration"); } +/// Helper method to check if the underlying ABI is AAPCS +static bool isAAPCS(const TargetInfo &TargetInfo) { + return TargetInfo.getABI().startswith("aapcs"); +} + LValue CodeGenFunction:: EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { const Expr *E = M->getSubExpr(); @@ -711,7 +728,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, if (SanOpts.has(SanitizerKind::ObjectSize) && !SkippedChecks.has(SanitizerKind::ObjectSize) && !Ty->isIncompleteType()) { - uint64_t TySize = getContext().getTypeSizeInChars(Ty).getQuantity(); + uint64_t TySize = CGM.getMinimumObjectSize(Ty).getQuantity(); llvm::Value *Size = llvm::ConstantInt::get(IntPtrTy, TySize); if (ArraySize) Size = Builder.CreateMul(Size, ArraySize); @@ -742,7 +759,9 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, !SkippedChecks.has(SanitizerKind::Alignment)) { AlignVal = Alignment.getQuantity(); if (!Ty->isIncompleteType() && !AlignVal) - AlignVal = getContext().getTypeAlignInChars(Ty).getQuantity(); + AlignVal = CGM.getNaturalTypeAlignment(Ty, nullptr, nullptr, + /*ForPointeeType=*/true) + .getQuantity(); // The glvalue must be suitably aligned. if (AlignVal > 1 && @@ -858,8 +877,12 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, static bool isFlexibleArrayMemberExpr(const Expr *E) { // For compatibility with existing code, we treat arrays of length 0 or // 1 as flexible array members. + // FIXME: This is inconsistent with the warning code in SemaChecking. Unify + // the two mechanisms. const ArrayType *AT = E->getType()->castAsArrayTypeUnsafe(); if (const auto *CAT = dyn_cast<ConstantArrayType>(AT)) { + // FIXME: Sema doesn't treat [1] as a flexible array member if the bound + // was produced by macro expansion. if (CAT->getSize().ugt(1)) return false; } else if (!isa<IncompleteArrayType>(AT)) @@ -872,6 +895,10 @@ static bool isFlexibleArrayMemberExpr(const Expr *E) { // FIXME: If the base type of the member expr is not FD->getParent(), // this should not be treated as a flexible array member access. if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) { + // FIXME: Sema doesn't treat a T[1] union member as a flexible array + // member, only a T[0] or T[] member gets that treatment. + if (FD->getParent()->isUnion()) + return true; RecordDecl::field_iterator FI( DeclContext::decl_iterator(const_cast<FieldDecl *>(FD))); return ++FI == FD->getParent()->field_end(); @@ -1069,9 +1096,8 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, if (isa<ExplicitCastExpr>(CE)) { LValueBaseInfo TargetTypeBaseInfo; TBAAAccessInfo TargetTypeTBAAInfo; - CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(), - &TargetTypeBaseInfo, - &TargetTypeTBAAInfo); + CharUnits Align = CGM.getNaturalPointeeTypeAlignment( + E->getType(), &TargetTypeBaseInfo, &TargetTypeTBAAInfo); if (TBAAInfo) *TBAAInfo = CGM.mergeTBAAInfoForCast(*TBAAInfo, TargetTypeTBAAInfo); @@ -1139,8 +1165,8 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, // TODO: conditional operators, comma. // Otherwise, use the alignment of the type. - CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(), BaseInfo, - TBAAInfo); + CharUnits Align = + CGM.getNaturalPointeeTypeAlignment(E->getType(), BaseInfo, TBAAInfo); return Address(EmitScalarExpr(E), Align); } @@ -1276,8 +1302,15 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { return EmitVAArgExprLValue(cast<VAArgExpr>(E)); case Expr::DeclRefExprClass: return EmitDeclRefLValue(cast<DeclRefExpr>(E)); - case Expr::ConstantExprClass: + case Expr::ConstantExprClass: { + const ConstantExpr *CE = cast<ConstantExpr>(E); + if (llvm::Value *Result = ConstantEmitter(*this).tryEmitConstantExpr(CE)) { + QualType RetType = cast<CallExpr>(CE->getSubExpr()->IgnoreImplicit()) + ->getCallReturnType(getContext()); + return MakeNaturalAlignAddrLValue(Result, RetType); + } return EmitLValue(cast<ConstantExpr>(E)->getSubExpr()); + } case Expr::ParenExprClass: return EmitLValue(cast<ParenExpr>(E)->getSubExpr()); case Expr::GenericSelectionExprClass: @@ -1304,7 +1337,6 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { case Expr::ExprWithCleanupsClass: { const auto *cleanups = cast<ExprWithCleanups>(E); - enterFullExpression(cleanups); RunCleanupsScope Scope(*this); LValue LV = EmitLValue(cleanups->getSubExpr()); if (LV.isSimple()) { @@ -1343,6 +1375,8 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { return EmitUnaryOpLValue(cast<UnaryOperator>(E)); case Expr::ArraySubscriptExprClass: return EmitArraySubscriptExpr(cast<ArraySubscriptExpr>(E)); + case Expr::MatrixSubscriptExprClass: + return EmitMatrixSubscriptExpr(cast<MatrixSubscriptExpr>(E)); case Expr::OMPArraySectionExprClass: return EmitOMPArraySectionExpr(cast<OMPArraySectionExpr>(E)); case Expr::ExtVectorElementExprClass: @@ -1368,6 +1402,7 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { case Expr::CXXDynamicCastExprClass: case Expr::CXXReinterpretCastExprClass: case Expr::CXXConstCastExprClass: + case Expr::CXXAddrspaceCastExprClass: case Expr::ObjCBridgedCastExprClass: return EmitCastLValue(cast<CastExpr>(E)); @@ -1651,15 +1686,14 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, if (VTy->getNumElements() == 3) { // Bitcast to vec4 type. - llvm::VectorType *vec4Ty = - llvm::VectorType::get(VTy->getElementType(), 4); + auto *vec4Ty = llvm::FixedVectorType::get(VTy->getElementType(), 4); Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4"); // Now load value. llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4"); // Shuffle vector to get vec3. V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty), - {0, 1, 2}, "extractVec"); + ArrayRef<int>{0, 1, 2}, "extractVec"); return EmitFromMemory(V, Ty); } } @@ -1716,6 +1750,42 @@ llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) { return Value; } +// Convert the pointer of \p Addr to a pointer to a vector (the value type of +// MatrixType), if it points to a array (the memory type of MatrixType). +static Address MaybeConvertMatrixAddress(Address Addr, CodeGenFunction &CGF, + bool IsVector = true) { + auto *ArrayTy = dyn_cast<llvm::ArrayType>( + cast<llvm::PointerType>(Addr.getPointer()->getType())->getElementType()); + if (ArrayTy && IsVector) { + auto *VectorTy = llvm::FixedVectorType::get(ArrayTy->getElementType(), + ArrayTy->getNumElements()); + + return Address(CGF.Builder.CreateElementBitCast(Addr, VectorTy)); + } + auto *VectorTy = dyn_cast<llvm::VectorType>( + cast<llvm::PointerType>(Addr.getPointer()->getType())->getElementType()); + if (VectorTy && !IsVector) { + auto *ArrayTy = llvm::ArrayType::get(VectorTy->getElementType(), + VectorTy->getNumElements()); + + return Address(CGF.Builder.CreateElementBitCast(Addr, ArrayTy)); + } + + return Addr; +} + +// Emit a store of a matrix LValue. This may require casting the original +// pointer to memory address (ArrayType) to a pointer to the value type +// (VectorType). +static void EmitStoreOfMatrixScalar(llvm::Value *value, LValue lvalue, + bool isInit, CodeGenFunction &CGF) { + Address Addr = MaybeConvertMatrixAddress(lvalue.getAddress(CGF), CGF, + value->getType()->isVectorTy()); + CGF.EmitStoreOfScalar(value, Addr, lvalue.isVolatile(), lvalue.getType(), + lvalue.getBaseInfo(), lvalue.getTBAAInfo(), isInit, + lvalue.isNontemporal()); +} + void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, LValueBaseInfo BaseInfo, @@ -1729,13 +1799,10 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, // Handle vec3 special. if (VecTy && VecTy->getNumElements() == 3) { // Our source is a vec3, do a shuffle vector to make it a vec4. - llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1), - Builder.getInt32(2), - llvm::UndefValue::get(Builder.getInt32Ty())}; - llvm::Value *MaskV = llvm::ConstantVector::get(Mask); Value = Builder.CreateShuffleVector(Value, llvm::UndefValue::get(VecTy), - MaskV, "extractVec"); - SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4); + ArrayRef<int>{0, 1, 2, -1}, + "extractVec"); + SrcTy = llvm::FixedVectorType::get(VecTy->getElementType(), 4); } if (Addr.getElementType() != SrcTy) { Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp"); @@ -1766,11 +1833,26 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, void CodeGenFunction::EmitStoreOfScalar(llvm::Value *value, LValue lvalue, bool isInit) { + if (lvalue.getType()->isConstantMatrixType()) { + EmitStoreOfMatrixScalar(value, lvalue, isInit, *this); + return; + } + EmitStoreOfScalar(value, lvalue.getAddress(*this), lvalue.isVolatile(), lvalue.getType(), lvalue.getBaseInfo(), lvalue.getTBAAInfo(), isInit, lvalue.isNontemporal()); } +// Emit a load of a LValue of matrix type. This may require casting the pointer +// to memory address (ArrayType) to a pointer to the value type (VectorType). +static RValue EmitLoadOfMatrixLValue(LValue LV, SourceLocation Loc, + CodeGenFunction &CGF) { + assert(LV.getType()->isConstantMatrixType()); + Address Addr = MaybeConvertMatrixAddress(LV.getAddress(CGF), CGF); + LV.setAddress(Addr); + return RValue::get(CGF.EmitLoadOfScalar(LV, Loc)); +} + /// EmitLoadOfLValue - Given an expression that represents a value lvalue, this /// method emits the address of the lvalue, then loads the result as an rvalue, /// returning the rvalue. @@ -1796,6 +1878,9 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) { if (LV.isSimple()) { assert(!LV.getType()->isFunctionType()); + if (LV.getType()->isConstantMatrixType()) + return EmitLoadOfMatrixLValue(LV, Loc, *this); + // Everything needs a load. return RValue::get(EmitLoadOfScalar(LV, Loc)); } @@ -1809,13 +1894,21 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) { // If this is a reference to a subset of the elements of a vector, either // shuffle the input or extract/insert them as appropriate. - if (LV.isExtVectorElt()) + if (LV.isExtVectorElt()) { return EmitLoadOfExtVectorElementLValue(LV); + } // Global Register variables always invoke intrinsics if (LV.isGlobalReg()) return EmitLoadOfGlobalRegLValue(LV); + if (LV.isMatrixElt()) { + llvm::LoadInst *Load = + Builder.CreateLoad(LV.getMatrixAddress(), LV.isVolatileQualified()); + return RValue::get( + Builder.CreateExtractElement(Load, LV.getMatrixIdx(), "matrixext")); + } + assert(LV.isBitField() && "Unknown LValue type!"); return EmitLoadOfBitfieldLValue(LV, Loc); } @@ -1870,13 +1963,12 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) { // Always use shuffle vector to try to retain the original program structure unsigned NumResultElts = ExprVT->getNumElements(); - SmallVector<llvm::Constant*, 4> Mask; + SmallVector<int, 4> Mask; for (unsigned i = 0; i != NumResultElts; ++i) - Mask.push_back(Builder.getInt32(getAccessedFieldNo(i, Elts))); + Mask.push_back(getAccessedFieldNo(i, Elts)); - llvm::Value *MaskV = llvm::ConstantVector::get(Mask); Vec = Builder.CreateShuffleVector(Vec, llvm::UndefValue::get(Vec->getType()), - MaskV); + Mask); return RValue::get(Vec); } @@ -1922,7 +2014,6 @@ RValue CodeGenFunction::EmitLoadOfGlobalRegLValue(LValue LV) { return RValue::get(Call); } - /// EmitStoreThroughLValue - Store the specified rvalue into the specified /// lvalue, where both are guaranteed to the have the same type, and that type /// is 'Ty'. @@ -1948,6 +2039,15 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst, if (Dst.isGlobalReg()) return EmitStoreThroughGlobalRegLValue(Src, Dst); + if (Dst.isMatrixElt()) { + llvm::Value *Vec = Builder.CreateLoad(Dst.getMatrixAddress()); + Vec = Builder.CreateInsertElement(Vec, Src.getScalarVal(), + Dst.getMatrixIdx(), "matins"); + Builder.CreateStore(Vec, Dst.getMatrixAddress(), + Dst.isVolatileQualified()); + return; + } + assert(Dst.isBitField() && "Unknown LValue type"); return EmitStoreThroughBitfieldLValue(Src, Dst); } @@ -2066,6 +2166,14 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst, SrcVal = Builder.CreateOr(Val, SrcVal, "bf.set"); } else { assert(Info.Offset == 0); + // According to the AACPS: + // When a volatile bit-field is written, and its container does not overlap + // with any non-bit-field member, its container must be read exactly once and + // written exactly once using the access width appropriate to the type of the + // container. The two accesses are not atomic. + if (Dst.isVolatileQualified() && isAAPCS(CGM.getTarget()) && + CGM.getCodeGenOpts().ForceAAPCSBitfieldLoad) + Builder.CreateLoad(Ptr, true, "bf.load"); } // Write the new value back out. @@ -2103,37 +2211,33 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src, if (const VectorType *VTy = Dst.getType()->getAs<VectorType>()) { unsigned NumSrcElts = VTy->getNumElements(); - unsigned NumDstElts = Vec->getType()->getVectorNumElements(); + unsigned NumDstElts = + cast<llvm::VectorType>(Vec->getType())->getNumElements(); if (NumDstElts == NumSrcElts) { // Use shuffle vector is the src and destination are the same number of // elements and restore the vector mask since it is on the side it will be // stored. - SmallVector<llvm::Constant*, 4> Mask(NumDstElts); + SmallVector<int, 4> Mask(NumDstElts); for (unsigned i = 0; i != NumSrcElts; ++i) - Mask[getAccessedFieldNo(i, Elts)] = Builder.getInt32(i); + Mask[getAccessedFieldNo(i, Elts)] = i; - llvm::Value *MaskV = llvm::ConstantVector::get(Mask); - Vec = Builder.CreateShuffleVector(SrcVal, - llvm::UndefValue::get(Vec->getType()), - MaskV); + Vec = Builder.CreateShuffleVector( + SrcVal, llvm::UndefValue::get(Vec->getType()), Mask); } else if (NumDstElts > NumSrcElts) { // Extended the source vector to the same length and then shuffle it // into the destination. // FIXME: since we're shuffling with undef, can we just use the indices // into that? This could be simpler. - SmallVector<llvm::Constant*, 4> ExtMask; + SmallVector<int, 4> ExtMask; for (unsigned i = 0; i != NumSrcElts; ++i) - ExtMask.push_back(Builder.getInt32(i)); - ExtMask.resize(NumDstElts, llvm::UndefValue::get(Int32Ty)); - llvm::Value *ExtMaskV = llvm::ConstantVector::get(ExtMask); - llvm::Value *ExtSrcVal = - Builder.CreateShuffleVector(SrcVal, - llvm::UndefValue::get(SrcVal->getType()), - ExtMaskV); + ExtMask.push_back(i); + ExtMask.resize(NumDstElts, -1); + llvm::Value *ExtSrcVal = Builder.CreateShuffleVector( + SrcVal, llvm::UndefValue::get(SrcVal->getType()), ExtMask); // build identity - SmallVector<llvm::Constant*, 4> Mask; + SmallVector<int, 4> Mask; for (unsigned i = 0; i != NumDstElts; ++i) - Mask.push_back(Builder.getInt32(i)); + Mask.push_back(i); // When the vector size is odd and .odd or .hi is used, the last element // of the Elts constant array will be one past the size of the vector. @@ -2143,9 +2247,8 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src, // modify when what gets shuffled in for (unsigned i = 0; i != NumSrcElts; ++i) - Mask[getAccessedFieldNo(i, Elts)] = Builder.getInt32(i+NumDstElts); - llvm::Value *MaskV = llvm::ConstantVector::get(Mask); - Vec = Builder.CreateShuffleVector(Vec, ExtSrcVal, MaskV); + Mask[getAccessedFieldNo(i, Elts)] = i + NumDstElts; + Vec = Builder.CreateShuffleVector(Vec, ExtSrcVal, Mask); } else { // We should never shorten the vector llvm_unreachable("unexpected shorten vector length"); @@ -2295,7 +2398,13 @@ EmitBitCastOfLValueToProperType(CodeGenFunction &CGF, static LValue EmitThreadPrivateVarDeclLValue( CodeGenFunction &CGF, const VarDecl *VD, QualType T, Address Addr, llvm::Type *RealVarTy, SourceLocation Loc) { - Addr = CGF.CGM.getOpenMPRuntime().getAddrOfThreadPrivate(CGF, VD, Addr, Loc); + if (CGF.CGM.getLangOpts().OpenMPIRBuilder) + Addr = CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate( + CGF, VD, Addr, Loc); + else + Addr = + CGF.CGM.getOpenMPRuntime().getAddrOfThreadPrivate(CGF, VD, Addr, Loc); + Addr = CGF.Builder.CreateElementBitCast(Addr, RealVarTy); return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl); } @@ -2327,9 +2436,9 @@ CodeGenFunction::EmitLoadOfReference(LValue RefLVal, Builder.CreateLoad(RefLVal.getAddress(*this), RefLVal.isVolatile()); CGM.DecorateInstructionWithTBAA(Load, RefLVal.getTBAAInfo()); - CharUnits Align = getNaturalTypeAlignment(RefLVal.getType()->getPointeeType(), - PointeeBaseInfo, PointeeTBAAInfo, - /* forPointeeType= */ true); + CharUnits Align = CGM.getNaturalTypeAlignment( + RefLVal.getType()->getPointeeType(), PointeeBaseInfo, PointeeTBAAInfo, + /* forPointeeType= */ true); return Address(Load, Align); } @@ -2347,9 +2456,9 @@ Address CodeGenFunction::EmitLoadOfPointer(Address Ptr, LValueBaseInfo *BaseInfo, TBAAAccessInfo *TBAAInfo) { llvm::Value *Addr = Builder.CreateLoad(Ptr); - return Address(Addr, getNaturalTypeAlignment(PtrTy->getPointeeType(), - BaseInfo, TBAAInfo, - /*forPointeeType=*/true)); + return Address(Addr, CGM.getNaturalTypeAlignment(PtrTy->getPointeeType(), + BaseInfo, TBAAInfo, + /*forPointeeType=*/true)); } LValue CodeGenFunction::EmitLoadOfPointerLValue(Address PtrAddr, @@ -2397,13 +2506,14 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF, } static llvm::Constant *EmitFunctionDeclPointer(CodeGenModule &CGM, - const FunctionDecl *FD) { + GlobalDecl GD) { + const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl()); if (FD->hasAttr<WeakRefAttr>()) { ConstantAddress aliasee = CGM.GetWeakRefReference(FD); return aliasee.getPointer(); } - llvm::Constant *V = CGM.GetAddrOfFunction(FD); + llvm::Constant *V = CGM.GetAddrOfFunction(GD); if (!FD->hasPrototype()) { if (const FunctionProtoType *Proto = FD->getType()->getAs<FunctionProtoType>()) { @@ -2420,9 +2530,10 @@ static llvm::Constant *EmitFunctionDeclPointer(CodeGenModule &CGM, return V; } -static LValue EmitFunctionDeclLValue(CodeGenFunction &CGF, - const Expr *E, const FunctionDecl *FD) { - llvm::Value *V = EmitFunctionDeclPointer(CGF.CGM, FD); +static LValue EmitFunctionDeclLValue(CodeGenFunction &CGF, const Expr *E, + GlobalDecl GD) { + const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl()); + llvm::Value *V = EmitFunctionDeclPointer(CGF.CGM, GD); CharUnits Alignment = CGF.getContext().getDeclAlign(FD); return CGF.MakeAddrLValue(V, E->getType(), Alignment, AlignmentSource::Decl); @@ -2552,10 +2663,10 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { } else { // Should we be using the alignment of the constant pointer we emitted? CharUnits Alignment = - getNaturalTypeAlignment(E->getType(), - /* BaseInfo= */ nullptr, - /* TBAAInfo= */ nullptr, - /* forPointeeType= */ true); + CGM.getNaturalTypeAlignment(E->getType(), + /* BaseInfo= */ nullptr, + /* TBAAInfo= */ nullptr, + /* forPointeeType= */ true); Addr = Address(Val, Alignment); } return MakeAddrLValue(Addr, T, AlignmentSource::Decl); @@ -2689,6 +2800,12 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { if (const auto *BD = dyn_cast<BindingDecl>(ND)) return EmitLValue(BD->getBinding()); + // We can form DeclRefExprs naming GUID declarations when reconstituting + // non-type template parameters into expressions. + if (const auto *GD = dyn_cast<MSGuidDecl>(ND)) + return MakeAddrLValue(CGM.GetAddrOfMSGuidDecl(GD), T, + AlignmentSource::Decl); + llvm_unreachable("Unhandled DeclRefExpr"); } @@ -2779,7 +2896,7 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) { PredefinedExpr::getIdentKindName(E->getIdentKind()), FnName}; std::string GVName = llvm::join(NameItems, NameItems + 2, "."); if (auto *BD = dyn_cast_or_null<BlockDecl>(CurCodeDecl)) { - std::string Name = SL->getString(); + std::string Name = std::string(SL->getString()); if (!Name.empty()) { unsigned Discriminator = CGM.getCXXABI().getMangleContext().getBlockId(BD, true); @@ -2788,7 +2905,8 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) { auto C = CGM.GetAddrOfConstantCString(Name, GVName.c_str()); return MakeAddrLValue(C, E->getType(), AlignmentSource::Decl); } else { - auto C = CGM.GetAddrOfConstantCString(FnName, GVName.c_str()); + auto C = + CGM.GetAddrOfConstantCString(std::string(FnName), GVName.c_str()); return MakeAddrLValue(C, E->getType(), AlignmentSource::Decl); } } @@ -2918,7 +3036,8 @@ llvm::Constant *CodeGenFunction::EmitCheckSourceLocation(SourceLocation Loc) { FilenameString = llvm::sys::path::filename(FilenameString); } - auto FilenameGV = CGM.GetAddrOfConstantCString(FilenameString, ".src"); + auto FilenameGV = + CGM.GetAddrOfConstantCString(std::string(FilenameString), ".src"); CGM.getSanitizerMetadata()->disableSanitizerForGlobal( cast<llvm::GlobalVariable>(FilenameGV.getPointer())); Filename = FilenameGV.getPointer(); @@ -3665,6 +3784,23 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, return LV; } +LValue CodeGenFunction::EmitMatrixSubscriptExpr(const MatrixSubscriptExpr *E) { + assert( + !E->isIncomplete() && + "incomplete matrix subscript expressions should be rejected during Sema"); + LValue Base = EmitLValue(E->getBase()); + llvm::Value *RowIdx = EmitScalarExpr(E->getRowIdx()); + llvm::Value *ColIdx = EmitScalarExpr(E->getColumnIdx()); + llvm::Value *NumRows = Builder.getIntN( + RowIdx->getType()->getScalarSizeInBits(), + E->getBase()->getType()->getAs<ConstantMatrixType>()->getNumRows()); + llvm::Value *FinalIdx = + Builder.CreateAdd(Builder.CreateMul(ColIdx, NumRows), RowIdx); + return LValue::MakeMatrixElt( + MaybeConvertMatrixAddress(Base.getAddress(*this), *this), FinalIdx, + E->getBase()->getType(), Base.getBaseInfo(), TBAAAccessInfo()); +} + static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, LValueBaseInfo &BaseInfo, TBAAAccessInfo &TBAAInfo, @@ -3695,8 +3831,8 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, } LValueBaseInfo TypeBaseInfo; TBAAAccessInfo TypeTBAAInfo; - CharUnits Align = CGF.getNaturalTypeAlignment(ElTy, &TypeBaseInfo, - &TypeTBAAInfo); + CharUnits Align = + CGF.CGM.getNaturalTypeAlignment(ElTy, &TypeBaseInfo, &TypeTBAAInfo); BaseInfo.mergeForCast(TypeBaseInfo); TBAAInfo = CGF.CGM.mergeTBAAInfoForCast(TBAAInfo, TypeTBAAInfo); return Address(CGF.Builder.CreateLoad(BaseLVal.getAddress(CGF)), Align); @@ -3713,7 +3849,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, else ResultExprTy = BaseTy->getPointeeType(); llvm::Value *Idx = nullptr; - if (IsLowerBound || E->getColonLoc().isInvalid()) { + if (IsLowerBound || E->getColonLocFirst().isInvalid()) { // Requesting lower bound or upper bound, but without provided length and // without ':' symbol for the default length -> length = 1. // Idx = LowerBound ?: 0; @@ -4020,17 +4156,17 @@ static Address emitAddrOfFieldStorage(CodeGenFunction &CGF, Address base, return CGF.Builder.CreateStructGEP(base, idx, field->getName()); } -static Address emitPreserveStructAccess(CodeGenFunction &CGF, Address base, - const FieldDecl *field) { +static Address emitPreserveStructAccess(CodeGenFunction &CGF, LValue base, + Address addr, const FieldDecl *field) { const RecordDecl *rec = field->getParent(); - llvm::DIType *DbgInfo = CGF.getDebugInfo()->getOrCreateRecordType( - CGF.getContext().getRecordType(rec), rec->getLocation()); + llvm::DIType *DbgInfo = CGF.getDebugInfo()->getOrCreateStandaloneType( + base.getType(), rec->getLocation()); unsigned idx = CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field); return CGF.Builder.CreatePreserveStructAccessIndex( - base, idx, CGF.getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo); + addr, idx, CGF.getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo); } static bool hasAnyVptr(const QualType Type, const ASTContext &Context) { @@ -4154,8 +4290,8 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, if (IsInPreservedAIRegion || (getDebugInfo() && rec->hasAttr<BPFPreserveAccessIndexAttr>())) { // Remember the original union field index - llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType( - getContext().getRecordType(rec), rec->getLocation()); + llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(base.getType(), + rec->getLocation()); addr = Address( Builder.CreatePreserveUnionAccessIndex( addr.getPointer(), getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo), @@ -4172,7 +4308,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, addr = emitAddrOfFieldStorage(*this, addr, field); else // Remember the original struct field index - addr = emitPreserveStructAccess(*this, addr, field); + addr = emitPreserveStructAccess(*this, base, addr, field); } // If this is a reference field, load the reference right now. @@ -4248,6 +4384,14 @@ LValue CodeGenFunction::EmitCompoundLiteralLValue(const CompoundLiteralExpr *E){ EmitAnyExprToMem(InitExpr, DeclPtr, E->getType().getQualifiers(), /*Init*/ true); + // Block-scope compound literals are destroyed at the end of the enclosing + // scope in C. + if (!getLangOpts().CPlusPlus) + if (QualType::DestructionKind DtorKind = E->getType().isDestructedType()) + pushLifetimeExtendedDestroy(getCleanupKind(DtorKind), DeclPtr, + E->getType(), getDestroyer(DtorKind), + DtorKind & EHCleanup); + return Result; } @@ -4295,6 +4439,16 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) { // If the true case is live, we need to track its region. if (CondExprBool) incrementProfileCounter(expr); + // If a throw expression we emit it and return an undefined lvalue + // because it can't be used. + if (auto *ThrowExpr = dyn_cast<CXXThrowExpr>(live->IgnoreParens())) { + EmitCXXThrowExpr(ThrowExpr); + llvm::Type *Ty = + llvm::PointerType::getUnqual(ConvertType(dead->getType())); + return MakeAddrLValue( + Address(llvm::UndefValue::get(Ty), CharUnits::One()), + dead->getType()); + } return EmitLValue(live); } } @@ -4620,7 +4774,8 @@ RValue CodeGenFunction::EmitSimpleCallExpr(const CallExpr *E, return EmitCall(E->getCallee()->getType(), Callee, E, ReturnValue); } -static CGCallee EmitDirectCallee(CodeGenFunction &CGF, const FunctionDecl *FD) { +static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) { + const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl()); if (auto builtinID = FD->getBuiltinID()) { // Replaceable builtin provide their own implementation of a builtin. Unless @@ -4632,8 +4787,8 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, const FunctionDecl *FD) { return CGCallee::forBuiltin(builtinID, FD); } - llvm::Constant *calleePtr = EmitFunctionDeclPointer(CGF.CGM, FD); - return CGCallee::forDirect(calleePtr, GlobalDecl(FD)); + llvm::Constant *calleePtr = EmitFunctionDeclPointer(CGF.CGM, GD); + return CGCallee::forDirect(calleePtr, GD); } CGCallee CodeGenFunction::EmitCallee(const Expr *E) { @@ -4774,7 +4929,7 @@ CodeGenFunction::EmitCXXTypeidLValue(const CXXTypeidExpr *E) { } Address CodeGenFunction::EmitCXXUuidofExpr(const CXXUuidofExpr *E) { - return Builder.CreateElementBitCast(CGM.GetAddrOfUuidDescriptor(E), + return Builder.CreateElementBitCast(CGM.GetAddrOfMSGuidDecl(E->getGuidDecl()), ConvertType(E->getType())); } @@ -5019,7 +5174,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee // to the function type. if (isa<FunctionNoProtoType>(FnType) || Chain) { llvm::Type *CalleeTy = getTypes().GetFunctionType(FnInfo); - CalleeTy = CalleeTy->getPointerTo(); + int AS = Callee.getFunctionPointer()->getType()->getPointerAddressSpace(); + CalleeTy = CalleeTy->getPointerTo(AS); llvm::Value *CalleePtr = Callee.getFunctionPointer(); CalleePtr = Builder.CreateBitCast(CalleePtr, CalleeTy, "callee.knr.cast"); diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 8de609a2ccd9..fb96d70732e8 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -15,6 +15,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" +#include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/DeclCXX.h" @@ -126,6 +127,11 @@ public: } void VisitConstantExpr(ConstantExpr *E) { + if (llvm::Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) { + CGF.EmitAggregateStore(Result, Dest.getAddress(), + E->getType().isVolatileQualified()); + return; + } return Visit(E->getSubExpr()); } @@ -249,7 +255,7 @@ void AggExprEmitter::withReturnValueSlot( const Expr *E, llvm::function_ref<RValue(ReturnValueSlot)> EmitCall) { QualType RetTy = E->getType(); bool RequiresDestruction = - Dest.isIgnored() && + !Dest.isExternallyDestructed() && RetTy.isDestructedType() == QualType::DK_nontrivial_c_struct; // If it makes no observable difference, save a memcpy + temporary. @@ -287,10 +293,8 @@ void AggExprEmitter::withReturnValueSlot( } RValue Src = - EmitCall(ReturnValueSlot(RetAddr, Dest.isVolatile(), IsResultUnused)); - - if (RequiresDestruction) - CGF.pushDestroy(RetTy.isDestructedType(), Src.getAggregateAddress(), RetTy); + EmitCall(ReturnValueSlot(RetAddr, Dest.isVolatile(), IsResultUnused, + Dest.isExternallyDestructed())); if (!UseTemp) return; @@ -659,22 +663,32 @@ AggExprEmitter::VisitCompoundLiteralExpr(CompoundLiteralExpr *E) { } AggValueSlot Slot = EnsureSlot(E->getType()); + + // Block-scope compound literals are destroyed at the end of the enclosing + // scope in C. + bool Destruct = + !CGF.getLangOpts().CPlusPlus && !Slot.isExternallyDestructed(); + if (Destruct) + Slot.setExternallyDestructed(); + CGF.EmitAggExpr(E->getInitializer(), Slot); + + if (Destruct) + if (QualType::DestructionKind DtorKind = E->getType().isDestructedType()) + CGF.pushLifetimeExtendedDestroy( + CGF.getCleanupKind(DtorKind), Slot.getAddress(), E->getType(), + CGF.getDestroyer(DtorKind), DtorKind & EHCleanup); } /// Attempt to look through various unimportant expressions to find a /// cast of the given kind. -static Expr *findPeephole(Expr *op, CastKind kind) { - while (true) { - op = op->IgnoreParens(); - if (CastExpr *castE = dyn_cast<CastExpr>(op)) { - if (castE->getCastKind() == kind) - return castE->getSubExpr(); - if (castE->getCastKind() == CK_NoOp) - continue; - } - return nullptr; +static Expr *findPeephole(Expr *op, CastKind kind, const ASTContext &ctx) { + op = op->IgnoreParenNoopCasts(ctx); + if (auto castE = dyn_cast<CastExpr>(op)) { + if (castE->getCastKind() == kind) + return castE->getSubExpr(); } + return nullptr; } void AggExprEmitter::VisitCastExpr(CastExpr *E) { @@ -763,7 +777,8 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { (isToAtomic ? CK_AtomicToNonAtomic : CK_NonAtomicToAtomic); // These two cases are reverses of each other; try to peephole them. - if (Expr *op = findPeephole(E->getSubExpr(), peepholeTarget)) { + if (Expr *op = + findPeephole(E->getSubExpr(), peepholeTarget, CGF.getContext())) { assert(CGF.getContext().hasSameUnqualifiedType(op->getType(), E->getType()) && "peephole significantly changed types?"); @@ -813,8 +828,19 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { // If we're loading from a volatile type, force the destination // into existence. if (E->getSubExpr()->getType().isVolatileQualified()) { + bool Destruct = + !Dest.isExternallyDestructed() && + E->getType().isDestructedType() == QualType::DK_nontrivial_c_struct; + if (Destruct) + Dest.setExternallyDestructed(); EnsureDest(E->getType()); - return Visit(E->getSubExpr()); + Visit(E->getSubExpr()); + + if (Destruct) + CGF.pushDestroy(QualType::DK_nontrivial_c_struct, Dest.getAddress(), + E->getType()); + + return; } LLVM_FALLTHROUGH; @@ -1328,7 +1354,6 @@ AggExprEmitter::VisitLambdaExpr(LambdaExpr *E) { } void AggExprEmitter::VisitExprWithCleanups(ExprWithCleanups *E) { - CGF.enterFullExpression(E); CodeGenFunction::RunCleanupsScope cleanups(CGF); Visit(E->getSubExpr()); } @@ -1923,6 +1948,18 @@ void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty, } } + if (getLangOpts().CUDAIsDevice) { + if (Ty->isCUDADeviceBuiltinSurfaceType()) { + if (getTargetHooks().emitCUDADeviceBuiltinSurfaceDeviceCopy(*this, Dest, + Src)) + return; + } else if (Ty->isCUDADeviceBuiltinTextureType()) { + if (getTargetHooks().emitCUDADeviceBuiltinTextureDeviceCopy(*this, Dest, + Src)) + return; + } + } + // Aggregate assignment turns into llvm.memcpy. This is almost valid per // C99 6.5.16.1p3, which states "If the value being stored in an object is // read from another object that overlaps in anyway the storage of the first diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index 42c1c34c57ad..d59aa6ce0fb9 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -112,7 +112,8 @@ RValue CodeGenFunction::EmitCXXDestructorCall( commonEmitCXXMemberOrOperatorCall(*this, DtorDecl, This, ImplicitParam, ImplicitParamTy, CE, Args, nullptr); return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(Dtor), Callee, - ReturnValueSlot(), Args); + ReturnValueSlot(), Args, nullptr, + CE ? CE->getExprLoc() : SourceLocation{}); } RValue CodeGenFunction::EmitCXXPseudoDestructorExpr( @@ -380,7 +381,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( IsArrow ? Base->getType()->getPointeeType() : Base->getType(); EmitCXXDestructorCall(GD, Callee, This.getPointer(*this), ThisTy, /*ImplicitParam=*/nullptr, - /*ImplicitParamTy=*/QualType(), nullptr); + /*ImplicitParamTy=*/QualType(), CE); } return RValue::get(nullptr); } @@ -1637,6 +1638,12 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) { RValue RV = EmitNewDeleteCall(*this, allocator, allocatorType, allocatorArgs); + // Set !heapallocsite metadata on the call to operator new. + if (getDebugInfo()) + if (auto *newCall = dyn_cast<llvm::CallBase>(RV.getScalarVal())) + getDebugInfo()->addHeapAllocSiteMetadata(newCall, allocType, + E->getExprLoc()); + // If this was a call to a global replaceable allocation function that does // not take an alignment argument, the allocator is known to produce // storage that's suitably aligned for any object that fits, up to a known @@ -1866,10 +1873,13 @@ static void EmitDestroyingObjectDelete(CodeGenFunction &CGF, } /// Emit the code for deleting a single object. -static void EmitObjectDelete(CodeGenFunction &CGF, +/// \return \c true if we started emitting UnconditionalDeleteBlock, \c false +/// if not. +static bool EmitObjectDelete(CodeGenFunction &CGF, const CXXDeleteExpr *DE, Address Ptr, - QualType ElementType) { + QualType ElementType, + llvm::BasicBlock *UnconditionalDeleteBlock) { // C++11 [expr.delete]p3: // If the static type of the object to be deleted is different from its // dynamic type, the static type shall be a base class of the dynamic type @@ -1916,7 +1926,7 @@ static void EmitObjectDelete(CodeGenFunction &CGF, if (UseVirtualCall) { CGF.CGM.getCXXABI().emitVirtualObjectDelete(CGF, DE, Ptr, ElementType, Dtor); - return; + return false; } } } @@ -1951,7 +1961,15 @@ static void EmitObjectDelete(CodeGenFunction &CGF, } } + // When optimizing for size, call 'operator delete' unconditionally. + if (CGF.CGM.getCodeGenOpts().OptimizeSize > 1) { + CGF.EmitBlock(UnconditionalDeleteBlock); + CGF.PopCleanupBlock(); + return true; + } + CGF.PopCleanupBlock(); + return false; } namespace { @@ -2028,6 +2046,12 @@ void CodeGenFunction::EmitCXXDeleteExpr(const CXXDeleteExpr *E) { Address Ptr = EmitPointerWithAlignment(Arg); // Null check the pointer. + // + // We could avoid this null check if we can determine that the object + // destruction is trivial and doesn't require an array cookie; we can + // unconditionally perform the operator delete call in that case. For now, we + // assume that deleted pointers are null rarely enough that it's better to + // keep the branch. This might be worth revisiting for a -O0 code size win. llvm::BasicBlock *DeleteNotNull = createBasicBlock("delete.notnull"); llvm::BasicBlock *DeleteEnd = createBasicBlock("delete.end"); @@ -2073,11 +2097,11 @@ void CodeGenFunction::EmitCXXDeleteExpr(const CXXDeleteExpr *E) { if (E->isArrayForm()) { EmitArrayDelete(*this, E, Ptr, DeleteTy); + EmitBlock(DeleteEnd); } else { - EmitObjectDelete(*this, E, Ptr, DeleteTy); + if (!EmitObjectDelete(*this, E, Ptr, DeleteTy, DeleteEnd)) + EmitBlock(DeleteEnd); } - - EmitBlock(DeleteEnd); } static bool isGLValueFromPointerDeref(const Expr *E) { diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index f7a4e9e94712..a49817898ae3 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -13,6 +13,7 @@ #include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "ConstantEmitter.h" #include "clang/AST/StmtVisitor.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Constants.h" @@ -97,11 +98,14 @@ public: } ComplexPairTy VisitStmt(Stmt *S) { - S->dump(CGF.getContext().getSourceManager()); + S->dump(llvm::errs(), CGF.getContext()); llvm_unreachable("Stmt can't have complex result type!"); } ComplexPairTy VisitExpr(Expr *S); ComplexPairTy VisitConstantExpr(ConstantExpr *E) { + if (llvm::Constant *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) + return ComplexPairTy(Result->getAggregateElement(0U), + Result->getAggregateElement(1U)); return Visit(E->getSubExpr()); } ComplexPairTy VisitParenExpr(ParenExpr *PE) { return Visit(PE->getSubExpr());} @@ -222,7 +226,6 @@ public: return Visit(DIE->getExpr()); } ComplexPairTy VisitExprWithCleanups(ExprWithCleanups *E) { - CGF.enterFullExpression(E); CodeGenFunction::RunCleanupsScope Scope(CGF); ComplexPairTy Vals = Visit(E->getSubExpr()); // Defend against dominance problems caused by jumps out of expression @@ -431,8 +434,10 @@ ComplexPairTy ComplexExprEmitter::EmitComplexToComplexCast(ComplexPairTy Val, // C99 6.3.1.6: When a value of complex type is converted to another // complex type, both the real and imaginary parts follow the conversion // rules for the corresponding real types. - Val.first = CGF.EmitScalarConversion(Val.first, SrcType, DestType, Loc); - Val.second = CGF.EmitScalarConversion(Val.second, SrcType, DestType, Loc); + if (Val.first) + Val.first = CGF.EmitScalarConversion(Val.first, SrcType, DestType, Loc); + if (Val.second) + Val.second = CGF.EmitScalarConversion(Val.second, SrcType, DestType, Loc); return Val; } diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index 46ed90a20264..c6b2930faece 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -318,12 +318,17 @@ bool ConstantAggregateBuilder::split(size_t Index, CharUnits Hint) { CharUnits Offset = Offsets[Index]; if (auto *CA = dyn_cast<llvm::ConstantAggregate>(C)) { + // Expand the sequence into its contained elements. + // FIXME: This assumes vector elements are byte-sized. replace(Elems, Index, Index + 1, llvm::map_range(llvm::seq(0u, CA->getNumOperands()), [&](unsigned Op) { return CA->getOperand(Op); })); - if (auto *Seq = dyn_cast<llvm::SequentialType>(CA->getType())) { + if (isa<llvm::ArrayType>(CA->getType()) || + isa<llvm::VectorType>(CA->getType())) { // Array or vector. - CharUnits ElemSize = getSize(Seq->getElementType()); + llvm::Type *ElemTy = + llvm::GetElementPtrInst::getTypeAtIndex(CA->getType(), (uint64_t)0); + CharUnits ElemSize = getSize(ElemTy); replace( Offsets, Index, Index + 1, llvm::map_range(llvm::seq(0u, CA->getNumOperands()), @@ -344,6 +349,8 @@ bool ConstantAggregateBuilder::split(size_t Index, CharUnits Hint) { } if (auto *CDS = dyn_cast<llvm::ConstantDataSequential>(C)) { + // Expand the sequence into its contained elements. + // FIXME: This assumes vector elements are byte-sized. // FIXME: If possible, split into two ConstantDataSequentials at Hint. CharUnits ElemSize = getSize(CDS->getElementType()); replace(Elems, Index, Index + 1, @@ -359,6 +366,7 @@ bool ConstantAggregateBuilder::split(size_t Index, CharUnits Hint) { } if (isa<llvm::ConstantAggregateZero>(C)) { + // Split into two zeros at the hinted offset. CharUnits ElemSize = getSize(C); assert(Hint > Offset && Hint < Offset + ElemSize && "nothing to split"); replace(Elems, Index, Index + 1, @@ -368,6 +376,7 @@ bool ConstantAggregateBuilder::split(size_t Index, CharUnits Hint) { } if (isa<llvm::UndefValue>(C)) { + // Drop undef; it doesn't contribute to the final layout. replace(Elems, Index, Index + 1, {}); replace(Offsets, Index, Index + 1, {}); return true; @@ -589,19 +598,21 @@ bool ConstStructBuilder::AppendBytes(CharUnits FieldOffsetInChars, bool ConstStructBuilder::AppendBitField( const FieldDecl *Field, uint64_t FieldOffset, llvm::ConstantInt *CI, bool AllowOverwrite) { - uint64_t FieldSize = Field->getBitWidthValue(CGM.getContext()); + const CGRecordLayout &RL = + CGM.getTypes().getCGRecordLayout(Field->getParent()); + const CGBitFieldInfo &Info = RL.getBitFieldInfo(Field); llvm::APInt FieldValue = CI->getValue(); // Promote the size of FieldValue if necessary // FIXME: This should never occur, but currently it can because initializer // constants are cast to bool, and because clang is not enforcing bitfield // width limits. - if (FieldSize > FieldValue.getBitWidth()) - FieldValue = FieldValue.zext(FieldSize); + if (Info.Size > FieldValue.getBitWidth()) + FieldValue = FieldValue.zext(Info.Size); // Truncate the size of FieldValue to the bit field size. - if (FieldSize < FieldValue.getBitWidth()) - FieldValue = FieldValue.trunc(FieldSize); + if (Info.Size < FieldValue.getBitWidth()) + FieldValue = FieldValue.trunc(Info.Size); return Builder.addBits(FieldValue, CGM.getContext().toBits(StartOffset) + FieldOffset, @@ -766,7 +777,7 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, if (const CXXRecordDecl *CD = dyn_cast<CXXRecordDecl>(RD)) { // Add a vtable pointer, if we need one and it hasn't already been added. - if (CD->isDynamicClass() && !IsPrimaryBase) { + if (Layout.hasOwnVFPtr()) { llvm::Constant *VTableAddressPoint = CGM.getCXXABI().getVTableAddressPointForConstExpr( BaseSubobject(CD, Offset), VTableClass); @@ -1000,6 +1011,8 @@ public: } llvm::Constant *VisitConstantExpr(ConstantExpr *CE, QualType T) { + if (llvm::Constant *Result = Emitter.tryEmitConstantExpr(CE)) + return Result; return Visit(CE->getSubExpr(), T); } @@ -1167,9 +1180,7 @@ public: } llvm::Constant *VisitExprWithCleanups(ExprWithCleanups *E, QualType T) { - if (!E->cleanupsHaveSideEffects()) - return Visit(E->getSubExpr(), T); - return nullptr; + return Visit(E->getSubExpr(), T); } llvm::Constant *VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E, @@ -1269,19 +1280,7 @@ public: if (!E->getConstructor()->isTrivial()) return nullptr; - // FIXME: We should not have to call getBaseElementType here. - const auto *RT = - CGM.getContext().getBaseElementType(Ty)->castAs<RecordType>(); - const CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl()); - - // If the class doesn't have a trivial destructor, we can't emit it as a - // constant expr. - if (!RD->hasTrivialDestructor()) - return nullptr; - - // Only copy and default constructors can be trivial. - - + // Only default and copy/move constructors can be trivial. if (E->getNumArgs()) { assert(E->getNumArgs() == 1 && "trivial ctor with > 1 argument"); assert(E->getConstructor()->isCopyOrMoveConstructor() && @@ -1361,6 +1360,20 @@ ConstantEmitter::tryEmitAbstract(const APValue &value, QualType destType) { return validateAndPopAbstract(C, state); } +llvm::Constant *ConstantEmitter::tryEmitConstantExpr(const ConstantExpr *CE) { + if (!CE->hasAPValueResult()) + return nullptr; + const Expr *Inner = CE->getSubExpr()->IgnoreImplicit(); + QualType RetType; + if (auto *Call = dyn_cast<CallExpr>(Inner)) + RetType = Call->getCallReturnType(CGF->getContext()); + else if (auto *Ctor = dyn_cast<CXXConstructExpr>(Inner)) + RetType = Ctor->getType(); + llvm::Constant *Res = + emitAbstract(CE->getBeginLoc(), CE->getAPValueResult(), RetType); + return Res; +} + llvm::Constant * ConstantEmitter::emitAbstract(const Expr *E, QualType destType) { auto state = pushAbstract(); @@ -1769,7 +1782,6 @@ private: ConstantLValue VisitCallExpr(const CallExpr *E); ConstantLValue VisitBlockExpr(const BlockExpr *E); ConstantLValue VisitCXXTypeidExpr(const CXXTypeidExpr *E); - ConstantLValue VisitCXXUuidofExpr(const CXXUuidofExpr *E); ConstantLValue VisitMaterializeTemporaryExpr( const MaterializeTemporaryExpr *E); @@ -1884,6 +1896,9 @@ ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) { } } + if (auto *GD = dyn_cast<MSGuidDecl>(D)) + return CGM.GetAddrOfMSGuidDecl(GD); + return nullptr; } @@ -1904,6 +1919,8 @@ ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) { ConstantLValue ConstantLValueEmitter::VisitConstantExpr(const ConstantExpr *E) { + if (llvm::Constant *Result = Emitter.tryEmitConstantExpr(E)) + return Result; return Visit(E->getSubExpr()); } @@ -1994,11 +2011,6 @@ ConstantLValueEmitter::VisitCXXTypeidExpr(const CXXTypeidExpr *E) { } ConstantLValue -ConstantLValueEmitter::VisitCXXUuidofExpr(const CXXUuidofExpr *E) { - return CGM.GetAddrOfUuidDescriptor(E); -} - -ConstantLValue ConstantLValueEmitter::VisitMaterializeTemporaryExpr( const MaterializeTemporaryExpr *E) { assert(E->getStorageDuration() == SD_Static); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 3f23fe11e4f5..6131f97995dc 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -37,6 +37,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsPowerPC.h" +#include "llvm/IR/MatrixBuilder.h" #include "llvm/IR/Module.h" #include <cstdarg> @@ -129,11 +130,10 @@ struct BinOpInfo { return true; } - /// Check if either operand is a fixed point type or integer type, with at - /// least one being a fixed point type. In any case, this - /// operation did not follow usual arithmetic conversion and both operands may - /// not be the same. - bool isFixedPointBinOp() const { + /// Check if at least one operand is a fixed point type. In such cases, this + /// operation did not follow usual arithmetic conversion and both operands + /// might not be of the same type. + bool isFixedPointOp() const { // We cannot simply check the result type since comparison operations return // an int. if (const auto *BinOp = dyn_cast<BinaryOperator>(E)) { @@ -141,6 +141,8 @@ struct BinOpInfo { QualType RHSType = BinOp->getRHS()->getType(); return LHSType->isFixedPointType() || RHSType->isFixedPointType(); } + if (const auto *UnOp = dyn_cast<UnaryOperator>(E)) + return UnOp->getSubExpr()->getType()->isFixedPointType(); return false; } }; @@ -213,22 +215,6 @@ static bool CanElideOverflowCheck(const ASTContext &Ctx, const BinOpInfo &Op) { (2 * Ctx.getTypeSize(RHSTy)) < PromotedSize; } -/// Update the FastMathFlags of LLVM IR from the FPOptions in LangOptions. -static void updateFastMathFlags(llvm::FastMathFlags &FMF, - FPOptions FPFeatures) { - FMF.setAllowContract(FPFeatures.allowFPContractAcrossStatement()); -} - -/// Propagate fast-math flags from \p Op to the instruction in \p V. -static Value *propagateFMFlags(Value *V, const BinOpInfo &Op) { - if (auto *I = dyn_cast<llvm::Instruction>(V)) { - llvm::FastMathFlags FMF = I->getFastMathFlags(); - updateFastMathFlags(FMF, Op.FPFeatures); - I->setFastMathFlags(FMF); - } - return V; -} - class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, Value*> { CodeGenFunction &CGF; @@ -297,7 +283,7 @@ public: Value *AlignmentValue = CGF.EmitScalarExpr(AVAttr->getAlignment()); llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(AlignmentValue); - CGF.EmitAlignmentAssumption(V, E, AVAttr->getLocation(), AlignmentCI); + CGF.emitAlignmentAssumption(V, E, AVAttr->getLocation(), AlignmentCI); } /// EmitLoadOfLValue - Given an expression with complex type that represents a @@ -427,12 +413,18 @@ public: } Value *VisitStmt(Stmt *S) { - S->dump(CGF.getContext().getSourceManager()); + S->dump(llvm::errs(), CGF.getContext()); llvm_unreachable("Stmt can't have complex result type!"); } Value *VisitExpr(Expr *S); Value *VisitConstantExpr(ConstantExpr *E) { + if (Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) { + if (E->isGLValue()) + return CGF.Builder.CreateLoad(Address( + Result, CGF.getContext().getTypeAlignInChars(E->getType()))); + return Result; + } return Visit(E->getSubExpr()); } Value *VisitParenExpr(ParenExpr *PE) { @@ -551,11 +543,17 @@ public: } Value *VisitArraySubscriptExpr(ArraySubscriptExpr *E); + Value *VisitMatrixSubscriptExpr(MatrixSubscriptExpr *E); Value *VisitShuffleVectorExpr(ShuffleVectorExpr *E); Value *VisitConvertVectorExpr(ConvertVectorExpr *E); Value *VisitMemberExpr(MemberExpr *E); Value *VisitExtVectorElementExpr(Expr *E) { return EmitLoadOfLValue(E); } Value *VisitCompoundLiteralExpr(CompoundLiteralExpr *E) { + // Strictly speaking, we shouldn't be calling EmitLoadOfLValue, which + // transitively calls EmitCompoundLiteralLValue, here in C++ since compound + // literals aren't l-values in C++. We do so simply because that's the + // cleanest way to handle compound literals in C++. + // See the discussion here: https://reviews.llvm.org/D64464 return EmitLoadOfLValue(E); } @@ -680,6 +678,10 @@ public: return Builder.getInt1(E->isSatisfied()); } + Value *VisitRequiresExpr(const RequiresExpr *E) { + return Builder.getInt1(E->isSatisfied()); + } + Value *VisitArrayTypeTraitExpr(const ArrayTypeTraitExpr *E) { return llvm::ConstantInt::get(Builder.getInt32Ty(), E->getValue()); } @@ -728,15 +730,34 @@ public: } } + if (Ops.Ty->isConstantMatrixType()) { + llvm::MatrixBuilder<CGBuilderTy> MB(Builder); + // We need to check the types of the operands of the operator to get the + // correct matrix dimensions. + auto *BO = cast<BinaryOperator>(Ops.E); + auto *LHSMatTy = dyn_cast<ConstantMatrixType>( + BO->getLHS()->getType().getCanonicalType()); + auto *RHSMatTy = dyn_cast<ConstantMatrixType>( + BO->getRHS()->getType().getCanonicalType()); + if (LHSMatTy && RHSMatTy) + return MB.CreateMatrixMultiply(Ops.LHS, Ops.RHS, LHSMatTy->getNumRows(), + LHSMatTy->getNumColumns(), + RHSMatTy->getNumColumns()); + return MB.CreateScalarMultiply(Ops.LHS, Ops.RHS); + } + if (Ops.Ty->isUnsignedIntegerType() && CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) && !CanElideOverflowCheck(CGF.getContext(), Ops)) return EmitOverflowCheckedBinOp(Ops); if (Ops.LHS->getType()->isFPOrFPVectorTy()) { - Value *V = Builder.CreateFMul(Ops.LHS, Ops.RHS, "mul"); - return propagateFMFlags(V, Ops); + // Preserve the old values + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures); + return Builder.CreateFMul(Ops.LHS, Ops.RHS, "mul"); } + if (Ops.isFixedPointOp()) + return EmitFixedPointBinOp(Ops); return Builder.CreateMul(Ops.LHS, Ops.RHS, "mul"); } /// Create a binary op that checks for overflow. @@ -748,6 +769,11 @@ public: llvm::Value *Zero,bool isDiv); // Common helper for getting how wide LHS of shift is. static Value *GetWidthMinusOneValue(Value* LHS,Value* RHS); + + // Used for shifting constraints for OpenCL, do mask for powers of 2, URem for + // non powers of two. + Value *ConstrainShiftValue(Value *LHS, Value *RHS, const Twine &Name); + Value *EmitDiv(const BinOpInfo &Ops); Value *EmitRem(const BinOpInfo &Ops); Value *EmitAdd(const BinOpInfo &Ops); @@ -1297,7 +1323,7 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, "Splatted expr doesn't match with vector element type?"); // Splat the element across to all elements - unsigned NumElements = DstTy->getVectorNumElements(); + unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements(); return Builder.CreateVectorSplat(NumElements, Src, "splat"); } @@ -1315,8 +1341,8 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, // short or half vector. // Source and destination are both expected to be vectors. - llvm::Type *SrcElementTy = SrcTy->getVectorElementType(); - llvm::Type *DstElementTy = DstTy->getVectorElementType(); + llvm::Type *SrcElementTy = cast<llvm::VectorType>(SrcTy)->getElementType(); + llvm::Type *DstElementTy = cast<llvm::VectorType>(DstTy)->getElementType(); (void)DstElementTy; assert(((SrcElementTy->isIntegerTy() && @@ -1622,8 +1648,8 @@ Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) { // n = extract mask i // x = extract val n // newv = insert newv, x, i - llvm::VectorType *RTy = llvm::VectorType::get(LTy->getElementType(), - MTy->getNumElements()); + auto *RTy = llvm::FixedVectorType::get(LTy->getElementType(), + MTy->getNumElements()); Value* NewV = llvm::UndefValue::get(RTy); for (unsigned i = 0, e = MTy->getNumElements(); i != e; ++i) { Value *IIndx = llvm::ConstantInt::get(CGF.SizeTy, i); @@ -1638,18 +1664,17 @@ Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) { Value* V1 = CGF.EmitScalarExpr(E->getExpr(0)); Value* V2 = CGF.EmitScalarExpr(E->getExpr(1)); - SmallVector<llvm::Constant*, 32> indices; + SmallVector<int, 32> Indices; for (unsigned i = 2; i < E->getNumSubExprs(); ++i) { llvm::APSInt Idx = E->getShuffleMaskIdx(CGF.getContext(), i-2); // Check for -1 and output it as undef in the IR. if (Idx.isSigned() && Idx.isAllOnesValue()) - indices.push_back(llvm::UndefValue::get(CGF.Int32Ty)); + Indices.push_back(-1); else - indices.push_back(Builder.getInt32(Idx.getZExtValue())); + Indices.push_back(Idx.getZExtValue()); } - Value *SV = llvm::ConstantVector::get(indices); - return Builder.CreateShuffleVector(V1, V2, SV, "shuffle"); + return Builder.CreateShuffleVector(V1, V2, Indices, "shuffle"); } Value *ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) { @@ -1682,8 +1707,8 @@ Value *ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) { assert(DstTy->isVectorTy() && "ConvertVector destination IR type must be a vector"); - llvm::Type *SrcEltTy = SrcTy->getVectorElementType(), - *DstEltTy = DstTy->getVectorElementType(); + llvm::Type *SrcEltTy = cast<llvm::VectorType>(SrcTy)->getElementType(), + *DstEltTy = cast<llvm::VectorType>(DstTy)->getElementType(); if (DstEltType->isBooleanType()) { assert((SrcEltTy->isFloatingPointTy() || @@ -1764,22 +1789,34 @@ Value *ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) { return Builder.CreateExtractElement(Base, Idx, "vecext"); } -static llvm::Constant *getMaskElt(llvm::ShuffleVectorInst *SVI, unsigned Idx, - unsigned Off, llvm::Type *I32Ty) { +Value *ScalarExprEmitter::VisitMatrixSubscriptExpr(MatrixSubscriptExpr *E) { + TestAndClearIgnoreResultAssign(); + + // Handle the vector case. The base must be a vector, the index must be an + // integer value. + Value *RowIdx = Visit(E->getRowIdx()); + Value *ColumnIdx = Visit(E->getColumnIdx()); + Value *Matrix = Visit(E->getBase()); + + // TODO: Should we emit bounds checks with SanitizerKind::ArrayBounds? + llvm::MatrixBuilder<CGBuilderTy> MB(Builder); + return MB.CreateExtractElement( + Matrix, RowIdx, ColumnIdx, + E->getBase()->getType()->getAs<ConstantMatrixType>()->getNumRows()); +} + +static int getMaskElt(llvm::ShuffleVectorInst *SVI, unsigned Idx, + unsigned Off) { int MV = SVI->getMaskValue(Idx); if (MV == -1) - return llvm::UndefValue::get(I32Ty); - return llvm::ConstantInt::get(I32Ty, Off+MV); + return -1; + return Off + MV; } -static llvm::Constant *getAsInt32(llvm::ConstantInt *C, llvm::Type *I32Ty) { - if (C->getBitWidth() != 32) { - assert(llvm::ConstantInt::isValueValidForType(I32Ty, - C->getZExtValue()) && - "Index operand too large for shufflevector mask!"); - return llvm::ConstantInt::get(I32Ty, C->getZExtValue()); - } - return C; +static int getAsInt32(llvm::ConstantInt *C, llvm::Type *I32Ty) { + assert(llvm::ConstantInt::isValueValidForType(I32Ty, C->getZExtValue()) && + "Index operand too large for shufflevector mask!"); + return C->getZExtValue(); } Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) { @@ -1816,7 +1853,7 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) { for (unsigned i = 0; i != NumInitElements; ++i) { Expr *IE = E->getInit(i); Value *Init = Visit(IE); - SmallVector<llvm::Constant*, 16> Args; + SmallVector<int, 16> Args; llvm::VectorType *VVT = dyn_cast<llvm::VectorType>(Init->getType()); @@ -1834,7 +1871,7 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) { // insert into undef -> shuffle (src, undef) // shufflemask must use an i32 Args.push_back(getAsInt32(C, CGF.Int32Ty)); - Args.resize(ResElts, llvm::UndefValue::get(CGF.Int32Ty)); + Args.resize(ResElts, -1); LHS = EI->getVectorOperand(); RHS = V; @@ -1843,17 +1880,16 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) { // insert into undefshuffle && size match -> shuffle (v, src) llvm::ShuffleVectorInst *SVV = cast<llvm::ShuffleVectorInst>(V); for (unsigned j = 0; j != CurIdx; ++j) - Args.push_back(getMaskElt(SVV, j, 0, CGF.Int32Ty)); - Args.push_back(Builder.getInt32(ResElts + C->getZExtValue())); - Args.resize(ResElts, llvm::UndefValue::get(CGF.Int32Ty)); + Args.push_back(getMaskElt(SVV, j, 0)); + Args.push_back(ResElts + C->getZExtValue()); + Args.resize(ResElts, -1); LHS = cast<llvm::ShuffleVectorInst>(V)->getOperand(0); RHS = EI->getVectorOperand(); VIsUndefShuffle = false; } if (!Args.empty()) { - llvm::Constant *Mask = llvm::ConstantVector::get(Args); - V = Builder.CreateShuffleVector(LHS, RHS, Mask); + V = Builder.CreateShuffleVector(LHS, RHS, Args); ++CurIdx; continue; } @@ -1882,15 +1918,14 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) { // If the current vector initializer is a shuffle with undef, merge // this shuffle directly into it. if (VIsUndefShuffle) { - Args.push_back(getMaskElt(cast<llvm::ShuffleVectorInst>(V), j, 0, - CGF.Int32Ty)); + Args.push_back(getMaskElt(cast<llvm::ShuffleVectorInst>(V), j, 0)); } else { - Args.push_back(Builder.getInt32(j)); + Args.push_back(j); } } for (unsigned j = 0, je = InitElts; j != je; ++j) - Args.push_back(getMaskElt(SVI, j, Offset, CGF.Int32Ty)); - Args.resize(ResElts, llvm::UndefValue::get(CGF.Int32Ty)); + Args.push_back(getMaskElt(SVI, j, Offset)); + Args.resize(ResElts, -1); if (VIsUndefShuffle) V = cast<llvm::ShuffleVectorInst>(V)->getOperand(0); @@ -1903,26 +1938,24 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) { // to the vector initializer into V. if (Args.empty()) { for (unsigned j = 0; j != InitElts; ++j) - Args.push_back(Builder.getInt32(j)); - Args.resize(ResElts, llvm::UndefValue::get(CGF.Int32Ty)); - llvm::Constant *Mask = llvm::ConstantVector::get(Args); - Init = Builder.CreateShuffleVector(Init, llvm::UndefValue::get(VVT), - Mask, "vext"); + Args.push_back(j); + Args.resize(ResElts, -1); + Init = Builder.CreateShuffleVector(Init, llvm::UndefValue::get(VVT), Args, + "vext"); Args.clear(); for (unsigned j = 0; j != CurIdx; ++j) - Args.push_back(Builder.getInt32(j)); + Args.push_back(j); for (unsigned j = 0; j != InitElts; ++j) - Args.push_back(Builder.getInt32(j+Offset)); - Args.resize(ResElts, llvm::UndefValue::get(CGF.Int32Ty)); + Args.push_back(j + Offset); + Args.resize(ResElts, -1); } // If V is undef, make sure it ends up on the RHS of the shuffle to aid // merging subsequent shuffles into this one. if (CurIdx == 0) std::swap(V, Init); - llvm::Constant *Mask = llvm::ConstantVector::get(Args); - V = Builder.CreateShuffleVector(V, Init, Mask, "vecinit"); + V = Builder.CreateShuffleVector(V, Init, Args, "vecinit"); VIsUndefShuffle = isa<llvm::UndefValue>(Init); CurIdx += InitElts; } @@ -2036,11 +2069,15 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { } } - // Update heapallocsite metadata when there is an explicit cast. - if (llvm::CallInst *CI = dyn_cast<llvm::CallInst>(Src)) - if (CI->getMetadata("heapallocsite") && isa<ExplicitCastExpr>(CE)) - CGF.getDebugInfo()-> - addHeapAllocSiteMetadata(CI, CE->getType(), CE->getExprLoc()); + // Update heapallocsite metadata when there is an explicit pointer cast. + if (auto *CI = dyn_cast<llvm::CallBase>(Src)) { + if (CI->getMetadata("heapallocsite") && isa<ExplicitCastExpr>(CE)) { + QualType PointeeType = DestTy->getPointeeType(); + if (!PointeeType.isNull()) + CGF.getDebugInfo()->addHeapAllocSiteMetadata(CI, PointeeType, + CE->getExprLoc()); + } + } return Builder.CreateBitCast(Src, DstTy); } @@ -2210,7 +2247,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { llvm::Type *DstTy = ConvertType(DestTy); Value *Elt = Visit(const_cast<Expr*>(E)); // Splat the element across to all elements - unsigned NumElements = DstTy->getVectorNumElements(); + unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements(); return Builder.CreateVectorSplat(NumElements, Elt, "splat"); } @@ -2311,7 +2348,6 @@ Value *ScalarExprEmitter::VisitStmtExpr(const StmtExpr *E) { } Value *ScalarExprEmitter::VisitExprWithCleanups(ExprWithCleanups *E) { - CGF.enterFullExpression(E); CodeGenFunction::RunCleanupsScope Scope(CGF); Value *V = Visit(E->getSubExpr()); // Defend against dominance problems caused by jumps out of expression @@ -2325,13 +2361,14 @@ Value *ScalarExprEmitter::VisitExprWithCleanups(ExprWithCleanups *E) { //===----------------------------------------------------------------------===// static BinOpInfo createBinOpInfoFromIncDec(const UnaryOperator *E, - llvm::Value *InVal, bool IsInc) { + llvm::Value *InVal, bool IsInc, + FPOptions FPFeatures) { BinOpInfo BinOp; BinOp.LHS = InVal; BinOp.RHS = llvm::ConstantInt::get(InVal->getType(), 1, false); BinOp.Ty = E->getType(); BinOp.Opcode = IsInc ? BO_Add : BO_Sub; - // FIXME: once UnaryOperator carries FPFeatures, copy it here. + BinOp.FPFeatures = FPFeatures; BinOp.E = E; return BinOp; } @@ -2351,7 +2388,8 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior( case LangOptions::SOB_Trapping: if (!E->canOverflow()) return Builder.CreateNSWAdd(InVal, Amount, Name); - return EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(E, InVal, IsInc)); + return EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec( + E, InVal, IsInc, E->getFPFeaturesInEffect(CGF.getLangOpts()))); } llvm_unreachable("Unknown SignedOverflowBehaviorTy"); } @@ -2497,8 +2535,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, value = EmitIncDecConsiderOverflowBehavior(E, value, isInc); } else if (E->canOverflow() && type->isUnsignedIntegerType() && CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) { - value = - EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(E, value, isInc)); + value = EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec( + E, value, isInc, E->getFPFeaturesInEffect(CGF.getLangOpts()))); } else { llvm::Value *amt = llvm::ConstantInt::get(value->getType(), amount, true); value = Builder.CreateAdd(value, amt, isInc ? "inc" : "dec"); @@ -2609,6 +2647,36 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, } } + // Fixed-point types. + } else if (type->isFixedPointType()) { + // Fixed-point types are tricky. In some cases, it isn't possible to + // represent a 1 or a -1 in the type at all. Piggyback off of + // EmitFixedPointBinOp to avoid having to reimplement saturation. + BinOpInfo Info; + Info.E = E; + Info.Ty = E->getType(); + Info.Opcode = isInc ? BO_Add : BO_Sub; + Info.LHS = value; + Info.RHS = llvm::ConstantInt::get(value->getType(), 1, false); + // If the type is signed, it's better to represent this as +(-1) or -(-1), + // since -1 is guaranteed to be representable. + if (type->isSignedFixedPointType()) { + Info.Opcode = isInc ? BO_Sub : BO_Add; + Info.RHS = Builder.CreateNeg(Info.RHS); + } + // Now, convert from our invented integer literal to the type of the unary + // op. This will upscale and saturate if necessary. This value can become + // undef in some cases. + FixedPointSemantics SrcSema = + FixedPointSemantics::GetIntegerSemantics(value->getType() + ->getScalarSizeInBits(), + /*IsSigned=*/true); + FixedPointSemantics DstSema = + CGF.getContext().getFixedPointSemantics(Info.Ty); + Info.RHS = EmitFixedPointConversion(Info.RHS, SrcSema, DstSema, + E->getExprLoc()); + value = EmitFixedPointBinOp(Info); + // Objective-C pointer types. } else { const ObjCObjectPointerType *OPT = type->castAs<ObjCObjectPointerType>(); @@ -2668,7 +2736,7 @@ Value *ScalarExprEmitter::VisitUnaryMinus(const UnaryOperator *E) { BinOp.LHS = llvm::Constant::getNullValue(BinOp.RHS->getType()); BinOp.Ty = E->getType(); BinOp.Opcode = BO_Sub; - // FIXME: once UnaryOperator carries FPFeatures, copy it here. + BinOp.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts()); BinOp.E = E; return EmitSub(BinOp); } @@ -2681,13 +2749,17 @@ Value *ScalarExprEmitter::VisitUnaryNot(const UnaryOperator *E) { Value *ScalarExprEmitter::VisitUnaryLNot(const UnaryOperator *E) { // Perform vector logical not on comparison with zero vector. - if (E->getType()->isExtVectorType()) { + if (E->getType()->isVectorType() && + E->getType()->castAs<VectorType>()->getVectorKind() == + VectorType::GenericVector) { Value *Oper = Visit(E->getSubExpr()); Value *Zero = llvm::Constant::getNullValue(Oper->getType()); Value *Result; - if (Oper->getType()->isFPOrFPVectorTy()) + if (Oper->getType()->isFPOrFPVectorTy()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII( + CGF, E->getFPFeaturesInEffect(CGF.getLangOpts())); Result = Builder.CreateFCmp(llvm::CmpInst::FCMP_OEQ, Oper, Zero, "cmp"); - else + } else Result = Builder.CreateICmp(llvm::CmpInst::ICMP_EQ, Oper, Zero, "cmp"); return Builder.CreateSExt(Result, ConvertType(E->getType()), "sext"); } @@ -2888,7 +2960,7 @@ BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E) { Result.RHS = Visit(E->getRHS()); Result.Ty = E->getType(); Result.Opcode = E->getOpcode(); - Result.FPFeatures = E->getFPFeatures(); + Result.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts()); Result.E = E; return Result; } @@ -2908,7 +2980,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( OpInfo.RHS = Visit(E->getRHS()); OpInfo.Ty = E->getComputationResultType(); OpInfo.Opcode = E->getOpcode(); - OpInfo.FPFeatures = E->getFPFeatures(); + OpInfo.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts()); OpInfo.E = E; // Load/convert the LHS. LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store); @@ -3096,7 +3168,9 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) { } if (Ops.LHS->getType()->isFPOrFPVectorTy()) { - llvm::Value *Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div"); + llvm::Value *Val; + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures); + Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div"); if (CGF.getLangOpts().OpenCL && !CGF.CGM.getCodeGenOpts().CorrectlyRoundedDivSqrt) { // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp @@ -3112,6 +3186,8 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) { } return Val; } + else if (Ops.isFixedPointOp()) + return EmitFixedPointBinOp(Ops); else if (Ops.Ty->hasUnsignedIntegerRepresentation()) return Builder.CreateUDiv(Ops.LHS, Ops.RHS, "div"); else @@ -3361,7 +3437,7 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, // the add operand respectively. This allows fmuladd to represent a*b-c, or // c-a*b. Patterns in LLVM should catch the negated forms and translate them to // efficient operations. -static Value* buildFMulAdd(llvm::BinaryOperator *MulOp, Value *Addend, +static Value* buildFMulAdd(llvm::Instruction *MulOp, Value *Addend, const CodeGenFunction &CGF, CGBuilderTy &Builder, bool negMul, bool negAdd) { assert(!(negMul && negAdd) && "Only one of negMul and negAdd should be set."); @@ -3373,12 +3449,23 @@ static Value* buildFMulAdd(llvm::BinaryOperator *MulOp, Value *Addend, if (negAdd) Addend = Builder.CreateFNeg(Addend, "neg"); - Value *FMulAdd = Builder.CreateCall( - CGF.CGM.getIntrinsic(llvm::Intrinsic::fmuladd, Addend->getType()), - {MulOp0, MulOp1, Addend}); - MulOp->eraseFromParent(); + Value *FMulAdd = nullptr; + if (Builder.getIsFPConstrained()) { + assert(isa<llvm::ConstrainedFPIntrinsic>(MulOp) && + "Only constrained operation should be created when Builder is in FP " + "constrained mode"); + FMulAdd = Builder.CreateConstrainedFPCall( + CGF.CGM.getIntrinsic(llvm::Intrinsic::experimental_constrained_fmuladd, + Addend->getType()), + {MulOp0, MulOp1, Addend}); + } else { + FMulAdd = Builder.CreateCall( + CGF.CGM.getIntrinsic(llvm::Intrinsic::fmuladd, Addend->getType()), + {MulOp0, MulOp1, Addend}); + } + MulOp->eraseFromParent(); - return FMulAdd; + return FMulAdd; } // Check whether it would be legal to emit an fmuladd intrinsic call to @@ -3413,6 +3500,19 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op, return buildFMulAdd(RHSBinOp, op.LHS, CGF, Builder, isSub, false); } + if (auto *LHSBinOp = dyn_cast<llvm::CallBase>(op.LHS)) { + if (LHSBinOp->getIntrinsicID() == + llvm::Intrinsic::experimental_constrained_fmul && + LHSBinOp->use_empty()) + return buildFMulAdd(LHSBinOp, op.RHS, CGF, Builder, false, isSub); + } + if (auto *RHSBinOp = dyn_cast<llvm::CallBase>(op.RHS)) { + if (RHSBinOp->getIntrinsicID() == + llvm::Intrinsic::experimental_constrained_fmul && + RHSBinOp->use_empty()) + return buildFMulAdd(RHSBinOp, op.LHS, CGF, Builder, isSub, false); + } + return nullptr; } @@ -3436,21 +3536,26 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) { } } + if (op.Ty->isConstantMatrixType()) { + llvm::MatrixBuilder<CGBuilderTy> MB(Builder); + return MB.CreateAdd(op.LHS, op.RHS); + } + if (op.Ty->isUnsignedIntegerType() && CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) && !CanElideOverflowCheck(CGF.getContext(), op)) return EmitOverflowCheckedBinOp(op); if (op.LHS->getType()->isFPOrFPVectorTy()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); // Try to form an fmuladd. if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder)) return FMulAdd; - Value *V = Builder.CreateFAdd(op.LHS, op.RHS, "add"); - return propagateFMFlags(V, op); + return Builder.CreateFAdd(op.LHS, op.RHS, "add"); } - if (op.isFixedPointBinOp()) + if (op.isFixedPointOp()) return EmitFixedPointBinOp(op); return Builder.CreateAdd(op.LHS, op.RHS, "add"); @@ -3462,14 +3567,27 @@ Value *ScalarExprEmitter::EmitFixedPointBinOp(const BinOpInfo &op) { using llvm::APSInt; using llvm::ConstantInt; - const auto *BinOp = cast<BinaryOperator>(op.E); - - // The result is a fixed point type and at least one of the operands is fixed - // point while the other is either fixed point or an int. This resulting type - // should be determined by Sema::handleFixedPointConversions(). + // This is either a binary operation where at least one of the operands is + // a fixed-point type, or a unary operation where the operand is a fixed-point + // type. The result type of a binary operation is determined by + // Sema::handleFixedPointConversions(). QualType ResultTy = op.Ty; - QualType LHSTy = BinOp->getLHS()->getType(); - QualType RHSTy = BinOp->getRHS()->getType(); + QualType LHSTy, RHSTy; + if (const auto *BinOp = dyn_cast<BinaryOperator>(op.E)) { + RHSTy = BinOp->getRHS()->getType(); + if (const auto *CAO = dyn_cast<CompoundAssignOperator>(BinOp)) { + // For compound assignment, the effective type of the LHS at this point + // is the computation LHS type, not the actual LHS type, and the final + // result type is not the type of the expression but rather the + // computation result type. + LHSTy = CAO->getComputationLHSType(); + ResultTy = CAO->getComputationResultType(); + } else + LHSTy = BinOp->getLHS()->getType(); + } else if (const auto *UnOp = dyn_cast<UnaryOperator>(op.E)) { + LHSTy = UnOp->getSubExpr()->getType(); + RHSTy = UnOp->getSubExpr()->getType(); + } ASTContext &Ctx = CGF.getContext(); Value *LHS = op.LHS; Value *RHS = op.RHS; @@ -3481,16 +3599,17 @@ Value *ScalarExprEmitter::EmitFixedPointBinOp(const BinOpInfo &op) { // Convert the operands to the full precision type. Value *FullLHS = EmitFixedPointConversion(LHS, LHSFixedSema, CommonFixedSema, - BinOp->getExprLoc()); + op.E->getExprLoc()); Value *FullRHS = EmitFixedPointConversion(RHS, RHSFixedSema, CommonFixedSema, - BinOp->getExprLoc()); + op.E->getExprLoc()); - // Perform the actual addition. + // Perform the actual operation. Value *Result; - switch (BinOp->getOpcode()) { + switch (op.Opcode) { + case BO_AddAssign: case BO_Add: { - if (ResultFixedSema.isSaturated()) { - llvm::Intrinsic::ID IID = ResultFixedSema.isSigned() + if (CommonFixedSema.isSaturated()) { + llvm::Intrinsic::ID IID = CommonFixedSema.isSigned() ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat; Result = Builder.CreateBinaryIntrinsic(IID, FullLHS, FullRHS); @@ -3499,9 +3618,10 @@ Value *ScalarExprEmitter::EmitFixedPointBinOp(const BinOpInfo &op) { } break; } + case BO_SubAssign: case BO_Sub: { - if (ResultFixedSema.isSaturated()) { - llvm::Intrinsic::ID IID = ResultFixedSema.isSigned() + if (CommonFixedSema.isSaturated()) { + llvm::Intrinsic::ID IID = CommonFixedSema.isSigned() ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat; Result = Builder.CreateBinaryIntrinsic(IID, FullLHS, FullRHS); @@ -3510,6 +3630,32 @@ Value *ScalarExprEmitter::EmitFixedPointBinOp(const BinOpInfo &op) { } break; } + case BO_MulAssign: + case BO_Mul: { + llvm::Intrinsic::ID IID; + if (CommonFixedSema.isSaturated()) + IID = CommonFixedSema.isSigned() ? llvm::Intrinsic::smul_fix_sat + : llvm::Intrinsic::umul_fix_sat; + else + IID = CommonFixedSema.isSigned() ? llvm::Intrinsic::smul_fix + : llvm::Intrinsic::umul_fix; + Result = Builder.CreateIntrinsic(IID, {FullLHS->getType()}, + {FullLHS, FullRHS, Builder.getInt32(CommonFixedSema.getScale())}); + break; + } + case BO_DivAssign: + case BO_Div: { + llvm::Intrinsic::ID IID; + if (CommonFixedSema.isSaturated()) + IID = CommonFixedSema.isSigned() ? llvm::Intrinsic::sdiv_fix_sat + : llvm::Intrinsic::udiv_fix_sat; + else + IID = CommonFixedSema.isSigned() ? llvm::Intrinsic::sdiv_fix + : llvm::Intrinsic::udiv_fix; + Result = Builder.CreateIntrinsic(IID, {FullLHS->getType()}, + {FullLHS, FullRHS, Builder.getInt32(CommonFixedSema.getScale())}); + break; + } case BO_LT: return CommonFixedSema.isSigned() ? Builder.CreateICmpSLT(FullLHS, FullRHS) : Builder.CreateICmpULT(FullLHS, FullRHS); @@ -3529,17 +3675,11 @@ Value *ScalarExprEmitter::EmitFixedPointBinOp(const BinOpInfo &op) { return Builder.CreateICmpEQ(FullLHS, FullRHS); case BO_NE: return Builder.CreateICmpNE(FullLHS, FullRHS); - case BO_Mul: - case BO_Div: case BO_Shl: case BO_Shr: case BO_Cmp: case BO_LAnd: case BO_LOr: - case BO_MulAssign: - case BO_DivAssign: - case BO_AddAssign: - case BO_SubAssign: case BO_ShlAssign: case BO_ShrAssign: llvm_unreachable("Found unimplemented fixed point binary operation"); @@ -3560,7 +3700,7 @@ Value *ScalarExprEmitter::EmitFixedPointBinOp(const BinOpInfo &op) { // Convert to the result type. return EmitFixedPointConversion(Result, CommonFixedSema, ResultFixedSema, - BinOp->getExprLoc()); + op.E->getExprLoc()); } Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { @@ -3581,20 +3721,25 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { } } + if (op.Ty->isConstantMatrixType()) { + llvm::MatrixBuilder<CGBuilderTy> MB(Builder); + return MB.CreateSub(op.LHS, op.RHS); + } + if (op.Ty->isUnsignedIntegerType() && CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) && !CanElideOverflowCheck(CGF.getContext(), op)) return EmitOverflowCheckedBinOp(op); if (op.LHS->getType()->isFPOrFPVectorTy()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, op.FPFeatures); // Try to form an fmuladd. if (Value *FMulAdd = tryEmitFMulAdd(op, CGF, Builder, true)) return FMulAdd; - Value *V = Builder.CreateFSub(op.LHS, op.RHS, "sub"); - return propagateFMFlags(V, op); + return Builder.CreateFSub(op.LHS, op.RHS, "sub"); } - if (op.isFixedPointBinOp()) + if (op.isFixedPointOp()) return EmitFixedPointBinOp(op); return Builder.CreateSub(op.LHS, op.RHS, "sub"); @@ -3666,6 +3811,21 @@ Value *ScalarExprEmitter::GetWidthMinusOneValue(Value* LHS,Value* RHS) { return llvm::ConstantInt::get(RHS->getType(), Ty->getBitWidth() - 1); } +Value *ScalarExprEmitter::ConstrainShiftValue(Value *LHS, Value *RHS, + const Twine &Name) { + llvm::IntegerType *Ty; + if (auto *VT = dyn_cast<llvm::VectorType>(LHS->getType())) + Ty = cast<llvm::IntegerType>(VT->getElementType()); + else + Ty = cast<llvm::IntegerType>(LHS->getType()); + + if (llvm::isPowerOf2_64(Ty->getBitWidth())) + return Builder.CreateAnd(RHS, GetWidthMinusOneValue(LHS, RHS), Name); + + return Builder.CreateURem( + RHS, llvm::ConstantInt::get(RHS->getType(), Ty->getBitWidth()), Name); +} + Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) { // LLVM requires the LHS and RHS to be the same type: promote or truncate the // RHS to the same size as the LHS. @@ -3676,12 +3836,11 @@ Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) { bool SanitizeBase = CGF.SanOpts.has(SanitizerKind::ShiftBase) && Ops.Ty->hasSignedIntegerRepresentation() && !CGF.getLangOpts().isSignedOverflowDefined() && - !CGF.getLangOpts().CPlusPlus2a; + !CGF.getLangOpts().CPlusPlus20; bool SanitizeExponent = CGF.SanOpts.has(SanitizerKind::ShiftExponent); // OpenCL 6.3j: shift values are effectively % word size of LHS. if (CGF.getLangOpts().OpenCL) - RHS = - Builder.CreateAnd(RHS, GetWidthMinusOneValue(Ops.LHS, RHS), "shl.mask"); + RHS = ConstrainShiftValue(Ops.LHS, RHS, "shl.mask"); else if ((SanitizeBase || SanitizeExponent) && isa<llvm::IntegerType>(Ops.LHS->getType())) { CodeGenFunction::SanitizerScope SanScope(&CGF); @@ -3743,8 +3902,7 @@ Value *ScalarExprEmitter::EmitShr(const BinOpInfo &Ops) { // OpenCL 6.3j: shift values are effectively % word size of LHS. if (CGF.getLangOpts().OpenCL) - RHS = - Builder.CreateAnd(RHS, GetWidthMinusOneValue(Ops.LHS, RHS), "shr.mask"); + RHS = ConstrainShiftValue(Ops.LHS, RHS, "shr.mask"); else if (CGF.SanOpts.has(SanitizerKind::ShiftExponent) && isa<llvm::IntegerType>(Ops.LHS->getType())) { CodeGenFunction::SanitizerScope SanScope(&CGF); @@ -3897,9 +4055,10 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E, E->getExprLoc()); } - if (BOInfo.isFixedPointBinOp()) { + if (BOInfo.isFixedPointOp()) { Result = EmitFixedPointBinOp(BOInfo); } else if (LHS->getType()->isFPOrFPVectorTy()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, BOInfo.FPFeatures); if (!IsSignaling) Result = Builder.CreateFCmp(FCmpOpc, LHS, RHS, "cmp"); else @@ -4052,6 +4211,8 @@ Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) { Value *RHS = Visit(E->getRHS()); Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType()); if (LHS->getType()->isFPOrFPVectorTy()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII( + CGF, E->getFPFeaturesInEffect(CGF.getLangOpts())); LHS = Builder.CreateFCmp(llvm::CmpInst::FCMP_UNE, LHS, Zero, "cmp"); RHS = Builder.CreateFCmp(llvm::CmpInst::FCMP_UNE, RHS, Zero, "cmp"); } else { @@ -4136,6 +4297,8 @@ Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) { Value *RHS = Visit(E->getRHS()); Value *Zero = llvm::ConstantAggregateZero::get(LHS->getType()); if (LHS->getType()->isFPOrFPVectorTy()) { + CodeGenFunction::CGFPOptionsRAII FPOptsRAII( + CGF, E->getFPFeaturesInEffect(CGF.getLangOpts())); LHS = Builder.CreateFCmp(llvm::CmpInst::FCMP_UNE, LHS, Zero, "cmp"); RHS = Builder.CreateFCmp(llvm::CmpInst::FCMP_UNE, RHS, Zero, "cmp"); } else { @@ -4269,8 +4432,8 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { // OpenCL: If the condition is a vector, we can treat this condition like // the select function. - if (CGF.getLangOpts().OpenCL - && condExpr->getType()->isVectorType()) { + if ((CGF.getLangOpts().OpenCL && condExpr->getType()->isVectorType()) || + condExpr->getType()->isExtVectorType()) { CGF.incrementProfileCounter(E); llvm::Value *CondV = CGF.EmitScalarExpr(condExpr); @@ -4285,10 +4448,8 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { llvm::Value *zeroVec = llvm::Constant::getNullValue(vecTy); llvm::Value *TestMSB = Builder.CreateICmpSLT(CondV, zeroVec); - llvm::Value *tmp = Builder.CreateSExt(TestMSB, - llvm::VectorType::get(elemType, - numElem), - "sext"); + llvm::Value *tmp = Builder.CreateSExt( + TestMSB, llvm::FixedVectorType::get(elemType, numElem), "sext"); llvm::Value *tmp2 = Builder.CreateNot(tmp); // Cast float to int to perform ANDs if necessary. @@ -4427,14 +4588,9 @@ Value *ScalarExprEmitter::VisitBlockExpr(const BlockExpr *block) { static Value *ConvertVec3AndVec4(CGBuilderTy &Builder, CodeGenFunction &CGF, Value *Src, unsigned NumElementsDst) { llvm::Value *UnV = llvm::UndefValue::get(Src->getType()); - SmallVector<llvm::Constant*, 4> Args; - Args.push_back(Builder.getInt32(0)); - Args.push_back(Builder.getInt32(1)); - Args.push_back(Builder.getInt32(2)); - if (NumElementsDst == 4) - Args.push_back(llvm::UndefValue::get(CGF.Int32Ty)); - llvm::Constant *Mask = llvm::ConstantVector::get(Args); - return Builder.CreateShuffleVector(Src, UnV, Mask); + static constexpr int Mask[] = {0, 1, 2, -1}; + return Builder.CreateShuffleVector(Src, UnV, + llvm::makeArrayRef(Mask, NumElementsDst)); } // Create cast instructions for converting LLVM value \p Src to LLVM type \p @@ -4512,7 +4668,8 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) { // get a vec3. if (NumElementsSrc != 3 && NumElementsDst == 3) { if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) { - auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4); + auto *Vec4Ty = llvm::FixedVectorType::get( + cast<llvm::VectorType>(DstTy)->getElementType(), 4); Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src, Vec4Ty); } @@ -4655,7 +4812,7 @@ struct GEPOffsetAndOverflow { static GEPOffsetAndOverflow EmitGEPOffsetInBytes(Value *BasePtr, Value *GEPVal, llvm::LLVMContext &VMContext, CodeGenModule &CGM, - CGBuilderTy Builder) { + CGBuilderTy &Builder) { const auto &DL = CGM.getDataLayout(); // The total (signed) byte offset for the GEP. diff --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp b/clang/lib/CodeGen/CGGPUBuiltin.cpp index d7e267630762..f860623e2bc3 100644 --- a/clang/lib/CodeGen/CGGPUBuiltin.cpp +++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp @@ -16,6 +16,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instruction.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h" using namespace clang; using namespace CodeGen; @@ -110,7 +111,7 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) { llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1); llvm::Value *Arg = Args[I].getRValue(*this).getScalarVal(); - Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlignment(Arg->getType())); + Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlign(Arg->getType())); } BufferPtr = Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx)); } @@ -120,3 +121,36 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, return RValue::get(Builder.CreateCall( VprintfFunc, {Args[0].getRValue(*this).getScalarVal(), BufferPtr})); } + +RValue +CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E, + ReturnValueSlot ReturnValue) { + assert(getTarget().getTriple().getArch() == llvm::Triple::amdgcn); + assert(E->getBuiltinCallee() == Builtin::BIprintf || + E->getBuiltinCallee() == Builtin::BI__builtin_printf); + assert(E->getNumArgs() >= 1); // printf always has at least one arg. + + CallArgList CallArgs; + EmitCallArgs(CallArgs, + E->getDirectCallee()->getType()->getAs<FunctionProtoType>(), + E->arguments(), E->getDirectCallee(), + /* ParamsToSkip = */ 0); + + SmallVector<llvm::Value *, 8> Args; + for (auto A : CallArgs) { + // We don't know how to emit non-scalar varargs. + if (!A.getRValue(*this).isScalar()) { + CGM.ErrorUnsupported(E, "non-scalar arg to printf"); + return RValue::get(llvm::ConstantInt::get(IntTy, -1)); + } + + llvm::Value *Arg = A.getRValue(*this).getScalarVal(); + Args.push_back(Arg); + } + + llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint()); + IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation()); + auto Printf = llvm::emitAMDGPUPrintfCall(IRB, Args); + Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint()); + return RValue::get(Printf); +} diff --git a/clang/lib/CodeGen/CGLoopInfo.cpp b/clang/lib/CodeGen/CGLoopInfo.cpp index e4b184eb8798..78da72eda0cf 100644 --- a/clang/lib/CodeGen/CGLoopInfo.cpp +++ b/clang/lib/CodeGen/CGLoopInfo.cpp @@ -9,6 +9,8 @@ #include "CGLoopInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" +#include "clang/AST/Expr.h" +#include "clang/Basic/CodeGenOptions.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -572,6 +574,7 @@ void LoopInfoStack::push(BasicBlock *Header, const llvm::DebugLoc &StartLoc, } void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, + const clang::CodeGenOptions &CGOpts, ArrayRef<const clang::Attr *> Attrs, const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc) { @@ -752,6 +755,14 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, } } + if (CGOpts.OptimizationLevel > 0) + // Disable unrolling for the loop, if unrolling is disabled (via + // -fno-unroll-loops) and no pragmas override the decision. + if (!CGOpts.UnrollLoops && + (StagedAttrs.UnrollEnable == LoopAttributes::Unspecified && + StagedAttrs.UnrollCount == 0)) + setUnrollState(LoopAttributes::Disable); + /// Stage the attributes. push(Header, StartLoc, EndLoc); } diff --git a/clang/lib/CodeGen/CGLoopInfo.h b/clang/lib/CodeGen/CGLoopInfo.h index 5abcf37c5433..e379c64c99a8 100644 --- a/clang/lib/CodeGen/CGLoopInfo.h +++ b/clang/lib/CodeGen/CGLoopInfo.h @@ -29,6 +29,7 @@ class MDNode; namespace clang { class Attr; class ASTContext; +class CodeGenOptions; namespace CodeGen { /// Attributes that may be specified on loops. @@ -202,6 +203,7 @@ public: /// Begin a new structured loop. Stage attributes from the Attrs list. /// The staged attributes are applied to the loop and then cleared. void push(llvm::BasicBlock *Header, clang::ASTContext &Ctx, + const clang::CodeGenOptions &CGOpts, llvm::ArrayRef<const Attr *> Attrs, const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc); diff --git a/clang/lib/CodeGen/CGNonTrivialStruct.cpp b/clang/lib/CodeGen/CGNonTrivialStruct.cpp index d5f378c52232..d134be83a9dc 100644 --- a/clang/lib/CodeGen/CGNonTrivialStruct.cpp +++ b/clang/lib/CodeGen/CGNonTrivialStruct.cpp @@ -254,6 +254,10 @@ struct GenBinaryFuncName : CopyStructVisitor<GenBinaryFuncName<IsMove>, IsMove>, void visitVolatileTrivial(QualType FT, const FieldDecl *FD, CharUnits CurStructOffset) { + // Zero-length bit-fields don't need to be copied/assigned. + if (FD && FD->isZeroLengthBitField(this->Ctx)) + return; + // Because volatile fields can be bit-fields and are individually copied, // their offset and width are in bits. uint64_t OffsetInBits = @@ -317,6 +321,16 @@ static const CGFunctionInfo &getFunctionInfo(CodeGenModule &CGM, return CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); } +template <size_t N, size_t... Ints> +static std::array<Address, N> getParamAddrs(std::index_sequence<Ints...> IntSeq, + std::array<CharUnits, N> Alignments, + FunctionArgList Args, + CodeGenFunction *CGF) { + return std::array<Address, N>{{ + Address(CGF->Builder.CreateLoad(CGF->GetAddrOfLocalVar(Args[Ints])), + Alignments[Ints])...}}; +} + // Template classes that are used as bases for classes that emit special // functions. template <class Derived> struct GenFuncBase { @@ -424,9 +438,9 @@ template <class Derived> struct GenFuncBase { } template <size_t N> - llvm::Function * - getFunction(StringRef FuncName, QualType QT, std::array<Address, N> Addrs, - std::array<CharUnits, N> Alignments, CodeGenModule &CGM) { + llvm::Function *getFunction(StringRef FuncName, QualType QT, + std::array<CharUnits, N> Alignments, + CodeGenModule &CGM) { // If the special function already exists in the module, return it. if (llvm::Function *F = CGM.getModule().getFunction(FuncName)) { bool WrongType = false; @@ -439,7 +453,7 @@ template <class Derived> struct GenFuncBase { } if (WrongType) { - std::string FuncName = F->getName(); + std::string FuncName = std::string(F->getName()); SourceLocation Loc = QT->castAs<RecordType>()->getDecl()->getLocation(); CGM.Error(Loc, "special function " + FuncName + " for non-trivial C struct has incorrect type"); @@ -466,12 +480,8 @@ template <class Derived> struct GenFuncBase { CodeGenFunction NewCGF(CGM); setCGF(&NewCGF); CGF->StartFunction(FD, Ctx.VoidTy, F, FI, Args); - - for (unsigned I = 0; I < N; ++I) { - llvm::Value *V = CGF->Builder.CreateLoad(CGF->GetAddrOfLocalVar(Args[I])); - Addrs[I] = Address(V, Alignments[I]); - } - + std::array<Address, N> Addrs = + getParamAddrs<N>(std::make_index_sequence<N>{}, Alignments, Args, CGF); asDerived().visitStructFields(QT, CharUnits::Zero(), Addrs); CGF->FinishFunction(); return F; @@ -491,7 +501,7 @@ template <class Derived> struct GenFuncBase { } if (llvm::Function *F = - getFunction(FuncName, QT, Addrs, Alignments, CallerCGF.CGM)) + getFunction(FuncName, QT, Alignments, CallerCGF.CGM)) CallerCGF.EmitNounwindRuntimeCall(F, Ptrs); } @@ -543,6 +553,10 @@ struct GenBinaryFunc : CopyStructVisitor<Derived, IsMove>, std::array<Address, 2> Addrs) { LValue DstLV, SrcLV; if (FD) { + // No need to copy zero-length bit-fields. + if (FD->isZeroLengthBitField(this->CGF->getContext())) + return; + QualType RT = QualType(FD->getParent()->getTypeForDecl(), 0); llvm::PointerType *PtrTy = this->CGF->ConvertType(RT)->getPointerTo(); Address DstAddr = this->getAddrWithOffset(Addrs[DstIdx], Offset); @@ -825,17 +839,6 @@ static void callSpecialFunction(G &&Gen, StringRef FuncName, QualType QT, Gen.callFunc(FuncName, QT, Addrs, CGF); } -template <size_t N> static std::array<Address, N> createNullAddressArray(); - -template <> std::array<Address, 1> createNullAddressArray() { - return std::array<Address, 1>({{Address(nullptr, CharUnits::Zero())}}); -} - -template <> std::array<Address, 2> createNullAddressArray() { - return std::array<Address, 2>({{Address(nullptr, CharUnits::Zero()), - Address(nullptr, CharUnits::Zero())}}); -} - template <class G, size_t N> static llvm::Function * getSpecialFunction(G &&Gen, StringRef FuncName, QualType QT, bool IsVolatile, @@ -844,8 +847,7 @@ getSpecialFunction(G &&Gen, StringRef FuncName, QualType QT, bool IsVolatile, // The following call requires an array of addresses as arguments, but doesn't // actually use them (it overwrites them with the addresses of the arguments // of the created function). - return Gen.getFunction(FuncName, QT, createNullAddressArray<N>(), Alignments, - CGM); + return Gen.getFunction(FuncName, QT, Alignments, CGM); } // Functions to emit calls to the special functions of a non-trivial C struct. diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index 90fca2836d99..cd2b84f5dd20 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -1491,11 +1491,10 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, argLoad.getType())) finalArg = &argCast; - - BinaryOperator assign(&ivarRef, finalArg, BO_Assign, - ivarRef.getType(), VK_RValue, OK_Ordinary, - SourceLocation(), FPOptions()); - EmitStmt(&assign); + BinaryOperator *assign = BinaryOperator::Create( + getContext(), &ivarRef, finalArg, BO_Assign, ivarRef.getType(), VK_RValue, + OK_Ordinary, SourceLocation(), FPOptionsOverride()); + EmitStmt(assign); } /// Generate an Objective-C property setter function. @@ -1837,6 +1836,40 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ llvm::Value *CurrentItem = Builder.CreateAlignedLoad(CurrentItemPtr, getPointerAlign()); + if (SanOpts.has(SanitizerKind::ObjCCast)) { + // Before using an item from the collection, check that the implicit cast + // from id to the element type is valid. This is done with instrumentation + // roughly corresponding to: + // + // if (![item isKindOfClass:expectedCls]) { /* emit diagnostic */ } + const ObjCObjectPointerType *ObjPtrTy = + elementType->getAsObjCInterfacePointerType(); + const ObjCInterfaceType *InterfaceTy = + ObjPtrTy ? ObjPtrTy->getInterfaceType() : nullptr; + if (InterfaceTy) { + SanitizerScope SanScope(this); + auto &C = CGM.getContext(); + assert(InterfaceTy->getDecl() && "No decl for ObjC interface type"); + Selector IsKindOfClassSel = GetUnarySelector("isKindOfClass", C); + CallArgList IsKindOfClassArgs; + llvm::Value *Cls = + CGM.getObjCRuntime().GetClass(*this, InterfaceTy->getDecl()); + IsKindOfClassArgs.add(RValue::get(Cls), C.getObjCClassType()); + llvm::Value *IsClass = + CGM.getObjCRuntime() + .GenerateMessageSend(*this, ReturnValueSlot(), C.BoolTy, + IsKindOfClassSel, CurrentItem, + IsKindOfClassArgs) + .getScalarVal(); + llvm::Constant *StaticData[] = { + EmitCheckSourceLocation(S.getBeginLoc()), + EmitCheckTypeDescriptor(QualType(InterfaceTy, 0))}; + EmitCheck({{IsClass, SanitizerKind::ObjCCast}}, + SanitizerHandler::InvalidObjCCast, + ArrayRef<llvm::Constant *>(StaticData), CurrentItem); + } + } + // Cast that value to the right type. CurrentItem = Builder.CreateBitCast(CurrentItem, convertedElementType, "currentitem"); @@ -2160,7 +2193,8 @@ llvm::Value *CodeGenFunction::EmitARCRetainBlock(llvm::Value *value, if (!mandatory && isa<llvm::Instruction>(result)) { llvm::CallInst *call = cast<llvm::CallInst>(result->stripPointerCasts()); - assert(call->getCalledValue() == CGM.getObjCEntrypoints().objc_retainBlock); + assert(call->getCalledOperand() == + CGM.getObjCEntrypoints().objc_retainBlock); call->setMetadata("clang.arc.copy_on_escape", llvm::MDNode::get(Builder.getContext(), None)); @@ -3255,7 +3289,6 @@ static llvm::Value *emitARCRetainLoadOfScalar(CodeGenFunction &CGF, llvm::Value *CodeGenFunction::EmitARCRetainScalarExpr(const Expr *e) { // The retain needs to happen within the full-expression. if (const ExprWithCleanups *cleanups = dyn_cast<ExprWithCleanups>(e)) { - enterFullExpression(cleanups); RunCleanupsScope scope(*this); return EmitARCRetainScalarExpr(cleanups->getSubExpr()); } @@ -3271,7 +3304,6 @@ llvm::Value * CodeGenFunction::EmitARCRetainAutoreleaseScalarExpr(const Expr *e) { // The retain needs to happen within the full-expression. if (const ExprWithCleanups *cleanups = dyn_cast<ExprWithCleanups>(e)) { - enterFullExpression(cleanups); RunCleanupsScope scope(*this); return EmitARCRetainAutoreleaseScalarExpr(cleanups->getSubExpr()); } @@ -3382,7 +3414,6 @@ static llvm::Value *emitARCUnsafeUnretainedScalarExpr(CodeGenFunction &CGF, llvm::Value *CodeGenFunction::EmitARCUnsafeUnretainedScalarExpr(const Expr *e) { // Look through full-expressions. if (const ExprWithCleanups *cleanups = dyn_cast<ExprWithCleanups>(e)) { - enterFullExpression(cleanups); RunCleanupsScope scope(*this); return emitARCUnsafeUnretainedScalarExpr(*this, cleanups->getSubExpr()); } @@ -3505,7 +3536,7 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( if (!Ty->isRecordType()) return nullptr; const ObjCPropertyDecl *PD = PID->getPropertyDecl(); - if ((!(PD->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_atomic))) + if ((!(PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_atomic))) return nullptr; llvm::Constant *HelperFn = nullptr; if (hasTrivialSetExpr(PID)) @@ -3555,21 +3586,21 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( StartFunction(FD, ReturnTy, Fn, FI, args); - DeclRefExpr DstExpr(getContext(), &DstDecl, false, DestTy, VK_RValue, - SourceLocation()); - UnaryOperator DST(&DstExpr, UO_Deref, DestTy->getPointeeType(), - VK_LValue, OK_Ordinary, SourceLocation(), false); + DeclRefExpr DstExpr(C, &DstDecl, false, DestTy, VK_RValue, SourceLocation()); + UnaryOperator *DST = UnaryOperator::Create( + C, &DstExpr, UO_Deref, DestTy->getPointeeType(), VK_LValue, OK_Ordinary, + SourceLocation(), false, FPOptionsOverride()); - DeclRefExpr SrcExpr(getContext(), &SrcDecl, false, SrcTy, VK_RValue, - SourceLocation()); - UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(), - VK_LValue, OK_Ordinary, SourceLocation(), false); + DeclRefExpr SrcExpr(C, &SrcDecl, false, SrcTy, VK_RValue, SourceLocation()); + UnaryOperator *SRC = UnaryOperator::Create( + C, &SrcExpr, UO_Deref, SrcTy->getPointeeType(), VK_LValue, OK_Ordinary, + SourceLocation(), false, FPOptionsOverride()); - Expr *Args[2] = { &DST, &SRC }; + Expr *Args[2] = {DST, SRC}; CallExpr *CalleeExp = cast<CallExpr>(PID->getSetterCXXAssignment()); CXXOperatorCallExpr *TheCall = CXXOperatorCallExpr::Create( C, OO_Equal, CalleeExp->getCallee(), Args, DestTy->getPointeeType(), - VK_LValue, SourceLocation(), FPOptions()); + VK_LValue, SourceLocation(), FPOptionsOverride()); EmitStmt(TheCall); @@ -3589,7 +3620,7 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( QualType Ty = PD->getType(); if (!Ty->isRecordType()) return nullptr; - if ((!(PD->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_atomic))) + if ((!(PD->getPropertyAttributes() & ObjCPropertyAttribute::kind_atomic))) return nullptr; llvm::Constant *HelperFn = nullptr; if (hasTrivialGetExpr(PID)) @@ -3641,14 +3672,15 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( DeclRefExpr SrcExpr(getContext(), &SrcDecl, false, SrcTy, VK_RValue, SourceLocation()); - UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(), - VK_LValue, OK_Ordinary, SourceLocation(), false); + UnaryOperator *SRC = UnaryOperator::Create( + C, &SrcExpr, UO_Deref, SrcTy->getPointeeType(), VK_LValue, OK_Ordinary, + SourceLocation(), false, FPOptionsOverride()); CXXConstructExpr *CXXConstExpr = cast<CXXConstructExpr>(PID->getGetterCXXConstructor()); SmallVector<Expr*, 4> ConstructorArgs; - ConstructorArgs.push_back(&SRC); + ConstructorArgs.push_back(SRC); ConstructorArgs.append(std::next(CXXConstExpr->arg_begin()), CXXConstExpr->arg_end()); diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp index a27b6d4ed637..bb9c494ae68e 100644 --- a/clang/lib/CodeGen/CGObjCGNU.cpp +++ b/clang/lib/CodeGen/CGObjCGNU.cpp @@ -203,7 +203,8 @@ protected: /// the start of the string. The result of this function can be used anywhere /// where the C code specifies const char*. llvm::Constant *MakeConstantString(StringRef Str, const char *Name = "") { - ConstantAddress Array = CGM.GetAddrOfConstantCString(Str, Name); + ConstantAddress Array = + CGM.GetAddrOfConstantCString(std::string(Str), Name); return llvm::ConstantExpr::getGetElementPtr(Array.getElementType(), Array.getPointer(), Zeros); } @@ -254,11 +255,11 @@ protected: isDynamic=true) { int attrs = property->getPropertyAttributes(); // For read-only properties, clear the copy and retain flags - if (attrs & ObjCPropertyDecl::OBJC_PR_readonly) { - attrs &= ~ObjCPropertyDecl::OBJC_PR_copy; - attrs &= ~ObjCPropertyDecl::OBJC_PR_retain; - attrs &= ~ObjCPropertyDecl::OBJC_PR_weak; - attrs &= ~ObjCPropertyDecl::OBJC_PR_strong; + if (attrs & ObjCPropertyAttribute::kind_readonly) { + attrs &= ~ObjCPropertyAttribute::kind_copy; + attrs &= ~ObjCPropertyAttribute::kind_retain; + attrs &= ~ObjCPropertyAttribute::kind_weak; + attrs &= ~ObjCPropertyAttribute::kind_strong; } // The first flags field has the same attribute values as clang uses internally Fields.addInt(Int8Ty, attrs & 0xff); @@ -616,6 +617,13 @@ public: llvm::Value *GenerateProtocolRef(CodeGenFunction &CGF, const ObjCProtocolDecl *PD) override; void GenerateProtocol(const ObjCProtocolDecl *PD) override; + + virtual llvm::Constant *GenerateProtocolRef(const ObjCProtocolDecl *PD); + + llvm::Constant *GetOrEmitProtocol(const ObjCProtocolDecl *PD) override { + return GenerateProtocolRef(PD); + } + llvm::Function *ModuleInitFunction() override; llvm::FunctionCallee GetPropertyGetFunction() override; llvm::FunctionCallee GetPropertySetFunction() override; @@ -820,7 +828,7 @@ class CGObjCGNUstep : public CGObjCGNU { // Slot_t objc_slot_lookup_super(struct objc_super*, SEL); SlotLookupSuperFn.init(&CGM, "objc_slot_lookup_super", SlotTy, PtrToObjCSuperTy, SelectorTy); - // If we're in ObjC++ mode, then we want to make + // If we're in ObjC++ mode, then we want to make if (usesSEHExceptions) { llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext); // void objc_exception_rethrow(void) @@ -1347,7 +1355,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { void GenerateProtocol(const ObjCProtocolDecl *PD) override { // Do nothing - we only emit referenced protocols. } - llvm::Constant *GenerateProtocolRef(const ObjCProtocolDecl *PD) { + llvm::Constant *GenerateProtocolRef(const ObjCProtocolDecl *PD) override { std::string ProtocolName = PD->getNameAsString(); auto *&Protocol = ExistingProtocols[ProtocolName]; if (Protocol) @@ -1433,7 +1441,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { llvm::Constant *GetTypeString(llvm::StringRef TypeEncoding) { if (TypeEncoding.empty()) return NULLPtr; - std::string MangledTypes = TypeEncoding; + std::string MangledTypes = std::string(TypeEncoding); std::replace(MangledTypes.begin(), MangledTypes.end(), '@', '\1'); std::string TypesVarName = ".objc_sel_types_" + MangledTypes; @@ -1556,7 +1564,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { // We have to do this by hand, rather than with @llvm.ctors, so that the // linker can remove the duplicate invocations. auto *InitVar = new llvm::GlobalVariable(TheModule, LoadFunction->getType(), - /*isConstant*/true, llvm::GlobalValue::LinkOnceAnyLinkage, + /*isConstant*/false, llvm::GlobalValue::LinkOnceAnyLinkage, LoadFunction, ".objc_ctor"); // Check that this hasn't been renamed. This shouldn't happen, because // this function should be called precisely once. @@ -1647,14 +1655,16 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { for (const auto &lateInit : EarlyInitList) { auto *global = TheModule.getGlobalVariable(lateInit.first); if (global) { - b.CreateAlignedStore(global, - b.CreateStructGEP(lateInit.second.first, lateInit.second.second), CGM.getPointerAlign().getQuantity()); + b.CreateAlignedStore( + global, + b.CreateStructGEP(lateInit.second.first, lateInit.second.second), + CGM.getPointerAlign().getAsAlign()); } } b.CreateRetVoid(); // We can't use the normal LLVM global initialisation array, because we // need to specify that this runs early in library initialisation. - auto *InitVar = new llvm::GlobalVariable(CGM.getModule(), Init->getType(), + auto *InitVar = new llvm::GlobalVariable(CGM.getModule(), Init->getType(), /*isConstant*/true, llvm::GlobalValue::InternalLinkage, Init, ".objc_early_init_ptr"); InitVar->setSection(".CRT$XCLb"); @@ -1943,7 +1953,8 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { if (SuperClass) { std::pair<llvm::Constant*, int> v{classStruct, 1}; - EarlyInitList.emplace_back(SuperClass->getName(), std::move(v)); + EarlyInitList.emplace_back(std::string(SuperClass->getName()), + std::move(v)); } } @@ -2410,7 +2421,8 @@ llvm::Constant *CGObjCGNUstep::GetEHType(QualType T) { assert(PT && "Invalid @catch type."); const ObjCInterfaceType *IT = PT->getInterfaceType(); assert(IT && "Invalid @catch type."); - std::string className = IT->getDecl()->getIdentifier()->getName(); + std::string className = + std::string(IT->getDecl()->getIdentifier()->getName()); std::string typeinfoName = "__objc_eh_typeinfo_" + className; @@ -3034,13 +3046,18 @@ CGObjCGNU::GenerateProtocolList(ArrayRef<std::string> Protocols) { llvm::Value *CGObjCGNU::GenerateProtocolRef(CodeGenFunction &CGF, const ObjCProtocolDecl *PD) { + auto protocol = GenerateProtocolRef(PD); + llvm::Type *T = + CGM.getTypes().ConvertType(CGM.getContext().getObjCProtoType()); + return CGF.Builder.CreateBitCast(protocol, llvm::PointerType::getUnqual(T)); +} + +llvm::Constant *CGObjCGNU::GenerateProtocolRef(const ObjCProtocolDecl *PD) { llvm::Constant *&protocol = ExistingProtocols[PD->getNameAsString()]; if (!protocol) GenerateProtocol(PD); assert(protocol && "Unknown protocol"); - llvm::Type *T = - CGM.getTypes().ConvertType(CGM.getContext().getObjCProtoType()); - return CGF.Builder.CreateBitCast(protocol, llvm::PointerType::getUnqual(T)); + return protocol; } llvm::Constant * diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp index f36c28a85a68..1d0379afb4b5 100644 --- a/clang/lib/CodeGen/CGObjCMac.cpp +++ b/clang/lib/CodeGen/CGObjCMac.cpp @@ -1107,11 +1107,6 @@ public: void GenerateProtocol(const ObjCProtocolDecl *PD) override; - /// GetOrEmitProtocol - Get the protocol object for the given - /// declaration, emitting it if necessary. The return value has type - /// ProtocolPtrTy. - virtual llvm::Constant *GetOrEmitProtocol(const ObjCProtocolDecl *PD)=0; - /// GetOrEmitProtocolRef - Get a forward reference to the protocol /// object for the given declaration, emitting it if needed. These /// forward references will be filled in with empty bodies if no @@ -2035,7 +2030,7 @@ CGObjCCommonMac::GenerateConstantNSString(const StringLiteral *Literal) { GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); // Don't enforce the target's minimum global alignment, since the only use // of the string is via this class initializer. - GV->setAlignment(llvm::Align::None()); + GV->setAlignment(llvm::Align(1)); Fields.addBitCast(GV, CGM.Int8PtrTy); // String length. @@ -2558,9 +2553,8 @@ void CGObjCCommonMac::BuildRCRecordLayout(const llvm::StructLayout *RecLayout, } if (FQT->isRecordType() && ElCount) { int OldIndex = RunSkipBlockVars.size() - 1; - const RecordType *RT = FQT->getAs<RecordType>(); - BuildRCBlockVarRecordLayout(RT, BytePos + FieldOffset, - HasUnion); + auto *RT = FQT->castAs<RecordType>(); + BuildRCBlockVarRecordLayout(RT, BytePos + FieldOffset, HasUnion); // Replicate layout information for each array element. Note that // one element is already done. @@ -3047,9 +3041,10 @@ llvm::Value *CGObjCCommonMac::EmitClassRefViaRuntime( ObjCCommonTypesHelper &ObjCTypes) { llvm::FunctionCallee lookUpClassFn = ObjCTypes.getLookUpClassFn(); - llvm::Value *className = - CGF.CGM.GetAddrOfConstantCString(ID->getObjCRuntimeNameAsString()) - .getPointer(); + llvm::Value *className = CGF.CGM + .GetAddrOfConstantCString(std::string( + ID->getObjCRuntimeNameAsString())) + .getPointer(); ASTContext &ctx = CGF.CGM.getContext(); className = CGF.Builder.CreateBitCast(className, @@ -3291,6 +3286,8 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name, for (auto *PD : ClassExt->properties()) { if (IsClassProperty != PD->isClassProperty()) continue; + if (PD->isDirectProperty()) + continue; PropertySet.insert(PD->getIdentifier()); Properties.push_back(PD); } @@ -3302,6 +3299,8 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name, // class extension. if (!PropertySet.insert(PD->getIdentifier()).second) continue; + if (PD->isDirectProperty()) + continue; Properties.push_back(PD); } @@ -3327,8 +3326,6 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name, values.addInt(ObjCTypes.IntTy, Properties.size()); auto propertiesArray = values.beginArray(ObjCTypes.PropertyTy); for (auto PD : Properties) { - if (PD->isDirectProperty()) - continue; auto property = propertiesArray.beginStruct(ObjCTypes.PropertyTy); property.add(GetPropertyName(PD->getIdentifier())); property.add(GetPropertyTypeString(PD, Container)); @@ -3637,7 +3634,7 @@ void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) { // Check for a forward reference. llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name, true); if (GV) { - assert(GV->getType()->getElementType() == ObjCTypes.ClassTy && + assert(GV->getValueType() == ObjCTypes.ClassTy && "Forward metaclass reference has incorrect type."); values.finishAndSetAsInitializer(GV); GV->setSection(Section); @@ -3700,7 +3697,7 @@ llvm::Constant *CGObjCMac::EmitMetaClass(const ObjCImplementationDecl *ID, // Check for a forward reference. llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name, true); if (GV) { - assert(GV->getType()->getElementType() == ObjCTypes.ClassTy && + assert(GV->getValueType() == ObjCTypes.ClassTy && "Forward metaclass reference has incorrect type."); values.finishAndSetAsInitializer(GV); } else { @@ -3731,7 +3728,7 @@ llvm::Constant *CGObjCMac::EmitMetaClassRef(const ObjCInterfaceDecl *ID) { llvm::GlobalValue::PrivateLinkage, nullptr, Name); - assert(GV->getType()->getElementType() == ObjCTypes.ClassTy && + assert(GV->getValueType() == ObjCTypes.ClassTy && "Forward metaclass reference has incorrect type."); return GV; } @@ -3745,7 +3742,7 @@ llvm::Value *CGObjCMac::EmitSuperClassRef(const ObjCInterfaceDecl *ID) { llvm::GlobalValue::PrivateLinkage, nullptr, Name); - assert(GV->getType()->getElementType() == ObjCTypes.ClassTy && + assert(GV->getValueType() == ObjCTypes.ClassTy && "Forward class metadata reference has incorrect type."); return GV; } @@ -4029,22 +4026,49 @@ llvm::Function *CGObjCCommonMac::GenerateMethod(const ObjCMethodDecl *OMD, llvm::Function * CGObjCCommonMac::GenerateDirectMethod(const ObjCMethodDecl *OMD, const ObjCContainerDecl *CD) { - auto I = DirectMethodDefinitions.find(OMD->getCanonicalDecl()); - if (I != DirectMethodDefinitions.end()) - return I->second; + auto *COMD = OMD->getCanonicalDecl(); + auto I = DirectMethodDefinitions.find(COMD); + llvm::Function *OldFn = nullptr, *Fn = nullptr; - SmallString<256> Name; - GetNameForMethod(OMD, CD, Name, /*ignoreCategoryNamespace*/true); + if (I != DirectMethodDefinitions.end()) { + // Objective-C allows for the declaration and implementation types + // to differ slightly. + // + // If we're being asked for the Function associated for a method + // implementation, a previous value might have been cached + // based on the type of the canonical declaration. + // + // If these do not match, then we'll replace this function with + // a new one that has the proper type below. + if (!OMD->getBody() || COMD->getReturnType() == OMD->getReturnType()) + return I->second; + OldFn = I->second; + } CodeGenTypes &Types = CGM.getTypes(); llvm::FunctionType *MethodTy = Types.GetFunctionType(Types.arrangeObjCMethodDeclaration(OMD)); - llvm::Function *Method = - llvm::Function::Create(MethodTy, llvm::GlobalValue::ExternalLinkage, - Name.str(), &CGM.getModule()); - DirectMethodDefinitions.insert(std::make_pair(OMD->getCanonicalDecl(), Method)); - return Method; + if (OldFn) { + Fn = llvm::Function::Create(MethodTy, llvm::GlobalValue::ExternalLinkage, + "", &CGM.getModule()); + Fn->takeName(OldFn); + OldFn->replaceAllUsesWith( + llvm::ConstantExpr::getBitCast(Fn, OldFn->getType())); + OldFn->eraseFromParent(); + + // Replace the cached function in the map. + I->second = Fn; + } else { + SmallString<256> Name; + GetNameForMethod(OMD, CD, Name, /*ignoreCategoryNamespace*/ true); + + Fn = llvm::Function::Create(MethodTy, llvm::GlobalValue::ExternalLinkage, + Name.str(), &CGM.getModule()); + DirectMethodDefinitions.insert(std::make_pair(COMD, Fn)); + } + + return Fn; } void CGObjCCommonMac::GenerateDirectMethodPrologue( @@ -4195,7 +4219,8 @@ CGObjCCommonMac::CreateCStringLiteral(StringRef Name, ObjCLabelType Type, : "__TEXT,__cstring,cstring_literals"; break; case ObjCLabelType::PropertyName: - Section = "__TEXT,__cstring,cstring_literals"; + Section = NonFragile ? "__TEXT,__objc_methname,cstring_literals" + : "__TEXT,__cstring,cstring_literals"; break; } @@ -5128,15 +5153,18 @@ void CGObjCCommonMac::EmitImageInfo() { Mod.addModuleFlag(llvm::Module::Error, "Objective-C Image Info Section", llvm::MDString::get(VMContext, Section)); + auto Int8Ty = llvm::Type::getInt8Ty(VMContext); if (CGM.getLangOpts().getGC() == LangOptions::NonGC) { // Non-GC overrides those files which specify GC. - Mod.addModuleFlag(llvm::Module::Override, - "Objective-C Garbage Collection", (uint32_t)0); + Mod.addModuleFlag(llvm::Module::Error, + "Objective-C Garbage Collection", + llvm::ConstantInt::get(Int8Ty,0)); } else { // Add the ObjC garbage collection value. Mod.addModuleFlag(llvm::Module::Error, "Objective-C Garbage Collection", - eImageInfo_GarbageCollected); + llvm::ConstantInt::get(Int8Ty, + (uint8_t)eImageInfo_GarbageCollected)); if (CGM.getLangOpts().getGC() == LangOptions::GCOnly) { // Add the ObjC GC Only value. @@ -5147,7 +5175,7 @@ void CGObjCCommonMac::EmitImageInfo() { llvm::Metadata *Ops[2] = { llvm::MDString::get(VMContext, "Objective-C Garbage Collection"), llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - llvm::Type::getInt32Ty(VMContext), eImageInfo_GarbageCollected))}; + Int8Ty, eImageInfo_GarbageCollected))}; Mod.addModuleFlag(llvm::Module::Require, "Objective-C GC Only", llvm::MDNode::get(VMContext, Ops)); } @@ -5423,7 +5451,7 @@ llvm::Constant *IvarLayoutBuilder::buildBitmap(CGObjCCommonMac &CGObjC, // This isn't a stable sort, but our algorithm should handle it fine. llvm::array_pod_sort(IvarsInfo.begin(), IvarsInfo.end()); } else { - assert(std::is_sorted(IvarsInfo.begin(), IvarsInfo.end())); + assert(llvm::is_sorted(IvarsInfo)); } assert(IvarsInfo.back().Offset < InstanceEnd); @@ -6217,11 +6245,9 @@ void CGObjCNonFragileABIMac::AddModuleClassList( assert((!CGM.getTriple().isOSBinFormatMachO() || SectionName.startswith("__DATA")) && "SectionName expected to start with __DATA on MachO"); - llvm::GlobalValue::LinkageTypes LT = - getLinkageTypeForObjCMetadata(CGM, SectionName); - llvm::GlobalVariable *GV = - new llvm::GlobalVariable(CGM.getModule(), Init->getType(), false, LT, Init, - SymbolName); + llvm::GlobalVariable *GV = new llvm::GlobalVariable( + CGM.getModule(), Init->getType(), false, + llvm::GlobalValue::PrivateLinkage, Init, SymbolName); GV->setAlignment( llvm::Align(CGM.getDataLayout().getABITypeAlignment(Init->getType()))); GV->setSection(SectionName); @@ -6350,7 +6376,7 @@ llvm::GlobalVariable * CGObjCNonFragileABIMac::BuildClassRoTInitializer( unsigned InstanceStart, unsigned InstanceSize, const ObjCImplementationDecl *ID) { - std::string ClassName = ID->getObjCRuntimeNameAsString(); + std::string ClassName = std::string(ID->getObjCRuntimeNameAsString()); CharUnits beginInstance = CharUnits::fromQuantity(InstanceStart); CharUnits endInstance = CharUnits::fromQuantity(InstanceSize); @@ -7509,10 +7535,9 @@ CGObjCNonFragileABIMac::EmitSuperClassRef(CodeGenFunction &CGF, llvm::Constant *ClassGV = GetClassGlobalForClassRef(ID); std::string SectionName = GetSectionName("__objc_superrefs", "regular,no_dead_strip"); - Entry = new llvm::GlobalVariable( - CGM.getModule(), ClassGV->getType(), false, - getLinkageTypeForObjCMetadata(CGM, SectionName), ClassGV, - "OBJC_CLASSLIST_SUP_REFS_$_"); + Entry = new llvm::GlobalVariable(CGM.getModule(), ClassGV->getType(), false, + llvm::GlobalValue::PrivateLinkage, ClassGV, + "OBJC_CLASSLIST_SUP_REFS_$_"); Entry->setAlignment(CGF.getPointerAlign().getAsAlign()); Entry->setSection(SectionName); CGM.addCompilerUsedGlobal(Entry); @@ -7533,10 +7558,9 @@ llvm::Value *CGObjCNonFragileABIMac::EmitMetaClassRef(CodeGenFunction &CGF, auto MetaClassGV = GetClassGlobal(ID, /*metaclass*/ true, NotForDefinition); std::string SectionName = GetSectionName("__objc_superrefs", "regular,no_dead_strip"); - Entry = new llvm::GlobalVariable( - CGM.getModule(), ObjCTypes.ClassnfABIPtrTy, false, - getLinkageTypeForObjCMetadata(CGM, SectionName), MetaClassGV, - "OBJC_CLASSLIST_SUP_REFS_$_"); + Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABIPtrTy, + false, llvm::GlobalValue::PrivateLinkage, + MetaClassGV, "OBJC_CLASSLIST_SUP_REFS_$_"); Entry->setAlignment(Align.getAsAlign()); Entry->setSection(SectionName); CGM.addCompilerUsedGlobal(Entry); diff --git a/clang/lib/CodeGen/CGObjCRuntime.cpp b/clang/lib/CodeGen/CGObjCRuntime.cpp index f8b831d0e9be..39efe040302d 100644 --- a/clang/lib/CodeGen/CGObjCRuntime.cpp +++ b/clang/lib/CodeGen/CGObjCRuntime.cpp @@ -13,14 +13,15 @@ //===----------------------------------------------------------------------===// #include "CGObjCRuntime.h" -#include "CGCleanup.h" #include "CGCXXABI.h" +#include "CGCleanup.h" #include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtObjC.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "clang/CodeGen/CodeGenABITypes.h" #include "llvm/Support/SaveAndRestore.h" using namespace clang; @@ -211,7 +212,7 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF, CGF.pushSEHCleanup(NormalAndEHCleanup, FinallyFunc); } - + // Emit the try body. CGF.EmitStmt(S.getTryBody()); @@ -271,7 +272,7 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF, cleanups.ForceCleanup(); CGF.EmitBranchThroughCleanup(Cont); - } + } // Go back to the try-statement fallthrough. CGF.Builder.restoreIP(SavedIP); @@ -383,3 +384,9 @@ CGObjCRuntime::getMessageSendInfo(const ObjCMethodDecl *method, CGM.getTypes().GetFunctionType(argsInfo)->getPointerTo(); return MessageSendInfo(argsInfo, signatureType); } + +llvm::Constant * +clang::CodeGen::emitObjCProtocolObject(CodeGenModule &CGM, + const ObjCProtocolDecl *protocol) { + return CGM.getObjCRuntime().GetOrEmitProtocol(protocol); +} diff --git a/clang/lib/CodeGen/CGObjCRuntime.h b/clang/lib/CodeGen/CGObjCRuntime.h index f0b3525cfde2..a2c189585f7b 100644 --- a/clang/lib/CodeGen/CGObjCRuntime.h +++ b/clang/lib/CodeGen/CGObjCRuntime.h @@ -211,6 +211,11 @@ public: /// implementations. virtual void GenerateProtocol(const ObjCProtocolDecl *OPD) = 0; + /// GetOrEmitProtocol - Get the protocol object for the given + /// declaration, emitting it if necessary. The return value has type + /// ProtocolPtrTy. + virtual llvm::Constant *GetOrEmitProtocol(const ObjCProtocolDecl *PD) = 0; + /// Generate a function preamble for a method with the specified /// types. diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 97b17799a03e..43cbe9c720ea 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -21,17 +21,24 @@ #include "clang/AST/StmtOpenMP.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/BitmaskEnum.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/OpenMPKinds.h" +#include "clang/Basic/SourceManager.h" #include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Value.h" +#include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include <cassert> +#include <numeric> using namespace clang; using namespace CodeGen; @@ -562,205 +569,6 @@ enum OpenMPSchedType { OMP_sch_modifier_nonmonotonic = (1 << 30), }; -enum OpenMPRTLFunction { - /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, - /// kmpc_micro microtask, ...); - OMPRTL__kmpc_fork_call, - /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, - /// kmp_int32 global_tid, void *data, size_t size, void ***cache); - OMPRTL__kmpc_threadprivate_cached, - /// Call to void __kmpc_threadprivate_register( ident_t *, - /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); - OMPRTL__kmpc_threadprivate_register, - // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); - OMPRTL__kmpc_global_thread_num, - // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, - // kmp_critical_name *crit); - OMPRTL__kmpc_critical, - // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 - // global_tid, kmp_critical_name *crit, uintptr_t hint); - OMPRTL__kmpc_critical_with_hint, - // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, - // kmp_critical_name *crit); - OMPRTL__kmpc_end_critical, - // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 - // global_tid); - OMPRTL__kmpc_cancel_barrier, - // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); - OMPRTL__kmpc_barrier, - // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); - OMPRTL__kmpc_for_static_fini, - // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 - // global_tid); - OMPRTL__kmpc_serialized_parallel, - // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 - // global_tid); - OMPRTL__kmpc_end_serialized_parallel, - // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, - // kmp_int32 num_threads); - OMPRTL__kmpc_push_num_threads, - // Call to void __kmpc_flush(ident_t *loc); - OMPRTL__kmpc_flush, - // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); - OMPRTL__kmpc_master, - // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); - OMPRTL__kmpc_end_master, - // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, - // int end_part); - OMPRTL__kmpc_omp_taskyield, - // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); - OMPRTL__kmpc_single, - // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); - OMPRTL__kmpc_end_single, - // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, - // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, - // kmp_routine_entry_t *task_entry); - OMPRTL__kmpc_omp_task_alloc, - // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, - // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, - // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, - // kmp_int64 device_id); - OMPRTL__kmpc_omp_target_task_alloc, - // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * - // new_task); - OMPRTL__kmpc_omp_task, - // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, - // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), - // kmp_int32 didit); - OMPRTL__kmpc_copyprivate, - // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, - // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void - // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); - OMPRTL__kmpc_reduce, - // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 - // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, - // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name - // *lck); - OMPRTL__kmpc_reduce_nowait, - // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, - // kmp_critical_name *lck); - OMPRTL__kmpc_end_reduce, - // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, - // kmp_critical_name *lck); - OMPRTL__kmpc_end_reduce_nowait, - // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, - // kmp_task_t * new_task); - OMPRTL__kmpc_omp_task_begin_if0, - // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, - // kmp_task_t * new_task); - OMPRTL__kmpc_omp_task_complete_if0, - // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); - OMPRTL__kmpc_ordered, - // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); - OMPRTL__kmpc_end_ordered, - // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 - // global_tid); - OMPRTL__kmpc_omp_taskwait, - // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); - OMPRTL__kmpc_taskgroup, - // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); - OMPRTL__kmpc_end_taskgroup, - // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, - // int proc_bind); - OMPRTL__kmpc_push_proc_bind, - // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 - // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t - // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); - OMPRTL__kmpc_omp_task_with_deps, - // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 - // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 - // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); - OMPRTL__kmpc_omp_wait_deps, - // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 - // global_tid, kmp_int32 cncl_kind); - OMPRTL__kmpc_cancellationpoint, - // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, - // kmp_int32 cncl_kind); - OMPRTL__kmpc_cancel, - // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, - // kmp_int32 num_teams, kmp_int32 thread_limit); - OMPRTL__kmpc_push_num_teams, - // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro - // microtask, ...); - OMPRTL__kmpc_fork_teams, - // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int - // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int - // sched, kmp_uint64 grainsize, void *task_dup); - OMPRTL__kmpc_taskloop, - // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 - // num_dims, struct kmp_dim *dims); - OMPRTL__kmpc_doacross_init, - // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); - OMPRTL__kmpc_doacross_fini, - // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 - // *vec); - OMPRTL__kmpc_doacross_post, - // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 - // *vec); - OMPRTL__kmpc_doacross_wait, - // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void - // *data); - OMPRTL__kmpc_task_reduction_init, - // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void - // *d); - OMPRTL__kmpc_task_reduction_get_th_data, - // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); - OMPRTL__kmpc_alloc, - // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); - OMPRTL__kmpc_free, - - // - // Offloading related calls - // - // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 - // size); - OMPRTL__kmpc_push_target_tripcount, - // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t - // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t - // *arg_types); - OMPRTL__tgt_target, - // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, - // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t - // *arg_types); - OMPRTL__tgt_target_nowait, - // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, - // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t - // *arg_types, int32_t num_teams, int32_t thread_limit); - OMPRTL__tgt_target_teams, - // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void - // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t - // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); - OMPRTL__tgt_target_teams_nowait, - // Call to void __tgt_register_requires(int64_t flags); - OMPRTL__tgt_register_requires, - // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, - // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); - OMPRTL__tgt_target_data_begin, - // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t - // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t - // *arg_types); - OMPRTL__tgt_target_data_begin_nowait, - // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, - // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); - OMPRTL__tgt_target_data_end, - // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t - // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t - // *arg_types); - OMPRTL__tgt_target_data_end_nowait, - // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, - // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); - OMPRTL__tgt_target_data_update, - // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t - // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t - // *arg_types); - OMPRTL__tgt_target_data_update_nowait, - // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); - OMPRTL__tgt_mapper_num_components, - // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void - // *base, void *begin, int64_t size, int64_t type); - OMPRTL__tgt_push_mapper_component, -}; - /// A basic class for pre|post-action for advanced codegen sequence for OpenMP /// region. class CleanupTy final : public EHScopeStack::Cleanup { @@ -971,27 +779,37 @@ void ReductionCodeGen::emitAggregateInitialization( } ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, + ArrayRef<const Expr *> Origs, ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> ReductionOps) { ClausesData.reserve(Shareds.size()); SharedAddresses.reserve(Shareds.size()); Sizes.reserve(Shareds.size()); BaseDecls.reserve(Shareds.size()); - auto IPriv = Privates.begin(); - auto IRed = ReductionOps.begin(); + const auto *IOrig = Origs.begin(); + const auto *IPriv = Privates.begin(); + const auto *IRed = ReductionOps.begin(); for (const Expr *Ref : Shareds) { - ClausesData.emplace_back(Ref, *IPriv, *IRed); + ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); + std::advance(IOrig, 1); std::advance(IPriv, 1); std::advance(IRed, 1); } } -void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { - assert(SharedAddresses.size() == N && +void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { + assert(SharedAddresses.size() == N && OrigAddresses.size() == N && "Number of generated lvalues must be exactly N."); - LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); - LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); + LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); + LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); SharedAddresses.emplace_back(First, Second); + if (ClausesData[N].Shared == ClausesData[N].Ref) { + OrigAddresses.emplace_back(First, Second); + } else { + LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); + LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); + OrigAddresses.emplace_back(First, Second); + } } void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { @@ -1001,26 +819,25 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); if (!PrivateType->isVariablyModifiedType()) { Sizes.emplace_back( - CGF.getTypeSize( - SharedAddresses[N].first.getType().getNonReferenceType()), + CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), nullptr); return; } llvm::Value *Size; llvm::Value *SizeInChars; - auto *ElemType = cast<llvm::PointerType>( - SharedAddresses[N].first.getPointer(CGF)->getType()) - ->getElementType(); + auto *ElemType = + cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) + ->getElementType(); auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); if (AsArraySection) { - Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF), - SharedAddresses[N].first.getPointer(CGF)); + Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), + OrigAddresses[N].first.getPointer(CGF)); Size = CGF.Builder.CreateNUWAdd( Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); } else { - SizeInChars = CGF.getTypeSize( - SharedAddresses[N].first.getType().getNonReferenceType()); + SizeInChars = + CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); } Sizes.emplace_back(SizeInChars, Size); @@ -1243,7 +1060,7 @@ static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, StringRef Separator) : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), - OffloadEntriesInfoManager(CGM) { + OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { ASTContext &C = CGM.getContext(); RecordDecl *RD = C.buildImplicitRecord("ident_t"); QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); @@ -1263,55 +1080,11 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); + // Initialize Types used in OpenMPIRBuilder from OMPKinds.def + OMPBuilder.initialize(); loadOffloadInfoMetadata(); } -bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD, - const GlobalDecl &OldGD, - llvm::GlobalValue *OrigAddr, - bool IsForDefinition) { - // Emit at least a definition for the aliasee if the the address of the - // original function is requested. - if (IsForDefinition || OrigAddr) - (void)CGM.GetAddrOfGlobal(NewGD); - StringRef NewMangledName = CGM.getMangledName(NewGD); - llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName); - if (Addr && !Addr->isDeclaration()) { - const auto *D = cast<FunctionDecl>(OldGD.getDecl()); - const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD); - llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI); - - // Create a reference to the named value. This ensures that it is emitted - // if a deferred decl. - llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD); - - // Create the new alias itself, but don't set a name yet. - auto *GA = - llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule()); - - if (OrigAddr) { - assert(OrigAddr->isDeclaration() && "Expected declaration"); - - GA->takeName(OrigAddr); - OrigAddr->replaceAllUsesWith( - llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType())); - OrigAddr->eraseFromParent(); - } else { - GA->setName(CGM.getMangledName(OldGD)); - } - - // Set attributes which are particular to an alias; this is a - // specialization of the attributes which may be set on a global function. - if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() || - D->isWeakImported()) - GA->setLinkage(llvm::Function::WeakAnyLinkage); - - CGM.SetCommonAttributes(OldGD, GA); - return true; - } - return false; -} - void CGOpenMPRuntime::clear() { InternalVars.clear(); // Clean non-target variable declarations possibly used only in debug info. @@ -1325,14 +1098,6 @@ void CGOpenMPRuntime::clear() { continue; GV->eraseFromParent(); } - // Emit aliases for the deferred aliasees. - for (const auto &Pair : DeferredVariantFunction) { - StringRef MangledName = CGM.getMangledName(Pair.second.second); - llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName); - // If not able to emit alias, just emit original declaration. - (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr, - /*IsForDefinition=*/false); - } } std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { @@ -1343,7 +1108,7 @@ std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { OS << Sep << Part; Sep = Separator; } - return OS.str(); + return std::string(OS.str()); } static llvm::Function * @@ -1494,6 +1259,8 @@ static llvm::Function *emitParallelOrTeamsOutlinedFunction( bool HasCancel = false; if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) HasCancel = OPD->hasCancel(); + else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) + HasCancel = OPD->hasCancel(); else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) HasCancel = OPSD->hasCancel(); else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) @@ -1511,12 +1278,12 @@ static llvm::Function *emitParallelOrTeamsOutlinedFunction( // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new // parallel region to make cancellation barriers work properly. - llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder(); - PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel); + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel); CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, HasCancel, OutlinedHelperName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - return CGF.GenerateOpenMPCapturedStmtFunction(*CS); + return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); } llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( @@ -1549,7 +1316,9 @@ llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), TaskTVar->getType()->castAs<PointerType>()) .getPointer(CGF)}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_omp_task), + TaskArgs); }; CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, UntiedCodeGen); @@ -1560,11 +1329,19 @@ llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop : OMPD_task; const CapturedStmt *CS = D.getCapturedStmt(Region); - const auto *TD = dyn_cast<OMPTaskDirective>(&D); + bool HasCancel = false; + if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) + HasCancel = TD->hasCancel(); + else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) + HasCancel = TD->hasCancel(); + else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) + HasCancel = TD->hasCancel(); + else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) + HasCancel = TD->hasCancel(); + CodeGenFunction CGF(CGM, true); CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, - InnermostKind, - TD ? TD->hasCancel() : false, Action); + InnermostKind, HasCancel, Action); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); if (!Tied) @@ -1786,7 +1563,8 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, CGBuilderTy::InsertPointGuard IPG(CGF.Builder); CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); llvm::CallInst *Call = CGF.Builder.CreateCall( - createRuntimeFunction(OMPRTL__kmpc_global_thread_num), + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_global_thread_num), emitUpdateLocation(CGF, Loc)); Call->setCallingConv(CGF.getRuntimeCC()); Elem.second.ThreadID = Call; @@ -1800,16 +1578,17 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { OpenMPLocThreadIDMap.erase(CGF.CurFn); } if (FunctionUDRMap.count(CGF.CurFn) > 0) { - for(auto *D : FunctionUDRMap[CGF.CurFn]) + for(const auto *D : FunctionUDRMap[CGF.CurFn]) UDRMap.erase(D); FunctionUDRMap.erase(CGF.CurFn); } auto I = FunctionUDMMap.find(CGF.CurFn); if (I != FunctionUDMMap.end()) { - for(auto *D : I->second) + for(const auto *D : I->second) UDMMap.erase(D); FunctionUDMMap.erase(I); } + LastprivateConditionalToTypes.erase(CGF.CurFn); } llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { @@ -1826,766 +1605,6 @@ llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { return llvm::PointerType::getUnqual(Kmpc_MicroTy); } -llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { - llvm::FunctionCallee RTLFn = nullptr; - switch (static_cast<OpenMPRTLFunction>(Function)) { - case OMPRTL__kmpc_fork_call: { - // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro - // microtask, ...); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, - getKmpc_MicroPointerTy()}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); - if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { - if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { - llvm::LLVMContext &Ctx = F->getContext(); - llvm::MDBuilder MDB(Ctx); - // Annotate the callback behavior of the __kmpc_fork_call: - // - The callback callee is argument number 2 (microtask). - // - The first two arguments of the callback callee are unknown (-1). - // - All variadic arguments to the __kmpc_fork_call are passed to the - // callback callee. - F->addMetadata( - llvm::LLVMContext::MD_callback, - *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( - 2, {-1, -1}, - /* VarArgsArePassed */ true)})); - } - } - break; - } - case OMPRTL__kmpc_global_thread_num: { - // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); - llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); - break; - } - case OMPRTL__kmpc_threadprivate_cached: { - // Build void *__kmpc_threadprivate_cached(ident_t *loc, - // kmp_int32 global_tid, void *data, size_t size, void ***cache); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, - CGM.VoidPtrTy, CGM.SizeTy, - CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); - break; - } - case OMPRTL__kmpc_critical: { - // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, - // kmp_critical_name *crit); - llvm::Type *TypeParams[] = { - getIdentTyPointerTy(), CGM.Int32Ty, - llvm::PointerType::getUnqual(KmpCriticalNameTy)}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); - break; - } - case OMPRTL__kmpc_critical_with_hint: { - // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, - // kmp_critical_name *crit, uintptr_t hint); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, - llvm::PointerType::getUnqual(KmpCriticalNameTy), - CGM.IntPtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); - break; - } - case OMPRTL__kmpc_threadprivate_register: { - // Build void __kmpc_threadprivate_register(ident_t *, void *data, - // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); - // typedef void *(*kmpc_ctor)(void *); - auto *KmpcCtorTy = - llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, - /*isVarArg*/ false)->getPointerTo(); - // typedef void *(*kmpc_cctor)(void *, void *); - llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; - auto *KmpcCopyCtorTy = - llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, - /*isVarArg*/ false) - ->getPointerTo(); - // typedef void (*kmpc_dtor)(void *); - auto *KmpcDtorTy = - llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) - ->getPointerTo(); - llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, - KmpcCopyCtorTy, KmpcDtorTy}; - auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, - /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); - break; - } - case OMPRTL__kmpc_end_critical: { - // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, - // kmp_critical_name *crit); - llvm::Type *TypeParams[] = { - getIdentTyPointerTy(), CGM.Int32Ty, - llvm::PointerType::getUnqual(KmpCriticalNameTy)}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); - break; - } - case OMPRTL__kmpc_cancel_barrier: { - // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 - // global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); - break; - } - case OMPRTL__kmpc_barrier: { - // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); - break; - } - case OMPRTL__kmpc_for_static_fini: { - // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); - break; - } - case OMPRTL__kmpc_push_num_threads: { - // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, - // kmp_int32 num_threads) - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, - CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); - break; - } - case OMPRTL__kmpc_serialized_parallel: { - // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 - // global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); - break; - } - case OMPRTL__kmpc_end_serialized_parallel: { - // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 - // global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); - break; - } - case OMPRTL__kmpc_flush: { - // Build void __kmpc_flush(ident_t *loc); - llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); - break; - } - case OMPRTL__kmpc_master: { - // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); - break; - } - case OMPRTL__kmpc_end_master: { - // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); - break; - } - case OMPRTL__kmpc_omp_taskyield: { - // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, - // int end_part); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); - break; - } - case OMPRTL__kmpc_single: { - // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); - break; - } - case OMPRTL__kmpc_end_single: { - // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); - break; - } - case OMPRTL__kmpc_omp_task_alloc: { - // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, - // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, - // kmp_routine_entry_t *task_entry); - assert(KmpRoutineEntryPtrTy != nullptr && - "Type kmp_routine_entry_t must be created."); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, - CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; - // Return void * and then cast to particular kmp_task_t type. - auto *FnTy = - llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); - break; - } - case OMPRTL__kmpc_omp_target_task_alloc: { - // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, - // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, - // kmp_routine_entry_t *task_entry, kmp_int64 device_id); - assert(KmpRoutineEntryPtrTy != nullptr && - "Type kmp_routine_entry_t must be created."); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, - CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, - CGM.Int64Ty}; - // Return void * and then cast to particular kmp_task_t type. - auto *FnTy = - llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); - break; - } - case OMPRTL__kmpc_omp_task: { - // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t - // *new_task); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, - CGM.VoidPtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); - break; - } - case OMPRTL__kmpc_copyprivate: { - // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, - // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), - // kmp_int32 didit); - llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; - auto *CpyFnTy = - llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, - CGM.VoidPtrTy, CpyFnTy->getPointerTo(), - CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); - break; - } - case OMPRTL__kmpc_reduce: { - // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, - // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void - // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); - llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; - auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, - /*isVarArg=*/false); - llvm::Type *TypeParams[] = { - getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, - CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), - llvm::PointerType::getUnqual(KmpCriticalNameTy)}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); - break; - } - case OMPRTL__kmpc_reduce_nowait: { - // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 - // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, - // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name - // *lck); - llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; - auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, - /*isVarArg=*/false); - llvm::Type *TypeParams[] = { - getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, - CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), - llvm::PointerType::getUnqual(KmpCriticalNameTy)}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); - break; - } - case OMPRTL__kmpc_end_reduce: { - // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, - // kmp_critical_name *lck); - llvm::Type *TypeParams[] = { - getIdentTyPointerTy(), CGM.Int32Ty, - llvm::PointerType::getUnqual(KmpCriticalNameTy)}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); - break; - } - case OMPRTL__kmpc_end_reduce_nowait: { - // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, - // kmp_critical_name *lck); - llvm::Type *TypeParams[] = { - getIdentTyPointerTy(), CGM.Int32Ty, - llvm::PointerType::getUnqual(KmpCriticalNameTy)}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = - CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); - break; - } - case OMPRTL__kmpc_omp_task_begin_if0: { - // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t - // *new_task); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, - CGM.VoidPtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = - CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); - break; - } - case OMPRTL__kmpc_omp_task_complete_if0: { - // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t - // *new_task); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, - CGM.VoidPtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, - /*Name=*/"__kmpc_omp_task_complete_if0"); - break; - } - case OMPRTL__kmpc_ordered: { - // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); - break; - } - case OMPRTL__kmpc_end_ordered: { - // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); - break; - } - case OMPRTL__kmpc_omp_taskwait: { - // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); - break; - } - case OMPRTL__kmpc_taskgroup: { - // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); - break; - } - case OMPRTL__kmpc_end_taskgroup: { - // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); - break; - } - case OMPRTL__kmpc_push_proc_bind: { - // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, - // int proc_bind) - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); - break; - } - case OMPRTL__kmpc_omp_task_with_deps: { - // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, - // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, - // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); - llvm::Type *TypeParams[] = { - getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, - CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); - RTLFn = - CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); - break; - } - case OMPRTL__kmpc_omp_wait_deps: { - // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, - // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, - // kmp_depend_info_t *noalias_dep_list); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, - CGM.Int32Ty, CGM.VoidPtrTy, - CGM.Int32Ty, CGM.VoidPtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); - break; - } - case OMPRTL__kmpc_cancellationpoint: { - // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 - // global_tid, kmp_int32 cncl_kind) - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); - break; - } - case OMPRTL__kmpc_cancel: { - // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, - // kmp_int32 cncl_kind) - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); - break; - } - case OMPRTL__kmpc_push_num_teams: { - // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, - // kmp_int32 num_teams, kmp_int32 num_threads) - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, - CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); - break; - } - case OMPRTL__kmpc_fork_teams: { - // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro - // microtask, ...); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, - getKmpc_MicroPointerTy()}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); - if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { - if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { - llvm::LLVMContext &Ctx = F->getContext(); - llvm::MDBuilder MDB(Ctx); - // Annotate the callback behavior of the __kmpc_fork_teams: - // - The callback callee is argument number 2 (microtask). - // - The first two arguments of the callback callee are unknown (-1). - // - All variadic arguments to the __kmpc_fork_teams are passed to the - // callback callee. - F->addMetadata( - llvm::LLVMContext::MD_callback, - *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( - 2, {-1, -1}, - /* VarArgsArePassed */ true)})); - } - } - break; - } - case OMPRTL__kmpc_taskloop: { - // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int - // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int - // sched, kmp_uint64 grainsize, void *task_dup); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), - CGM.IntTy, - CGM.VoidPtrTy, - CGM.IntTy, - CGM.Int64Ty->getPointerTo(), - CGM.Int64Ty->getPointerTo(), - CGM.Int64Ty, - CGM.IntTy, - CGM.IntTy, - CGM.Int64Ty, - CGM.VoidPtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); - break; - } - case OMPRTL__kmpc_doacross_init: { - // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 - // num_dims, struct kmp_dim *dims); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), - CGM.Int32Ty, - CGM.Int32Ty, - CGM.VoidPtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); - break; - } - case OMPRTL__kmpc_doacross_fini: { - // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); - break; - } - case OMPRTL__kmpc_doacross_post: { - // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 - // *vec); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, - CGM.Int64Ty->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); - break; - } - case OMPRTL__kmpc_doacross_wait: { - // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 - // *vec); - llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, - CGM.Int64Ty->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); - break; - } - case OMPRTL__kmpc_task_reduction_init: { - // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void - // *data); - llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); - RTLFn = - CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); - break; - } - case OMPRTL__kmpc_task_reduction_get_th_data: { - // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void - // *d); - llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); - break; - } - case OMPRTL__kmpc_alloc: { - // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t - // al); omp_allocator_handle_t type is void *. - llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); - break; - } - case OMPRTL__kmpc_free: { - // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t - // al); omp_allocator_handle_t type is void *. - llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); - break; - } - case OMPRTL__kmpc_push_target_tripcount: { - // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 - // size); - llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; - llvm::FunctionType *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); - break; - } - case OMPRTL__tgt_target: { - // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t - // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t - // *arg_types); - llvm::Type *TypeParams[] = {CGM.Int64Ty, - CGM.VoidPtrTy, - CGM.Int32Ty, - CGM.VoidPtrPtrTy, - CGM.VoidPtrPtrTy, - CGM.Int64Ty->getPointerTo(), - CGM.Int64Ty->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); - break; - } - case OMPRTL__tgt_target_nowait: { - // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, - // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, - // int64_t *arg_types); - llvm::Type *TypeParams[] = {CGM.Int64Ty, - CGM.VoidPtrTy, - CGM.Int32Ty, - CGM.VoidPtrPtrTy, - CGM.VoidPtrPtrTy, - CGM.Int64Ty->getPointerTo(), - CGM.Int64Ty->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); - break; - } - case OMPRTL__tgt_target_teams: { - // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, - // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, - // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); - llvm::Type *TypeParams[] = {CGM.Int64Ty, - CGM.VoidPtrTy, - CGM.Int32Ty, - CGM.VoidPtrPtrTy, - CGM.VoidPtrPtrTy, - CGM.Int64Ty->getPointerTo(), - CGM.Int64Ty->getPointerTo(), - CGM.Int32Ty, - CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); - break; - } - case OMPRTL__tgt_target_teams_nowait: { - // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void - // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t - // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); - llvm::Type *TypeParams[] = {CGM.Int64Ty, - CGM.VoidPtrTy, - CGM.Int32Ty, - CGM.VoidPtrPtrTy, - CGM.VoidPtrPtrTy, - CGM.Int64Ty->getPointerTo(), - CGM.Int64Ty->getPointerTo(), - CGM.Int32Ty, - CGM.Int32Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); - break; - } - case OMPRTL__tgt_register_requires: { - // Build void __tgt_register_requires(int64_t flags); - llvm::Type *TypeParams[] = {CGM.Int64Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); - break; - } - case OMPRTL__tgt_target_data_begin: { - // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, - // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); - llvm::Type *TypeParams[] = {CGM.Int64Ty, - CGM.Int32Ty, - CGM.VoidPtrPtrTy, - CGM.VoidPtrPtrTy, - CGM.Int64Ty->getPointerTo(), - CGM.Int64Ty->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); - break; - } - case OMPRTL__tgt_target_data_begin_nowait: { - // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t - // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t - // *arg_types); - llvm::Type *TypeParams[] = {CGM.Int64Ty, - CGM.Int32Ty, - CGM.VoidPtrPtrTy, - CGM.VoidPtrPtrTy, - CGM.Int64Ty->getPointerTo(), - CGM.Int64Ty->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); - break; - } - case OMPRTL__tgt_target_data_end: { - // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, - // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); - llvm::Type *TypeParams[] = {CGM.Int64Ty, - CGM.Int32Ty, - CGM.VoidPtrPtrTy, - CGM.VoidPtrPtrTy, - CGM.Int64Ty->getPointerTo(), - CGM.Int64Ty->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); - break; - } - case OMPRTL__tgt_target_data_end_nowait: { - // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t - // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t - // *arg_types); - llvm::Type *TypeParams[] = {CGM.Int64Ty, - CGM.Int32Ty, - CGM.VoidPtrPtrTy, - CGM.VoidPtrPtrTy, - CGM.Int64Ty->getPointerTo(), - CGM.Int64Ty->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); - break; - } - case OMPRTL__tgt_target_data_update: { - // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, - // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); - llvm::Type *TypeParams[] = {CGM.Int64Ty, - CGM.Int32Ty, - CGM.VoidPtrPtrTy, - CGM.VoidPtrPtrTy, - CGM.Int64Ty->getPointerTo(), - CGM.Int64Ty->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); - break; - } - case OMPRTL__tgt_target_data_update_nowait: { - // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t - // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t - // *arg_types); - llvm::Type *TypeParams[] = {CGM.Int64Ty, - CGM.Int32Ty, - CGM.VoidPtrPtrTy, - CGM.VoidPtrPtrTy, - CGM.Int64Ty->getPointerTo(), - CGM.Int64Ty->getPointerTo()}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); - break; - } - case OMPRTL__tgt_mapper_num_components: { - // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); - llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); - break; - } - case OMPRTL__tgt_push_mapper_component: { - // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void - // *base, void *begin, int64_t size, int64_t type); - llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, - CGM.Int64Ty, CGM.Int64Ty}; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); - break; - } - } - assert(RTLFn && "Unable to find OpenMP runtime function"); - return RTLFn; -} - llvm::FunctionCallee CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && @@ -2764,7 +1783,9 @@ Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), getOrCreateThreadPrivateCache(VD)}; return Address(CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), + Args), VDAddr.getAlignment()); } @@ -2774,7 +1795,8 @@ void CGOpenMPRuntime::emitThreadPrivateVarInit( // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime // library. llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_global_thread_num), OMPLoc); // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) // to register constructor/destructor for variable. @@ -2782,7 +1804,9 @@ void CGOpenMPRuntime::emitThreadPrivateVarInit( OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), Ctor, CopyCtor, Dtor}; CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_threadprivate_register), + Args); } llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( @@ -2813,7 +1837,7 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); std::string Name = getName({"__kmpc_global_ctor_", ""}); llvm::Function *Fn = - CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); + CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, Args, Loc, Loc); llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( @@ -2846,7 +1870,7 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); std::string Name = getName({"__kmpc_global_dtor_", ""}); llvm::Function *Fn = - CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); + CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, Loc, Loc); @@ -2889,7 +1913,7 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( auto *InitFunctionTy = llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); std::string Name = getName({"__omp_threadprivate_init_", ""}); - llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( + llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); CodeGenFunction InitCGF(CGM); FunctionArgList ArgList; @@ -2918,12 +1942,14 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, HasRequiresUnifiedSharedMemory)) return CGM.getLangOpts().OpenMPIsDevice; VD = VD->getDefinition(CGM.getContext()); - if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) + assert(VD && "Unknown VarDecl"); + + if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) return CGM.getLangOpts().OpenMPIsDevice; QualType ASTTy = VD->getType(); - SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); + // Produce the unique prefix to identify the new target regions. We use // the source location of the variable declaration which we know to not // conflict with any target region. @@ -2949,7 +1975,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); - llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( + llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( FTy, Twine(Buffer, "_ctor"), FI, Loc); auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, @@ -2987,7 +2013,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); - llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( + llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( FTy, Twine(Buffer, "_dtor"), FI, Loc); auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, @@ -3042,7 +2068,9 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, return Address( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), + Args), VarLVType->getPointerTo(/*AddrSpace=*/0)), CGM.getContext().getTypeAlignInChars(VarType)); } @@ -3093,8 +2121,9 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, if (!CGF.HaveInsertPoint()) return; llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); - auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, - PrePostActionTy &) { + auto &M = CGM.getModule(); + auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, + this](CodeGenFunction &CGF, PrePostActionTy &) { // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); llvm::Value *Args[] = { @@ -3106,18 +2135,19 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, RealArgs.append(CapturedVars.begin(), CapturedVars.end()); llvm::FunctionCallee RTLFn = - RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); + OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); CGF.EmitRuntimeCall(RTLFn, RealArgs); }; - auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, - PrePostActionTy &) { + auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, + this](CodeGenFunction &CGF, PrePostActionTy &) { CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); // Build calls: // __kmpc_serialized_parallel(&Loc, GTid); llvm::Value *Args[] = {RTLoc, ThreadID}; - CGF.EmitRuntimeCall( - RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___kmpc_serialized_parallel), + Args); // OutlinedFn(>id, &zero_bound, CapturedStruct); Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); @@ -3134,9 +2164,9 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, // __kmpc_end_serialized_parallel(&Loc, GTid); llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; - CGF.EmitRuntimeCall( - RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), - EndArgs); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___kmpc_end_serialized_parallel), + EndArgs); }; if (IfCond) { emitIfClause(CGF, IfCond, ThenGen, ElseGen); @@ -3250,12 +2280,16 @@ void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, std::end(Args)); if (Hint) { EnterArgs.push_back(CGF.Builder.CreateIntCast( - CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); + CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); } CommonActionTy Action( - createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint - : OMPRTL__kmpc_critical), - EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), + Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), + EnterArgs, + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_end_critical), + Args); CriticalOpGen.setAction(Action); emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); } @@ -3271,8 +2305,12 @@ void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, // } // Prepare arguments and build a call to __kmpc_master llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, - createRuntimeFunction(OMPRTL__kmpc_end_master), Args, + CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_master), + Args, + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_end_master), + Args, /*Conditional=*/true); MasterOpGen.setAction(Action); emitInlinedDirective(CGF, OMPD_master, MasterOpGen); @@ -3283,11 +2321,18 @@ void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; - // Build call __kmpc_omp_taskyield(loc, thread_id, 0); - llvm::Value *Args[] = { - emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), - llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); + if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { + OMPBuilder.CreateTaskyield(CGF.Builder); + } else { + // Build call __kmpc_omp_taskyield(loc, thread_id, 0); + llvm::Value *Args[] = { + emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), + llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_omp_taskyield), + Args); + } + if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) Region->emitUntiedSwitch(CGF); } @@ -3302,8 +2347,11 @@ void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, // __kmpc_end_taskgroup(ident_t *, gtid); // Prepare arguments and build a call to __kmpc_taskgroup llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, - createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), + CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_taskgroup), + Args, + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_end_taskgroup), Args); TaskgroupOpGen.setAction(Action); emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); @@ -3409,8 +2457,12 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, } // Prepare arguments and build a call to __kmpc_single llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, - createRuntimeFunction(OMPRTL__kmpc_end_single), Args, + CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_single), + Args, + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_end_single), + Args, /*Conditional=*/true); SingleOpGen.setAction(Action); emitInlinedDirective(CGF, OMPD_single, SingleOpGen); @@ -3455,7 +2507,9 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, CpyFn, // void (*) (void *, void *) <copy_func> DidItVal // i32 did_it }; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_copyprivate), + Args); } } @@ -3470,8 +2524,11 @@ void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, // Prepare arguments and build a call to __kmpc_ordered if (IsThreads) { llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, - createRuntimeFunction(OMPRTL__kmpc_end_ordered), + CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_ordered), + Args, + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_end_ordered), Args); OrderedOpGen.setAction(Action); emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); @@ -3519,9 +2576,8 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, // Check if we should use the OMPBuilder auto *OMPRegionInfo = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); - llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); - if (OMPBuilder) { - CGF.Builder.restoreIP(OMPBuilder->CreateBarrier( + if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { + CGF.Builder.restoreIP(OMPBuilder.CreateBarrier( CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); return; } @@ -3538,7 +2594,9 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, if (OMPRegionInfo) { if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { llvm::Value *Result = CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_cancel_barrier), + Args); if (EmitChecks) { // if (__kmpc_cancel_barrier()) { // exit from construct; @@ -3557,7 +2615,9 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, return; } } - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_barrier), + Args); } /// Map the OpenMP loop schedule to the runtime enumeration. @@ -3771,6 +2831,7 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, llvm::Value *ThreadId = getThreadID(CGF, Loc); llvm::FunctionCallee StaticInitFunction = createForStaticInitFunction(Values.IVSize, Values.IVSigned); + auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); } @@ -3805,7 +2866,9 @@ void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, ? OMP_IDENT_WORK_LOOP : OMP_IDENT_WORK_SECTIONS), getThreadID(CGF, Loc)}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), + auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_for_static_fini), Args); } @@ -3853,7 +2916,8 @@ void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_push_num_threads), Args); } @@ -3867,16 +2931,23 @@ void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_push_proc_bind), + Args); } void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, - SourceLocation Loc) { - if (!CGF.HaveInsertPoint()) - return; - // Build call void __kmpc_flush(ident_t *loc) - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), - emitUpdateLocation(CGF, Loc)); + SourceLocation Loc, llvm::AtomicOrdering AO) { + if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { + OMPBuilder.CreateFlush(CGF.Builder); + } else { + if (!CGF.HaveInsertPoint()) + return; + // Build call void __kmpc_flush(ident_t *loc) + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_flush), + emitUpdateLocation(CGF, Loc)); + } } namespace { @@ -4358,13 +3429,14 @@ QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { namespace { struct PrivateHelpersTy { - PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, - const VarDecl *PrivateElemInit) - : Original(Original), PrivateCopy(PrivateCopy), + PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, + const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) + : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), PrivateElemInit(PrivateElemInit) {} - const VarDecl *Original; - const VarDecl *PrivateCopy; - const VarDecl *PrivateElemInit; + const Expr *OriginalRef = nullptr; + const VarDecl *Original = nullptr; + const VarDecl *PrivateCopy = nullptr; + const VarDecl *PrivateElemInit = nullptr; }; typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; } // anonymous namespace @@ -4744,7 +3816,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF, // For target-based directives skip 3 firstprivate arrays BasePointersArray, // PointersArray and SizesArray. The original variables for these arrays are // not captured and we get their addresses explicitly. - if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || + if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || (IsTargetTask && KmpTaskSharedsPtr.isValid())) { SrcBase = CGF.MakeAddrLValue( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( @@ -4776,13 +3848,23 @@ static void emitPrivatesInit(CodeGenFunction &CGF, "Expected artificial target data variable."); SharedRefLValue = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); - } else { + } else if (ForDup) { SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); SharedRefLValue = CGF.MakeAddrLValue( Address(SharedRefLValue.getPointer(CGF), C.getDeclAlign(OriginalVD)), SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), SharedRefLValue.getTBAAInfo()); + } else if (CGF.LambdaCaptureFields.count( + Pair.second.Original->getCanonicalDecl()) > 0 || + dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { + SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); + } else { + // Processing for implicitly captured variables. + InlinedOpenMPRegionRAII Region( + CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, + /*HasCancel=*/false); + SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); } if (Type->isArrayType()) { // Initialize firstprivate array. @@ -4915,7 +3997,7 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, Base, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), Loc), - CGF.getNaturalTypeAlignment(SharedsTy)); + CGM.getNaturalTypeAlignment(SharedsTy)); } emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); @@ -4938,6 +4020,135 @@ checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { return NeedsCleanup; } +namespace { +/// Loop generator for OpenMP iterator expression. +class OMPIteratorGeneratorScope final + : public CodeGenFunction::OMPPrivateScope { + CodeGenFunction &CGF; + const OMPIteratorExpr *E = nullptr; + SmallVector<CodeGenFunction::JumpDest, 4> ContDests; + SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; + OMPIteratorGeneratorScope() = delete; + OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; + +public: + OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) + : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { + if (!E) + return; + SmallVector<llvm::Value *, 4> Uppers; + for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { + Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); + const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); + addPrivate(VD, [&CGF, VD]() { + return CGF.CreateMemTemp(VD->getType(), VD->getName()); + }); + const OMPIteratorHelperData &HelperData = E->getHelper(I); + addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { + return CGF.CreateMemTemp(HelperData.CounterVD->getType(), + "counter.addr"); + }); + } + Privatize(); + + for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { + const OMPIteratorHelperData &HelperData = E->getHelper(I); + LValue CLVal = + CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), + HelperData.CounterVD->getType()); + // Counter = 0; + CGF.EmitStoreOfScalar( + llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), + CLVal); + CodeGenFunction::JumpDest &ContDest = + ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); + CodeGenFunction::JumpDest &ExitDest = + ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); + // N = <number-of_iterations>; + llvm::Value *N = Uppers[I]; + // cont: + // if (Counter < N) goto body; else goto exit; + CGF.EmitBlock(ContDest.getBlock()); + auto *CVal = + CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); + llvm::Value *Cmp = + HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() + ? CGF.Builder.CreateICmpSLT(CVal, N) + : CGF.Builder.CreateICmpULT(CVal, N); + llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); + CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); + // body: + CGF.EmitBlock(BodyBB); + // Iteri = Begini + Counter * Stepi; + CGF.EmitIgnoredExpr(HelperData.Update); + } + } + ~OMPIteratorGeneratorScope() { + if (!E) + return; + for (unsigned I = E->numOfIterators(); I > 0; --I) { + // Counter = Counter + 1; + const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); + CGF.EmitIgnoredExpr(HelperData.CounterUpdate); + // goto cont; + CGF.EmitBranchThroughCleanup(ContDests[I - 1]); + // exit: + CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); + } + } +}; +} // namespace + +static std::pair<llvm::Value *, llvm::Value *> +getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { + const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); + llvm::Value *Addr; + if (OASE) { + const Expr *Base = OASE->getBase(); + Addr = CGF.EmitScalarExpr(Base); + } else { + Addr = CGF.EmitLValue(E).getPointer(CGF); + } + llvm::Value *SizeVal; + QualType Ty = E->getType(); + if (OASE) { + SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); + for (const Expr *SE : OASE->getDimensions()) { + llvm::Value *Sz = CGF.EmitScalarExpr(SE); + Sz = CGF.EmitScalarConversion( + Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); + SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); + } + } else if (const auto *ASE = + dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { + LValue UpAddrLVal = + CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); + llvm::Value *UpAddr = + CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); + llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); + llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); + SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); + } else { + SizeVal = CGF.getTypeSize(Ty); + } + return std::make_pair(Addr, SizeVal); +} + +/// Builds kmp_depend_info, if it is not built yet, and builds flags type. +static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { + QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); + if (KmpTaskAffinityInfoTy.isNull()) { + RecordDecl *KmpAffinityInfoRD = + C.buildImplicitRecord("kmp_task_affinity_info_t"); + KmpAffinityInfoRD->startDefinition(); + addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); + addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); + addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); + KmpAffinityInfoRD->completeDefinition(); + KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); + } +} + CGOpenMPRuntime::TaskResultTy CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, @@ -4946,23 +4157,23 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, ASTContext &C = CGM.getContext(); llvm::SmallVector<PrivateDataTy, 4> Privates; // Aggregate privates and sort them by the alignment. - auto I = Data.PrivateCopies.begin(); + const auto *I = Data.PrivateCopies.begin(); for (const Expr *E : Data.PrivateVars) { const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); Privates.emplace_back( C.getDeclAlign(VD), - PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), + PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), /*PrivateElemInit=*/nullptr)); ++I; } I = Data.FirstprivateCopies.begin(); - auto IElemInitRef = Data.FirstprivateInits.begin(); + const auto *IElemInitRef = Data.FirstprivateInits.begin(); for (const Expr *E : Data.FirstprivateVars) { const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); Privates.emplace_back( C.getDeclAlign(VD), PrivateHelpersTy( - VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), + E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); ++I; ++IElemInitRef; @@ -4972,7 +4183,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); Privates.emplace_back( C.getDeclAlign(VD), - PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), + PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), /*PrivateElemInit=*/nullptr)); ++I; } @@ -5046,7 +4257,8 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, TiedFlag = 0x1, FinalFlag = 0x2, DestructorsFlag = 0x8, - PriorityFlag = 0x20 + PriorityFlag = 0x20, + DetachableFlag = 0x40, }; unsigned Flags = Data.Tied ? TiedFlag : 0; bool NeedsCleanup = false; @@ -5057,6 +4269,8 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, } if (Data.Priority.getInt()) Flags = Flags | PriorityFlag; + if (D.hasClausesOfKind<OMPDetachClause>()) + Flags = Flags | DetachableFlag; llvm::Value *TaskFlags = Data.Final.getPointer() ? CGF.Builder.CreateSelect(Data.Final.getPointer(), @@ -5084,10 +4298,170 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); AllocArgs.push_back(DeviceID); NewTask = CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), + AllocArgs); } else { - NewTask = CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); + NewTask = + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), + AllocArgs); + } + // Emit detach clause initialization. + // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, + // task_descriptor); + if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { + const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); + LValue EvtLVal = CGF.EmitLValue(Evt); + + // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, + // int gtid, kmp_task_t *task); + llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); + llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); + Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); + llvm::Value *EvtVal = CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), + {Loc, Tid, NewTask}); + EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), + Evt->getExprLoc()); + CGF.EmitStoreOfScalar(EvtVal, EvtLVal); + } + // Process affinity clauses. + if (D.hasClausesOfKind<OMPAffinityClause>()) { + // Process list of affinity data. + ASTContext &C = CGM.getContext(); + Address AffinitiesArray = Address::invalid(); + // Calculate number of elements to form the array of affinity data. + llvm::Value *NumOfElements = nullptr; + unsigned NumAffinities = 0; + for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { + if (const Expr *Modifier = C->getModifier()) { + const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); + for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { + llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); + Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); + NumOfElements = + NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; + } + } else { + NumAffinities += C->varlist_size(); + } + } + getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); + // Fields ids in kmp_task_affinity_info record. + enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; + + QualType KmpTaskAffinityInfoArrayTy; + if (NumOfElements) { + NumOfElements = CGF.Builder.CreateNUWAdd( + llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); + OpaqueValueExpr OVE( + Loc, + C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), + VK_RValue); + CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, + RValue::get(NumOfElements)); + KmpTaskAffinityInfoArrayTy = + C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, + /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); + // Properly emit variable-sized array. + auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, + ImplicitParamDecl::Other); + CGF.EmitVarDecl(*PD); + AffinitiesArray = CGF.GetAddrOfLocalVar(PD); + NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, + /*isSigned=*/false); + } else { + KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( + KmpTaskAffinityInfoTy, + llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, + ArrayType::Normal, /*IndexTypeQuals=*/0); + AffinitiesArray = + CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); + AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); + NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, + /*isSigned=*/false); + } + + const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); + // Fill array by elements without iterators. + unsigned Pos = 0; + bool HasIterator = false; + for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { + if (C->getModifier()) { + HasIterator = true; + continue; + } + for (const Expr *E : C->varlists()) { + llvm::Value *Addr; + llvm::Value *Size; + std::tie(Addr, Size) = getPointerAndSize(CGF, E); + LValue Base = + CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), + KmpTaskAffinityInfoTy); + // affs[i].base_addr = &<Affinities[i].second>; + LValue BaseAddrLVal = CGF.EmitLValueForField( + Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); + CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), + BaseAddrLVal); + // affs[i].len = sizeof(<Affinities[i].second>); + LValue LenLVal = CGF.EmitLValueForField( + Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); + CGF.EmitStoreOfScalar(Size, LenLVal); + ++Pos; + } + } + LValue PosLVal; + if (HasIterator) { + PosLVal = CGF.MakeAddrLValue( + CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), + C.getSizeType()); + CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); + } + // Process elements with iterators. + for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { + const Expr *Modifier = C->getModifier(); + if (!Modifier) + continue; + OMPIteratorGeneratorScope IteratorScope( + CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); + for (const Expr *E : C->varlists()) { + llvm::Value *Addr; + llvm::Value *Size; + std::tie(Addr, Size) = getPointerAndSize(CGF, E); + llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); + LValue Base = CGF.MakeAddrLValue( + Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), + AffinitiesArray.getAlignment()), + KmpTaskAffinityInfoTy); + // affs[i].base_addr = &<Affinities[i].second>; + LValue BaseAddrLVal = CGF.EmitLValueForField( + Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); + CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), + BaseAddrLVal); + // affs[i].len = sizeof(<Affinities[i].second>); + LValue LenLVal = CGF.EmitLValueForField( + Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); + CGF.EmitStoreOfScalar(Size, LenLVal); + Idx = CGF.Builder.CreateNUWAdd( + Idx, llvm::ConstantInt::get(Idx->getType(), 1)); + CGF.EmitStoreOfScalar(Idx, PosLVal); + } + } + // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, + // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 + // naffins, kmp_task_affinity_info_t *affin_list); + llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); + llvm::Value *GTid = getThreadID(CGF, Loc); + llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + AffinitiesArray.getPointer(), CGM.VoidPtrTy); + // FIXME: Emit the function and ignore its result for now unless the + // runtime function is properly implemented. + (void)CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), + {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); } llvm::Value *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( @@ -5106,7 +4480,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), Loc), - CGF.getNaturalTypeAlignment(SharedsTy)); + CGM.getNaturalTypeAlignment(SharedsTy)); LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); @@ -5158,6 +4532,540 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, return Result; } +namespace { +/// Dependence kind for RTL. +enum RTLDependenceKindTy { + DepIn = 0x01, + DepInOut = 0x3, + DepMutexInOutSet = 0x4 +}; +/// Fields ids in kmp_depend_info record. +enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; +} // namespace + +/// Translates internal dependency kind into the runtime kind. +static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { + RTLDependenceKindTy DepKind; + switch (K) { + case OMPC_DEPEND_in: + DepKind = DepIn; + break; + // Out and InOut dependencies must use the same code. + case OMPC_DEPEND_out: + case OMPC_DEPEND_inout: + DepKind = DepInOut; + break; + case OMPC_DEPEND_mutexinoutset: + DepKind = DepMutexInOutSet; + break; + case OMPC_DEPEND_source: + case OMPC_DEPEND_sink: + case OMPC_DEPEND_depobj: + case OMPC_DEPEND_unknown: + llvm_unreachable("Unknown task dependence type"); + } + return DepKind; +} + +/// Builds kmp_depend_info, if it is not built yet, and builds flags type. +static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, + QualType &FlagsTy) { + FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); + if (KmpDependInfoTy.isNull()) { + RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); + KmpDependInfoRD->startDefinition(); + addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); + addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); + addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); + KmpDependInfoRD->completeDefinition(); + KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); + } +} + +std::pair<llvm::Value *, LValue> +CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, + SourceLocation Loc) { + ASTContext &C = CGM.getContext(); + QualType FlagsTy; + getDependTypes(C, KmpDependInfoTy, FlagsTy); + RecordDecl *KmpDependInfoRD = + cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); + LValue Base = CGF.EmitLoadOfPointerLValue( + DepobjLVal.getAddress(CGF), + C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); + Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); + Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), + Base.getTBAAInfo()); + llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( + Addr.getPointer(), + llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); + LValue NumDepsBase = CGF.MakeAddrLValue( + Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, + Base.getBaseInfo(), Base.getTBAAInfo()); + // NumDeps = deps[i].base_addr; + LValue BaseAddrLVal = CGF.EmitLValueForField( + NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); + llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); + return std::make_pair(NumDeps, Base); +} + +static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, + llvm::PointerUnion<unsigned *, LValue *> Pos, + const OMPTaskDataTy::DependData &Data, + Address DependenciesArray) { + CodeGenModule &CGM = CGF.CGM; + ASTContext &C = CGM.getContext(); + QualType FlagsTy; + getDependTypes(C, KmpDependInfoTy, FlagsTy); + RecordDecl *KmpDependInfoRD = + cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); + llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); + + OMPIteratorGeneratorScope IteratorScope( + CGF, cast_or_null<OMPIteratorExpr>( + Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() + : nullptr)); + for (const Expr *E : Data.DepExprs) { + llvm::Value *Addr; + llvm::Value *Size; + std::tie(Addr, Size) = getPointerAndSize(CGF, E); + LValue Base; + if (unsigned *P = Pos.dyn_cast<unsigned *>()) { + Base = CGF.MakeAddrLValue( + CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); + } else { + LValue &PosLVal = *Pos.get<LValue *>(); + llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); + Base = CGF.MakeAddrLValue( + Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), + DependenciesArray.getAlignment()), + KmpDependInfoTy); + } + // deps[i].base_addr = &<Dependencies[i].second>; + LValue BaseAddrLVal = CGF.EmitLValueForField( + Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); + CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), + BaseAddrLVal); + // deps[i].len = sizeof(<Dependencies[i].second>); + LValue LenLVal = CGF.EmitLValueForField( + Base, *std::next(KmpDependInfoRD->field_begin(), Len)); + CGF.EmitStoreOfScalar(Size, LenLVal); + // deps[i].flags = <Dependencies[i].first>; + RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); + LValue FlagsLVal = CGF.EmitLValueForField( + Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); + CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), + FlagsLVal); + if (unsigned *P = Pos.dyn_cast<unsigned *>()) { + ++(*P); + } else { + LValue &PosLVal = *Pos.get<LValue *>(); + llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); + Idx = CGF.Builder.CreateNUWAdd(Idx, + llvm::ConstantInt::get(Idx->getType(), 1)); + CGF.EmitStoreOfScalar(Idx, PosLVal); + } + } +} + +static SmallVector<llvm::Value *, 4> +emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, + const OMPTaskDataTy::DependData &Data) { + assert(Data.DepKind == OMPC_DEPEND_depobj && + "Expected depobj dependecy kind."); + SmallVector<llvm::Value *, 4> Sizes; + SmallVector<LValue, 4> SizeLVals; + ASTContext &C = CGF.getContext(); + QualType FlagsTy; + getDependTypes(C, KmpDependInfoTy, FlagsTy); + RecordDecl *KmpDependInfoRD = + cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); + QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); + llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); + { + OMPIteratorGeneratorScope IteratorScope( + CGF, cast_or_null<OMPIteratorExpr>( + Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() + : nullptr)); + for (const Expr *E : Data.DepExprs) { + LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); + LValue Base = CGF.EmitLoadOfPointerLValue( + DepobjLVal.getAddress(CGF), + C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Base.getAddress(CGF), KmpDependInfoPtrT); + Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), + Base.getTBAAInfo()); + llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( + Addr.getPointer(), + llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); + LValue NumDepsBase = CGF.MakeAddrLValue( + Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, + Base.getBaseInfo(), Base.getTBAAInfo()); + // NumDeps = deps[i].base_addr; + LValue BaseAddrLVal = CGF.EmitLValueForField( + NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); + llvm::Value *NumDeps = + CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); + LValue NumLVal = CGF.MakeAddrLValue( + CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), + C.getUIntPtrType()); + CGF.InitTempAlloca(NumLVal.getAddress(CGF), + llvm::ConstantInt::get(CGF.IntPtrTy, 0)); + llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); + llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); + CGF.EmitStoreOfScalar(Add, NumLVal); + SizeLVals.push_back(NumLVal); + } + } + for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { + llvm::Value *Size = + CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); + Sizes.push_back(Size); + } + return Sizes; +} + +static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, + LValue PosLVal, + const OMPTaskDataTy::DependData &Data, + Address DependenciesArray) { + assert(Data.DepKind == OMPC_DEPEND_depobj && + "Expected depobj dependecy kind."); + ASTContext &C = CGF.getContext(); + QualType FlagsTy; + getDependTypes(C, KmpDependInfoTy, FlagsTy); + RecordDecl *KmpDependInfoRD = + cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); + QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); + llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); + llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); + { + OMPIteratorGeneratorScope IteratorScope( + CGF, cast_or_null<OMPIteratorExpr>( + Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() + : nullptr)); + for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { + const Expr *E = Data.DepExprs[I]; + LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); + LValue Base = CGF.EmitLoadOfPointerLValue( + DepobjLVal.getAddress(CGF), + C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Base.getAddress(CGF), KmpDependInfoPtrT); + Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), + Base.getTBAAInfo()); + + // Get number of elements in a single depobj. + llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( + Addr.getPointer(), + llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); + LValue NumDepsBase = CGF.MakeAddrLValue( + Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, + Base.getBaseInfo(), Base.getTBAAInfo()); + // NumDeps = deps[i].base_addr; + LValue BaseAddrLVal = CGF.EmitLValueForField( + NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); + llvm::Value *NumDeps = + CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); + + // memcopy dependency data. + llvm::Value *Size = CGF.Builder.CreateNUWMul( + ElSize, + CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); + llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); + Address DepAddr = + Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), + DependenciesArray.getAlignment()); + CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); + + // Increase pos. + // pos += size; + llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); + CGF.EmitStoreOfScalar(Add, PosLVal); + } + } +} + +std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( + CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, + SourceLocation Loc) { + if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { + return D.DepExprs.empty(); + })) + return std::make_pair(nullptr, Address::invalid()); + // Process list of dependencies. + ASTContext &C = CGM.getContext(); + Address DependenciesArray = Address::invalid(); + llvm::Value *NumOfElements = nullptr; + unsigned NumDependencies = std::accumulate( + Dependencies.begin(), Dependencies.end(), 0, + [](unsigned V, const OMPTaskDataTy::DependData &D) { + return D.DepKind == OMPC_DEPEND_depobj + ? V + : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); + }); + QualType FlagsTy; + getDependTypes(C, KmpDependInfoTy, FlagsTy); + bool HasDepobjDeps = false; + bool HasRegularWithIterators = false; + llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); + llvm::Value *NumOfRegularWithIterators = + llvm::ConstantInt::get(CGF.IntPtrTy, 1); + // Calculate number of depobj dependecies and regular deps with the iterators. + for (const OMPTaskDataTy::DependData &D : Dependencies) { + if (D.DepKind == OMPC_DEPEND_depobj) { + SmallVector<llvm::Value *, 4> Sizes = + emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); + for (llvm::Value *Size : Sizes) { + NumOfDepobjElements = + CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); + } + HasDepobjDeps = true; + continue; + } + // Include number of iterations, if any. + if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { + for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { + llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); + Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); + NumOfRegularWithIterators = + CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); + } + HasRegularWithIterators = true; + continue; + } + } + + QualType KmpDependInfoArrayTy; + if (HasDepobjDeps || HasRegularWithIterators) { + NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, + /*isSigned=*/false); + if (HasDepobjDeps) { + NumOfElements = + CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); + } + if (HasRegularWithIterators) { + NumOfElements = + CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); + } + OpaqueValueExpr OVE(Loc, + C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), + VK_RValue); + CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, + RValue::get(NumOfElements)); + KmpDependInfoArrayTy = + C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, + /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); + // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); + // Properly emit variable-sized array. + auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, + ImplicitParamDecl::Other); + CGF.EmitVarDecl(*PD); + DependenciesArray = CGF.GetAddrOfLocalVar(PD); + NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, + /*isSigned=*/false); + } else { + KmpDependInfoArrayTy = C.getConstantArrayType( + KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, + ArrayType::Normal, /*IndexTypeQuals=*/0); + DependenciesArray = + CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); + DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); + NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, + /*isSigned=*/false); + } + unsigned Pos = 0; + for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { + if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || + Dependencies[I].IteratorExpr) + continue; + emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], + DependenciesArray); + } + // Copy regular dependecies with iterators. + LValue PosLVal = CGF.MakeAddrLValue( + CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); + CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); + for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { + if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || + !Dependencies[I].IteratorExpr) + continue; + emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], + DependenciesArray); + } + // Copy final depobj arrays without iterators. + if (HasDepobjDeps) { + for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { + if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) + continue; + emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], + DependenciesArray); + } + } + DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + DependenciesArray, CGF.VoidPtrTy); + return std::make_pair(NumOfElements, DependenciesArray); +} + +Address CGOpenMPRuntime::emitDepobjDependClause( + CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, + SourceLocation Loc) { + if (Dependencies.DepExprs.empty()) + return Address::invalid(); + // Process list of dependencies. + ASTContext &C = CGM.getContext(); + Address DependenciesArray = Address::invalid(); + unsigned NumDependencies = Dependencies.DepExprs.size(); + QualType FlagsTy; + getDependTypes(C, KmpDependInfoTy, FlagsTy); + RecordDecl *KmpDependInfoRD = + cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); + + llvm::Value *Size; + // Define type kmp_depend_info[<Dependencies.size()>]; + // For depobj reserve one extra element to store the number of elements. + // It is required to handle depobj(x) update(in) construct. + // kmp_depend_info[<Dependencies.size()>] deps; + llvm::Value *NumDepsVal; + CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); + if (const auto *IE = + cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { + NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); + for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { + llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); + Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); + NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); + } + Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), + NumDepsVal); + CharUnits SizeInBytes = + C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); + llvm::Value *RecSize = CGM.getSize(SizeInBytes); + Size = CGF.Builder.CreateNUWMul(Size, RecSize); + NumDepsVal = + CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); + } else { + QualType KmpDependInfoArrayTy = C.getConstantArrayType( + KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), + nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); + CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); + Size = CGM.getSize(Sz.alignTo(Align)); + NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); + } + // Need to allocate on the dynamic memory. + llvm::Value *ThreadID = getThreadID(CGF, Loc); + // Use default allocator. + llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + llvm::Value *Args[] = {ThreadID, Size, Allocator}; + + llvm::Value *Addr = + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_alloc), + Args, ".dep.arr.addr"); + Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); + DependenciesArray = Address(Addr, Align); + // Write number of elements in the first element of array for depobj. + LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); + // deps[i].base_addr = NumDependencies; + LValue BaseAddrLVal = CGF.EmitLValueForField( + Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); + CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); + llvm::PointerUnion<unsigned *, LValue *> Pos; + unsigned Idx = 1; + LValue PosLVal; + if (Dependencies.IteratorExpr) { + PosLVal = CGF.MakeAddrLValue( + CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), + C.getSizeType()); + CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, + /*IsInit=*/true); + Pos = &PosLVal; + } else { + Pos = &Idx; + } + emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); + DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); + return DependenciesArray; +} + +void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, + SourceLocation Loc) { + ASTContext &C = CGM.getContext(); + QualType FlagsTy; + getDependTypes(C, KmpDependInfoTy, FlagsTy); + LValue Base = CGF.EmitLoadOfPointerLValue( + DepobjLVal.getAddress(CGF), + C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); + Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); + llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( + Addr.getPointer(), + llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); + DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, + CGF.VoidPtrTy); + llvm::Value *ThreadID = getThreadID(CGF, Loc); + // Use default allocator. + llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; + + // _kmpc_free(gtid, addr, nullptr); + (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_free), + Args); +} + +void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, + OpenMPDependClauseKind NewDepKind, + SourceLocation Loc) { + ASTContext &C = CGM.getContext(); + QualType FlagsTy; + getDependTypes(C, KmpDependInfoTy, FlagsTy); + RecordDecl *KmpDependInfoRD = + cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); + llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); + llvm::Value *NumDeps; + LValue Base; + std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); + + Address Begin = Base.getAddress(CGF); + // Cast from pointer to array type to pointer to single element. + llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); + // The basic structure here is a while-do loop. + llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); + llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); + llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); + CGF.EmitBlock(BodyBB); + llvm::PHINode *ElementPHI = + CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); + ElementPHI->addIncoming(Begin.getPointer(), EntryBB); + Begin = Address(ElementPHI, Begin.getAlignment()); + Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), + Base.getTBAAInfo()); + // deps[i].flags = NewDepKind; + RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); + LValue FlagsLVal = CGF.EmitLValueForField( + Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); + CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), + FlagsLVal); + + // Shift the address forward by one element. + Address ElementNext = + CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); + ElementPHI->addIncoming(ElementNext.getPointer(), + CGF.Builder.GetInsertBlock()); + llvm::Value *IsEmpty = + CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); + CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); + // Done. + CGF.EmitBlock(DoneBB, /*IsFinished=*/true); +} + void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, @@ -5174,94 +5082,11 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; LValue TDBase = Result.TDBase; const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; - ASTContext &C = CGM.getContext(); // Process list of dependences. Address DependenciesArray = Address::invalid(); - unsigned NumDependencies = Data.Dependences.size(); - if (NumDependencies) { - // Dependence kind for RTL. - enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; - enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; - RecordDecl *KmpDependInfoRD; - QualType FlagsTy = - C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); - llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); - if (KmpDependInfoTy.isNull()) { - KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); - KmpDependInfoRD->startDefinition(); - addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); - addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); - addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); - KmpDependInfoRD->completeDefinition(); - KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); - } else { - KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); - } - // Define type kmp_depend_info[<Dependences.size()>]; - QualType KmpDependInfoArrayTy = C.getConstantArrayType( - KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), - nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); - // kmp_depend_info[<Dependences.size()>] deps; - DependenciesArray = - CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); - for (unsigned I = 0; I < NumDependencies; ++I) { - const Expr *E = Data.Dependences[I].second; - LValue Addr = CGF.EmitLValue(E); - llvm::Value *Size; - QualType Ty = E->getType(); - if (const auto *ASE = - dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { - LValue UpAddrLVal = - CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); - llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( - UpAddrLVal.getPointer(CGF), /*Idx0=*/1); - llvm::Value *LowIntPtr = - CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy); - llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); - Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); - } else { - Size = CGF.getTypeSize(Ty); - } - LValue Base = CGF.MakeAddrLValue( - CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), - KmpDependInfoTy); - // deps[i].base_addr = &<Dependences[i].second>; - LValue BaseAddrLVal = CGF.EmitLValueForField( - Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); - CGF.EmitStoreOfScalar( - CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy), - BaseAddrLVal); - // deps[i].len = sizeof(<Dependences[i].second>); - LValue LenLVal = CGF.EmitLValueForField( - Base, *std::next(KmpDependInfoRD->field_begin(), Len)); - CGF.EmitStoreOfScalar(Size, LenLVal); - // deps[i].flags = <Dependences[i].first>; - RTLDependenceKindTy DepKind; - switch (Data.Dependences[I].first) { - case OMPC_DEPEND_in: - DepKind = DepIn; - break; - // Out and InOut dependencies must use the same code. - case OMPC_DEPEND_out: - case OMPC_DEPEND_inout: - DepKind = DepInOut; - break; - case OMPC_DEPEND_mutexinoutset: - DepKind = DepMutexInOutSet; - break; - case OMPC_DEPEND_source: - case OMPC_DEPEND_sink: - case OMPC_DEPEND_unknown: - llvm_unreachable("Unknown task dependence type"); - } - LValue FlagsLVal = CGF.EmitLValueForField( - Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); - CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), - FlagsLVal); - } - DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); - } + llvm::Value *NumOfElements; + std::tie(NumOfElements, DependenciesArray) = + emitDependClause(CGF, Data.Dependences, Loc); // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() // libcall. @@ -5273,28 +5098,30 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; llvm::Value *DepTaskArgs[7]; - if (NumDependencies) { + if (!Data.Dependences.empty()) { DepTaskArgs[0] = UpLoc; DepTaskArgs[1] = ThreadID; DepTaskArgs[2] = NewTask; - DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); + DepTaskArgs[3] = NumOfElements; DepTaskArgs[4] = DependenciesArray.getPointer(); DepTaskArgs[5] = CGF.Builder.getInt32(0); DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } - auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, - &TaskArgs, + auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { if (!Data.Tied) { auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); } - if (NumDependencies) { + if (!Data.Dependences.empty()) { CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), + DepTaskArgs); } else { - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_omp_task), TaskArgs); } // Check if parent region is untied and build return for untied task; @@ -5304,26 +5131,27 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, }; llvm::Value *DepWaitTaskArgs[6]; - if (NumDependencies) { + if (!Data.Dependences.empty()) { DepWaitTaskArgs[0] = UpLoc; DepWaitTaskArgs[1] = ThreadID; - DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); + DepWaitTaskArgs[2] = NumOfElements; DepWaitTaskArgs[3] = DependenciesArray.getPointer(); DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } - auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, - NumDependencies, &DepWaitTaskArgs, + auto &M = CGM.getModule(); + auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, + TaskEntry, &Data, &DepWaitTaskArgs, Loc](CodeGenFunction &CGF, PrePostActionTy &) { - CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); CodeGenFunction::RunCleanupsScope LocalScope(CGF); // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info // is specified. - if (NumDependencies) - CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), - DepWaitTaskArgs); + if (!Data.Dependences.empty()) + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), + DepWaitTaskArgs); // Call proxy_task_entry(gtid, new_task); auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { @@ -5338,9 +5166,12 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, // kmp_task_t *new_task); RegionCodeGenTy RCG(CodeGen); - CommonActionTy Action( - RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, - RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); + CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___kmpc_omp_task_begin_if0), + TaskArgs, + OMPBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___kmpc_omp_task_complete_if0), + TaskArgs); RCG.setAction(Action); RCG(CGF); }; @@ -5434,7 +5265,9 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( Result.TaskDupFn, CGF.VoidPtrTy) : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_taskloop), + TaskArgs); } /// Emit reduction operation for each element of array (required for @@ -5776,8 +5609,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, Lock // kmp_critical_name *&<lock> }; llvm::Value *Res = CGF.EmitRuntimeCall( - createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait - : OMPRTL__kmpc_reduce), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), + WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), Args); // 5. Build switch(res) @@ -5818,8 +5652,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, RegionCodeGenTy RCG(CodeGen); CommonActionTy Action( nullptr, llvm::None, - createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait - : OMPRTL__kmpc_end_reduce), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait + : OMPRTL___kmpc_end_reduce), EndArgs); RCG.setAction(Action); RCG(CGF); @@ -5942,7 +5777,8 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, Lock // kmp_critical_name *&<lock> }; CommonActionTy Action(nullptr, llvm::None, - createRuntimeFunction(OMPRTL__kmpc_end_reduce), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_end_reduce), EndArgs); AtomicRCG.setAction(Action); AtomicRCG(CGF); @@ -5969,12 +5805,12 @@ static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); Out << Prefix << Name << "_" << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); - return Out.str(); + return std::string(Out.str()); } /// Emits reduction initializer function: /// \code -/// void @.red_init(void* %arg) { +/// void @.red_init(void* %arg, void* %orig) { /// %0 = bitcast void* %arg to <type>* /// store <type> <init>, <type>* %0 /// ret void @@ -5984,10 +5820,15 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) { ASTContext &C = CGM.getContext(); + QualType VoidPtrTy = C.VoidPtrTy; + VoidPtrTy.addRestrict(); FunctionArgList Args; - ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, + ImplicitParamDecl::Other); Args.emplace_back(&Param); + Args.emplace_back(&ParamOrig); const auto &FnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); @@ -6012,28 +5853,25 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, CGM.getContext().getSizeType(), Loc); } RCG.emitAggregateType(CGF, N, Size); - LValue SharedLVal; + LValue OrigLVal; // If initializer uses initializer from declare reduction construct, emit a // pointer to the address of the original reduction item (reuired by reduction // initializer) if (RCG.usesReductionInitializer(N)) { - Address SharedAddr = - CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( - CGF, CGM.getContext().VoidPtrTy, - generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); + Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); SharedAddr = CGF.EmitLoadOfPointer( SharedAddr, CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); - SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); + OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); } else { - SharedLVal = CGF.MakeNaturalAlignAddrLValue( + OrigLVal = CGF.MakeNaturalAlignAddrLValue( llvm::ConstantPointerNull::get(CGM.VoidPtrTy), CGM.getContext().VoidPtrTy); } // Emit the initializer: // %0 = bitcast void* %arg to <type>* // store <type> <init>, <type>* %0 - RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, + RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, [](CodeGenFunction &) { return false; }); CGF.FinishFunction(); return Fn; @@ -6173,18 +6011,20 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( return nullptr; // Build typedef struct: - // kmp_task_red_input { + // kmp_taskred_input { // void *reduce_shar; // shared reduction item + // void *reduce_orig; // original reduction item used for initialization // size_t reduce_size; // size of data item // void *reduce_init; // data initialization routine // void *reduce_fini; // data finalization routine // void *reduce_comb; // data combiner routine // kmp_task_red_flags_t flags; // flags for additional info from compiler - // } kmp_task_red_input_t; + // } kmp_taskred_input_t; ASTContext &C = CGM.getContext(); - RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); + RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); RD->startDefinition(); const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); + const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); @@ -6199,8 +6039,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); // kmp_task_red_input_t .rd_input.[Size]; Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); - ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, - Data.ReductionOps); + ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, + Data.ReductionCopies, Data.ReductionOps); for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), @@ -6212,20 +6052,24 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); // ElemLVal.reduce_shar = &Shareds[Cnt]; LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); - RCG.emitSharedLValue(CGF, Cnt); + RCG.emitSharedOrigLValue(CGF, Cnt); llvm::Value *CastedShared = CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); CGF.EmitStoreOfScalar(CastedShared, SharedLVal); + // ElemLVal.reduce_orig = &Origs[Cnt]; + LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); + llvm::Value *CastedOrig = + CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); + CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); RCG.emitAggregateType(CGF, Cnt); llvm::Value *SizeValInChars; llvm::Value *SizeVal; std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); - // We use delayed creation/initialization for VLAs, array sections and - // custom reduction initializations. It is required because runtime does not - // provide the way to pass the sizes of VLAs/array sections to - // initializer/combiner/finalizer functions and does not pass the pointer to - // original reduction item to the initializer. Instead threadprivate global - // variables are used to store these values and use them in the functions. + // We use delayed creation/initialization for VLAs and array sections. It is + // required because runtime does not provide the way to pass the sizes of + // VLAs/array sections to initializer/combiner/finalizer functions. Instead + // threadprivate global variables are used to store these values and use + // them in the functions. bool DelayedCreation = !!SizeVal; SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, /*isSigned=*/false); @@ -6236,7 +6080,6 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( llvm::Value *InitAddr = CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); CGF.EmitStoreOfScalar(InitAddr, InitLVal); - DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); // ElemLVal.reduce_fini = fini; LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); @@ -6260,16 +6103,52 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), FlagsLVal.getType()); } - // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void - // *data); + if (Data.IsReductionWithTaskMod) { + // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int + // is_ws, int num, void *data); + llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), + CGM.IntTy, /*isSigned=*/true); + llvm::Value *Args[] = { + IdentTLoc, GTid, + llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, + /*isSigned=*/true), + llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + TaskRedInput.getPointer(), CGM.VoidPtrTy)}; + return CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), + Args); + } + // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); llvm::Value *Args[] = { CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, /*isSigned=*/true), llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), CGM.VoidPtrTy)}; - return CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); + return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_taskred_init), + Args); +} + +void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, + SourceLocation Loc, + bool IsWorksharingReduction) { + // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int + // is_ws, int num, void *data); + llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), + CGM.IntTy, /*isSigned=*/true); + llvm::Value *Args[] = {IdentTLoc, GTid, + llvm::ConstantInt::get(CGM.IntTy, + IsWorksharingReduction ? 1 : 0, + /*isSigned=*/true)}; + (void)CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), + Args); } void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, @@ -6287,16 +6166,6 @@ void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); } - // Store address of the original reduction item if custom initializer is used. - if (RCG.usesReductionInitializer(N)) { - Address SharedAddr = getAddrOfArtificialThreadPrivate( - CGF, CGM.getContext().VoidPtrTy, - generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); - CGF.Builder.CreateStore( - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy), - SharedAddr, /*IsVolatile=*/false); - } } Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, @@ -6313,7 +6182,9 @@ Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; return Address( CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), + Args), SharedLVal.getAlignment()); } @@ -6321,11 +6192,19 @@ void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; - // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 - // global_tid); - llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - // Ignore return result until untied tasks are supported. - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); + + if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { + OMPBuilder.CreateTaskwait(CGF.Builder); + } else { + // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 + // global_tid); + llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; + // Ignore return result until untied tasks are supported. + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_omp_taskwait), + Args); + } + if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) Region->emitUntiedSwitch(CGF); } @@ -6382,7 +6261,9 @@ void CGOpenMPRuntime::emitCancellationPointCall( CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; // Ignore return result until untied tasks are supported. llvm::Value *Result = CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_cancellationpoint), + Args); // if (__kmpc_cancellationpoint()) { // exit from construct; // } @@ -6407,17 +6288,18 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, return; // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, // kmp_int32 cncl_kind); + auto &M = CGM.getModule(); if (auto *OMPRegionInfo = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { - auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, - PrePostActionTy &) { + auto &&ThenGen = [this, &M, Loc, CancelRegion, + OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); llvm::Value *Args[] = { RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; // Ignore return result until untied tasks are supported. llvm::Value *Result = CGF.EmitRuntimeCall( - RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); + OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); // if (__kmpc_cancel()) { // exit from construct; // } @@ -6442,16 +6324,106 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, } } +namespace { +/// Cleanup action for uses_allocators support. +class OMPUsesAllocatorsActionTy final : public PrePostActionTy { + ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; + +public: + OMPUsesAllocatorsActionTy( + ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) + : Allocators(Allocators) {} + void Enter(CodeGenFunction &CGF) override { + if (!CGF.HaveInsertPoint()) + return; + for (const auto &AllocatorData : Allocators) { + CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( + CGF, AllocatorData.first, AllocatorData.second); + } + } + void Exit(CodeGenFunction &CGF) override { + if (!CGF.HaveInsertPoint()) + return; + for (const auto &AllocatorData : Allocators) { + CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, + AllocatorData.first); + } + } +}; +} // namespace + void CGOpenMPRuntime::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { assert(!ParentName.empty() && "Invalid target region parent name!"); HasEmittedTargetRegion = true; + SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; + for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { + for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { + const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); + if (!D.AllocatorTraits) + continue; + Allocators.emplace_back(D.Allocator, D.AllocatorTraits); + } + } + OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); + CodeGen.setAction(UsesAllocatorAction); emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); } +void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, + const Expr *Allocator, + const Expr *AllocatorTraits) { + llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); + ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); + // Use default memspace handle. + llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + llvm::Value *NumTraits = llvm::ConstantInt::get( + CGF.IntTy, cast<ConstantArrayType>( + AllocatorTraits->getType()->getAsArrayTypeUnsafe()) + ->getSize() + .getLimitedValue()); + LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); + Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); + AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, + AllocatorTraitsLVal.getBaseInfo(), + AllocatorTraitsLVal.getTBAAInfo()); + llvm::Value *Traits = + CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); + + llvm::Value *AllocatorVal = + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_init_allocator), + {ThreadId, MemSpaceHandle, NumTraits, Traits}); + // Store to allocator. + CGF.EmitVarDecl(*cast<VarDecl>( + cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); + LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); + AllocatorVal = + CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, + Allocator->getType(), Allocator->getExprLoc()); + CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); +} + +void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, + const Expr *Allocator) { + llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); + ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); + LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); + llvm::Value *AllocatorVal = + CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); + AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), + CGF.getContext().VoidPtrTy, + Allocator->getExprLoc()); + (void)CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_destroy_allocator), + {ThreadId, AllocatorVal}); +} + void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, @@ -6483,7 +6455,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); + OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); // If this target outline function is not an offload entry, we don't need to // register it. @@ -6669,6 +6641,8 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: + case OMPD_depobj: + case OMPD_scan: case OMPD_teams: case OMPD_target_data: case OMPD_target_exit_data: @@ -6684,6 +6658,8 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, case OMPD_target_update: case OMPD_declare_simd: case OMPD_declare_variant: + case OMPD_begin_declare_variant: + case OMPD_end_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: @@ -6697,6 +6673,8 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, case OMPD_requires: case OMPD_unknown: break; + default: + break; } llvm_unreachable("Unexpected directive kind."); } @@ -6980,6 +6958,8 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: + case OMPD_depobj: + case OMPD_scan: case OMPD_teams: case OMPD_target_data: case OMPD_target_exit_data: @@ -6995,6 +6975,8 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, case OMPD_target_update: case OMPD_declare_simd: case OMPD_declare_variant: + case OMPD_begin_declare_variant: + case OMPD_end_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: @@ -7008,6 +6990,8 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, case OMPD_requires: case OMPD_unknown: break; + default: + break; } llvm_unreachable("Unsupported directive kind."); } @@ -7044,7 +7028,7 @@ public: OMP_MAP_TARGET_PARAM = 0x20, /// Signal that the runtime library has to return the device pointer /// in the current position for the data being mapped. Used when we have the - /// use_device_ptr clause. + /// use_device_ptr or use_device_addr clause. OMP_MAP_RETURN_PARAM = 0x40, /// This flag signals that the reference being passed is a pointer to /// private data. @@ -7112,26 +7096,30 @@ private: ArrayRef<OpenMPMapModifierKind> MapModifiers; bool ReturnDevicePointer = false; bool IsImplicit = false; + bool ForDeviceAddr = false; MapInfo() = default; MapInfo( OMPClauseMappableExprCommon::MappableExprComponentListRef Components, OpenMPMapClauseKind MapType, - ArrayRef<OpenMPMapModifierKind> MapModifiers, - bool ReturnDevicePointer, bool IsImplicit) + ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer, + bool IsImplicit, bool ForDeviceAddr = false) : Components(Components), MapType(MapType), MapModifiers(MapModifiers), - ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} + ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), + ForDeviceAddr(ForDeviceAddr) {} }; - /// If use_device_ptr is used on a pointer which is a struct member and there - /// is no map information about it, then emission of that entry is deferred - /// until the whole struct has been processed. + /// If use_device_ptr or use_device_addr is used on a decl which is a struct + /// member and there is no map information about it, then emission of that + /// entry is deferred until the whole struct has been processed. struct DeferredDevicePtrEntryTy { const Expr *IE = nullptr; const ValueDecl *VD = nullptr; + bool ForDeviceAddr = false; - DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) - : IE(IE), VD(VD) {} + DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, + bool ForDeviceAddr) + : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} }; /// The target directive from where the mappable clauses were extracted. It @@ -7158,6 +7146,20 @@ private: llvm::Value *getExprTypeSize(const Expr *E) const { QualType ExprTy = E->getType().getCanonicalType(); + // Calculate the size for array shaping expression. + if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { + llvm::Value *Size = + CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); + for (const Expr *SE : OAE->getDimensions()) { + llvm::Value *Sz = CGF.EmitScalarExpr(SE); + Sz = CGF.EmitScalarConversion(Sz, SE->getType(), + CGF.getContext().getSizeType(), + SE->getExprLoc()); + Size = CGF.Builder.CreateNUWMul(Size, Sz); + } + return Size; + } + // Reference types are ignored for mapping purposes. if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) ExprTy = RefTy->getPointeeType().getCanonicalType(); @@ -7173,7 +7175,7 @@ private: // If there is no length associated with the expression and lower bound is // not specified too, that means we are using the whole length of the // base. - if (!OAE->getLength() && OAE->getColonLoc().isValid() && + if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && !OAE->getLowerBound()) return CGF.getTypeSize(BaseTy); @@ -7188,7 +7190,7 @@ private: // If we don't have a length at this point, that is because we have an // array section with a single element. - if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) + if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) return ElemSize; if (const Expr *LenExpr = OAE->getLength()) { @@ -7198,7 +7200,7 @@ private: LenExpr->getExprLoc()); return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); } - assert(!OAE->getLength() && OAE->getColonLoc().isValid() && + assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && OAE->getLowerBound() && "expected array_section[lb:]."); // Size = sizetype - lb * elemtype; llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); @@ -7271,7 +7273,7 @@ private: return false; // An array section with no colon always refer to a single element. - if (OASE->getColonLoc().isInvalid()) + if (OASE->getColonLocFirst().isInvalid()) return false; const Expr *Length = OASE->getLength(); @@ -7305,13 +7307,12 @@ private: /// \a IsFirstComponent should be set to true if the provided set of /// components is the first associated with a capture. void generateInfoForComponentList( - OpenMPMapClauseKind MapType, - ArrayRef<OpenMPMapModifierKind> MapModifiers, + OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, OMPClauseMappableExprCommon::MappableExprComponentListRef Components, MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, - bool IsImplicit, + bool IsImplicit, bool ForDeviceAddr = false, ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> OverlappedElements = llvm::None) const { // The following summarizes what has to be generated for each map and the @@ -7489,6 +7490,7 @@ private: const Expr *AssocExpr = I->getAssociatedExpression(); const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); + const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); if (isa<MemberExpr>(AssocExpr)) { // The base is the 'this' pointer. The content of the pointer is going @@ -7498,6 +7500,11 @@ private: (OASE && isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); + } else if (OAShE && + isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { + BP = Address( + CGF.EmitScalarExpr(OAShE->getBase()), + CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); } else { // The base is the reference to the variable. // BP = &Var. @@ -7580,29 +7587,44 @@ private: // types. const auto *OASE = dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); + const auto *OAShE = + dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); + const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); + const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); bool IsPointer = + OAShE || (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) .getCanonicalType() ->isAnyPointerType()) || I->getAssociatedExpression()->getType()->isAnyPointerType(); + bool IsNonDerefPointer = IsPointer && !UO && !BO; - if (Next == CE || IsPointer || IsFinalArraySection) { + if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { // If this is not the last component, we expect the pointer to be // associated with an array expression or member expression. assert((Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || - isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && + isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || + isa<UnaryOperator>(Next->getAssociatedExpression()) || + isa<BinaryOperator>(Next->getAssociatedExpression())) && "Unexpected expression"); - Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) - .getAddress(CGF); + Address LB = Address::invalid(); + if (OAShE) { + LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), + CGF.getContext().getTypeAlignInChars( + OAShE->getBase()->getType())); + } else { + LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) + .getAddress(CGF); + } // If this component is a pointer inside the base struct then we don't // need to create any entry for it - it will be combined with the object // it is pointing to into a single PTR_AND_OBJ entry. - bool IsMemberPointer = - IsPointer && EncounteredME && + bool IsMemberPointerOrAddr = + (IsPointer || ForDeviceAddr) && EncounteredME && (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == EncounteredME); if (!OverlappedElements.empty()) { @@ -7669,7 +7691,7 @@ private: break; } llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); - if (!IsMemberPointer) { + if (!IsMemberPointerOrAddr) { BasePointers.push_back(BP.getPointer()); Pointers.push_back(LB.getPointer()); Sizes.push_back( @@ -7708,13 +7730,20 @@ private: // mapped member. If the parent is "*this", then the value declaration // is nullptr. if (EncounteredME) { - const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); + const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); unsigned FieldIndex = FD->getFieldIndex(); // Update info about the lowest and highest elements for this struct if (!PartialStruct.Base.isValid()) { PartialStruct.LowestElem = {FieldIndex, LB}; - PartialStruct.HighestElem = {FieldIndex, LB}; + if (IsFinalArraySection) { + Address HB = + CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) + .getAddress(CGF); + PartialStruct.HighestElem = {FieldIndex, HB}; + } else { + PartialStruct.HighestElem = {FieldIndex, LB}; + } PartialStruct.Base = BP; } else if (FieldIndex < PartialStruct.LowestElem.first) { PartialStruct.LowestElem = {FieldIndex, LB}; @@ -7851,6 +7880,19 @@ public: for (const auto *D : C->varlists()) FirstPrivateDecls.try_emplace( cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); + // Extract implicit firstprivates from uses_allocators clauses. + for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { + for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { + OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); + if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) + FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), + /*Implicit=*/true); + else if (const auto *VD = dyn_cast<VarDecl>( + cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) + ->getDecl())) + FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); + } + } // Extract device pointer clause information. for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) for (auto L : C->component_lists()) @@ -7910,17 +7952,18 @@ public: // Helper function to fill the information map for the different supported // clauses. - auto &&InfoGen = [&Info]( - const ValueDecl *D, - OMPClauseMappableExprCommon::MappableExprComponentListRef L, - OpenMPMapClauseKind MapType, - ArrayRef<OpenMPMapModifierKind> MapModifiers, - bool ReturnDevicePointer, bool IsImplicit) { - const ValueDecl *VD = - D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; - Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, - IsImplicit); - }; + auto &&InfoGen = + [&Info](const ValueDecl *D, + OMPClauseMappableExprCommon::MappableExprComponentListRef L, + OpenMPMapClauseKind MapType, + ArrayRef<OpenMPMapModifierKind> MapModifiers, + bool ReturnDevicePointer, bool IsImplicit, + bool ForDeviceAddr = false) { + const ValueDecl *VD = + D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; + Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, + IsImplicit, ForDeviceAddr); + }; assert(CurDir.is<const OMPExecutableDirective *>() && "Expect a executable directive"); @@ -7990,7 +8033,7 @@ public: // partial struct. InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit()); - DeferredInfo[nullptr].emplace_back(IE, VD); + DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); } else { llvm::Value *Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); @@ -8002,6 +8045,70 @@ public: } } + // Look at the use_device_addr clause information and mark the existing map + // entries as such. If there is no map information for an entry in the + // use_device_addr list, we create one with map type 'alloc' and zero size + // section. It is the user fault if that was not mapped before. If there is + // no map information and the pointer is a struct member, then we defer the + // emission of that entry until the whole struct has been processed. + llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; + for (const auto *C : + CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) { + for (const auto L : C->component_lists()) { + assert(!L.second.empty() && "Not expecting empty list of components!"); + const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); + if (!Processed.insert(VD).second) + continue; + VD = cast<ValueDecl>(VD->getCanonicalDecl()); + const Expr *IE = L.second.back().getAssociatedExpression(); + // If the first component is a member expression, we have to look into + // 'this', which maps to null in the map of map information. Otherwise + // look directly for the information. + auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); + + // We potentially have map information for this declaration already. + // Look for the first set of components that refer to it. + if (It != Info.end()) { + auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { + return MI.Components.back().getAssociatedDeclaration() == VD; + }); + // If we found a map entry, signal that the pointer has to be returned + // and move on to the next declaration. + if (CI != It->second.end()) { + CI->ReturnDevicePointer = true; + continue; + } + } + + // We didn't find any match in our map information - generate a zero + // size array section - if the pointer is a struct member we defer this + // action until the whole struct has been processed. + if (isa<MemberExpr>(IE)) { + // Insert the pointer into Info to be processed by + // generateInfoForComponentList. Because it is a member pointer + // without a pointee, no entry will be generated for it, therefore + // we need to generate one after the whole struct has been processed. + // Nonetheless, generateInfoForComponentList must be called to take + // the pointer into account for the calculation of the range of the + // partial struct. + InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, + /*ReturnDevicePointer=*/false, C->isImplicit(), + /*ForDeviceAddr=*/true); + DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); + } else { + llvm::Value *Ptr; + if (IE->isGLValue()) + Ptr = CGF.EmitLValue(IE).getPointer(CGF); + else + Ptr = CGF.EmitScalarExpr(IE); + BasePointers.emplace_back(Ptr, VD); + Pointers.push_back(Ptr); + Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); + Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); + } + } + } + for (const auto &M : Info) { // We need to know when we generate information for the first component // associated with a capture, because the mapping flags depend on it. @@ -8020,10 +8127,10 @@ public: // Remember the current base pointer index. unsigned CurrentBasePointersIdx = CurBasePointers.size(); - generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, - CurBasePointers, CurPointers, CurSizes, - CurTypes, PartialStruct, - IsFirstComponentList, L.IsImplicit); + generateInfoForComponentList( + L.MapType, L.MapModifiers, L.Components, CurBasePointers, + CurPointers, CurSizes, CurTypes, PartialStruct, + IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr); // If this entry relates with a device pointer, set the relevant // declaration and add the 'return pointer' flag. @@ -8043,21 +8150,35 @@ public: } // Append any pending zero-length pointers which are struct members and - // used with use_device_ptr. + // used with use_device_ptr or use_device_addr. auto CI = DeferredInfo.find(M.first); if (CI != DeferredInfo.end()) { for (const DeferredDevicePtrEntryTy &L : CI->second) { - llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); - llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( - this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); + llvm::Value *BasePtr; + llvm::Value *Ptr; + if (L.ForDeviceAddr) { + if (L.IE->isGLValue()) + Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); + else + Ptr = this->CGF.EmitScalarExpr(L.IE); + BasePtr = Ptr; + // Entry is RETURN_PARAM. Also, set the placeholder value + // MEMBER_OF=FFFF so that the entry is later updated with the + // correct value of MEMBER_OF. + CurTypes.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); + } else { + BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); + Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), + L.IE->getExprLoc()); + // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder + // value MEMBER_OF=FFFF so that the entry is later updated with the + // correct value of MEMBER_OF. + CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | + OMP_MAP_MEMBER_OF); + } CurBasePointers.emplace_back(BasePtr, L.VD); CurPointers.push_back(Ptr); CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); - // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder - // value MEMBER_OF=FFFF so that the entry is later updated with the - // correct value of MEMBER_OF. - CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | - OMP_MAP_MEMBER_OF); } } @@ -8126,10 +8247,10 @@ public: for (const MapInfo &L : M.second) { assert(!L.Components.empty() && "Not expecting declaration with no component lists."); - generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, - CurBasePointers, CurPointers, CurSizes, - CurTypes, PartialStruct, - IsFirstComponentList, L.IsImplicit); + generateInfoForComponentList( + L.MapType, L.MapModifiers, L.Components, CurBasePointers, + CurPointers, CurSizes, CurTypes, PartialStruct, + IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr); IsFirstComponentList = false; } @@ -8395,10 +8516,10 @@ public: ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> OverlappedComponents = Pair.getSecond(); bool IsFirstComponentList = true; - generateInfoForComponentList(MapType, MapModifiers, Components, - BasePointers, Pointers, Sizes, Types, - PartialStruct, IsFirstComponentList, - IsImplicit, OverlappedComponents); + generateInfoForComponentList( + MapType, MapModifiers, Components, BasePointers, Pointers, Sizes, + Types, PartialStruct, IsFirstComponentList, IsImplicit, + /*ForDeviceAddr=*/false, OverlappedComponents); } // Go through other elements without overlapped elements. bool IsFirstComponentList = OverlappedData.empty(); @@ -8759,6 +8880,8 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: + case OMPD_depobj: + case OMPD_scan: case OMPD_teams: case OMPD_target_data: case OMPD_target_exit_data: @@ -8774,6 +8897,8 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { case OMPD_target_update: case OMPD_declare_simd: case OMPD_declare_variant: + case OMPD_begin_declare_variant: + case OMPD_end_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: @@ -8786,6 +8911,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: + default: llvm_unreachable("Unexpected directive."); } } @@ -8935,7 +9061,9 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, // pre-existing components. llvm::Value *OffloadingArgs[] = {Handle}; llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___tgt_mapper_num_components), + OffloadingArgs); llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( PreviousSize, MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); @@ -9041,7 +9169,8 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, CurSizeArg, CurMapType}; MapperCGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__tgt_push_mapper_component), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___tgt_push_mapper_component), OffloadingArgs); } @@ -9085,8 +9214,9 @@ void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( // Evaluate if this is an array section. llvm::BasicBlock *IsDeleteBB = - MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete"); - llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix); + MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); + llvm::BasicBlock *BodyBB = + MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); @@ -9099,10 +9229,10 @@ void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( llvm::Value *DeleteCond; if (IsInit) { DeleteCond = MapperCGF.Builder.CreateIsNull( - DeleteBit, "omp.array" + Prefix + ".delete"); + DeleteBit, getName({"omp.array", Prefix, ".delete"})); } else { DeleteCond = MapperCGF.Builder.CreateIsNotNull( - DeleteBit, "omp.array" + Prefix + ".delete"); + DeleteBit, getName({"omp.array", Prefix, ".delete"})); } MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); @@ -9121,7 +9251,9 @@ void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( // data structure. llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; MapperCGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___tgt_push_mapper_component), + OffloadingArgs); } void CGOpenMPRuntime::emitTargetNumIterationsCall( @@ -9143,7 +9275,9 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall( if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { llvm::Value *Args[] = {DeviceID, NumIterations}; CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_push_target_tripcount), + Args); } }; emitInlinedDirective(CGF, OMPD_unknown, CodeGen); @@ -9152,7 +9286,7 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall( void CGOpenMPRuntime::emitTargetCall( CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, - const Expr *Device, + llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) { @@ -9176,6 +9310,16 @@ void CGOpenMPRuntime::emitTargetCall( auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { + if (Device.getInt() == OMPC_DEVICE_ancestor) { + // Reverse offloading is not supported, so just execute on the host. + if (RequiresOuterTask) { + CapturedVars.clear(); + CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); + } + emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); + return; + } + // On top of the arrays that were filled up, the target offloading call // takes as arguments the device id as well as the host pointer. The host // pointer is used by the runtime library to identify the current target @@ -9190,9 +9334,13 @@ void CGOpenMPRuntime::emitTargetCall( // Emit device ID if any. llvm::Value *DeviceID; - if (Device) { - DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), - CGF.Int64Ty, /*isSigned=*/true); + if (Device.getPointer()) { + assert((Device.getInt() == OMPC_DEVICE_unknown || + Device.getInt() == OMPC_DEVICE_device_num) && + "Expected device_num modifier."); + llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); + DeviceID = + CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); } else { DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); } @@ -9256,8 +9404,9 @@ void CGOpenMPRuntime::emitTargetCall( NumTeams, NumThreads}; Return = CGF.EmitRuntimeCall( - createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait - : OMPRTL__tgt_target_teams), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), HasNowait ? OMPRTL___tgt_target_teams_nowait + : OMPRTL___tgt_target_teams), OffloadingArgs); } else { llvm::Value *OffloadingArgs[] = {DeviceID, @@ -9268,8 +9417,9 @@ void CGOpenMPRuntime::emitTargetCall( InputInfo.SizesArray.getPointer(), MapTypesArray}; Return = CGF.EmitRuntimeCall( - createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait - : OMPRTL__tgt_target), + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), + HasNowait ? OMPRTL___tgt_target_nowait : OMPRTL___tgt_target), OffloadingArgs); } @@ -9521,6 +9671,8 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: + case OMPD_depobj: + case OMPD_scan: case OMPD_teams: case OMPD_target_data: case OMPD_target_exit_data: @@ -9536,6 +9688,8 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_target_update: case OMPD_declare_simd: case OMPD_declare_variant: + case OMPD_begin_declare_variant: + case OMPD_end_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: @@ -9548,6 +9702,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: + default: llvm_unreachable("Unknown target directive for OpenMP device codegen."); } return; @@ -9774,22 +9929,40 @@ void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( " Expected target-based directive."); } -void CGOpenMPRuntime::checkArchForUnifiedAddressing( - const OMPRequiresDecl *D) { +void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { for (const OMPClause *Clause : D->clauselists()) { if (Clause->getClauseKind() == OMPC_unified_shared_memory) { HasRequiresUnifiedSharedMemory = true; - break; + } else if (const auto *AC = + dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { + switch (AC->getAtomicDefaultMemOrderKind()) { + case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: + RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; + break; + case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: + RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; + break; + case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: + RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; + break; + case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: + break; + } } } } +llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { + return RequiresAtomicOrdering; +} + bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) { if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) return false; const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); switch(A->getAllocatorType()) { + case OMPAllocateDeclAttr::OMPNullMemAlloc: case OMPAllocateDeclAttr::OMPDefaultMemAlloc: // Not supported, fallback to the default mem space. case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: @@ -9865,7 +10038,7 @@ llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { const auto &FI = CGM.getTypes().arrangeNullaryFunction(); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); std::string ReqName = getName({"omp_offloading", "requires_reg"}); - RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); + RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; // TODO: check for other requires clauses. @@ -9880,8 +10053,9 @@ llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { "Target or declare target region expected."); if (HasRequiresUnifiedSharedMemory) Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), - llvm::ConstantInt::get(CGM.Int64Ty, Flags)); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___tgt_register_requires), + llvm::ConstantInt::get(CGM.Int64Ty, Flags)); CGF.FinishFunction(); } return RequiresRegFn; @@ -9907,7 +10081,8 @@ void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, RealArgs.append(std::begin(Args), std::end(Args)); RealArgs.append(CapturedVars.begin(), CapturedVars.end()); - llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); + llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_fork_teams); CGF.EmitRuntimeCall(RTLFn, RealArgs); } @@ -9935,7 +10110,8 @@ void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, ThreadLimitVal}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_push_num_teams), PushNumTeamsArgs); } @@ -9989,7 +10165,8 @@ void CGOpenMPRuntime::emitTargetDataCalls( llvm::Value *OffloadingArgs[] = { DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___tgt_target_data_begin), OffloadingArgs); // If device pointer privatization is required, emit the body of the region @@ -10025,7 +10202,8 @@ void CGOpenMPRuntime::emitTargetDataCalls( llvm::Value *OffloadingArgs[] = { DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___tgt_target_data_end), OffloadingArgs); }; @@ -10105,19 +10283,19 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( // Select the right runtime function call for each expected standalone // directive. const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); - OpenMPRTLFunction RTLFn; + RuntimeFunction RTLFn; switch (D.getDirectiveKind()) { case OMPD_target_enter_data: - RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait - : OMPRTL__tgt_target_data_begin; + RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait + : OMPRTL___tgt_target_data_begin; break; case OMPD_target_exit_data: - RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait - : OMPRTL__tgt_target_data_end; + RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait + : OMPRTL___tgt_target_data_end; break; case OMPD_target_update: - RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait - : OMPRTL__tgt_target_data_update; + RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait + : OMPRTL___tgt_target_data_update; break; case OMPD_parallel: case OMPD_for: @@ -10144,6 +10322,8 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: + case OMPD_depobj: + case OMPD_scan: case OMPD_teams: case OMPD_target_data: case OMPD_distribute: @@ -10156,6 +10336,8 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_teams_distribute_parallel_for_simd: case OMPD_declare_simd: case OMPD_declare_variant: + case OMPD_begin_declare_variant: + case OMPD_end_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: @@ -10178,10 +10360,13 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_target_parallel_for_simd: case OMPD_requires: case OMPD_unknown: + default: llvm_unreachable("Unexpected standalone target data directive."); break; } - CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), + OffloadingArgs); }; auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( @@ -10343,7 +10528,7 @@ emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, break; case Linear: Out << 'l'; - if (!!ParamAttr.StrideOrArg) + if (ParamAttr.StrideOrArg != 1) Out << ParamAttr.StrideOrArg; break; case Uniform: @@ -10420,7 +10605,7 @@ static bool getAArch64PBV(QualType QT, ASTContext &C) { /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. /// TODO: Add support for references, section 3.2.1, item 1. static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { - if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { + if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { QualType PTy = QT.getCanonicalType()->getPointeeType(); if (getAArch64PBV(PTy, C)) return C.getTypeSize(PTy); @@ -10483,7 +10668,7 @@ static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { Out << 'l'; // Don't print the step value if it is not present or if it is // equal to 1. - if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) + if (ParamAttr.StrideOrArg != 1) Out << ParamAttr.StrideOrArg; break; case Uniform: @@ -10498,7 +10683,7 @@ static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { Out << 'a' << ParamAttr.Alignment; } - return Out.str(); + return std::string(Out.str()); } // Function used to add the attribute. The parameter `VLEN` is @@ -10721,15 +10906,24 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, for (const Expr *E : Attr->linears()) { E = E->IgnoreParenImpCasts(); unsigned Pos; + // Rescaling factor needed to compute the linear parameter + // value in the mangled name. + unsigned PtrRescalingFactor = 1; if (isa<CXXThisExpr>(E)) { Pos = ParamPositions[FD]; } else { const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) ->getCanonicalDecl(); Pos = ParamPositions[PVD]; + if (auto *P = dyn_cast<PointerType>(PVD->getType())) + PtrRescalingFactor = CGM.getContext() + .getTypeSizeInChars(P->getPointeeType()) + .getQuantity(); } ParamAttrTy &ParamAttr = ParamAttrs[Pos]; ParamAttr.Kind = Linear; + // Assuming a stride of 1, for `linear` without modifiers. + ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); if (*SI) { Expr::EvalResult Result; if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { @@ -10745,6 +10939,11 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, ParamAttr.StrideOrArg = Result.Val.getInt(); } } + // If we are using a linear clause on a pointer, we need to + // rescale the value of linear_step with the byte size of the + // pointee type. + if (Linear == ParamAttr.Kind) + ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; ++SI; ++MI; } @@ -10837,10 +11036,9 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, // dims.upper = num_iterations; LValue UpperLVal = CGF.EmitLValueForField( DimsLVal, *std::next(RD->field_begin(), UpperFD)); - llvm::Value *NumIterVal = - CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), - D.getNumIterations()->getType(), Int64Ty, - D.getNumIterations()->getExprLoc()); + llvm::Value *NumIterVal = CGF.EmitScalarConversion( + CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), + Int64Ty, NumIterations[I]->getExprLoc()); CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); // dims.stride = 1; LValue StrideLVal = CGF.EmitLValueForField( @@ -10859,13 +11057,13 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), CGM.VoidPtrTy)}; - llvm::FunctionCallee RTLFn = - createRuntimeFunction(OMPRTL__kmpc_doacross_init); + llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_doacross_init); CGF.EmitRuntimeCall(RTLFn, Args); llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; - llvm::FunctionCallee FiniRTLFn = - createRuntimeFunction(OMPRTL__kmpc_doacross_fini); + llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_doacross_fini); CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, llvm::makeArrayRef(FiniArgs)); } @@ -10893,10 +11091,12 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; llvm::FunctionCallee RTLFn; if (C->getDependencyKind() == OMPC_DEPEND_source) { - RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); + RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_doacross_post); } else { assert(C->getDependencyKind() == OMPC_DEPEND_sink); - RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); + RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_doacross_wait); } CGF.EmitRuntimeCall(RTLFn, Args); } @@ -10969,7 +11169,8 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, return Address::invalid(); const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); // Use the default allocation. - if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && + if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || + AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && !AA->getAllocator()) return Address::invalid(); llvm::Value *Size; @@ -10999,296 +11200,23 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, llvm::Value *Args[] = {ThreadID, Size, Allocator}; llvm::Value *Addr = - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, - CVD->getName() + ".void.addr"); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_alloc), + Args, getName({CVD->getName(), ".void.addr"})); llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, Allocator}; - llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); + llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_free); CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, llvm::makeArrayRef(FiniArgs)); Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( Addr, CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), - CVD->getName() + ".addr"); + getName({CVD->getName(), ".addr"})); return Address(Addr, Align); } -namespace { -using OMPContextSelectorData = - OpenMPCtxSelectorData<ArrayRef<StringRef>, llvm::APSInt>; -using CompleteOMPContextSelectorData = SmallVector<OMPContextSelectorData, 4>; -} // anonymous namespace - -/// Checks current context and returns true if it matches the context selector. -template <OpenMPContextSelectorSetKind CtxSet, OpenMPContextSelectorKind Ctx, - typename... Arguments> -static bool checkContext(const OMPContextSelectorData &Data, - Arguments... Params) { - assert(Data.CtxSet != OMP_CTX_SET_unknown && Data.Ctx != OMP_CTX_unknown && - "Unknown context selector or context selector set."); - return false; -} - -/// Checks for implementation={vendor(<vendor>)} context selector. -/// \returns true iff <vendor>="llvm", false otherwise. -template <> -bool checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>( - const OMPContextSelectorData &Data) { - return llvm::all_of(Data.Names, - [](StringRef S) { return !S.compare_lower("llvm"); }); -} - -/// Checks for device={kind(<kind>)} context selector. -/// \returns true if <kind>="host" and compilation is for host. -/// true if <kind>="nohost" and compilation is for device. -/// true if <kind>="cpu" and compilation is for Arm, X86 or PPC CPU. -/// true if <kind>="gpu" and compilation is for NVPTX or AMDGCN. -/// false otherwise. -template <> -bool checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>( - const OMPContextSelectorData &Data, CodeGenModule &CGM) { - for (StringRef Name : Data.Names) { - if (!Name.compare_lower("host")) { - if (CGM.getLangOpts().OpenMPIsDevice) - return false; - continue; - } - if (!Name.compare_lower("nohost")) { - if (!CGM.getLangOpts().OpenMPIsDevice) - return false; - continue; - } - switch (CGM.getTriple().getArch()) { - case llvm::Triple::arm: - case llvm::Triple::armeb: - case llvm::Triple::aarch64: - case llvm::Triple::aarch64_be: - case llvm::Triple::aarch64_32: - case llvm::Triple::ppc: - case llvm::Triple::ppc64: - case llvm::Triple::ppc64le: - case llvm::Triple::x86: - case llvm::Triple::x86_64: - if (Name.compare_lower("cpu")) - return false; - break; - case llvm::Triple::amdgcn: - case llvm::Triple::nvptx: - case llvm::Triple::nvptx64: - if (Name.compare_lower("gpu")) - return false; - break; - case llvm::Triple::UnknownArch: - case llvm::Triple::arc: - case llvm::Triple::avr: - case llvm::Triple::bpfel: - case llvm::Triple::bpfeb: - case llvm::Triple::hexagon: - case llvm::Triple::mips: - case llvm::Triple::mipsel: - case llvm::Triple::mips64: - case llvm::Triple::mips64el: - case llvm::Triple::msp430: - case llvm::Triple::r600: - case llvm::Triple::riscv32: - case llvm::Triple::riscv64: - case llvm::Triple::sparc: - case llvm::Triple::sparcv9: - case llvm::Triple::sparcel: - case llvm::Triple::systemz: - case llvm::Triple::tce: - case llvm::Triple::tcele: - case llvm::Triple::thumb: - case llvm::Triple::thumbeb: - case llvm::Triple::xcore: - case llvm::Triple::le32: - case llvm::Triple::le64: - case llvm::Triple::amdil: - case llvm::Triple::amdil64: - case llvm::Triple::hsail: - case llvm::Triple::hsail64: - case llvm::Triple::spir: - case llvm::Triple::spir64: - case llvm::Triple::kalimba: - case llvm::Triple::shave: - case llvm::Triple::lanai: - case llvm::Triple::wasm32: - case llvm::Triple::wasm64: - case llvm::Triple::renderscript32: - case llvm::Triple::renderscript64: - case llvm::Triple::ve: - return false; - } - } - return true; -} - -static bool matchesContext(CodeGenModule &CGM, - const CompleteOMPContextSelectorData &ContextData) { - for (const OMPContextSelectorData &Data : ContextData) { - switch (Data.Ctx) { - case OMP_CTX_vendor: - assert(Data.CtxSet == OMP_CTX_SET_implementation && - "Expected implementation context selector set."); - if (!checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(Data)) - return false; - break; - case OMP_CTX_kind: - assert(Data.CtxSet == OMP_CTX_SET_device && - "Expected device context selector set."); - if (!checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(Data, - CGM)) - return false; - break; - case OMP_CTX_unknown: - llvm_unreachable("Unknown context selector kind."); - } - } - return true; -} - -static CompleteOMPContextSelectorData -translateAttrToContextSelectorData(ASTContext &C, - const OMPDeclareVariantAttr *A) { - CompleteOMPContextSelectorData Data; - for (unsigned I = 0, E = A->scores_size(); I < E; ++I) { - Data.emplace_back(); - auto CtxSet = static_cast<OpenMPContextSelectorSetKind>( - *std::next(A->ctxSelectorSets_begin(), I)); - auto Ctx = static_cast<OpenMPContextSelectorKind>( - *std::next(A->ctxSelectors_begin(), I)); - Data.back().CtxSet = CtxSet; - Data.back().Ctx = Ctx; - const Expr *Score = *std::next(A->scores_begin(), I); - Data.back().Score = Score->EvaluateKnownConstInt(C); - switch (Ctx) { - case OMP_CTX_vendor: - assert(CtxSet == OMP_CTX_SET_implementation && - "Expected implementation context selector set."); - Data.back().Names = - llvm::makeArrayRef(A->implVendors_begin(), A->implVendors_end()); - break; - case OMP_CTX_kind: - assert(CtxSet == OMP_CTX_SET_device && - "Expected device context selector set."); - Data.back().Names = - llvm::makeArrayRef(A->deviceKinds_begin(), A->deviceKinds_end()); - break; - case OMP_CTX_unknown: - llvm_unreachable("Unknown context selector kind."); - } - } - return Data; -} - -static bool isStrictSubset(const CompleteOMPContextSelectorData &LHS, - const CompleteOMPContextSelectorData &RHS) { - llvm::SmallDenseMap<std::pair<int, int>, llvm::StringSet<>, 4> RHSData; - for (const OMPContextSelectorData &D : RHS) { - auto &Pair = RHSData.FindAndConstruct(std::make_pair(D.CtxSet, D.Ctx)); - Pair.getSecond().insert(D.Names.begin(), D.Names.end()); - } - bool AllSetsAreEqual = true; - for (const OMPContextSelectorData &D : LHS) { - auto It = RHSData.find(std::make_pair(D.CtxSet, D.Ctx)); - if (It == RHSData.end()) - return false; - if (D.Names.size() > It->getSecond().size()) - return false; - if (llvm::set_union(It->getSecond(), D.Names)) - return false; - AllSetsAreEqual = - AllSetsAreEqual && (D.Names.size() == It->getSecond().size()); - } - - return LHS.size() != RHS.size() || !AllSetsAreEqual; -} - -static bool greaterCtxScore(const CompleteOMPContextSelectorData &LHS, - const CompleteOMPContextSelectorData &RHS) { - // Score is calculated as sum of all scores + 1. - llvm::APSInt LHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false); - bool RHSIsSubsetOfLHS = isStrictSubset(RHS, LHS); - if (RHSIsSubsetOfLHS) { - LHSScore = llvm::APSInt::get(0); - } else { - for (const OMPContextSelectorData &Data : LHS) { - if (Data.Score.getBitWidth() > LHSScore.getBitWidth()) { - LHSScore = LHSScore.extend(Data.Score.getBitWidth()) + Data.Score; - } else if (Data.Score.getBitWidth() < LHSScore.getBitWidth()) { - LHSScore += Data.Score.extend(LHSScore.getBitWidth()); - } else { - LHSScore += Data.Score; - } - } - } - llvm::APSInt RHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false); - if (!RHSIsSubsetOfLHS && isStrictSubset(LHS, RHS)) { - RHSScore = llvm::APSInt::get(0); - } else { - for (const OMPContextSelectorData &Data : RHS) { - if (Data.Score.getBitWidth() > RHSScore.getBitWidth()) { - RHSScore = RHSScore.extend(Data.Score.getBitWidth()) + Data.Score; - } else if (Data.Score.getBitWidth() < RHSScore.getBitWidth()) { - RHSScore += Data.Score.extend(RHSScore.getBitWidth()); - } else { - RHSScore += Data.Score; - } - } - } - return llvm::APSInt::compareValues(LHSScore, RHSScore) >= 0; -} - -/// Finds the variant function that matches current context with its context -/// selector. -static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM, - const FunctionDecl *FD) { - if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>()) - return FD; - // Iterate through all DeclareVariant attributes and check context selectors. - const OMPDeclareVariantAttr *TopMostAttr = nullptr; - CompleteOMPContextSelectorData TopMostData; - for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) { - CompleteOMPContextSelectorData Data = - translateAttrToContextSelectorData(CGM.getContext(), A); - if (!matchesContext(CGM, Data)) - continue; - // If the attribute matches the context, find the attribute with the highest - // score. - if (!TopMostAttr || !greaterCtxScore(TopMostData, Data)) { - TopMostAttr = A; - TopMostData.swap(Data); - } - } - if (!TopMostAttr) - return FD; - return cast<FunctionDecl>( - cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts()) - ->getDecl()); -} - -bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) { - const auto *D = cast<FunctionDecl>(GD.getDecl()); - // If the original function is defined already, use its definition. - StringRef MangledName = CGM.getMangledName(GD); - llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName); - if (Orig && !Orig->isDeclaration()) - return false; - const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D); - // Emit original function if it does not have declare variant attribute or the - // context does not match. - if (NewFD == D) - return false; - GlobalDecl NewGD = GD.getWithDecl(NewFD); - if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) { - DeferredVariantFunction.erase(D); - return true; - } - DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD))); - return true; -} - CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( CodeGenModule &CGM, const OMPLoopDirective &S) : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { @@ -11329,17 +11257,101 @@ bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); } +void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( + const OMPExecutableDirective &S, + llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) + const { + llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; + // Vars in target/task regions must be excluded completely. + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || + isOpenMPTaskingDirective(S.getDirectiveKind())) { + SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; + getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); + const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); + for (const CapturedStmt::Capture &Cap : CS->captures()) { + if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) + NeedToCheckForLPCs.insert(Cap.getCapturedVar()); + } + } + // Exclude vars in private clauses. + for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { + for (const Expr *Ref : C->varlists()) { + if (!Ref->getType()->isScalarType()) + continue; + const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); + if (!DRE) + continue; + NeedToCheckForLPCs.insert(DRE->getDecl()); + } + } + for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { + for (const Expr *Ref : C->varlists()) { + if (!Ref->getType()->isScalarType()) + continue; + const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); + if (!DRE) + continue; + NeedToCheckForLPCs.insert(DRE->getDecl()); + } + } + for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { + for (const Expr *Ref : C->varlists()) { + if (!Ref->getType()->isScalarType()) + continue; + const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); + if (!DRE) + continue; + NeedToCheckForLPCs.insert(DRE->getDecl()); + } + } + for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { + for (const Expr *Ref : C->varlists()) { + if (!Ref->getType()->isScalarType()) + continue; + const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); + if (!DRE) + continue; + NeedToCheckForLPCs.insert(DRE->getDecl()); + } + } + for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { + for (const Expr *Ref : C->varlists()) { + if (!Ref->getType()->isScalarType()) + continue; + const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); + if (!DRE) + continue; + NeedToCheckForLPCs.insert(DRE->getDecl()); + } + } + for (const Decl *VD : NeedToCheckForLPCs) { + for (const LastprivateConditionalData &Data : + llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { + if (Data.DeclToUniqueName.count(VD) > 0) { + if (!Data.Disabled) + NeedToAddForLPCsAsDisabled.insert(VD); + break; + } + } + } +} + CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) : CGM(CGF.CGM), - NeedToPush(llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), - [](const OMPLastprivateClause *C) { - return C->getKind() == - OMPC_LASTPRIVATE_conditional; - })) { + Action((CGM.getLangOpts().OpenMP >= 50 && + llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), + [](const OMPLastprivateClause *C) { + return C->getKind() == + OMPC_LASTPRIVATE_conditional; + })) + ? ActionToDo::PushAsLastprivateConditional + : ActionToDo::DoNotPush) { assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); - if (!NeedToPush) + if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) return; + assert(Action == ActionToDo::PushAsLastprivateConditional && + "Expected a push action."); LastprivateConditionalData &Data = CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { @@ -11347,107 +11359,136 @@ CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( continue; for (const Expr *Ref : C->varlists()) { - Data.DeclToUniqeName.try_emplace( + Data.DeclToUniqueName.insert(std::make_pair( cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), - generateUniqueName(CGM, "pl_cond", Ref)); + SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); } } Data.IVLVal = IVLVal; - // In simd only mode or for simd directives no need to generate threadprivate - // references for the loop iteration counter, we can use the original one - // since outlining cannot happen in simd regions. - if (CGF.getLangOpts().OpenMPSimd || - isOpenMPSimdDirective(S.getDirectiveKind())) { - Data.UseOriginalIV = true; + Data.Fn = CGF.CurFn; +} + +CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( + CodeGenFunction &CGF, const OMPExecutableDirective &S) + : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { + assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); + if (CGM.getLangOpts().OpenMP < 50) return; + llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; + tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); + if (!NeedToAddForLPCsAsDisabled.empty()) { + Action = ActionToDo::DisableLastprivateConditional; + LastprivateConditionalData &Data = + CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); + for (const Decl *VD : NeedToAddForLPCsAsDisabled) + Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); + Data.Fn = CGF.CurFn; + Data.Disabled = true; } - llvm::SmallString<16> Buffer; - llvm::raw_svector_ostream OS(Buffer); - PresumedLoc PLoc = - CGM.getContext().getSourceManager().getPresumedLoc(S.getBeginLoc()); - assert(PLoc.isValid() && "Source location is expected to be always valid."); +} - llvm::sys::fs::UniqueID ID; - if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) - CGM.getDiags().Report(diag::err_cannot_open_file) - << PLoc.getFilename() << EC.message(); - OS << "$pl_cond_" << ID.getDevice() << "_" << ID.getFile() << "_" - << PLoc.getLine() << "_" << PLoc.getColumn() << "$iv"; - Data.IVName = OS.str(); +CGOpenMPRuntime::LastprivateConditionalRAII +CGOpenMPRuntime::LastprivateConditionalRAII::disable( + CodeGenFunction &CGF, const OMPExecutableDirective &S) { + return LastprivateConditionalRAII(CGF, S); } CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { - if (!NeedToPush) + if (CGM.getLangOpts().OpenMP < 50) return; - CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); + if (Action == ActionToDo::DisableLastprivateConditional) { + assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && + "Expected list of disabled private vars."); + CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); + } + if (Action == ActionToDo::PushAsLastprivateConditional) { + assert( + !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && + "Expected list of lastprivate conditional vars."); + CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); + } } -void CGOpenMPRuntime::initLastprivateConditionalCounter( - CodeGenFunction &CGF, const OMPExecutableDirective &S) { - if (CGM.getLangOpts().OpenMPSimd || - !llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), - [](const OMPLastprivateClause *C) { - return C->getKind() == OMPC_LASTPRIVATE_conditional; - })) - return; - const CGOpenMPRuntime::LastprivateConditionalData &Data = - LastprivateConditionalStack.back(); - if (Data.UseOriginalIV) - return; - // Global loop counter. Required to handle inner parallel-for regions. - // global_iv = iv; - Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( - CGF, Data.IVLVal.getType(), Data.IVName); - LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, Data.IVLVal.getType()); - llvm::Value *IVVal = CGF.EmitLoadOfScalar(Data.IVLVal, S.getBeginLoc()); - CGF.EmitStoreOfScalar(IVVal, GlobIVLVal); +Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, + const VarDecl *VD) { + ASTContext &C = CGM.getContext(); + auto I = LastprivateConditionalToTypes.find(CGF.CurFn); + if (I == LastprivateConditionalToTypes.end()) + I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; + QualType NewType; + const FieldDecl *VDField; + const FieldDecl *FiredField; + LValue BaseLVal; + auto VI = I->getSecond().find(VD); + if (VI == I->getSecond().end()) { + RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); + RD->startDefinition(); + VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); + FiredField = addFieldToRecordDecl(C, RD, C.CharTy); + RD->completeDefinition(); + NewType = C.getRecordType(RD); + Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); + BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); + I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); + } else { + NewType = std::get<0>(VI->getSecond()); + VDField = std::get<1>(VI->getSecond()); + FiredField = std::get<2>(VI->getSecond()); + BaseLVal = std::get<3>(VI->getSecond()); + } + LValue FiredLVal = + CGF.EmitLValueForField(BaseLVal, FiredField); + CGF.EmitStoreOfScalar( + llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), + FiredLVal); + return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); } namespace { /// Checks if the lastprivate conditional variable is referenced in LHS. class LastprivateConditionalRefChecker final : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { - CodeGenFunction &CGF; ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; const Expr *FoundE = nullptr; const Decl *FoundD = nullptr; StringRef UniqueDeclName; LValue IVLVal; - StringRef IVName; + llvm::Function *FoundFn = nullptr; SourceLocation Loc; - bool UseOriginalIV = false; public: bool VisitDeclRefExpr(const DeclRefExpr *E) { for (const CGOpenMPRuntime::LastprivateConditionalData &D : llvm::reverse(LPM)) { - auto It = D.DeclToUniqeName.find(E->getDecl()); - if (It == D.DeclToUniqeName.end()) + auto It = D.DeclToUniqueName.find(E->getDecl()); + if (It == D.DeclToUniqueName.end()) continue; + if (D.Disabled) + return false; FoundE = E; FoundD = E->getDecl()->getCanonicalDecl(); - UniqueDeclName = It->getSecond(); + UniqueDeclName = It->second; IVLVal = D.IVLVal; - IVName = D.IVName; - UseOriginalIV = D.UseOriginalIV; + FoundFn = D.Fn; break; } return FoundE == E; } bool VisitMemberExpr(const MemberExpr *E) { - if (!CGF.IsWrappedCXXThis(E->getBase())) + if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) return false; for (const CGOpenMPRuntime::LastprivateConditionalData &D : llvm::reverse(LPM)) { - auto It = D.DeclToUniqeName.find(E->getMemberDecl()); - if (It == D.DeclToUniqeName.end()) + auto It = D.DeclToUniqueName.find(E->getMemberDecl()); + if (It == D.DeclToUniqueName.end()) continue; + if (D.Disabled) + return false; FoundE = E; FoundD = E->getMemberDecl()->getCanonicalDecl(); - UniqueDeclName = It->getSecond(); + UniqueDeclName = It->second; IVLVal = D.IVLVal; - IVName = D.IVName; - UseOriginalIV = D.UseOriginalIV; + FoundFn = D.Fn; break; } return FoundE == E; @@ -11465,62 +11506,41 @@ public: return false; } explicit LastprivateConditionalRefChecker( - CodeGenFunction &CGF, ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) - : CGF(CGF), LPM(LPM) {} - std::tuple<const Expr *, const Decl *, StringRef, LValue, StringRef, bool> + : LPM(LPM) {} + std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> getFoundData() const { - return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, - UseOriginalIV); + return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); } }; } // namespace -void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, - const Expr *LHS) { - if (CGF.getLangOpts().OpenMP < 50) - return; - LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack); - if (!Checker.Visit(LHS)) - return; - const Expr *FoundE; - const Decl *FoundD; - StringRef UniqueDeclName; - LValue IVLVal; - StringRef IVName; - bool UseOriginalIV; - std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) = - Checker.getFoundData(); - +void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, + LValue IVLVal, + StringRef UniqueDeclName, + LValue LVal, + SourceLocation Loc) { // Last updated loop counter for the lastprivate conditional var. // int<xx> last_iv = 0; llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); llvm::Constant *LastIV = - getOrCreateInternalVariable(LLIVTy, UniqueDeclName + "$iv"); + getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); cast<llvm::GlobalVariable>(LastIV)->setAlignment( IVLVal.getAlignment().getAsAlign()); LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); - // Private address of the lastprivate conditional in the current context. - // priv_a - LValue LVal = CGF.EmitLValue(FoundE); // Last value of the lastprivate conditional. // decltype(priv_a) last_a; llvm::Constant *Last = getOrCreateInternalVariable( - LVal.getAddress(CGF).getElementType(), UniqueDeclName); + CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); cast<llvm::GlobalVariable>(Last)->setAlignment( LVal.getAlignment().getAsAlign()); LValue LastLVal = CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); // Global loop counter. Required to handle inner parallel-for regions. - // global_iv - if (!UseOriginalIV) { - Address IVAddr = - getAddrOfArtificialThreadPrivate(CGF, IVLVal.getType(), IVName); - IVLVal = CGF.MakeAddrLValue(IVAddr, IVLVal.getType()); - } - llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc()); + // iv + llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); // #pragma omp critical(a) // if (last_iv <= iv) { @@ -11528,11 +11548,10 @@ void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, // last_a = priv_a; // } auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, - FoundE](CodeGenFunction &CGF, PrePostActionTy &Action) { + Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); - llvm::Value *LastIVVal = - CGF.EmitLoadOfScalar(LastIVLVal, FoundE->getExprLoc()); - // (last_iv <= global_iv) ? Check if the variable is updated and store new + llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); + // (last_iv <= iv) ? Check if the variable is updated and store new // value in global var. llvm::Value *CmpRes; if (IVLVal.getType()->isSignedIntegerType()) { @@ -11548,19 +11567,18 @@ void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, // { CGF.EmitBlock(ThenBB); - // last_iv = global_iv; + // last_iv = iv; CGF.EmitStoreOfScalar(IVVal, LastIVLVal); // last_a = priv_a; switch (CGF.getEvaluationKind(LVal.getType())) { case TEK_Scalar: { - llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, FoundE->getExprLoc()); + llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); CGF.EmitStoreOfScalar(PrivVal, LastLVal); break; } case TEK_Complex: { - CodeGenFunction::ComplexPairTy PrivVal = - CGF.EmitLoadOfComplex(LVal, FoundE->getExprLoc()); + CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); break; } @@ -11580,7 +11598,100 @@ void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, RegionCodeGenTy ThenRCG(CodeGen); ThenRCG(CGF); } else { - emitCriticalRegion(CGF, UniqueDeclName, CodeGen, FoundE->getExprLoc()); + emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); + } +} + +void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, + const Expr *LHS) { + if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) + return; + LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); + if (!Checker.Visit(LHS)) + return; + const Expr *FoundE; + const Decl *FoundD; + StringRef UniqueDeclName; + LValue IVLVal; + llvm::Function *FoundFn; + std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = + Checker.getFoundData(); + if (FoundFn != CGF.CurFn) { + // Special codegen for inner parallel regions. + // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; + auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); + assert(It != LastprivateConditionalToTypes[FoundFn].end() && + "Lastprivate conditional is not found in outer region."); + QualType StructTy = std::get<0>(It->getSecond()); + const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); + LValue PrivLVal = CGF.EmitLValue(FoundE); + Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + PrivLVal.getAddress(CGF), + CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); + LValue BaseLVal = + CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); + LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); + CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( + CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), + FiredLVal, llvm::AtomicOrdering::Unordered, + /*IsVolatile=*/true, /*isInit=*/false); + return; + } + + // Private address of the lastprivate conditional in the current context. + // priv_a + LValue LVal = CGF.EmitLValue(FoundE); + emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, + FoundE->getExprLoc()); +} + +void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { + if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) + return; + auto Range = llvm::reverse(LastprivateConditionalStack); + auto It = llvm::find_if( + Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); + if (It == Range.end() || It->Fn != CGF.CurFn) + return; + auto LPCI = LastprivateConditionalToTypes.find(It->Fn); + assert(LPCI != LastprivateConditionalToTypes.end() && + "Lastprivates must be registered already."); + SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; + getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); + const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); + for (const auto &Pair : It->DeclToUniqueName) { + const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); + if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) + continue; + auto I = LPCI->getSecond().find(Pair.first); + assert(I != LPCI->getSecond().end() && + "Lastprivate must be rehistered already."); + // bool Cmp = priv_a.Fired != 0; + LValue BaseLVal = std::get<3>(I->getSecond()); + LValue FiredLVal = + CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); + llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); + llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); + llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); + llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); + // if (Cmp) { + CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); + CGF.EmitBlock(ThenBB); + Address Addr = CGF.GetAddrOfLocalVar(VD); + LValue LVal; + if (VD->getType()->isReferenceType()) + LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), + AlignmentSource::Decl); + else + LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), + AlignmentSource::Decl); + emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, + D.getBeginLoc()); + auto AL = ApplyDebugLocation::CreateArtificial(CGF); + CGF.EmitBlock(DoneBB, /*IsFinal=*/true); + // } } } @@ -11589,10 +11700,10 @@ void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( SourceLocation Loc) { if (CGF.getLangOpts().OpenMP < 50) return; - auto It = LastprivateConditionalStack.back().DeclToUniqeName.find(VD); - assert(It != LastprivateConditionalStack.back().DeclToUniqeName.end() && + auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); + assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && "Unknown lastprivate conditional variable."); - StringRef UniqueName = It->getSecond(); + StringRef UniqueName = It->second; llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); // The variable was not updated in the region - exit. if (!GV) @@ -11750,7 +11861,8 @@ Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars, - SourceLocation Loc) { + SourceLocation Loc, + llvm::AtomicOrdering AO) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -11785,6 +11897,12 @@ llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( llvm_unreachable("Not supported in SIMD-only mode"); } +void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, + SourceLocation Loc, + bool IsWorksharingReduction) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, @@ -11826,7 +11944,7 @@ void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( void CGOpenMPSIMDRuntime::emitTargetCall( CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, - const Expr *Device, + llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) { diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 8159f5e8b790..eb22f155f5ef 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -20,12 +20,15 @@ #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSet.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/Function.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Support/AtomicOrdering.h" namespace llvm { class ArrayType; @@ -35,6 +38,7 @@ class GlobalVariable; class StructType; class Type; class Value; +class OpenMPIRBuilder; } // namespace llvm namespace clang { @@ -80,11 +84,10 @@ public: template <typename Callable> RegionCodeGenTy( Callable &&CodeGen, - typename std::enable_if< - !std::is_same<typename std::remove_reference<Callable>::type, - RegionCodeGenTy>::value>::type * = nullptr) + std::enable_if_t<!std::is_same<std::remove_reference_t<Callable>, + RegionCodeGenTy>::value> * = nullptr) : CodeGen(reinterpret_cast<intptr_t>(&CodeGen)), - Callback(CallbackFn<typename std::remove_reference<Callable>::type>), + Callback(CallbackFn<std::remove_reference_t<Callable>>), PrePostAction(nullptr) {} void setAction(PrePostActionTy &Action) const { PrePostAction = &Action; } void operator()(CodeGenFunction &CGF) const; @@ -99,9 +102,18 @@ struct OMPTaskDataTy final { SmallVector<const Expr *, 4> LastprivateVars; SmallVector<const Expr *, 4> LastprivateCopies; SmallVector<const Expr *, 4> ReductionVars; + SmallVector<const Expr *, 4> ReductionOrigs; SmallVector<const Expr *, 4> ReductionCopies; SmallVector<const Expr *, 4> ReductionOps; - SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 4> Dependences; + struct DependData { + OpenMPDependClauseKind DepKind = OMPC_DEPEND_unknown; + const Expr *IteratorExpr = nullptr; + SmallVector<const Expr *, 4> DepExprs; + explicit DependData() = default; + DependData(OpenMPDependClauseKind DepKind, const Expr *IteratorExpr) + : DepKind(DepKind), IteratorExpr(IteratorExpr) {} + }; + SmallVector<DependData, 4> Dependences; llvm::PointerIntPair<llvm::Value *, 1, bool> Final; llvm::PointerIntPair<llvm::Value *, 1, bool> Schedule; llvm::PointerIntPair<llvm::Value *, 1, bool> Priority; @@ -109,6 +121,8 @@ struct OMPTaskDataTy final { unsigned NumberOfParts = 0; bool Tied = true; bool Nogroup = false; + bool IsReductionWithTaskMod = false; + bool IsWorksharingReduction = false; }; /// Class intended to support codegen of all kind of the reduction clauses. @@ -116,20 +130,26 @@ class ReductionCodeGen { private: /// Data required for codegen of reduction clauses. struct ReductionData { - /// Reference to the original shared item. + /// Reference to the item shared between tasks to reduce into. + const Expr *Shared = nullptr; + /// Reference to the original item. const Expr *Ref = nullptr; /// Helper expression for generation of private copy. const Expr *Private = nullptr; /// Helper expression for generation reduction operation. const Expr *ReductionOp = nullptr; - ReductionData(const Expr *Ref, const Expr *Private, const Expr *ReductionOp) - : Ref(Ref), Private(Private), ReductionOp(ReductionOp) {} + ReductionData(const Expr *Shared, const Expr *Ref, const Expr *Private, + const Expr *ReductionOp) + : Shared(Shared), Ref(Ref), Private(Private), ReductionOp(ReductionOp) { + } }; /// List of reduction-based clauses. SmallVector<ReductionData, 4> ClausesData; - /// List of addresses of original shared variables/expressions. + /// List of addresses of shared variables/expressions. SmallVector<std::pair<LValue, LValue>, 4> SharedAddresses; + /// List of addresses of original variables/expressions. + SmallVector<std::pair<LValue, LValue>, 4> OrigAddresses; /// Sizes of the reduction items in chars. SmallVector<std::pair<llvm::Value *, llvm::Value *>, 4> Sizes; /// Base declarations for the reduction items. @@ -149,12 +169,12 @@ private: const OMPDeclareReductionDecl *DRD); public: - ReductionCodeGen(ArrayRef<const Expr *> Shareds, + ReductionCodeGen(ArrayRef<const Expr *> Shareds, ArrayRef<const Expr *> Origs, ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> ReductionOps); - /// Emits lvalue for a reduction item. + /// Emits lvalue for the shared and original reduction item. /// \param N Number of the reduction item. - void emitSharedLValue(CodeGenFunction &CGF, unsigned N); + void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N); /// Emits the code for the variable-modified type, if required. /// \param N Number of the reduction item. void emitAggregateType(CodeGenFunction &CGF, unsigned N); @@ -186,6 +206,8 @@ public: Address PrivateAddr); /// Returns LValue for the reduction item. LValue getSharedLValue(unsigned N) const { return SharedAddresses[N].first; } + /// Returns LValue for the original reduction item. + LValue getOrigLValue(unsigned N) const { return OrigAddresses[N].first; } /// Returns the size of the reduction item (in chars and total number of /// elements in the item), or nullptr, if the size is a constant. std::pair<llvm::Value *, llvm::Value *> getSizes(unsigned N) const { @@ -230,26 +252,42 @@ public: /// Also, stores the expression for the private loop counter and it /// threaprivate name. struct LastprivateConditionalData { - llvm::SmallDenseMap<CanonicalDeclPtr<const Decl>, SmallString<16>> - DeclToUniqeName; + llvm::MapVector<CanonicalDeclPtr<const Decl>, SmallString<16>> + DeclToUniqueName; LValue IVLVal; - SmallString<16> IVName; - /// True if original lvalue for loop counter can be used in codegen (simd - /// region or simd only mode) and no need to create threadprivate - /// references. - bool UseOriginalIV = false; + llvm::Function *Fn = nullptr; + bool Disabled = false; }; /// Manages list of lastprivate conditional decls for the specified directive. class LastprivateConditionalRAII { + enum class ActionToDo { + DoNotPush, + PushAsLastprivateConditional, + DisableLastprivateConditional, + }; CodeGenModule &CGM; - const bool NeedToPush; + ActionToDo Action = ActionToDo::DoNotPush; + + /// Check and try to disable analysis of inner regions for changes in + /// lastprivate conditional. + void tryToDisableInnerAnalysis(const OMPExecutableDirective &S, + llvm::DenseSet<CanonicalDeclPtr<const Decl>> + &NeedToAddForLPCsAsDisabled) const; - public: LastprivateConditionalRAII(CodeGenFunction &CGF, - const OMPExecutableDirective &S, LValue IVLVal); + const OMPExecutableDirective &S); + + public: + explicit LastprivateConditionalRAII(CodeGenFunction &CGF, + const OMPExecutableDirective &S, + LValue IVLVal); + static LastprivateConditionalRAII disable(CodeGenFunction &CGF, + const OMPExecutableDirective &S); ~LastprivateConditionalRAII(); }; + llvm::OpenMPIRBuilder &getOMPBuilder() { return OMPBuilder; } + protected: CodeGenModule &CGM; StringRef FirstSeparator, Separator; @@ -319,17 +357,6 @@ protected: /// default location. virtual unsigned getDefaultLocationReserved2Flags() const { return 0; } - /// Tries to emit declare variant function for \p OldGD from \p NewGD. - /// \param OrigAddr LLVM IR value for \p OldGD. - /// \param IsForDefinition true, if requested emission for the definition of - /// \p OldGD. - /// \returns true, was able to emit a definition function for \p OldGD, which - /// points to \p NewGD. - virtual bool tryEmitDeclareVariant(const GlobalDecl &NewGD, - const GlobalDecl &OldGD, - llvm::GlobalValue *OrigAddr, - bool IsForDefinition); - /// Returns default flags for the barriers depending on the directive, for /// which this barier is going to be emitted. static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind); @@ -345,6 +372,8 @@ protected: llvm::Value *getCriticalRegionLock(StringRef CriticalName); private: + /// An OpenMP-IR-Builder instance. + llvm::OpenMPIRBuilder OMPBuilder; /// Default const ident_t object used for initialization of all other /// ident_t objects. llvm::Constant *DefaultOpenMPPSource = nullptr; @@ -392,6 +421,13 @@ private: llvm::DenseMap<llvm::Function *, SmallVector<const OMPDeclareMapperDecl *, 4>>; FunctionUDMMapTy FunctionUDMMap; + /// Maps local variables marked as lastprivate conditional to their internal + /// types. + llvm::DenseMap<llvm::Function *, + llvm::DenseMap<CanonicalDeclPtr<const Decl>, + std::tuple<QualType, const FieldDecl *, + const FieldDecl *, LValue>>> + LastprivateConditionalToTypes; /// Type kmp_critical_name, originally defined as typedef kmp_int32 /// kmp_critical_name[8]; llvm::ArrayType *KmpCriticalNameTy; @@ -428,6 +464,16 @@ private: /// } flags; /// } kmp_depend_info_t; QualType KmpDependInfoTy; + /// Type typedef struct kmp_task_affinity_info { + /// kmp_intptr_t base_addr; + /// size_t len; + /// struct { + /// bool flag1 : 1; + /// bool flag2 : 1; + /// kmp_int32 reserved : 30; + /// } flags; + /// } kmp_task_affinity_info_t; + QualType KmpTaskAffinityInfoTy; /// struct kmp_dim { // loop bounds info casted to kmp_int64 /// kmp_int64 lo; // lower /// kmp_int64 up; // upper @@ -664,12 +710,6 @@ private: /// must be emitted. llvm::SmallDenseSet<const VarDecl *> DeferredGlobalVariables; - /// Mapping of the original functions to their variants and original global - /// decl. - llvm::MapVector<CanonicalDeclPtr<const FunctionDecl>, - std::pair<GlobalDecl, GlobalDecl>> - DeferredVariantFunction; - using NontemporalDeclsSet = llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>>; /// Stack for list of declarations in current context marked as nontemporal. /// The set is the union of all current stack elements. @@ -684,6 +724,9 @@ private: /// directive is present. bool HasRequiresUnifiedSharedMemory = false; + /// Atomic ordering from the omp requires directive. + llvm::AtomicOrdering RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; + /// Flag for keeping track of weather a target region has been emitted. bool HasEmittedTargetRegion = false; @@ -710,11 +753,6 @@ private: /// Returns pointer to kmpc_micro type. llvm::Type *getKmpc_MicroPointerTy(); - /// Returns specified OpenMP runtime function. - /// \param Function OpenMP runtime function. - /// \return Specified function. - llvm::FunctionCallee createRuntimeFunction(unsigned Function); - /// Returns __kmpc_for_static_init_* runtime function for the specified /// size \a IVSize and sign \a IVSigned. llvm::FunctionCallee createForStaticInitFunction(unsigned IVSize, @@ -826,6 +864,19 @@ private: const OMPLoopDirective &D)> SizeEmitter); + /// Emit update for lastprivate conditional data. + void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, + StringRef UniqueDeclName, LValue LVal, + SourceLocation Loc); + + /// Returns the number of the elements and the address of the depobj + /// dependency array. + /// \return Number of elements in depobj array and the pointer to the array of + /// dependencies. + std::pair<llvm::Value *, LValue> getDepobjElements(CodeGenFunction &CGF, + LValue DepobjLVal, + SourceLocation Loc); + public: explicit CGOpenMPRuntime(CodeGenModule &CGM) : CGOpenMPRuntime(CGM, ".", ".") {} @@ -1220,7 +1271,7 @@ public: /// Emit flush of the variables specified in 'omp flush' directive. /// \param Vars List of variables to flush. virtual void emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars, - SourceLocation Loc); + SourceLocation Loc, llvm::AtomicOrdering AO); /// Emit task region for the task directive. The task region is /// emitted in several steps: @@ -1381,18 +1432,34 @@ public: /// should be emitted for reduction: /// \code /// - /// _task_red_item_t red_data[n]; + /// _taskred_item_t red_data[n]; /// ... - /// red_data[i].shar = &origs[i]; + /// red_data[i].shar = &shareds[i]; + /// red_data[i].orig = &origs[i]; /// red_data[i].size = sizeof(origs[i]); /// red_data[i].f_init = (void*)RedInit<i>; /// red_data[i].f_fini = (void*)RedDest<i>; /// red_data[i].f_comb = (void*)RedOp<i>; /// red_data[i].flags = <Flag_i>; /// ... - /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data); + /// void* tg1 = __kmpc_taskred_init(gtid, n, red_data); /// \endcode + /// For reduction clause with task modifier it emits the next call: + /// \code /// + /// _taskred_item_t red_data[n]; + /// ... + /// red_data[i].shar = &shareds[i]; + /// red_data[i].orig = &origs[i]; + /// red_data[i].size = sizeof(origs[i]); + /// red_data[i].f_init = (void*)RedInit<i>; + /// red_data[i].f_fini = (void*)RedDest<i>; + /// red_data[i].f_comb = (void*)RedOp<i>; + /// red_data[i].flags = <Flag_i>; + /// ... + /// void* tg1 = __kmpc_taskred_modifier_init(loc, gtid, is_worksharing, n, + /// red_data); + /// \endcode /// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations. /// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations. /// \param Data Additional data for task generation like tiedness, final @@ -1403,11 +1470,16 @@ public: ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data); + /// Emits the following code for reduction clause with task modifier: + /// \code + /// __kmpc_task_reduction_modifier_fini(loc, gtid, is_worksharing); + /// \endcode + virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, + bool IsWorksharingReduction); + /// Required to resolve existing problems in the runtime. Emits threadprivate /// variables to store the size of the VLAs/array sections for - /// initializer/combiner/finalizer functions + emits threadprivate variable to - /// store the pointer to the original reduction item for the custom - /// initializer defined by declare reduction construct. + /// initializer/combiner/finalizer functions. /// \param RCG Allows to reuse an existing data for the reductions. /// \param N Reduction item for which fixups must be emitted. virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, @@ -1467,16 +1539,16 @@ public: /// \param IfCond Expression evaluated in if clause associated with the target /// directive, or null if no if clause is used. /// \param Device Expression evaluated in device clause associated with the - /// target directive, or null if no device clause is used. + /// target directive, or null if no device clause is used and device modifier. /// \param SizeEmitter Callback to emit number of iterations for loop-based /// directives. - virtual void - emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, - llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, - const Expr *IfCond, const Expr *Device, - llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, - const OMPLoopDirective &D)> - SizeEmitter); + virtual void emitTargetCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, + llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter); /// Emit the target regions enclosed in \a GD function definition or /// the function itself in case it is a valid device function. Returns true if @@ -1675,7 +1747,10 @@ public: /// Perform check on requires decl to ensure that target architecture /// supports unified addressing - virtual void checkArchForUnifiedAddressing(const OMPRequiresDecl *D); + virtual void processRequiresDirective(const OMPRequiresDecl *D); + + /// Gets default memory ordering as specified in requires directive. + llvm::AtomicOrdering getDefaultMemoryOrdering() const; /// Checks if the variable has associated OMPAllocateDeclAttr attribute with /// the predefined allocator and translates it into the corresponding address @@ -1685,17 +1760,13 @@ public: /// Return whether the unified_shared_memory has been specified. bool hasRequiresUnifiedSharedMemory() const; - /// Emits the definition of the declare variant function. - virtual bool emitDeclareVariant(GlobalDecl GD, bool IsForDefinition); - /// Checks if the \p VD variable is marked as nontemporal declaration in /// current context. bool isNontemporalDecl(const ValueDecl *VD) const; - /// Initializes global counter for lastprivate conditional. - virtual void - initLastprivateConditionalCounter(CodeGenFunction &CGF, - const OMPExecutableDirective &S); + /// Create specialized alloca to handle lastprivate conditionals. + Address emitLastprivateConditionalInit(CodeGenFunction &CGF, + const VarDecl *VD); /// Checks if the provided \p LVal is lastprivate conditional and emits the /// code to update the value of the original variable. @@ -1713,6 +1784,30 @@ public: virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS); + /// Checks if the lastprivate conditional was updated in inner region and + /// writes the value. + /// \code + /// lastprivate(conditional: a) + /// ... + /// <type> a;bool Fired = false; + /// #pragma omp ... shared(a) + /// { + /// lp_a = ...; + /// Fired = true; + /// } + /// if (Fired) { + /// #pragma omp critical(a) + /// if (last_iv_a <= iv) { + /// last_iv_a = iv; + /// global_a = lp_a; + /// } + /// Fired = false; + /// } + /// \endcode + virtual void checkAndEmitSharedLastprivateConditional( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls); + /// Gets the address of the global copy used for lastprivate conditional /// update, if any. /// \param PrivLVal LValue for the private copy. @@ -1721,6 +1816,41 @@ public: LValue PrivLVal, const VarDecl *VD, SourceLocation Loc); + + /// Emits list of dependecies based on the provided data (array of + /// dependence/expression pairs). + /// \returns Pointer to the first element of the array casted to VoidPtr type. + std::pair<llvm::Value *, Address> + emitDependClause(CodeGenFunction &CGF, + ArrayRef<OMPTaskDataTy::DependData> Dependencies, + SourceLocation Loc); + + /// Emits list of dependecies based on the provided data (array of + /// dependence/expression pairs) for depobj construct. In this case, the + /// variable is allocated in dynamically. \returns Pointer to the first + /// element of the array casted to VoidPtr type. + Address emitDepobjDependClause(CodeGenFunction &CGF, + const OMPTaskDataTy::DependData &Dependencies, + SourceLocation Loc); + + /// Emits the code to destroy the dependency object provided in depobj + /// directive. + void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, + SourceLocation Loc); + + /// Updates the dependency kind in the specified depobj object. + /// \param DepobjLVal LValue for the main depobj object. + /// \param NewDepKind New dependency kind. + void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, + OpenMPDependClauseKind NewDepKind, SourceLocation Loc); + + /// Initializes user defined allocators specified in the uses_allocators + /// clauses. + void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, + const Expr *AllocatorTraits); + + /// Destroys user defined allocators specified in the uses_allocators clause. + void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator); }; /// Class supports emissionof SIMD-only code. @@ -1985,7 +2115,7 @@ public: /// Emit flush of the variables specified in 'omp flush' directive. /// \param Vars List of variables to flush. void emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars, - SourceLocation Loc) override; + SourceLocation Loc, llvm::AtomicOrdering AO) override; /// Emit task region for the task directive. The task region is /// emitted in several steps: @@ -2107,18 +2237,34 @@ public: /// should be emitted for reduction: /// \code /// - /// _task_red_item_t red_data[n]; + /// _taskred_item_t red_data[n]; /// ... - /// red_data[i].shar = &origs[i]; + /// red_data[i].shar = &shareds[i]; + /// red_data[i].orig = &origs[i]; /// red_data[i].size = sizeof(origs[i]); /// red_data[i].f_init = (void*)RedInit<i>; /// red_data[i].f_fini = (void*)RedDest<i>; /// red_data[i].f_comb = (void*)RedOp<i>; /// red_data[i].flags = <Flag_i>; /// ... - /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data); + /// void* tg1 = __kmpc_taskred_init(gtid, n, red_data); /// \endcode + /// For reduction clause with task modifier it emits the next call: + /// \code /// + /// _taskred_item_t red_data[n]; + /// ... + /// red_data[i].shar = &shareds[i]; + /// red_data[i].orig = &origs[i]; + /// red_data[i].size = sizeof(origs[i]); + /// red_data[i].f_init = (void*)RedInit<i>; + /// red_data[i].f_fini = (void*)RedDest<i>; + /// red_data[i].f_comb = (void*)RedOp<i>; + /// red_data[i].flags = <Flag_i>; + /// ... + /// void* tg1 = __kmpc_taskred_modifier_init(loc, gtid, is_worksharing, n, + /// red_data); + /// \endcode /// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations. /// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations. /// \param Data Additional data for task generation like tiedness, final @@ -2128,6 +2274,13 @@ public: ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) override; + /// Emits the following code for reduction clause with task modifier: + /// \code + /// __kmpc_task_reduction_modifier_fini(loc, gtid, is_worksharing); + /// \endcode + void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, + bool IsWorksharingReduction) override; + /// Required to resolve existing problems in the runtime. Emits threadprivate /// variables to store the size of the VLAs/array sections for /// initializer/combiner/finalizer functions + emits threadprivate variable to @@ -2191,14 +2344,14 @@ public: /// \param IfCond Expression evaluated in if clause associated with the target /// directive, or null if no if clause is used. /// \param Device Expression evaluated in device clause associated with the - /// target directive, or null if no device clause is used. - void - emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, - llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, - const Expr *IfCond, const Expr *Device, - llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, - const OMPLoopDirective &D)> - SizeEmitter) override; + /// target directive, or null if no device clause is used and device modifier. + void emitTargetCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, + llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter) override; /// Emit the target regions enclosed in \a GD function definition or /// the function itself in case it is a valid device function. Returns true if diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index d00d84b79cfe..cbd443134e7a 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -38,11 +38,9 @@ enum OpenMPRTLFunctionNVPTX { /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2, /// Call to void __kmpc_kernel_prepare_parallel(void - /// *outlined_function, int16_t - /// IsOMPRuntimeInitialized); + /// *outlined_function); OMPRTL_NVPTX__kmpc_kernel_prepare_parallel, - /// Call to bool __kmpc_kernel_parallel(void **outlined_function, - /// int16_t IsOMPRuntimeInitialized); + /// Call to bool __kmpc_kernel_parallel(void **outlined_function); OMPRTL_NVPTX__kmpc_kernel_parallel, /// Call to void __kmpc_kernel_end_parallel(); OMPRTL_NVPTX__kmpc_kernel_end_parallel, @@ -85,6 +83,9 @@ enum OpenMPRTLFunctionNVPTX { /// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size, /// int16_t UseSharedMemory); OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack, + /// Call to void* __kmpc_data_sharing_push_stack(size_t size, int16_t + /// UseSharedMemory); + OMPRTL_NVPTX__kmpc_data_sharing_push_stack, /// Call to void __kmpc_data_sharing_pop_stack(void *a); OMPRTL_NVPTX__kmpc_data_sharing_pop_stack, /// Call to void __kmpc_begin_sharing_variables(void ***args, @@ -341,8 +342,7 @@ class CheckVarsEscapingDeclContext final if (!Attr) return; if (((Attr->getCaptureKind() != OMPC_map) && - !isOpenMPPrivate( - static_cast<OpenMPClauseKind>(Attr->getCaptureKind()))) || + !isOpenMPPrivate(Attr->getCaptureKind())) || ((Attr->getCaptureKind() == OMPC_map) && !FD->getType()->isAnyPointerType())) return; @@ -786,6 +786,8 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: + case OMPD_depobj: + case OMPD_scan: case OMPD_teams: case OMPD_target_data: case OMPD_target_exit_data: @@ -801,6 +803,8 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, case OMPD_target_update: case OMPD_declare_simd: case OMPD_declare_variant: + case OMPD_begin_declare_variant: + case OMPD_end_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: @@ -813,6 +817,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: + default: llvm_unreachable("Unexpected directive."); } } @@ -862,6 +867,8 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: + case OMPD_depobj: + case OMPD_scan: case OMPD_teams: case OMPD_target_data: case OMPD_target_exit_data: @@ -877,6 +884,8 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_target_update: case OMPD_declare_simd: case OMPD_declare_variant: + case OMPD_begin_declare_variant: + case OMPD_end_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: @@ -889,6 +898,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: + default: break; } llvm_unreachable( @@ -1031,6 +1041,8 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: + case OMPD_depobj: + case OMPD_scan: case OMPD_teams: case OMPD_target_data: case OMPD_target_exit_data: @@ -1046,6 +1058,8 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, case OMPD_target_update: case OMPD_declare_simd: case OMPD_declare_variant: + case OMPD_begin_declare_variant: + case OMPD_end_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: @@ -1058,6 +1072,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: + default: llvm_unreachable("Unexpected directive."); } } @@ -1113,6 +1128,8 @@ static bool supportsLightweightRuntime(ASTContext &Ctx, case OMPD_taskgroup: case OMPD_atomic: case OMPD_flush: + case OMPD_depobj: + case OMPD_scan: case OMPD_teams: case OMPD_target_data: case OMPD_target_exit_data: @@ -1128,6 +1145,8 @@ static bool supportsLightweightRuntime(ASTContext &Ctx, case OMPD_target_update: case OMPD_declare_simd: case OMPD_declare_variant: + case OMPD_begin_declare_variant: + case OMPD_end_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: @@ -1140,6 +1159,7 @@ static bool supportsLightweightRuntime(ASTContext &Ctx, case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: + default: break; } llvm_unreachable( @@ -1444,8 +1464,7 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy)); // TODO: Optimize runtime initialization and pass in correct value. - llvm::Value *Args[] = {WorkFn.getPointer(), - /*RequiresOMPRuntime=*/Bld.getInt16(1)}; + llvm::Value *Args[] = {WorkFn.getPointer()}; llvm::Value *Ret = CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args); Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus); @@ -1573,17 +1592,16 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { } case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: { /// Build void __kmpc_kernel_prepare_parallel( - /// void *outlined_function, int16_t IsOMPRuntimeInitialized); - llvm::Type *TypeParams[] = {CGM.Int8PtrTy, CGM.Int16Ty}; + /// void *outlined_function); + llvm::Type *TypeParams[] = {CGM.Int8PtrTy}; auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel"); break; } case OMPRTL_NVPTX__kmpc_kernel_parallel: { - /// Build bool __kmpc_kernel_parallel(void **outlined_function, - /// int16_t IsOMPRuntimeInitialized); - llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy, CGM.Int16Ty}; + /// Build bool __kmpc_kernel_parallel(void **outlined_function); + llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy}; llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy); auto *FnTy = llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false); @@ -1738,6 +1756,16 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack"); break; } + case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: { + // Build void *__kmpc_data_sharing_push_stack(size_t size, int16_t + // UseSharedMemory); + llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_data_sharing_push_stack"); + break; + } case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: { // Build void __kmpc_data_sharing_pop_stack(void *a); llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; @@ -1915,19 +1943,6 @@ unsigned CGOpenMPRuntimeNVPTX::getDefaultLocationReserved2Flags() const { llvm_unreachable("Unknown flags are requested."); } -bool CGOpenMPRuntimeNVPTX::tryEmitDeclareVariant(const GlobalDecl &NewGD, - const GlobalDecl &OldGD, - llvm::GlobalValue *OrigAddr, - bool IsForDefinition) { - // Emit the function in OldGD with the body from NewGD, if NewGD is defined. - auto *NewFD = cast<FunctionDecl>(NewGD.getDecl()); - if (NewFD->isDefined()) { - CGM.emitOpenMPDeviceFunctionRedefinition(OldGD, NewGD, OrigAddr); - return true; - } - return false; -} - CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) : CGOpenMPRuntime(CGM, "_", "$") { if (!CGM.getLangOpts().OpenMPIsDevice) @@ -2208,7 +2223,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, GlobalRecCastAddr = Phi; I->getSecond().GlobalRecordAddr = Phi; I->getSecond().IsInSPMDModeFlag = IsSPMD; - } else if (IsInTTDRegion) { + } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) { assert(GlobalizedRecords.back().Records.size() < 2 && "Expected less than 2 globalized records: one for target and one " "for teams."); @@ -2281,12 +2296,16 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, } else { // TODO: allow the usage of shared memory to be controlled by // the user, for now, default to global. + bool UseSharedMemory = + IsInTTDRegion && GlobalRecordSize <= SharedMemorySize; llvm::Value *GlobalRecordSizeArg[] = { llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), - CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; + CGF.Builder.getInt16(UseSharedMemory ? 1 : 0)}; llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), + IsInTTDRegion + ? OMPRTL_NVPTX__kmpc_data_sharing_push_stack + : OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), GlobalRecordSizeArg); GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecValue, GlobalRecPtrTy); @@ -2433,7 +2452,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF, OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr)); CGF.EmitBlock(ExitBB); - } else if (IsInTTDRegion) { + } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) { assert(GlobalizedRecords.back().RegionCounter > 0 && "region counter must be > 0."); --GlobalizedRecords.back().RegionCounter; @@ -2546,7 +2565,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall( llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy); // Prepare for parallel region. Indicate the outlined function. - llvm::Value *Args[] = {ID, /*RequiresOMPRuntime=*/Bld.getInt16(1)}; + llvm::Value *Args[] = {ID}; CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), Args); @@ -4754,6 +4773,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, switch (A->getAllocatorType()) { // Use the default allocator here as by default local vars are // threadlocal. + case OMPAllocateDeclAttr::OMPNullMemAlloc: case OMPAllocateDeclAttr::OMPDefaultMemAlloc: case OMPAllocateDeclAttr::OMPThreadMemAlloc: case OMPAllocateDeclAttr::OMPHighBWMemAlloc: @@ -4920,6 +4940,7 @@ bool CGOpenMPRuntimeNVPTX::hasAllocateAttributeForGlobalVar(const VarDecl *VD, return false; const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); switch(A->getAllocatorType()) { + case OMPAllocateDeclAttr::OMPNullMemAlloc: case OMPAllocateDeclAttr::OMPDefaultMemAlloc: // Not supported, fallback to the default mem space. case OMPAllocateDeclAttr::OMPThreadMemAlloc: @@ -4962,7 +4983,7 @@ static CudaArch getCudaArch(CodeGenModule &CGM) { /// Check to see if target architecture supports unified addressing which is /// a restriction for OpenMP requires clause "unified_shared_memory". -void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing( +void CGOpenMPRuntimeNVPTX::processRequiresDirective( const OMPRequiresDecl *D) { for (const OMPClause *Clause : D->clauselists()) { if (Clause->getClauseKind() == OMPC_unified_shared_memory) { @@ -4990,6 +5011,7 @@ void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing( case CudaArch::SM_70: case CudaArch::SM_72: case CudaArch::SM_75: + case CudaArch::SM_80: case CudaArch::GFX600: case CudaArch::GFX601: case CudaArch::GFX700: @@ -5010,6 +5032,7 @@ void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing( case CudaArch::GFX1010: case CudaArch::GFX1011: case CudaArch::GFX1012: + case CudaArch::GFX1030: case CudaArch::UNKNOWN: break; case CudaArch::LAST: @@ -5017,7 +5040,7 @@ void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing( } } } - CGOpenMPRuntime::checkArchForUnifiedAddressing(D); + CGOpenMPRuntime::processRequiresDirective(D); } /// Get number of SMs and number of blocks per SM. @@ -5047,6 +5070,7 @@ static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) { case CudaArch::SM_70: case CudaArch::SM_72: case CudaArch::SM_75: + case CudaArch::SM_80: return {84, 32}; case CudaArch::GFX600: case CudaArch::GFX601: @@ -5068,6 +5092,7 @@ static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) { case CudaArch::GFX1010: case CudaArch::GFX1011: case CudaArch::GFX1012: + case CudaArch::GFX1030: case CudaArch::UNKNOWN: break; case CudaArch::LAST: @@ -5077,7 +5102,8 @@ static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) { } void CGOpenMPRuntimeNVPTX::clear() { - if (!GlobalizedRecords.empty()) { + if (!GlobalizedRecords.empty() && + !CGM.getLangOpts().OpenMPCUDATargetParallel) { ASTContext &C = CGM.getContext(); llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> GlobalRecs; llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> SharedRecs; diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index 4159af0a622f..c52ae43817c7 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -193,18 +193,6 @@ protected: /// Full/Lightweight runtime mode. Used for better optimization. unsigned getDefaultLocationReserved2Flags() const override; - /// Tries to emit declare variant function for \p OldGD from \p NewGD. - /// \param OrigAddr LLVM IR value for \p OldGD. - /// \param IsForDefinition true, if requested emission for the definition of - /// \p OldGD. - /// \returns true, was able to emit a definition function for \p OldGD, which - /// points to \p NewGD. - /// NVPTX backend does not support global aliases, so just use the function, - /// emitted for \p NewGD instead of \p OldGD. - bool tryEmitDeclareVariant(const GlobalDecl &NewGD, const GlobalDecl &OldGD, - llvm::GlobalValue *OrigAddr, - bool IsForDefinition) override; - public: explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); void clear() override; @@ -395,7 +383,7 @@ public: /// Perform check on requires decl to ensure that target architecture /// supports unified addressing - void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) override; + void processRequiresDirective(const OMPRequiresDecl *D) override; /// Returns default address space for the constant firstprivates, __constant__ /// address space by default. diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp index 4de64a32f2ac..4e5d1d3f16f6 100644 --- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -385,7 +385,8 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field, Run = FieldEnd; continue; } - llvm::Type *Type = Types.ConvertTypeForMem(Field->getType()); + llvm::Type *Type = + Types.ConvertTypeForMem(Field->getType(), /*ForBitFields=*/true); // If we don't have a run yet, or don't live within the previous run's // allocated storage then we allocate some storage and start a new run. if (Run == FieldEnd || BitOffset >= Tail) { @@ -405,15 +406,17 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field, return; } - // Check if OffsetInRecord is better as a single field run. When OffsetInRecord - // has legal integer width, and its bitfield offset is naturally aligned, it - // is better to make the bitfield a separate storage component so as it can be - // accessed directly with lower cost. + // Check if OffsetInRecord (the size in bits of the current run) is better + // as a single field run. When OffsetInRecord has legal integer width, and + // its bitfield offset is naturally aligned, it is better to make the + // bitfield a separate storage component so as it can be accessed directly + // with lower cost. auto IsBetterAsSingleFieldRun = [&](uint64_t OffsetInRecord, uint64_t StartBitOffset) { if (!Types.getCodeGenOpts().FineGrainedBitfieldAccesses) return false; - if (!DataLayout.isLegalInteger(OffsetInRecord)) + if (OffsetInRecord < 8 || !llvm::isPowerOf2_64(OffsetInRecord) || + !DataLayout.fitsInLegalInteger(OffsetInRecord)) return false; // Make sure StartBitOffset is natually aligned if it is treated as an // IType integer. @@ -729,8 +732,8 @@ CGBitFieldInfo CGBitFieldInfo::MakeInfo(CodeGenTypes &Types, return CGBitFieldInfo(Offset, Size, IsSigned, StorageSize, StorageOffset); } -CGRecordLayout *CodeGenTypes::ComputeRecordLayout(const RecordDecl *D, - llvm::StructType *Ty) { +std::unique_ptr<CGRecordLayout> +CodeGenTypes::ComputeRecordLayout(const RecordDecl *D, llvm::StructType *Ty) { CGRecordLowering Builder(*this, D, /*Packed=*/false); Builder.lower(/*NonVirtualBaseType=*/false); @@ -757,9 +760,9 @@ CGRecordLayout *CodeGenTypes::ComputeRecordLayout(const RecordDecl *D, // but we may need to recursively layout D while laying D out as a base type. Ty->setBody(Builder.FieldTypes, Builder.Packed); - CGRecordLayout *RL = - new CGRecordLayout(Ty, BaseTy, Builder.IsZeroInitializable, - Builder.IsZeroInitializableAsBase); + auto RL = std::make_unique<CGRecordLayout>( + Ty, BaseTy, (bool)Builder.IsZeroInitializable, + (bool)Builder.IsZeroInitializableAsBase); RL->NonVirtualBases.swap(Builder.NonVirtualBases); RL->CompleteObjectVirtualBases.swap(Builder.VirtualBases); diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 138459c68dbf..672909849bb7 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "CGDebugInfo.h" +#include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "TargetInfo.h" @@ -18,12 +19,14 @@ #include "clang/AST/StmtVisitor.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/PrettyStackTrace.h" +#include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/Support/SaveAndRestore.h" using namespace clang; using namespace CodeGen; @@ -246,6 +249,12 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) { case Stmt::OMPFlushDirectiveClass: EmitOMPFlushDirective(cast<OMPFlushDirective>(*S)); break; + case Stmt::OMPDepobjDirectiveClass: + EmitOMPDepobjDirective(cast<OMPDepobjDirective>(*S)); + break; + case Stmt::OMPScanDirectiveClass: + EmitOMPScanDirective(cast<OMPScanDirective>(*S)); + break; case Stmt::OMPOrderedDirectiveClass: EmitOMPOrderedDirective(cast<OMPOrderedDirective>(*S)); break; @@ -601,6 +610,13 @@ void CodeGenFunction::EmitLabelStmt(const LabelStmt &S) { } void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) { + bool nomerge = false; + for (const auto *A : S.getAttrs()) + if (A->getKind() == attr::NoMerge) { + nomerge = true; + break; + } + SaveAndRestore<bool> save_nomerge(InNoMergeAttributedStmt, nomerge); EmitStmt(S.getSubStmt(), S.getAttrs()); } @@ -721,8 +737,8 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S, EmitBlock(LoopHeader.getBlock()); const SourceRange &R = S.getSourceRange(); - LoopStack.push(LoopHeader.getBlock(), CGM.getContext(), WhileAttrs, - SourceLocToDebugLoc(R.getBegin()), + LoopStack.push(LoopHeader.getBlock(), CGM.getContext(), CGM.getCodeGenOpts(), + WhileAttrs, SourceLocToDebugLoc(R.getBegin()), SourceLocToDebugLoc(R.getEnd())); // Create an exit block for when the condition fails, which will @@ -823,7 +839,7 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S, EmitBlock(LoopCond.getBlock()); const SourceRange &R = S.getSourceRange(); - LoopStack.push(LoopBody, CGM.getContext(), DoAttrs, + LoopStack.push(LoopBody, CGM.getContext(), CGM.getCodeGenOpts(), DoAttrs, SourceLocToDebugLoc(R.getBegin()), SourceLocToDebugLoc(R.getEnd())); @@ -881,7 +897,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S, EmitBlock(CondBlock); const SourceRange &R = S.getSourceRange(); - LoopStack.push(CondBlock, CGM.getContext(), ForAttrs, + LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), ForAttrs, SourceLocToDebugLoc(R.getBegin()), SourceLocToDebugLoc(R.getEnd())); @@ -982,7 +998,7 @@ CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S, EmitBlock(CondBlock); const SourceRange &R = S.getSourceRange(); - LoopStack.push(CondBlock, CGM.getContext(), ForAttrs, + LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), ForAttrs, SourceLocToDebugLoc(R.getBegin()), SourceLocToDebugLoc(R.getEnd())); @@ -1054,6 +1070,19 @@ void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) { EmitBranchThroughCleanup(ReturnBlock); } +namespace { +// RAII struct used to save and restore a return statment's result expression. +struct SaveRetExprRAII { + SaveRetExprRAII(const Expr *RetExpr, CodeGenFunction &CGF) + : OldRetExpr(CGF.RetExpr), CGF(CGF) { + CGF.RetExpr = RetExpr; + } + ~SaveRetExprRAII() { CGF.RetExpr = OldRetExpr; } + const Expr *OldRetExpr; + CodeGenFunction &CGF; +}; +} // namespace + /// EmitReturnStmt - Note that due to GCC extensions, this can have an operand /// if the function returns void, or may be missing one if the function returns /// non-void. Fun stuff :). @@ -1079,20 +1108,28 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { // Emit the result value, even if unused, to evaluate the side effects. const Expr *RV = S.getRetValue(); - // Treat block literals in a return expression as if they appeared - // in their own scope. This permits a small, easily-implemented - // exception to our over-conservative rules about not jumping to - // statements following block literals with non-trivial cleanups. - RunCleanupsScope cleanupScope(*this); - if (const FullExpr *fe = dyn_cast_or_null<FullExpr>(RV)) { - enterFullExpression(fe); - RV = fe->getSubExpr(); - } + // Record the result expression of the return statement. The recorded + // expression is used to determine whether a block capture's lifetime should + // end at the end of the full expression as opposed to the end of the scope + // enclosing the block expression. + // + // This permits a small, easily-implemented exception to our over-conservative + // rules about not jumping to statements following block literals with + // non-trivial cleanups. + SaveRetExprRAII SaveRetExpr(RV, *this); + RunCleanupsScope cleanupScope(*this); + if (const auto *EWC = dyn_cast_or_null<ExprWithCleanups>(RV)) + RV = EWC->getSubExpr(); // FIXME: Clean this up by using an LValue for ReturnTemp, // EmitStoreThroughLValue, and EmitAnyExpr. - if (getLangOpts().ElideConstructors && - S.getNRVOCandidate() && S.getNRVOCandidate()->isNRVOVariable()) { + // Check if the NRVO candidate was not globalized in OpenMP mode. + if (getLangOpts().ElideConstructors && S.getNRVOCandidate() && + S.getNRVOCandidate()->isNRVOVariable() && + (!getLangOpts().OpenMP || + !CGM.getOpenMPRuntime() + .getAddressOfLocalVariable(*this, S.getNRVOCandidate()) + .isValid())) { // Apply the named return value optimization for this return statement, // which means doing nothing: the appropriate result has already been // constructed into the NRVO variable. @@ -2091,8 +2128,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Update largest vector width for any vector types. if (auto *VT = dyn_cast<llvm::VectorType>(ResultRegTypes.back())) - LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, - VT->getPrimitiveSizeInBits().getFixedSize()); + LargestVectorWidth = + std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getKnownMinSize()); } else { ArgTypes.push_back(Dest.getAddress(*this).getType()); Args.push_back(Dest.getPointer(*this)); @@ -2116,8 +2154,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Update largest vector width for any vector types. if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType())) - LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, - VT->getPrimitiveSizeInBits().getFixedSize()); + LargestVectorWidth = + std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getKnownMinSize()); if (Info.allowsRegister()) InOutConstraints += llvm::utostr(i); else @@ -2203,21 +2242,15 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Update largest vector width for any vector types. if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType())) - LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, - VT->getPrimitiveSizeInBits().getFixedSize()); + LargestVectorWidth = + std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getKnownMinSize()); ArgTypes.push_back(Arg->getType()); Args.push_back(Arg); Constraints += InputConstraint; } - // Append the "input" part of inout constraints last. - for (unsigned i = 0, e = InOutArgs.size(); i != e; i++) { - ArgTypes.push_back(InOutArgTypes[i]); - Args.push_back(InOutArgs[i]); - } - Constraints += InOutConstraints; - // Labels SmallVector<llvm::BasicBlock *, 16> Transfer; llvm::BasicBlock *Fallthrough = nullptr; @@ -2225,7 +2258,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { if (const auto *GS = dyn_cast<GCCAsmStmt>(&S)) { IsGCCAsmGoto = GS->isAsmGoto(); if (IsGCCAsmGoto) { - for (auto *E : GS->labels()) { + for (const auto *E : GS->labels()) { JumpDest Dest = getJumpDestForLabel(E->getLabel()); Transfer.push_back(Dest.getBlock()); llvm::BlockAddress *BA = @@ -2236,19 +2269,31 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { Constraints += ','; Constraints += 'X'; } - StringRef Name = "asm.fallthrough"; - Fallthrough = createBasicBlock(Name); + Fallthrough = createBasicBlock("asm.fallthrough"); } } + // Append the "input" part of inout constraints last. + for (unsigned i = 0, e = InOutArgs.size(); i != e; i++) { + ArgTypes.push_back(InOutArgTypes[i]); + Args.push_back(InOutArgs[i]); + } + Constraints += InOutConstraints; + // Clobbers for (unsigned i = 0, e = S.getNumClobbers(); i != e; i++) { StringRef Clobber = S.getClobber(i); if (Clobber == "memory") ReadOnly = ReadNone = false; - else if (Clobber != "cc") + else if (Clobber != "cc") { Clobber = getTarget().getNormalizedGCCRegisterName(Clobber); + if (CGM.getCodeGenOpts().StackClashProtector && + getTarget().isSPRegName(Clobber)) { + CGM.getDiags().Report(S.getAsmLoc(), + diag::warn_stack_clash_protection_inline_asm); + } + } if (!Constraints.empty()) Constraints += ','; @@ -2287,9 +2332,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { if (IsGCCAsmGoto) { llvm::CallBrInst *Result = Builder.CreateCallBr(IA, Fallthrough, Transfer, Args); + EmitBlock(Fallthrough); UpdateAsmCallInst(cast<llvm::CallBase>(*Result), HasSideEffect, ReadOnly, ReadNone, S, ResultRegTypes, *this, RegResults); - EmitBlock(Fallthrough); } else { llvm::CallInst *Result = Builder.CreateCall(IA, Args, getBundlesForFunclet(IA)); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index dc3899f0e4ea..cfd5eda8cc80 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -18,14 +18,22 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/DeclOpenMP.h" +#include "clang/AST/OpenMPClause.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtOpenMP.h" +#include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/PrettyStackTrace.h" +#include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/AtomicOrdering.h" using namespace clang; using namespace CodeGen; using namespace llvm::omp; +static const VarDecl *getBaseDecl(const Expr *Ref); + namespace { /// Lexical scope for OpenMP executable constructs, that handles correct codegen /// for captured expressions. @@ -53,7 +61,8 @@ class OMPLexicalScope : public CodeGenFunction::LexicalScope { static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { return CGF.LambdaCaptureFields.lookup(VD) || (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || - (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); + (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && + cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); } public: @@ -214,6 +223,12 @@ public: if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) CGF.EmitVarDecl(*OED); } + } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) { + for (const Expr *E : UDP->varlists()) { + const Decl *D = getBaseDecl(E); + if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) + CGF.EmitVarDecl(*OED); + } } } if (!isOpenMPSimdDirective(S.getDirectiveKind())) @@ -365,26 +380,28 @@ static QualType getCanonicalParamType(ASTContext &C, QualType T) { } namespace { - /// Contains required data for proper outlined function codegen. - struct FunctionOptions { - /// Captured statement for which the function is generated. - const CapturedStmt *S = nullptr; - /// true if cast to/from UIntPtr is required for variables captured by - /// value. - const bool UIntPtrCastRequired = true; - /// true if only casted arguments must be registered as local args or VLA - /// sizes. - const bool RegisterCastedArgsOnly = false; - /// Name of the generated function. - const StringRef FunctionName; - explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, - bool RegisterCastedArgsOnly, - StringRef FunctionName) - : S(S), UIntPtrCastRequired(UIntPtrCastRequired), - RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), - FunctionName(FunctionName) {} - }; -} +/// Contains required data for proper outlined function codegen. +struct FunctionOptions { + /// Captured statement for which the function is generated. + const CapturedStmt *S = nullptr; + /// true if cast to/from UIntPtr is required for variables captured by + /// value. + const bool UIntPtrCastRequired = true; + /// true if only casted arguments must be registered as local args or VLA + /// sizes. + const bool RegisterCastedArgsOnly = false; + /// Name of the generated function. + const StringRef FunctionName; + /// Location of the non-debug version of the outlined function. + SourceLocation Loc; + explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, + bool RegisterCastedArgsOnly, StringRef FunctionName, + SourceLocation Loc) + : S(S), UIntPtrCastRequired(UIntPtrCastRequired), + RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), + FunctionName(FunctionName), Loc(Loc) {} +}; +} // namespace static llvm::Function *emitOutlinedFunctionPrologue( CodeGenFunction &CGF, FunctionArgList &Args, @@ -485,7 +502,9 @@ static llvm::Function *emitOutlinedFunctionPrologue( // Generate the function. CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, - FO.S->getBeginLoc(), CD->getBody()->getBeginLoc()); + FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(), + FO.UIntPtrCastRequired ? FO.Loc + : CD->getBody()->getBeginLoc()); unsigned Cnt = CD->getContextParamPosition(); I = FO.S->captures().begin(); for (const FieldDecl *FD : RD->fields()) { @@ -560,7 +579,8 @@ static llvm::Function *emitOutlinedFunctionPrologue( } llvm::Function * -CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { +CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, + SourceLocation Loc) { assert( CapturedStmtInfo && "CapturedStmtInfo should be set when generating the captured function"); @@ -577,7 +597,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { if (NeedWrapperFunction) Out << "_debug__"; FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, - Out.str()); + Out.str(), Loc); llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, VLASizes, CXXThisValue, FO); CodeGenFunction::OMPPrivateScope LocalScope(*this); @@ -600,7 +620,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, /*RegisterCastedArgsOnly=*/true, - CapturedStmtInfo->getHelperName()); + CapturedStmtInfo->getHelperName(), Loc); CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; Args.clear(); @@ -632,8 +652,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { } CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); } - CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getBeginLoc(), - F, CallArgs); + CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs); WrapperCGF.FinishFunction(); return WrapperF; } @@ -747,11 +766,12 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, getLangOpts().OpenMPIsDevice && isOpenMPTargetExecutionDirective(D.getDirectiveKind()); bool FirstprivateIsLastprivate = false; - llvm::DenseSet<const VarDecl *> Lastprivates; + llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates; for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { for (const auto *D : C->varlists()) - Lastprivates.insert( - cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); + Lastprivates.try_emplace( + cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(), + C->getKind()); } llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; @@ -761,8 +781,8 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, bool MustEmitFirstprivateCopy = CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { - auto IRef = C->varlist_begin(); - auto InitsRef = C->inits().begin(); + const auto *IRef = C->varlist_begin(); + const auto *InitsRef = C->inits().begin(); for (const Expr *IInit : C->private_copies()) { const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); bool ThisFirstprivateIsLastprivate = @@ -853,14 +873,34 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, }); } else { Address OriginalAddr = OriginalLVal.getAddress(*this); - IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, VDInit, OriginalAddr, VD]() { + IsRegistered = + PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD, + ThisFirstprivateIsLastprivate, + OrigVD, &Lastprivates, IRef]() { // Emit private VarDecl with copy init. // Remap temp VDInit variable to the address of the original // variable (for proper handling of captured global variables). setAddrOfLocalVar(VDInit, OriginalAddr); EmitDecl(*VD); LocalDeclMap.erase(VDInit); + if (ThisFirstprivateIsLastprivate && + Lastprivates[OrigVD->getCanonicalDecl()] == + OMPC_LASTPRIVATE_conditional) { + // Create/init special variable for lastprivate conditionals. + Address VDAddr = + CGM.getOpenMPRuntime().emitLastprivateConditionalInit( + *this, OrigVD); + llvm::Value *V = EmitLoadOfScalar( + MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(), + AlignmentSource::Decl), + (*IRef)->getExprLoc()); + EmitStoreOfScalar(V, + MakeAddrLValue(VDAddr, (*IRef)->getType(), + AlignmentSource::Decl)); + LocalDeclMap.erase(VD); + setAddrOfLocalVar(VD, VDAddr); + return VDAddr; + } return GetAddrOfLocalVar(VD); }); } @@ -990,8 +1030,8 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && !getLangOpts().OpenMPSimd) break; - auto IRef = C->varlist_begin(); - auto IDestRef = C->destination_exprs().begin(); + const auto *IRef = C->varlist_begin(); + const auto *IDestRef = C->destination_exprs().begin(); for (const Expr *IInit : C->private_copies()) { // Keep the address of the original variable for future update at the end // of the loop. @@ -1013,7 +1053,15 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( // for 'firstprivate' clause. if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); - bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() { + bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C, + OrigVD]() { + if (C->getKind() == OMPC_LASTPRIVATE_conditional) { + Address VDAddr = + CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this, + OrigVD); + setAddrOfLocalVar(VD, VDAddr); + return VDAddr; + } // Emit private VarDecl with copy init. EmitDecl(*VD); return GetAddrOfLocalVar(VD); @@ -1099,7 +1147,7 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) PrivateAddr = Address(Builder.CreateLoad(PrivateAddr), - getNaturalTypeAlignment(RefTy->getPointeeType())); + CGM.getNaturalTypeAlignment(RefTy->getPointeeType())); // Store the last value to the private copy in the last iteration. if (C->getKind() == OMPC_LASTPRIVATE_conditional) CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( @@ -1122,7 +1170,7 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( void CodeGenFunction::EmitOMPReductionClauseInit( const OMPExecutableDirective &D, - CodeGenFunction::OMPPrivateScope &PrivateScope) { + CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) { if (!HaveInsertPoint()) return; SmallVector<const Expr *, 4> Shareds; @@ -1130,32 +1178,36 @@ void CodeGenFunction::EmitOMPReductionClauseInit( SmallVector<const Expr *, 4> ReductionOps; SmallVector<const Expr *, 4> LHSs; SmallVector<const Expr *, 4> RHSs; + OMPTaskDataTy Data; + SmallVector<const Expr *, 4> TaskLHSs; + SmallVector<const Expr *, 4> TaskRHSs; for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { - auto IPriv = C->privates().begin(); - auto IRed = C->reduction_ops().begin(); - auto ILHS = C->lhs_exprs().begin(); - auto IRHS = C->rhs_exprs().begin(); - for (const Expr *Ref : C->varlists()) { - Shareds.emplace_back(Ref); - Privates.emplace_back(*IPriv); - ReductionOps.emplace_back(*IRed); - LHSs.emplace_back(*ILHS); - RHSs.emplace_back(*IRHS); - std::advance(IPriv, 1); - std::advance(IRed, 1); - std::advance(ILHS, 1); - std::advance(IRHS, 1); + if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) + continue; + Shareds.append(C->varlist_begin(), C->varlist_end()); + Privates.append(C->privates().begin(), C->privates().end()); + ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); + if (C->getModifier() == OMPC_REDUCTION_task) { + Data.ReductionVars.append(C->privates().begin(), C->privates().end()); + Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); + Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); + Data.ReductionOps.append(C->reduction_ops().begin(), + C->reduction_ops().end()); + TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); } } - ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); + ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); unsigned Count = 0; - auto ILHS = LHSs.begin(); - auto IRHS = RHSs.begin(); - auto IPriv = Privates.begin(); + auto *ILHS = LHSs.begin(); + auto *IRHS = RHSs.begin(); + auto *IPriv = Privates.begin(); for (const Expr *IRef : Shareds) { const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); // Emit private VarDecl with reduction init. - RedCG.emitSharedLValue(*this, Count); + RedCG.emitSharedOrigLValue(*this, Count); RedCG.emitAggregateType(*this, Count); AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), @@ -1222,6 +1274,118 @@ void CodeGenFunction::EmitOMPReductionClauseInit( ++IPriv; ++Count; } + if (!Data.ReductionVars.empty()) { + Data.IsReductionWithTaskMod = true; + Data.IsWorksharingReduction = + isOpenMPWorksharingDirective(D.getDirectiveKind()); + llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit( + *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data); + const Expr *TaskRedRef = nullptr; + switch (D.getDirectiveKind()) { + case OMPD_parallel: + TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr(); + break; + case OMPD_for: + TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr(); + break; + case OMPD_sections: + TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr(); + break; + case OMPD_parallel_for: + TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr(); + break; + case OMPD_parallel_master: + TaskRedRef = + cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr(); + break; + case OMPD_parallel_sections: + TaskRedRef = + cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr(); + break; + case OMPD_target_parallel: + TaskRedRef = + cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr(); + break; + case OMPD_target_parallel_for: + TaskRedRef = + cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr(); + break; + case OMPD_distribute_parallel_for: + TaskRedRef = + cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr(); + break; + case OMPD_teams_distribute_parallel_for: + TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D) + .getTaskReductionRefExpr(); + break; + case OMPD_target_teams_distribute_parallel_for: + TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D) + .getTaskReductionRefExpr(); + break; + case OMPD_simd: + case OMPD_for_simd: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_parallel_for_simd: + case OMPD_task: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_taskgroup: + case OMPD_flush: + case OMPD_depobj: + case OMPD_scan: + case OMPD_ordered: + case OMPD_atomic: + case OMPD_teams: + case OMPD_target: + case OMPD_cancellation_point: + case OMPD_cancel: + case OMPD_target_data: + case OMPD_target_enter_data: + case OMPD_target_exit_data: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: + case OMPD_distribute: + case OMPD_target_update: + case OMPD_distribute_parallel_for_simd: + case OMPD_distribute_simd: + case OMPD_target_parallel_for_simd: + case OMPD_target_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_target_teams: + case OMPD_target_teams_distribute: + case OMPD_target_teams_distribute_parallel_for_simd: + case OMPD_target_teams_distribute_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_threadprivate: + case OMPD_allocate: + case OMPD_declare_reduction: + case OMPD_declare_mapper: + case OMPD_declare_simd: + case OMPD_requires: + case OMPD_declare_variant: + case OMPD_begin_declare_variant: + case OMPD_end_declare_variant: + case OMPD_unknown: + default: + llvm_unreachable("Enexpected directive with task reductions."); + } + + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl()); + EmitVarDecl(*VD); + EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD), + /*Volatile=*/false, TaskRedRef->getType()); + } } void CodeGenFunction::EmitOMPReductionClauseFinal( @@ -1233,14 +1397,25 @@ void CodeGenFunction::EmitOMPReductionClauseFinal( llvm::SmallVector<const Expr *, 8> RHSExprs; llvm::SmallVector<const Expr *, 8> ReductionOps; bool HasAtLeastOneReduction = false; + bool IsReductionWithTaskMod = false; for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { + // Do not emit for inscan reductions. + if (C->getModifier() == OMPC_REDUCTION_inscan) + continue; HasAtLeastOneReduction = true; Privates.append(C->privates().begin(), C->privates().end()); LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); + IsReductionWithTaskMod = + IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; } if (HasAtLeastOneReduction) { + if (IsReductionWithTaskMod) { + CGM.getOpenMPRuntime().emitTaskReductionFini( + *this, D.getBeginLoc(), + isOpenMPWorksharingDirective(D.getDirectiveKind())); + } bool WithNowait = D.getSingleClause<OMPNowaitClause>() || isOpenMPParallelDirective(D.getDirectiveKind()) || ReductionKind == OMPD_simd; @@ -1288,6 +1463,63 @@ typedef llvm::function_ref<void(CodeGenFunction &, CodeGenBoundParametersTy; } // anonymous namespace +static void +checkForLastprivateConditionalUpdate(CodeGenFunction &CGF, + const OMPExecutableDirective &S) { + if (CGF.getLangOpts().OpenMP < 50) + return; + llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls; + for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { + for (const Expr *Ref : C->varlists()) { + if (!Ref->getType()->isScalarType()) + continue; + const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); + if (!DRE) + continue; + PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); + } + } + for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { + for (const Expr *Ref : C->varlists()) { + if (!Ref->getType()->isScalarType()) + continue; + const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); + if (!DRE) + continue; + PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); + } + } + for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { + for (const Expr *Ref : C->varlists()) { + if (!Ref->getType()->isScalarType()) + continue; + const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); + if (!DRE) + continue; + PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); + } + } + // Privates should ne analyzed since they are not captured at all. + // Task reductions may be skipped - tasks are ignored. + // Firstprivates do not return value but may be passed by reference - no need + // to check for updated lastprivate conditional. + for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { + for (const Expr *Ref : C->varlists()) { + if (!Ref->getType()->isScalarType()) + continue; + const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); + if (!DRE) + continue; + PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); + } + } + CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional( + CGF, S, PrivateDecls); +} + static void emitCommonOMPParallelDirective( CodeGenFunction &CGF, const OMPExecutableDirective &S, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, @@ -1334,9 +1566,97 @@ static void emitEmptyBoundParameters(CodeGenFunction &, const OMPExecutableDirective &, llvm::SmallVectorImpl<llvm::Value *> &) {} -void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { +Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( + CodeGenFunction &CGF, const VarDecl *VD) { + CodeGenModule &CGM = CGF.CGM; + auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + + if (!VD) + return Address::invalid(); + const VarDecl *CVD = VD->getCanonicalDecl(); + if (!CVD->hasAttr<OMPAllocateDeclAttr>()) + return Address::invalid(); + const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); + // Use the default allocation. + if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && + !AA->getAllocator()) + return Address::invalid(); + llvm::Value *Size; + CharUnits Align = CGM.getContext().getDeclAlign(CVD); + if (CVD->getType()->isVariablyModifiedType()) { + Size = CGF.getTypeSize(CVD->getType()); + // Align the size: ((size + align - 1) / align) * align + Size = CGF.Builder.CreateNUWAdd( + Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); + Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); + Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); + } else { + CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); + Size = CGM.getSize(Sz.alignTo(Align)); + } + + assert(AA->getAllocator() && + "Expected allocator expression for non-default allocator."); + llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); + // According to the standard, the original allocator type is a enum (integer). + // Convert to pointer type, if required. + if (Allocator->getType()->isIntegerTy()) + Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); + else if (Allocator->getType()->isPointerTy()) + Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, + CGM.VoidPtrTy); + + llvm::Value *Addr = OMPBuilder.CreateOMPAlloc( + CGF.Builder, Size, Allocator, + getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", ".")); + llvm::CallInst *FreeCI = + OMPBuilder.CreateOMPFree(CGF.Builder, Addr, Allocator); + + CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI); + Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Addr, + CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), + getNameWithSeparators({CVD->getName(), ".addr"}, ".", ".")); + return Address(Addr, Align); +} + +Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate( + CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, + SourceLocation Loc) { + CodeGenModule &CGM = CGF.CGM; + if (CGM.getLangOpts().OpenMPUseTLS && + CGM.getContext().getTargetInfo().isTLSSupported()) + return VDAddr; + + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + + llvm::Type *VarTy = VDAddr.getElementType(); + llvm::Value *Data = + CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy); + llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)); + std::string Suffix = getNameWithSeparators({"cache", ""}); + llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix); + + llvm::CallInst *ThreadPrivateCacheCall = + OMPBuilder.CreateCachedThreadPrivate(CGF.Builder, Data, Size, CacheName); + + return Address(ThreadPrivateCacheCall, VDAddr.getAlignment()); +} - if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { +std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators( + ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) { + SmallString<128> Buffer; + llvm::raw_svector_ostream OS(Buffer); + StringRef Sep = FirstSeparator; + for (StringRef Part : Parts) { + OS << Sep << Part; + Sep = Separator; + } + return OS.str().str(); +} +void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { + if (CGM.getLangOpts().OpenMPIRBuilder) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); // Check if we have any if clause associated with the directive. llvm::Value *IfCond = nullptr; if (const auto *C = S.getSingleClause<OMPIfClause>()) @@ -1357,15 +1677,7 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { // The cleanup callback that finalizes all variabels at the given location, // thus calls destructors etc. auto FiniCB = [this](InsertPointTy IP) { - CGBuilderTy::InsertPointGuard IPG(Builder); - assert(IP.getBlock()->end() != IP.getPoint() && - "OpenMP IR Builder should cause terminated block!"); - llvm::BasicBlock *IPBB = IP.getBlock(); - llvm::BasicBlock *DestBB = IPBB->splitBasicBlock(IP.getPoint()); - IPBB->getTerminator()->eraseFromParent(); - Builder.SetInsertPoint(IPBB); - CodeGenFunction::JumpDest Dest = getJumpDestInCurrentScope(DestBB); - EmitBranchThroughCleanup(Dest); + OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; // Privatization callback that performs appropriate action for @@ -1387,32 +1699,17 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { auto BodyGenCB = [ParallelRegionBodyStmt, this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, llvm::BasicBlock &ContinuationBB) { - auto OldAllocaIP = AllocaInsertPt; - AllocaInsertPt = &*AllocaIP.getPoint(); - - auto OldReturnBlock = ReturnBlock; - ReturnBlock = getJumpDestInCurrentScope(&ContinuationBB); - - llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); - CodeGenIPBB->splitBasicBlock(CodeGenIP.getPoint()); - llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator(); - CodeGenIPBBTI->removeFromParent(); - - Builder.SetInsertPoint(CodeGenIPBB); - - EmitStmt(ParallelRegionBodyStmt); - - Builder.Insert(CodeGenIPBBTI); - - AllocaInsertPt = OldAllocaIP; - ReturnBlock = OldReturnBlock; + OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP, + ContinuationBB); + OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt, + CodeGenIP, ContinuationBB); }; CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); - Builder.restoreIP(OMPBuilder->CreateParallel(Builder, BodyGenCB, PrivCB, - FiniCB, IfCond, NumThreads, - ProcBind, S.hasCancel())); + Builder.restoreIP(OMPBuilder.CreateParallel(Builder, BodyGenCB, PrivCB, + FiniCB, IfCond, NumThreads, + ProcBind, S.hasCancel())); return; } @@ -1436,10 +1733,16 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); }; - emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, - emitEmptyBoundParameters); - emitPostUpdateForReductionClause(*this, S, - [](CodeGenFunction &) { return nullptr; }); + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); + emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, + emitEmptyBoundParameters); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); + } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); } static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, @@ -1506,6 +1809,27 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, getProfileCount(D.getBody())); EmitBlock(NextBB); } + + OMPPrivateScope InscanScope(*this); + EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true); + bool IsInscanRegion = InscanScope.Privatize(); + if (IsInscanRegion) { + // Need to remember the block before and after scan directive + // to dispatch them correctly depending on the clause used in + // this directive, inclusive or exclusive. For inclusive scan the natural + // order of the blocks is used, for exclusive clause the blocks must be + // executed in reverse order. + OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb"); + OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb"); + // No need to allocate inscan exit block, in simd mode it is selected in the + // codegen for the scan directive. + if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd) + OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb"); + OMPScanDispatch = createBasicBlock("omp.inscan.dispatch"); + EmitBranch(OMPScanDispatch); + EmitBlock(OMPBeforeScanBlock); + } + // Emit loop variables for C++ range loops. const Stmt *Body = D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); @@ -1515,13 +1839,17 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, Body, /*TryImperfectlyNestedLoops=*/true), D.getCollapsedNumber()); + // Jump to the dispatcher at the end of the loop body. + if (IsInscanRegion) + EmitBranch(OMPScanExitBlock); + // The end (updates/cleanups). EmitBlock(Continue.getBlock()); BreakContinueStack.pop_back(); } void CodeGenFunction::EmitOMPInnerLoop( - const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, + const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond, const Expr *IncExpr, const llvm::function_ref<void(CodeGenFunction &)> BodyGen, const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { @@ -1531,8 +1859,19 @@ void CodeGenFunction::EmitOMPInnerLoop( auto CondBlock = createBasicBlock("omp.inner.for.cond"); EmitBlock(CondBlock); const SourceRange R = S.getSourceRange(); - LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), - SourceLocToDebugLoc(R.getEnd())); + + // If attributes are attached, push to the basic block with them. + const auto &OMPED = cast<OMPExecutableDirective>(S); + const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); + const Stmt *SS = ICS->getCapturedStmt(); + const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS); + if (AS) + LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), + AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()), + SourceLocToDebugLoc(R.getEnd())); + else + LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), + SourceLocToDebugLoc(R.getEnd())); // If there are any cleanups between here and the loop-exit scope, // create a block to stage a loop exit along. @@ -1671,7 +2010,7 @@ static void emitAlignedClause(CodeGenFunction &CGF, "alignment is not power of 2"); if (Alignment != 0) { llvm::Value *PtrValue = CGF.EmitScalarExpr(E); - CGF.EmitAlignmentAssumption( + CGF.emitAlignmentAssumption( PtrValue, E, /*No second loc needed*/ SourceLocation(), llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment)); } @@ -1835,6 +2174,18 @@ void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, LoopStack.setParallel(!IsMonotonic); LoopStack.setVectorizeEnable(); emitSimdlenSafelenClause(*this, D, IsMonotonic); + if (const auto *C = D.getSingleClause<OMPOrderClause>()) + if (C->getKind() == OMPC_ORDER_concurrent) + LoopStack.setParallel(/*Enable=*/true); + if ((D.getDirectiveKind() == OMPD_simd || + (getLangOpts().OpenMPSimd && + isOpenMPSimdDirective(D.getDirectiveKind()))) && + llvm::any_of(D.getClausesOfKind<OMPReductionClause>(), + [](const OMPReductionClause *C) { + return C->getModifier() == OMPC_REDUCTION_inscan; + })) + // Disable parallel access in case of prefix sum. + LoopStack.setParallel(/*Enable=*/false); } void CodeGenFunction::EmitOMPSimdFinal( @@ -1886,7 +2237,6 @@ void CodeGenFunction::EmitOMPSimdFinal( static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, const OMPLoopDirective &S, CodeGenFunction::JumpDest LoopExit) { - CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S); CGF.EmitOMPLoopBody(S, LoopExit); CGF.EmitStopPoint(&S); } @@ -1917,12 +2267,14 @@ static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, BodyCodeGen(CGF); }; const Expr *IfCond = nullptr; - for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { - if (CGF.getLangOpts().OpenMP >= 50 && - (C->getNameModifier() == OMPD_unknown || - C->getNameModifier() == OMPD_simd)) { - IfCond = C->getCondition(); - break; + if (isOpenMPSimdDirective(S.getDirectiveKind())) { + for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { + if (CGF.getLangOpts().OpenMP >= 50 && + (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_simd)) { + IfCond = C->getCondition(); + break; + } } } if (IfCond) { @@ -2007,10 +2359,8 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, CGF.EmitOMPInnerLoop( S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), [&S](CodeGenFunction &CGF) { - CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter( - CGF, S); - CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); - CGF.EmitStopPoint(&S); + emitOMPLoopBodyWithStopPoint(CGF, S, + CodeGenFunction::JumpDest()); }, [](CodeGenFunction &) {}); }); @@ -2031,11 +2381,19 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, } void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { + ParentLoopDirectiveForScanRegion ScanRegion(*this, S); + OMPFirstScanLoop = true; auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { emitOMPSimdRegion(CGF, S, Action); }; - OMPLexicalScope Scope(*this, S, OMPD_unknown); - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); + OMPLexicalScope Scope(*this, S, OMPD_unknown); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); + } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); } void CodeGenFunction::EmitOMPOuterLoop( @@ -2103,10 +2461,14 @@ void CodeGenFunction::EmitOMPOuterLoop( [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { // Generate !llvm.loop.parallel metadata for loads and stores for loops // with dynamic/guided scheduling and without ordered clause. - if (!isOpenMPSimdDirective(S.getDirectiveKind())) + if (!isOpenMPSimdDirective(S.getDirectiveKind())) { CGF.LoopStack.setParallel(!IsMonotonic); - else + if (const auto *C = S.getSingleClause<OMPOrderClause>()) + if (C->getKind() == OMPC_ORDER_concurrent) + CGF.LoopStack.setParallel(/*Enable=*/true); + } else { CGF.EmitOMPSimdInit(S, IsMonotonic); + } }, [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { @@ -2612,6 +2974,14 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); + bool IsMonotonic = + Ordered || + ((ScheduleKind.Schedule == OMPC_SCHEDULE_static || + ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) && + !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || + ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || + ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || + ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; if ((RT.isStaticNonchunked(ScheduleKind.Schedule, /* Chunked */ Chunk != nullptr) || StaticChunkedOne) && @@ -2620,9 +2990,13 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); emitCommonSimdLoop( *this, S, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - if (isOpenMPSimdDirective(S.getDirectiveKind())) - CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); + [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { + if (isOpenMPSimdDirective(S.getDirectiveKind())) { + CGF.EmitOMPSimdInit(S, IsMonotonic); + } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { + if (C->getKind() == OMPC_ORDER_concurrent) + CGF.LoopStack.setParallel(/*Enable=*/true); + } }, [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, &S, ScheduleKind, LoopExit, @@ -2663,10 +3037,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( : S.getCond(), StaticChunkedOne ? S.getDistInc() : S.getInc(), [&S, LoopExit](CodeGenFunction &CGF) { - CGF.CGM.getOpenMPRuntime() - .initLastprivateConditionalCounter(CGF, S); - CGF.EmitOMPLoopBody(S, LoopExit); - CGF.EmitStopPoint(&S); + emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit); }, [](CodeGenFunction &) {}); }); @@ -2678,11 +3049,6 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( }; OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); } else { - const bool IsMonotonic = - Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || - ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || - ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || - ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. const OMPLoopArguments LoopArguments( @@ -2755,16 +3121,233 @@ emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, return {LBVal, UBVal}; } +/// Emits the code for the directive with inscan reductions. +/// The code is the following: +/// \code +/// size num_iters = <num_iters>; +/// <type> buffer[num_iters]; +/// #pragma omp ... +/// for (i: 0..<num_iters>) { +/// <input phase>; +/// buffer[i] = red; +/// } +/// for (int k = 0; k != ceil(log2(num_iters)); ++k) +/// for (size cnt = last_iter; cnt >= pow(2, k); --k) +/// buffer[i] op= buffer[i-pow(2,k)]; +/// #pragma omp ... +/// for (0..<num_iters>) { +/// red = InclusiveScan ? buffer[i] : buffer[i-1]; +/// <scan phase>; +/// } +/// \endcode +static void emitScanBasedDirective( + CodeGenFunction &CGF, const OMPLoopDirective &S, + llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen, + llvm::function_ref<void(CodeGenFunction &)> FirstGen, + llvm::function_ref<void(CodeGenFunction &)> SecondGen) { + llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( + NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); + SmallVector<const Expr *, 4> Shareds; + SmallVector<const Expr *, 4> Privates; + SmallVector<const Expr *, 4> ReductionOps; + SmallVector<const Expr *, 4> LHSs; + SmallVector<const Expr *, 4> RHSs; + SmallVector<const Expr *, 4> CopyOps; + SmallVector<const Expr *, 4> CopyArrayTemps; + SmallVector<const Expr *, 4> CopyArrayElems; + for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { + assert(C->getModifier() == OMPC_REDUCTION_inscan && + "Only inscan reductions are expected."); + Shareds.append(C->varlist_begin(), C->varlist_end()); + Privates.append(C->privates().begin(), C->privates().end()); + ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); + CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); + CopyArrayTemps.append(C->copy_array_temps().begin(), + C->copy_array_temps().end()); + CopyArrayElems.append(C->copy_array_elems().begin(), + C->copy_array_elems().end()); + } + { + // Emit buffers for each reduction variables. + // ReductionCodeGen is required to emit correctly the code for array + // reductions. + ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); + unsigned Count = 0; + auto *ITA = CopyArrayTemps.begin(); + for (const Expr *IRef : Privates) { + const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); + // Emit variably modified arrays, used for arrays/array sections + // reductions. + if (PrivateVD->getType()->isVariablyModifiedType()) { + RedCG.emitSharedOrigLValue(CGF, Count); + RedCG.emitAggregateType(CGF, Count); + } + CodeGenFunction::OpaqueValueMapping DimMapping( + CGF, + cast<OpaqueValueExpr>( + cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe()) + ->getSizeExpr()), + RValue::get(OMPScanNumIterations)); + // Emit temp buffer. + CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl())); + ++ITA; + ++Count; + } + } + CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); + { + // Emit loop with input phase: + // #pragma omp ... + // for (i: 0..<num_iters>) { + // <input phase>; + // buffer[i] = red; + // } + CGF.OMPFirstScanLoop = true; + CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); + FirstGen(CGF); + } + // Emit prefix reduction: + // for (int k = 0; k <= ceil(log2(n)); ++k) + llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); + llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body"); + llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit"); + llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy); + llvm::Value *Arg = + CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy); + llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg); + F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy); + LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal); + LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy); + llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( + OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1)); + auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc()); + CGF.EmitBlock(LoopBB); + auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2); + // size pow2k = 1; + auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2); + Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB); + Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB); + // for (size i = n - 1; i >= 2 ^ k; --i) + // tmp[i] op= tmp[i-pow2k]; + llvm::BasicBlock *InnerLoopBB = + CGF.createBasicBlock("omp.inner.log.scan.body"); + llvm::BasicBlock *InnerExitBB = + CGF.createBasicBlock("omp.inner.log.scan.exit"); + llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K); + CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); + CGF.EmitBlock(InnerLoopBB); + auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2); + IVal->addIncoming(NMin1, LoopBB); + { + CodeGenFunction::OMPPrivateScope PrivScope(CGF); + auto *ILHS = LHSs.begin(); + auto *IRHS = RHSs.begin(); + for (const Expr *CopyArrayElem : CopyArrayElems) { + const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + Address LHSAddr = Address::invalid(); + { + CodeGenFunction::OpaqueValueMapping IdxMapping( + CGF, + cast<OpaqueValueExpr>( + cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), + RValue::get(IVal)); + LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); + } + PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; }); + Address RHSAddr = Address::invalid(); + { + llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K); + CodeGenFunction::OpaqueValueMapping IdxMapping( + CGF, + cast<OpaqueValueExpr>( + cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), + RValue::get(OffsetIVal)); + RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); + } + PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; }); + ++ILHS; + ++IRHS; + } + PrivScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitReduction( + CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, + {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown}); + } + llvm::Value *NextIVal = + CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1)); + IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock()); + CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K); + CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); + CGF.EmitBlock(InnerExitBB); + llvm::Value *Next = + CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1)); + Counter->addIncoming(Next, CGF.Builder.GetInsertBlock()); + // pow2k <<= 1; + llvm::Value *NextPow2K = CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true); + Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock()); + llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal); + CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB); + auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc()); + CGF.EmitBlock(ExitBB); + + CGF.OMPFirstScanLoop = false; + SecondGen(CGF); +} + +static bool emitWorksharingDirective(CodeGenFunction &CGF, + const OMPLoopDirective &S, + bool HasCancel) { + bool HasLastprivates; + if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), + [](const OMPReductionClause *C) { + return C->getModifier() == OMPC_REDUCTION_inscan; + })) { + const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { + CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); + OMPLoopScope LoopScope(CGF, S); + return CGF.EmitScalarExpr(S.getNumIterations()); + }; + const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) { + CodeGenFunction::OMPCancelStackRAII CancelRegion( + CGF, S.getDirectiveKind(), HasCancel); + (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), + emitForLoopBounds, + emitDispatchForLoopBounds); + // Emit an implicit barrier at the end. + CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), + OMPD_for); + }; + const auto &&SecondGen = [&S, HasCancel, + &HasLastprivates](CodeGenFunction &CGF) { + CodeGenFunction::OMPCancelStackRAII CancelRegion( + CGF, S.getDirectiveKind(), HasCancel); + HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), + emitForLoopBounds, + emitDispatchForLoopBounds); + }; + emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); + } else { + CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), + HasCancel); + HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), + emitForLoopBounds, + emitDispatchForLoopBounds); + } + return HasLastprivates; +} + void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { bool HasLastprivates = false; auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { - OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); - HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), - emitForLoopBounds, - emitDispatchForLoopBounds); + HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel()); }; { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, S.hasCancel()); @@ -2773,17 +3356,19 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { // Emit an implicit barrier at the end. if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); } void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { bool HasLastprivates = false; auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { - HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), - emitForLoopBounds, - emitDispatchForLoopBounds); + HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false); }; { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); } @@ -2791,6 +3376,8 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { // Emit an implicit barrier at the end. if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); } static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, @@ -2808,7 +3395,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { bool HasLastprivates = false; auto &&CodeGen = [&S, CapturedStmt, CS, &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { - ASTContext &C = CGF.getContext(); + const ASTContext &C = CGF.getContext(); QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); // Emit helper vars inits. @@ -2830,11 +3417,13 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); // Generate condition for loop. - BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, - OK_Ordinary, S.getBeginLoc(), FPOptions()); + BinaryOperator *Cond = BinaryOperator::Create( + C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary, + S.getBeginLoc(), FPOptionsOverride()); // Increment for loop counter. - UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, - S.getBeginLoc(), true); + UnaryOperator *Inc = UnaryOperator::Create( + C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, + S.getBeginLoc(), true, FPOptionsOverride()); auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { // Iterate through all sections and emit a switch construct: // switch (IV) { @@ -2847,7 +3436,6 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { // break; // } // .omp.sections.exit: - CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S); llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); llvm::SwitchInst *SwitchStmt = CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), @@ -2905,7 +3493,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { // IV = LB; CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV); // while (idx <= UB) { BODY; ++idx; } - CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, + CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen, [](CodeGenFunction &) {}); // Tell the runtime we are done. auto &&CodeGen = [&S](CodeGenFunction &CGF) { @@ -2949,6 +3537,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); OMPLexicalScope Scope(*this, S, OMPD_unknown); EmitSections(S); } @@ -2957,6 +3547,8 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_sections); } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); } void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { @@ -2995,6 +3587,8 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); }; { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(), CopyprivateVars, DestExprs, @@ -3007,6 +3601,8 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { *this, S.getBeginLoc(), S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); } static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { @@ -3018,11 +3614,75 @@ static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { } void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { + if (CGM.getLangOpts().OpenMPIRBuilder) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + + const CapturedStmt *CS = S.getInnermostCapturedStmt(); + const Stmt *MasterRegionBodyStmt = CS->getCapturedStmt(); + + auto FiniCB = [this](InsertPointTy IP) { + OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); + }; + + auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, + InsertPointTy CodeGenIP, + llvm::BasicBlock &FiniBB) { + OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); + OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt, + CodeGenIP, FiniBB); + }; + + CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); + Builder.restoreIP(OMPBuilder.CreateMaster(Builder, BodyGenCB, FiniCB)); + + return; + } OMPLexicalScope Scope(*this, S, OMPD_unknown); emitMaster(*this, S); } void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { + if (CGM.getLangOpts().OpenMPIRBuilder) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + + const CapturedStmt *CS = S.getInnermostCapturedStmt(); + const Stmt *CriticalRegionBodyStmt = CS->getCapturedStmt(); + const Expr *Hint = nullptr; + if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) + Hint = HintClause->getHint(); + + // TODO: This is slightly different from what's currently being done in + // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything + // about typing is final. + llvm::Value *HintInst = nullptr; + if (Hint) + HintInst = + Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false); + + auto FiniCB = [this](InsertPointTy IP) { + OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); + }; + + auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, + InsertPointTy CodeGenIP, + llvm::BasicBlock &FiniBB) { + OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); + OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt, + CodeGenIP, FiniBB); + }; + + CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); + Builder.restoreIP(OMPBuilder.CreateCritical( + Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(), + HintInst)); + + return; + } + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); @@ -3042,12 +3702,16 @@ void CodeGenFunction::EmitOMPParallelForDirective( // directives: 'parallel' with 'for' directive. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); - OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); - CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, - emitDispatchForLoopBounds); + (void)emitWorksharingDirective(CGF, S, S.hasCancel()); }; - emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, - emitEmptyBoundParameters); + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); + emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, + emitEmptyBoundParameters); + } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); } void CodeGenFunction::EmitOMPParallelForSimdDirective( @@ -3056,11 +3720,16 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective( // directives: 'parallel' with 'for' directive. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); - CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, - emitDispatchForLoopBounds); + (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); }; - emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, - emitEmptyBoundParameters); + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); + emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen, + emitEmptyBoundParameters); + } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); } void CodeGenFunction::EmitOMPParallelMasterDirective( @@ -3086,10 +3755,16 @@ void CodeGenFunction::EmitOMPParallelMasterDirective( emitMaster(CGF, S); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); }; - emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, - emitEmptyBoundParameters); - emitPostUpdateForReductionClause(*this, S, - [](CodeGenFunction &) { return nullptr; }); + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); + emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, + emitEmptyBoundParameters); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); + } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); } void CodeGenFunction::EmitOMPParallelSectionsDirective( @@ -3100,8 +3775,14 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective( Action.Enter(CGF); CGF.EmitSections(S); }; - emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, - emitEmptyBoundParameters); + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); + emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, + emitEmptyBoundParameters); + } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); } void CodeGenFunction::EmitOMPTaskBasedDirective( @@ -3188,33 +3869,28 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( SmallVector<const Expr *, 4> LHSs; SmallVector<const Expr *, 4> RHSs; for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { - auto IPriv = C->privates().begin(); - auto IRed = C->reduction_ops().begin(); - auto ILHS = C->lhs_exprs().begin(); - auto IRHS = C->rhs_exprs().begin(); - for (const Expr *Ref : C->varlists()) { - Data.ReductionVars.emplace_back(Ref); - Data.ReductionCopies.emplace_back(*IPriv); - Data.ReductionOps.emplace_back(*IRed); - LHSs.emplace_back(*ILHS); - RHSs.emplace_back(*IRHS); - std::advance(IPriv, 1); - std::advance(IRed, 1); - std::advance(ILHS, 1); - std::advance(IRHS, 1); - } + Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); + Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); + Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); + Data.ReductionOps.append(C->reduction_ops().begin(), + C->reduction_ops().end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); } Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( *this, S.getBeginLoc(), LHSs, RHSs, Data); // Build list of dependences. - for (const auto *C : S.getClausesOfKind<OMPDependClause>()) - for (const Expr *IRef : C->varlists()) - Data.Dependences.emplace_back(C->getDependencyKind(), IRef); + for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { + OMPTaskDataTy::DependData &DD = + Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); + DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); + } auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, CapturedRegion](CodeGenFunction &CGF, PrePostActionTy &Action) { // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); + llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || !Data.LastprivateVars.empty()) { llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( @@ -3241,6 +3917,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), ".firstpriv.ptr.addr"); PrivatePtrs.emplace_back(VD, PrivatePtr); + FirstprivatePtrs.emplace_back(VD, PrivatePtr); CallArgs.push_back(PrivatePtr.getPointer()); } for (const Expr *E : Data.LastprivateVars) { @@ -3271,13 +3948,21 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( } } if (Data.Reductions) { + OMPPrivateScope FirstprivateScope(CGF); + for (const auto &Pair : FirstprivatePtrs) { + Address Replacement(CGF.Builder.CreateLoad(Pair.second), + CGF.getContext().getDeclAlign(Pair.first)); + FirstprivateScope.addPrivate(Pair.first, + [Replacement]() { return Replacement; }); + } + (void)FirstprivateScope.Privatize(); OMPLexicalScope LexScope(CGF, S, CapturedRegion); - ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, - Data.ReductionOps); + ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, + Data.ReductionCopies, Data.ReductionOps); llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { - RedCG.emitSharedLValue(CGF, Cnt); + RedCG.emitSharedOrigLValue(CGF, Cnt); RedCG.emitAggregateType(CGF, Cnt); // FIXME: This must removed once the runtime library is fixed. // Emit required threadprivate variables for @@ -3322,9 +4007,9 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( // privatized earlier. OMPPrivateScope InRedScope(CGF); if (!InRedVars.empty()) { - ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); + ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { - RedCG.emitSharedLValue(CGF, Cnt); + RedCG.emitSharedOrigLValue(CGF, Cnt); RedCG.emitAggregateType(CGF, Cnt); // The taskgroup descriptor variable is always implicit firstprivate and // privatized already during processing of the firstprivates. @@ -3333,9 +4018,13 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( // initializer/combiner/finalizer. CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), RedCG, Cnt); - llvm::Value *ReductionsPtr = - CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]), - TaskgroupDescriptors[Cnt]->getExprLoc()); + llvm::Value *ReductionsPtr; + if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { + ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), + TRExpr->getExprLoc()); + } else { + ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + } Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); Replacement = Address( @@ -3448,9 +4137,11 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( } (void)TargetScope.Privatize(); // Build list of dependences. - for (const auto *C : S.getClausesOfKind<OMPDependClause>()) - for (const Expr *IRef : C->varlists()) - Data.Dependences.emplace_back(C->getDependencyKind(), IRef); + for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { + OMPTaskDataTy::DependData &DD = + Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); + DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); + } auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { // Set proper addresses for generated private copies. @@ -3537,6 +4228,8 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { SharedsTy, CapturedStruct, IfCond, Data); }; + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); } @@ -3562,21 +4255,13 @@ void CodeGenFunction::EmitOMPTaskgroupDirective( SmallVector<const Expr *, 4> RHSs; OMPTaskDataTy Data; for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { - auto IPriv = C->privates().begin(); - auto IRed = C->reduction_ops().begin(); - auto ILHS = C->lhs_exprs().begin(); - auto IRHS = C->rhs_exprs().begin(); - for (const Expr *Ref : C->varlists()) { - Data.ReductionVars.emplace_back(Ref); - Data.ReductionCopies.emplace_back(*IPriv); - Data.ReductionOps.emplace_back(*IRed); - LHSs.emplace_back(*ILHS); - RHSs.emplace_back(*IRHS); - std::advance(IPriv, 1); - std::advance(IRed, 1); - std::advance(ILHS, 1); - std::advance(IRHS, 1); - } + Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); + Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); + Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); + Data.ReductionOps.append(C->reduction_ops().begin(), + C->reduction_ops().end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); } llvm::Value *ReductionDesc = CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), @@ -3593,6 +4278,9 @@ void CodeGenFunction::EmitOMPTaskgroupDirective( } void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { + llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>() + ? llvm::AtomicOrdering::NotAtomic + : llvm::AtomicOrdering::AcquireRelease; CGM.getOpenMPRuntime().emitFlush( *this, [&S]() -> ArrayRef<const Expr *> { @@ -3601,7 +4289,233 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { FlushClause->varlist_end()); return llvm::None; }(), - S.getBeginLoc()); + S.getBeginLoc(), AO); +} + +void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) { + const auto *DO = S.getSingleClause<OMPDepobjClause>(); + LValue DOLVal = EmitLValue(DO->getDepobj()); + if (const auto *DC = S.getSingleClause<OMPDependClause>()) { + OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(), + DC->getModifier()); + Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end()); + Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause( + *this, Dependencies, DC->getBeginLoc()); + EmitStoreOfScalar(DepAddr.getPointer(), DOLVal); + return; + } + if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) { + CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc()); + return; + } + if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) { + CGM.getOpenMPRuntime().emitUpdateClause( + *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc()); + return; + } +} + +void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { + if (!OMPParentLoopDirectiveForScan) + return; + const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; + bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>(); + SmallVector<const Expr *, 4> Shareds; + SmallVector<const Expr *, 4> Privates; + SmallVector<const Expr *, 4> LHSs; + SmallVector<const Expr *, 4> RHSs; + SmallVector<const Expr *, 4> ReductionOps; + SmallVector<const Expr *, 4> CopyOps; + SmallVector<const Expr *, 4> CopyArrayTemps; + SmallVector<const Expr *, 4> CopyArrayElems; + for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) { + if (C->getModifier() != OMPC_REDUCTION_inscan) + continue; + Shareds.append(C->varlist_begin(), C->varlist_end()); + Privates.append(C->privates().begin(), C->privates().end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); + ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); + CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); + CopyArrayTemps.append(C->copy_array_temps().begin(), + C->copy_array_temps().end()); + CopyArrayElems.append(C->copy_array_elems().begin(), + C->copy_array_elems().end()); + } + if (ParentDir.getDirectiveKind() == OMPD_simd || + (getLangOpts().OpenMPSimd && + isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) { + // For simd directive and simd-based directives in simd only mode, use the + // following codegen: + // int x = 0; + // #pragma omp simd reduction(inscan, +: x) + // for (..) { + // <first part> + // #pragma omp scan inclusive(x) + // <second part> + // } + // is transformed to: + // int x = 0; + // for (..) { + // int x_priv = 0; + // <first part> + // x = x_priv + x; + // x_priv = x; + // <second part> + // } + // and + // int x = 0; + // #pragma omp simd reduction(inscan, +: x) + // for (..) { + // <first part> + // #pragma omp scan exclusive(x) + // <second part> + // } + // to + // int x = 0; + // for (..) { + // int x_priv = 0; + // <second part> + // int temp = x; + // x = x_priv + x; + // x_priv = temp; + // <first part> + // } + llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce"); + EmitBranch(IsInclusive + ? OMPScanReduce + : BreakContinueStack.back().ContinueBlock.getBlock()); + EmitBlock(OMPScanDispatch); + { + // New scope for correct construction/destruction of temp variables for + // exclusive scan. + LexicalScope Scope(*this, S.getSourceRange()); + EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock); + EmitBlock(OMPScanReduce); + if (!IsInclusive) { + // Create temp var and copy LHS value to this temp value. + // TMP = LHS; + for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { + const Expr *PrivateExpr = Privates[I]; + const Expr *TempExpr = CopyArrayTemps[I]; + EmitAutoVarDecl( + *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl())); + LValue DestLVal = EmitLValue(TempExpr); + LValue SrcLVal = EmitLValue(LHSs[I]); + EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), + SrcLVal.getAddress(*this), + cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), + cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), + CopyOps[I]); + } + } + CGM.getOpenMPRuntime().emitReduction( + *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, + {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd}); + for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { + const Expr *PrivateExpr = Privates[I]; + LValue DestLVal; + LValue SrcLVal; + if (IsInclusive) { + DestLVal = EmitLValue(RHSs[I]); + SrcLVal = EmitLValue(LHSs[I]); + } else { + const Expr *TempExpr = CopyArrayTemps[I]; + DestLVal = EmitLValue(RHSs[I]); + SrcLVal = EmitLValue(TempExpr); + } + EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), + SrcLVal.getAddress(*this), + cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), + cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), + CopyOps[I]); + } + } + EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock); + OMPScanExitBlock = IsInclusive + ? BreakContinueStack.back().ContinueBlock.getBlock() + : OMPScanReduce; + EmitBlock(OMPAfterScanBlock); + return; + } + if (!IsInclusive) { + EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); + EmitBlock(OMPScanExitBlock); + } + if (OMPFirstScanLoop) { + // Emit buffer[i] = red; at the end of the input phase. + const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) + .getIterationVariable() + ->IgnoreParenImpCasts(); + LValue IdxLVal = EmitLValue(IVExpr); + llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); + IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); + for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { + const Expr *PrivateExpr = Privates[I]; + const Expr *OrigExpr = Shareds[I]; + const Expr *CopyArrayElem = CopyArrayElems[I]; + OpaqueValueMapping IdxMapping( + *this, + cast<OpaqueValueExpr>( + cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), + RValue::get(IdxVal)); + LValue DestLVal = EmitLValue(CopyArrayElem); + LValue SrcLVal = EmitLValue(OrigExpr); + EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), + SrcLVal.getAddress(*this), + cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), + cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), + CopyOps[I]); + } + } + EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); + if (IsInclusive) { + EmitBlock(OMPScanExitBlock); + EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); + } + EmitBlock(OMPScanDispatch); + if (!OMPFirstScanLoop) { + // Emit red = buffer[i]; at the entrance to the scan phase. + const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) + .getIterationVariable() + ->IgnoreParenImpCasts(); + LValue IdxLVal = EmitLValue(IVExpr); + llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); + IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); + llvm::BasicBlock *ExclusiveExitBB = nullptr; + if (!IsInclusive) { + llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec"); + ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit"); + llvm::Value *Cmp = Builder.CreateIsNull(IdxVal); + Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB); + EmitBlock(ContBB); + // Use idx - 1 iteration for exclusive scan. + IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1)); + } + for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { + const Expr *PrivateExpr = Privates[I]; + const Expr *OrigExpr = Shareds[I]; + const Expr *CopyArrayElem = CopyArrayElems[I]; + OpaqueValueMapping IdxMapping( + *this, + cast<OpaqueValueExpr>( + cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), + RValue::get(IdxVal)); + LValue SrcLVal = EmitLValue(CopyArrayElem); + LValue DestLVal = EmitLValue(OrigExpr); + EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), + SrcLVal.getAddress(*this), + cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), + cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), + CopyOps[I]); + } + if (!IsInclusive) { + EmitBlock(ExclusiveExitBB); + } + } + EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock + : OMPAfterScanBlock); + EmitBlock(OMPAfterScanBlock); } void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, @@ -3790,7 +4704,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, }); EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. - RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind()); + RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind()); } else { // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. @@ -3843,11 +4757,12 @@ void CodeGenFunction::EmitOMPDistributeDirective( } static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, - const CapturedStmt *S) { + const CapturedStmt *S, + SourceLocation Loc) { CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; CGF.CapturedStmtInfo = &CapStmtInfo; - llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); + llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc); Fn->setDoesNotRecurse(); return Fn; } @@ -3867,7 +4782,8 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { if (C) { llvm::SmallVector<llvm::Value *, 16> CapturedVars; CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); - llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); + llvm::Function *OutlinedFn = + emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), OutlinedFn, CapturedVars); } else { @@ -3918,16 +4834,22 @@ convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, return ComplexVal; } -static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, +static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO, LValue LVal, RValue RVal) { - if (LVal.isGlobalReg()) { + if (LVal.isGlobalReg()) CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); - } else { - CGF.EmitAtomicStore(RVal, LVal, - IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent - : llvm::AtomicOrdering::Monotonic, - LVal.isVolatile(), /*isInit=*/false); - } + else + CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false); +} + +static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF, + llvm::AtomicOrdering AO, LValue LVal, + SourceLocation Loc) { + if (LVal.isGlobalReg()) + return CGF.EmitLoadOfLValue(LVal, Loc); + return CGF.EmitAtomicLoad( + LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO), + LVal.isVolatile()); } void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, @@ -3948,7 +4870,7 @@ void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, } } -static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, +static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, const Expr *X, const Expr *V, SourceLocation Loc) { // v = x; @@ -3956,34 +4878,54 @@ static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); LValue XLValue = CGF.EmitLValue(X); LValue VLValue = CGF.EmitLValue(V); - RValue Res = XLValue.isGlobalReg() - ? CGF.EmitLoadOfLValue(XLValue, Loc) - : CGF.EmitAtomicLoad( - XLValue, Loc, - IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent - : llvm::AtomicOrdering::Monotonic, - XLValue.isVolatile()); - // OpenMP, 2.12.6, atomic Construct - // Any atomic construct with a seq_cst clause forces the atomically - // performed operation to include an implicit flush operation without a - // list. - if (IsSeqCst) - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); + RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc); + // OpenMP, 2.17.7, atomic Construct + // If the read or capture clause is specified and the acquire, acq_rel, or + // seq_cst clause is specified then the strong flush on exit from the atomic + // operation is also an acquire flush. + switch (AO) { + case llvm::AtomicOrdering::Acquire: + case llvm::AtomicOrdering::AcquireRelease: + case llvm::AtomicOrdering::SequentiallyConsistent: + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + llvm::AtomicOrdering::Acquire); + break; + case llvm::AtomicOrdering::Monotonic: + case llvm::AtomicOrdering::Release: + break; + case llvm::AtomicOrdering::NotAtomic: + case llvm::AtomicOrdering::Unordered: + llvm_unreachable("Unexpected ordering."); + } CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); } -static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, - const Expr *X, const Expr *E, - SourceLocation Loc) { +static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, + llvm::AtomicOrdering AO, const Expr *X, + const Expr *E, SourceLocation Loc) { // x = expr; assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); - emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); - // OpenMP, 2.12.6, atomic Construct - // Any atomic construct with a seq_cst clause forces the atomically - // performed operation to include an implicit flush operation without a - // list. - if (IsSeqCst) - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); + emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); + // OpenMP, 2.17.7, atomic Construct + // If the write, update, or capture clause is specified and the release, + // acq_rel, or seq_cst clause is specified then the strong flush on entry to + // the atomic operation is also a release flush. + switch (AO) { + case llvm::AtomicOrdering::Release: + case llvm::AtomicOrdering::AcquireRelease: + case llvm::AtomicOrdering::SequentiallyConsistent: + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + llvm::AtomicOrdering::Release); + break; + case llvm::AtomicOrdering::Acquire: + case llvm::AtomicOrdering::Monotonic: + break; + case llvm::AtomicOrdering::NotAtomic: + case llvm::AtomicOrdering::Unordered: + llvm_unreachable("Unexpected ordering."); + } } static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, @@ -4104,10 +5046,10 @@ std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( return Res; } -static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, - const Expr *X, const Expr *E, - const Expr *UE, bool IsXLHSInRHSPart, - SourceLocation Loc) { +static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, + llvm::AtomicOrdering AO, const Expr *X, + const Expr *E, const Expr *UE, + bool IsXLHSInRHSPart, SourceLocation Loc) { assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && "Update expr in 'atomic update' must be a binary operator."); const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); @@ -4120,9 +5062,6 @@ static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); LValue XLValue = CGF.EmitLValue(X); RValue ExprRValue = CGF.EmitAnyExpr(E); - llvm::AtomicOrdering AO = IsSeqCst - ? llvm::AtomicOrdering::SequentiallyConsistent - : llvm::AtomicOrdering::Monotonic; const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; @@ -4134,12 +5073,25 @@ static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, }; (void)CGF.EmitOMPAtomicSimpleUpdateExpr( XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); - // OpenMP, 2.12.6, atomic Construct - // Any atomic construct with a seq_cst clause forces the atomically - // performed operation to include an implicit flush operation without a - // list. - if (IsSeqCst) - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); + // OpenMP, 2.17.7, atomic Construct + // If the write, update, or capture clause is specified and the release, + // acq_rel, or seq_cst clause is specified then the strong flush on entry to + // the atomic operation is also a release flush. + switch (AO) { + case llvm::AtomicOrdering::Release: + case llvm::AtomicOrdering::AcquireRelease: + case llvm::AtomicOrdering::SequentiallyConsistent: + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + llvm::AtomicOrdering::Release); + break; + case llvm::AtomicOrdering::Acquire: + case llvm::AtomicOrdering::Monotonic: + break; + case llvm::AtomicOrdering::NotAtomic: + case llvm::AtomicOrdering::Unordered: + llvm_unreachable("Unexpected ordering."); + } } static RValue convertToType(CodeGenFunction &CGF, RValue Value, @@ -4159,7 +5111,8 @@ static RValue convertToType(CodeGenFunction &CGF, RValue Value, llvm_unreachable("Must be a scalar or complex."); } -static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, +static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, + llvm::AtomicOrdering AO, bool IsPostfixUpdate, const Expr *V, const Expr *X, const Expr *E, const Expr *UE, bool IsXLHSInRHSPart, @@ -4170,9 +5123,6 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, LValue VLValue = CGF.EmitLValue(V); LValue XLValue = CGF.EmitLValue(X); RValue ExprRValue = CGF.EmitAnyExpr(E); - llvm::AtomicOrdering AO = IsSeqCst - ? llvm::AtomicOrdering::SequentiallyConsistent - : llvm::AtomicOrdering::Monotonic; QualType NewVValType; if (UE) { // 'x' is updated with some additional value. @@ -4200,6 +5150,7 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, }; auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); if (Res.first) { // 'atomicrmw' instruction was generated. if (IsPostfixUpdate) { @@ -4226,6 +5177,7 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, Loc, Gen); + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); if (Res.first) { // 'atomicrmw' instruction was generated. NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; @@ -4233,32 +5185,54 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, } // Emit post-update store to 'v' of old/new 'x' value. CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); - // OpenMP, 2.12.6, atomic Construct - // Any atomic construct with a seq_cst clause forces the atomically - // performed operation to include an implicit flush operation without a - // list. - if (IsSeqCst) - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); + // OpenMP, 2.17.7, atomic Construct + // If the write, update, or capture clause is specified and the release, + // acq_rel, or seq_cst clause is specified then the strong flush on entry to + // the atomic operation is also a release flush. + // If the read or capture clause is specified and the acquire, acq_rel, or + // seq_cst clause is specified then the strong flush on exit from the atomic + // operation is also an acquire flush. + switch (AO) { + case llvm::AtomicOrdering::Release: + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + llvm::AtomicOrdering::Release); + break; + case llvm::AtomicOrdering::Acquire: + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + llvm::AtomicOrdering::Acquire); + break; + case llvm::AtomicOrdering::AcquireRelease: + case llvm::AtomicOrdering::SequentiallyConsistent: + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + llvm::AtomicOrdering::AcquireRelease); + break; + case llvm::AtomicOrdering::Monotonic: + break; + case llvm::AtomicOrdering::NotAtomic: + case llvm::AtomicOrdering::Unordered: + llvm_unreachable("Unexpected ordering."); + } } static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, - bool IsSeqCst, bool IsPostfixUpdate, + llvm::AtomicOrdering AO, bool IsPostfixUpdate, const Expr *X, const Expr *V, const Expr *E, const Expr *UE, bool IsXLHSInRHSPart, SourceLocation Loc) { switch (Kind) { case OMPC_read: - emitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); + emitOMPAtomicReadExpr(CGF, AO, X, V, Loc); break; case OMPC_write: - emitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); + emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc); break; case OMPC_unknown: case OMPC_update: - emitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); + emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc); break; case OMPC_capture: - emitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, + emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE, IsXLHSInRHSPart, Loc); break; case OMPC_if: @@ -4277,12 +5251,17 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_collapse: case OMPC_default: case OMPC_seq_cst: + case OMPC_acq_rel: + case OMPC_acquire: + case OMPC_release: + case OMPC_relaxed: case OMPC_shared: case OMPC_linear: case OMPC_aligned: case OMPC_copyin: case OMPC_copyprivate: case OMPC_flush: + case OMPC_depobj: case OMPC_proc_bind: case OMPC_schedule: case OMPC_ordered: @@ -4308,6 +5287,7 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_to: case OMPC_from: case OMPC_use_device_ptr: + case OMPC_use_device_addr: case OMPC_is_device_ptr: case OMPC_unified_address: case OMPC_unified_shared_memory: @@ -4317,38 +5297,76 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_device_type: case OMPC_match: case OMPC_nontemporal: + case OMPC_order: + case OMPC_destroy: + case OMPC_detach: + case OMPC_inclusive: + case OMPC_exclusive: + case OMPC_uses_allocators: + case OMPC_affinity: + default: llvm_unreachable("Clause is not allowed in 'omp atomic'."); } } void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { - bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); + llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic; + bool MemOrderingSpecified = false; + if (S.getSingleClause<OMPSeqCstClause>()) { + AO = llvm::AtomicOrdering::SequentiallyConsistent; + MemOrderingSpecified = true; + } else if (S.getSingleClause<OMPAcqRelClause>()) { + AO = llvm::AtomicOrdering::AcquireRelease; + MemOrderingSpecified = true; + } else if (S.getSingleClause<OMPAcquireClause>()) { + AO = llvm::AtomicOrdering::Acquire; + MemOrderingSpecified = true; + } else if (S.getSingleClause<OMPReleaseClause>()) { + AO = llvm::AtomicOrdering::Release; + MemOrderingSpecified = true; + } else if (S.getSingleClause<OMPRelaxedClause>()) { + AO = llvm::AtomicOrdering::Monotonic; + MemOrderingSpecified = true; + } OpenMPClauseKind Kind = OMPC_unknown; for (const OMPClause *C : S.clauses()) { - // Find first clause (skip seq_cst clause, if it is first). - if (C->getClauseKind() != OMPC_seq_cst) { + // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause, + // if it is first). + if (C->getClauseKind() != OMPC_seq_cst && + C->getClauseKind() != OMPC_acq_rel && + C->getClauseKind() != OMPC_acquire && + C->getClauseKind() != OMPC_release && + C->getClauseKind() != OMPC_relaxed) { Kind = C->getClauseKind(); break; } } - - const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers(); - if (const auto *FE = dyn_cast<FullExpr>(CS)) - enterFullExpression(FE); - // Processing for statements under 'atomic capture'. - if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { - for (const Stmt *C : Compound->body()) { - if (const auto *FE = dyn_cast<FullExpr>(C)) - enterFullExpression(FE); + if (!MemOrderingSpecified) { + llvm::AtomicOrdering DefaultOrder = + CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); + if (DefaultOrder == llvm::AtomicOrdering::Monotonic || + DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent || + (DefaultOrder == llvm::AtomicOrdering::AcquireRelease && + Kind == OMPC_capture)) { + AO = DefaultOrder; + } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) { + if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) { + AO = llvm::AtomicOrdering::Release; + } else if (Kind == OMPC_read) { + assert(Kind == OMPC_read && "Unexpected atomic kind."); + AO = llvm::AtomicOrdering::Acquire; + } } } - auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, + const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers(); + + auto &&CodeGen = [&S, Kind, AO, CS](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitStopPoint(CS); - emitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), - S.getV(), S.getExpr(), S.getUpdateExpr(), - S.isXLHSInRHSPart(), S.getBeginLoc()); + emitOMPAtomicExpr(CGF, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(), + S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(), + S.getBeginLoc()); }; OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); @@ -4370,6 +5388,8 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, return; } + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); llvm::Function *Fn = nullptr; llvm::Constant *FnID = nullptr; @@ -4384,9 +5404,10 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, } // Check if we have any device clause associated with the directive. - const Expr *Device = nullptr; + llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device( + nullptr, OMPC_DEVICE_unknown); if (auto *C = S.getSingleClause<OMPDeviceClause>()) - Device = C->getDevice(); + Device.setPointerAndInt(C->getDevice(), C->getModifier()); // Check if we have an if clause whose conditional always evaluates to false // or if we do not have any targets specified. If so the target region is not @@ -4856,7 +5877,8 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { break; } } - if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { + if (CGM.getLangOpts().OpenMPIRBuilder) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); // TODO: This check is necessary as we only generate `omp parallel` through // the OpenMPIRBuilder for now. if (S.getCancelRegion() == OMPD_parallel) { @@ -4865,7 +5887,7 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { IfCondition = EmitScalarExpr(IfCond, /*IgnoreResultAssign=*/true); return Builder.restoreIP( - OMPBuilder->CreateCancel(Builder, IfCondition, S.getCancelRegion())); + OMPBuilder.CreateCancel(Builder, IfCondition, S.getCancelRegion())); } } @@ -4876,7 +5898,8 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { CodeGenFunction::JumpDest CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { if (Kind == OMPD_parallel || Kind == OMPD_task || - Kind == OMPD_target_parallel) + Kind == OMPD_target_parallel || Kind == OMPD_taskloop || + Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop) return ReturnBlock; assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || @@ -4888,9 +5911,8 @@ CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { } void CodeGenFunction::EmitOMPUseDevicePtrClause( - const OMPClause &NC, OMPPrivateScope &PrivateScope, + const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope, const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { - const auto &C = cast<OMPUseDevicePtrClause>(NC); auto OrigVarIt = C.varlist_begin(); auto InitIt = C.inits().begin(); for (const Expr *PvtVarIt : C.private_copies()) { @@ -4951,6 +5973,60 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause( } } +static const VarDecl *getBaseDecl(const Expr *Ref) { + const Expr *Base = Ref->IgnoreParenImpCasts(); + while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base)) + Base = OASE->getBase()->IgnoreParenImpCasts(); + while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base)) + Base = ASE->getBase()->IgnoreParenImpCasts(); + return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl()); +} + +void CodeGenFunction::EmitOMPUseDeviceAddrClause( + const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, + const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { + llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; + for (const Expr *Ref : C.varlists()) { + const VarDecl *OrigVD = getBaseDecl(Ref); + if (!Processed.insert(OrigVD).second) + continue; + // In order to identify the right initializer we need to match the + // declaration used by the mapping logic. In some cases we may get + // OMPCapturedExprDecl that refers to the original declaration. + const ValueDecl *MatchingVD = OrigVD; + if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { + // OMPCapturedExprDecl are used to privative fields of the current + // structure. + const auto *ME = cast<MemberExpr>(OED->getInit()); + assert(isa<CXXThisExpr>(ME->getBase()) && + "Base should be the current struct!"); + MatchingVD = ME->getMemberDecl(); + } + + // If we don't have information about the current list item, move on to + // the next one. + auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); + if (InitAddrIt == CaptureDeviceAddrMap.end()) + continue; + + Address PrivAddr = InitAddrIt->getSecond(); + // For declrefs and variable length array need to load the pointer for + // correct mapping, since the pointer to the data was passed to the runtime. + if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) || + MatchingVD->getType()->isArrayType()) + PrivAddr = + EmitLoadOfPointer(PrivAddr, getContext() + .getPointerType(OrigVD->getType()) + ->castAs<PointerType>()); + llvm::Type *RealTy = + ConvertTypeForMem(OrigVD->getType().getNonReferenceType()) + ->getPointerTo(); + PrivAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(PrivAddr, RealTy); + + (void)PrivateScope.addPrivate(OrigVD, [PrivAddr]() { return PrivAddr; }); + } +} + // Generate the instructions for '#pragma omp target data' directive. void CodeGenFunction::EmitOMPTargetDataDirective( const OMPTargetDataDirective &S) { @@ -4995,9 +6071,13 @@ void CodeGenFunction::EmitOMPTargetDataDirective( for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, Info.CaptureDeviceAddrMap); + for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) + CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope, + Info.CaptureDeviceAddrMap); (void)PrivateScope.Privatize(); RCG(CGF); } else { + OMPLexicalScope Scope(CGF, S, OMPD_unknown); RCG(CGF); } }; @@ -5222,7 +6302,11 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); // Emit outlined function for task construct. const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); - Address CapturedStruct = GenerateCapturedStmtArgument(*CS); + Address CapturedStruct = Address::invalid(); + { + OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); + CapturedStruct = GenerateCapturedStmtArgument(*CS); + } QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); const Expr *IfCond = nullptr; for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { @@ -5322,8 +6406,8 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { CGF.EmitOMPInnerLoop( S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), [&S](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); - CGF.EmitStopPoint(&S); + emitOMPLoopBodyWithStopPoint(CGF, S, + CodeGenFunction::JumpDest()); }, [](CodeGenFunction &) {}); }); @@ -5376,11 +6460,15 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { } void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); EmitOMPTaskLoopBasedDirective(S); } void CodeGenFunction::EmitOMPTaskLoopSimdDirective( const OMPTaskLoopSimdDirective &S) { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); OMPLexicalScope Scope(*this, S); EmitOMPTaskLoopBasedDirective(S); } @@ -5391,6 +6479,8 @@ void CodeGenFunction::EmitOMPMasterTaskLoopDirective( Action.Enter(CGF); EmitOMPTaskLoopBasedDirective(S); }; + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false); CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); } @@ -5401,6 +6491,8 @@ void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( Action.Enter(CGF); EmitOMPTaskLoopBasedDirective(S); }; + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); } @@ -5413,10 +6505,12 @@ void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( Action.Enter(CGF); CGF.EmitOMPTaskLoopBasedDirective(S); }; - OMPLexicalScope Scope(CGF, S, llvm::None, /*EmitPreInitStmt=*/false); + OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, S.getBeginLoc()); }; + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen, emitEmptyBoundParameters); } @@ -5433,6 +6527,8 @@ void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective( CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, S.getBeginLoc()); }; + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen, emitEmptyBoundParameters); } @@ -5461,19 +6557,43 @@ void CodeGenFunction::EmitOMPTargetUpdateDirective( void CodeGenFunction::EmitSimpleOMPExecutableDirective( const OMPExecutableDirective &D) { + if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) { + EmitOMPScanDirective(*SD); + return; + } if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) return; auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { + OMPPrivateScope GlobalsScope(CGF); + if (isOpenMPTaskingDirective(D.getDirectiveKind())) { + // Capture global firstprivates to avoid crash. + for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { + for (const Expr *Ref : C->varlists()) { + const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); + if (!DRE) + continue; + const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()); + if (!VD || VD->hasLocalStorage()) + continue; + if (!CGF.LocalDeclMap.count(VD)) { + LValue GlobLVal = CGF.EmitLValue(Ref); + GlobalsScope.addPrivate( + VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); }); + } + } + } + } if (isOpenMPSimdDirective(D.getDirectiveKind())) { + (void)GlobalsScope.Privatize(); + ParentLoopDirectiveForScanRegion ScanRegion(CGF, D); emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); } else { - OMPPrivateScope LoopGlobals(CGF); if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { for (const Expr *E : LD->counters()) { const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { LValue GlobLVal = CGF.EmitLValue(E); - LoopGlobals.addPrivate( + GlobalsScope.addPrivate( VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); }); } if (isa<OMPCapturedExprDecl>(VD)) { @@ -5497,14 +6617,20 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective( } } } - LoopGlobals.Privatize(); + (void)GlobalsScope.Privatize(); CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); } }; - OMPSimdLexicalScope Scope(*this, D); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, - isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd - : D.getDirectiveKind(), - CodeGen); + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D); + OMPSimdLexicalScope Scope(*this, D); + CGM.getOpenMPRuntime().emitInlinedDirective( + *this, + isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd + : D.getDirectiveKind(), + CodeGen); + } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, D); } diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp index 59631e802373..65b3b0c5f53d 100644 --- a/clang/lib/CodeGen/CGVTables.cpp +++ b/clang/lib/CodeGen/CGVTables.cpp @@ -363,8 +363,10 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::FunctionCallee Callee, : FPT->getReturnType(); ReturnValueSlot Slot; if (!ResultType->isVoidType() && - CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect) - Slot = ReturnValueSlot(ReturnValue, ResultType.isVolatileQualified()); + (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect || + hasAggregateEvaluationKind(ResultType))) + Slot = ReturnValueSlot(ReturnValue, ResultType.isVolatileQualified(), + /*IsUnused=*/false, /*IsExternallyDestructed=*/true); // Now emit our call. llvm::CallBase *CallOrInvoke; @@ -437,7 +439,8 @@ void CodeGenFunction::EmitMustTailThunk(GlobalDecl GD, // Finish the function to maintain CodeGenFunction invariants. // FIXME: Don't emit unreachable code. EmitBlock(createBasicBlock()); - FinishFunction(); + + FinishThunk(); } void CodeGenFunction::generateThunk(llvm::Function *Fn, @@ -564,7 +567,7 @@ llvm::Constant *CodeGenVTables::maybeEmitThunk(GlobalDecl GD, CGM.SetLLVMFunctionAttributesForDefinition(GD.getDecl(), ThunkFn); // Thunks for variadic methods are special because in general variadic - // arguments cannot be perferctly forwarded. In the general case, clang + // arguments cannot be perfectly forwarded. In the general case, clang // implements such thunks by cloning the original function body. However, for // thunks with no return adjustment on targets that support musttail, we can // use musttail to perfectly forward the variadic arguments. @@ -616,29 +619,178 @@ void CodeGenVTables::EmitThunks(GlobalDecl GD) { maybeEmitThunk(GD, Thunk, /*ForVTable=*/false); } -void CodeGenVTables::addVTableComponent( - ConstantArrayBuilder &builder, const VTableLayout &layout, - unsigned idx, llvm::Constant *rtti, unsigned &nextVTableThunkIndex) { - auto &component = layout.vtable_components()[idx]; +void CodeGenVTables::addRelativeComponent(ConstantArrayBuilder &builder, + llvm::Constant *component, + unsigned vtableAddressPoint, + bool vtableHasLocalLinkage, + bool isCompleteDtor) const { + // No need to get the offset of a nullptr. + if (component->isNullValue()) + return builder.add(llvm::ConstantInt::get(CGM.Int32Ty, 0)); + + auto *globalVal = + cast<llvm::GlobalValue>(component->stripPointerCastsAndAliases()); + llvm::Module &module = CGM.getModule(); + + // We don't want to copy the linkage of the vtable exactly because we still + // want the stub/proxy to be emitted for properly calculating the offset. + // Examples where there would be no symbol emitted are available_externally + // and private linkages. + auto stubLinkage = vtableHasLocalLinkage ? llvm::GlobalValue::InternalLinkage + : llvm::GlobalValue::ExternalLinkage; + + llvm::Constant *target; + if (auto *func = dyn_cast<llvm::Function>(globalVal)) { + target = getOrCreateRelativeStub(func, stubLinkage, isCompleteDtor); + } else { + llvm::SmallString<16> rttiProxyName(globalVal->getName()); + rttiProxyName.append(".rtti_proxy"); + + // The RTTI component may not always be emitted in the same linkage unit as + // the vtable. As a general case, we can make a dso_local proxy to the RTTI + // that points to the actual RTTI struct somewhere. This will result in a + // GOTPCREL relocation when taking the relative offset to the proxy. + llvm::GlobalVariable *proxy = module.getNamedGlobal(rttiProxyName); + if (!proxy) { + proxy = new llvm::GlobalVariable(module, globalVal->getType(), + /*isConstant=*/true, stubLinkage, + globalVal, rttiProxyName); + proxy->setDSOLocal(true); + proxy->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + if (!proxy->hasLocalLinkage()) { + proxy->setVisibility(llvm::GlobalValue::HiddenVisibility); + proxy->setComdat(module.getOrInsertComdat(rttiProxyName)); + } + } + target = proxy; + } - auto addOffsetConstant = [&](CharUnits offset) { - builder.add(llvm::ConstantExpr::getIntToPtr( - llvm::ConstantInt::get(CGM.PtrDiffTy, offset.getQuantity()), - CGM.Int8PtrTy)); - }; + builder.addRelativeOffsetToPosition(CGM.Int32Ty, target, + /*position=*/vtableAddressPoint); +} + +llvm::Function *CodeGenVTables::getOrCreateRelativeStub( + llvm::Function *func, llvm::GlobalValue::LinkageTypes stubLinkage, + bool isCompleteDtor) const { + // A complete object destructor can later be substituted in the vtable for an + // appropriate base object destructor when optimizations are enabled. This can + // happen for child classes that don't have their own destructor. In the case + // where a parent virtual destructor is not guaranteed to be in the same + // linkage unit as the child vtable, it's possible for an external reference + // for this destructor to be substituted into the child vtable, preventing it + // from being in rodata. If this function is a complete virtual destructor, we + // can just force a stub to be emitted for it. + if (func->isDSOLocal() && !isCompleteDtor) + return func; + + llvm::SmallString<16> stubName(func->getName()); + stubName.append(".stub"); + + // Instead of taking the offset between the vtable and virtual function + // directly, we emit a dso_local stub that just contains a tail call to the + // original virtual function and take the offset between that and the + // vtable. We do this because there are some cases where the original + // function that would've been inserted into the vtable is not dso_local + // which may require some kind of dynamic relocation which prevents the + // vtable from being readonly. On x86_64, taking the offset between the + // function and the vtable gets lowered to the offset between the PLT entry + // for the function and the vtable which gives us a PLT32 reloc. On AArch64, + // right now only CALL26 and JUMP26 instructions generate PLT relocations, + // so we manifest them with stubs that are just jumps to the original + // function. + auto &module = CGM.getModule(); + llvm::Function *stub = module.getFunction(stubName); + if (stub) { + assert(stub->isDSOLocal() && + "The previous definition of this stub should've been dso_local."); + return stub; + } + + stub = llvm::Function::Create(func->getFunctionType(), stubLinkage, stubName, + module); + + // Propogate function attributes. + stub->setAttributes(func->getAttributes()); + + stub->setDSOLocal(true); + stub->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + if (!stub->hasLocalLinkage()) { + stub->setVisibility(llvm::GlobalValue::HiddenVisibility); + stub->setComdat(module.getOrInsertComdat(stubName)); + } + + // Fill the stub with a tail call that will be optimized. + llvm::BasicBlock *block = + llvm::BasicBlock::Create(module.getContext(), "entry", stub); + llvm::IRBuilder<> block_builder(block); + llvm::SmallVector<llvm::Value *, 8> args; + for (auto &arg : stub->args()) + args.push_back(&arg); + llvm::CallInst *call = block_builder.CreateCall(func, args); + call->setAttributes(func->getAttributes()); + call->setTailCall(); + if (call->getType()->isVoidTy()) + block_builder.CreateRetVoid(); + else + block_builder.CreateRet(call); + + return stub; +} + +bool CodeGenVTables::useRelativeLayout() const { + return CGM.getTarget().getCXXABI().isItaniumFamily() && + CGM.getItaniumVTableContext().isRelativeLayout(); +} + +llvm::Type *CodeGenVTables::getVTableComponentType() const { + if (useRelativeLayout()) + return CGM.Int32Ty; + return CGM.Int8PtrTy; +} + +static void AddPointerLayoutOffset(const CodeGenModule &CGM, + ConstantArrayBuilder &builder, + CharUnits offset) { + builder.add(llvm::ConstantExpr::getIntToPtr( + llvm::ConstantInt::get(CGM.PtrDiffTy, offset.getQuantity()), + CGM.Int8PtrTy)); +} + +static void AddRelativeLayoutOffset(const CodeGenModule &CGM, + ConstantArrayBuilder &builder, + CharUnits offset) { + builder.add(llvm::ConstantInt::get(CGM.Int32Ty, offset.getQuantity())); +} + +void CodeGenVTables::addVTableComponent(ConstantArrayBuilder &builder, + const VTableLayout &layout, + unsigned componentIndex, + llvm::Constant *rtti, + unsigned &nextVTableThunkIndex, + unsigned vtableAddressPoint, + bool vtableHasLocalLinkage) { + auto &component = layout.vtable_components()[componentIndex]; + + auto addOffsetConstant = + useRelativeLayout() ? AddRelativeLayoutOffset : AddPointerLayoutOffset; switch (component.getKind()) { case VTableComponent::CK_VCallOffset: - return addOffsetConstant(component.getVCallOffset()); + return addOffsetConstant(CGM, builder, component.getVCallOffset()); case VTableComponent::CK_VBaseOffset: - return addOffsetConstant(component.getVBaseOffset()); + return addOffsetConstant(CGM, builder, component.getVBaseOffset()); case VTableComponent::CK_OffsetToTop: - return addOffsetConstant(component.getOffsetToTop()); + return addOffsetConstant(CGM, builder, component.getOffsetToTop()); case VTableComponent::CK_RTTI: - return builder.add(llvm::ConstantExpr::getBitCast(rtti, CGM.Int8PtrTy)); + if (useRelativeLayout()) + return addRelativeComponent(builder, rtti, vtableAddressPoint, + vtableHasLocalLinkage, + /*isCompleteDtor=*/false); + else + return builder.add(llvm::ConstantExpr::getBitCast(rtti, CGM.Int8PtrTy)); case VTableComponent::CK_FunctionPointer: case VTableComponent::CK_CompleteDtorPointer: @@ -672,11 +824,21 @@ void CodeGenVTables::addVTableComponent( ? MD->hasAttr<CUDADeviceAttr>() : (MD->hasAttr<CUDAHostAttr>() || !MD->hasAttr<CUDADeviceAttr>()); if (!CanEmitMethod) - return builder.addNullPointer(CGM.Int8PtrTy); + return builder.add(llvm::ConstantExpr::getNullValue(CGM.Int8PtrTy)); // Method is acceptable, continue processing as usual. } auto getSpecialVirtualFn = [&](StringRef name) -> llvm::Constant * { + // FIXME(PR43094): When merging comdat groups, lld can select a local + // symbol as the signature symbol even though it cannot be accessed + // outside that symbol's TU. The relative vtables ABI would make + // __cxa_pure_virtual and __cxa_deleted_virtual local symbols, and + // depending on link order, the comdat groups could resolve to the one + // with the local symbol. As a temporary solution, fill these components + // with zero. We shouldn't be calling these in the first place anyway. + if (useRelativeLayout()) + return llvm::ConstantPointerNull::get(CGM.Int8PtrTy); + // For NVPTX devices in OpenMP emit special functon as null pointers, // otherwise linking ends up with unresolved references. if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPIsDevice && @@ -697,19 +859,20 @@ void CodeGenVTables::addVTableComponent( if (cast<CXXMethodDecl>(GD.getDecl())->isPure()) { if (!PureVirtualFn) PureVirtualFn = - getSpecialVirtualFn(CGM.getCXXABI().GetPureVirtualCallName()); + getSpecialVirtualFn(CGM.getCXXABI().GetPureVirtualCallName()); fnPtr = PureVirtualFn; // Deleted virtual member functions. } else if (cast<CXXMethodDecl>(GD.getDecl())->isDeleted()) { if (!DeletedVirtualFn) DeletedVirtualFn = - getSpecialVirtualFn(CGM.getCXXABI().GetDeletedVirtualCallName()); + getSpecialVirtualFn(CGM.getCXXABI().GetDeletedVirtualCallName()); fnPtr = DeletedVirtualFn; // Thunks. } else if (nextVTableThunkIndex < layout.vtable_thunks().size() && - layout.vtable_thunks()[nextVTableThunkIndex].first == idx) { + layout.vtable_thunks()[nextVTableThunkIndex].first == + componentIndex) { auto &thunkInfo = layout.vtable_thunks()[nextVTableThunkIndex].second; nextVTableThunkIndex++; @@ -721,13 +884,19 @@ void CodeGenVTables::addVTableComponent( fnPtr = CGM.GetAddrOfFunction(GD, fnTy, /*ForVTable=*/true); } - fnPtr = llvm::ConstantExpr::getBitCast(fnPtr, CGM.Int8PtrTy); - builder.add(fnPtr); - return; + if (useRelativeLayout()) { + return addRelativeComponent( + builder, fnPtr, vtableAddressPoint, vtableHasLocalLinkage, + component.getKind() == VTableComponent::CK_CompleteDtorPointer); + } else + return builder.add(llvm::ConstantExpr::getBitCast(fnPtr, CGM.Int8PtrTy)); } case VTableComponent::CK_UnusedFunctionPointer: - return builder.addNullPointer(CGM.Int8PtrTy); + if (useRelativeLayout()) + return builder.add(llvm::ConstantExpr::getNullValue(CGM.Int32Ty)); + else + return builder.addNullPointer(CGM.Int8PtrTy); } llvm_unreachable("Unexpected vtable component kind"); @@ -735,34 +904,41 @@ void CodeGenVTables::addVTableComponent( llvm::Type *CodeGenVTables::getVTableType(const VTableLayout &layout) { SmallVector<llvm::Type *, 4> tys; - for (unsigned i = 0, e = layout.getNumVTables(); i != e; ++i) { - tys.push_back(llvm::ArrayType::get(CGM.Int8PtrTy, layout.getVTableSize(i))); - } + llvm::Type *componentType = getVTableComponentType(); + for (unsigned i = 0, e = layout.getNumVTables(); i != e; ++i) + tys.push_back(llvm::ArrayType::get(componentType, layout.getVTableSize(i))); return llvm::StructType::get(CGM.getLLVMContext(), tys); } void CodeGenVTables::createVTableInitializer(ConstantStructBuilder &builder, const VTableLayout &layout, - llvm::Constant *rtti) { + llvm::Constant *rtti, + bool vtableHasLocalLinkage) { + llvm::Type *componentType = getVTableComponentType(); + + const auto &addressPoints = layout.getAddressPointIndices(); unsigned nextVTableThunkIndex = 0; - for (unsigned i = 0, e = layout.getNumVTables(); i != e; ++i) { - auto vtableElem = builder.beginArray(CGM.Int8PtrTy); - size_t thisIndex = layout.getVTableOffset(i); - size_t nextIndex = thisIndex + layout.getVTableSize(i); - for (unsigned i = thisIndex; i != nextIndex; ++i) { - addVTableComponent(vtableElem, layout, i, rtti, nextVTableThunkIndex); + for (unsigned vtableIndex = 0, endIndex = layout.getNumVTables(); + vtableIndex != endIndex; ++vtableIndex) { + auto vtableElem = builder.beginArray(componentType); + + size_t vtableStart = layout.getVTableOffset(vtableIndex); + size_t vtableEnd = vtableStart + layout.getVTableSize(vtableIndex); + for (size_t componentIndex = vtableStart; componentIndex < vtableEnd; + ++componentIndex) { + addVTableComponent(vtableElem, layout, componentIndex, rtti, + nextVTableThunkIndex, addressPoints[vtableIndex], + vtableHasLocalLinkage); } vtableElem.finishAndAddTo(builder); } } -llvm::GlobalVariable * -CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD, - const BaseSubobject &Base, - bool BaseIsVirtual, - llvm::GlobalVariable::LinkageTypes Linkage, - VTableAddressPointsMapTy& AddressPoints) { +llvm::GlobalVariable *CodeGenVTables::GenerateConstructionVTable( + const CXXRecordDecl *RD, const BaseSubobject &Base, bool BaseIsVirtual, + llvm::GlobalVariable::LinkageTypes Linkage, + VTableAddressPointsMapTy &AddressPoints) { if (CGDebugInfo *DI = CGM.getModuleDebugInfo()) DI->completeClassData(Base.getBase()); @@ -779,7 +955,15 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD, cast<ItaniumMangleContext>(CGM.getCXXABI().getMangleContext()) .mangleCXXCtorVTable(RD, Base.getBaseOffset().getQuantity(), Base.getBase(), Out); - StringRef Name = OutName.str(); + SmallString<256> Name(OutName); + + bool UsingRelativeLayout = getItaniumVTableContext().isRelativeLayout(); + bool VTableAliasExists = + UsingRelativeLayout && CGM.getModule().getNamedAlias(Name); + if (VTableAliasExists) { + // We previously made the vtable hidden and changed its name. + Name.append(".local"); + } llvm::Type *VTType = getVTableType(*VTLayout); @@ -806,7 +990,8 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD, // Create and set the initializer. ConstantInitBuilder builder(CGM); auto components = builder.beginStruct(); - createVTableInitializer(components, *VTLayout, RTTI); + createVTableInitializer(components, *VTLayout, RTTI, + VTable->hasLocalLinkage()); components.finishAndSetAsInitializer(VTable); // Set properties only after the initializer has been set to ensure that the @@ -816,9 +1001,68 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD, CGM.EmitVTableTypeMetadata(RD, VTable, *VTLayout.get()); + if (UsingRelativeLayout && !VTable->isDSOLocal()) + GenerateRelativeVTableAlias(VTable, OutName); + return VTable; } +// If the VTable is not dso_local, then we will not be able to indicate that +// the VTable does not need a relocation and move into rodata. A frequent +// time this can occur is for classes that should be made public from a DSO +// (like in libc++). For cases like these, we can make the vtable hidden or +// private and create a public alias with the same visibility and linkage as +// the original vtable type. +void CodeGenVTables::GenerateRelativeVTableAlias(llvm::GlobalVariable *VTable, + llvm::StringRef AliasNameRef) { + assert(getItaniumVTableContext().isRelativeLayout() && + "Can only use this if the relative vtable ABI is used"); + assert(!VTable->isDSOLocal() && "This should be called only if the vtable is " + "not guaranteed to be dso_local"); + + // If the vtable is available_externally, we shouldn't (or need to) generate + // an alias for it in the first place since the vtable won't actually by + // emitted in this compilation unit. + if (VTable->hasAvailableExternallyLinkage()) + return; + + // Create a new string in the event the alias is already the name of the + // vtable. Using the reference directly could lead to use of an inititialized + // value in the module's StringMap. + llvm::SmallString<256> AliasName(AliasNameRef); + VTable->setName(AliasName + ".local"); + + auto Linkage = VTable->getLinkage(); + assert(llvm::GlobalAlias::isValidLinkage(Linkage) && + "Invalid vtable alias linkage"); + + llvm::GlobalAlias *VTableAlias = CGM.getModule().getNamedAlias(AliasName); + if (!VTableAlias) { + VTableAlias = llvm::GlobalAlias::create(VTable->getValueType(), + VTable->getAddressSpace(), Linkage, + AliasName, &CGM.getModule()); + } else { + assert(VTableAlias->getValueType() == VTable->getValueType()); + assert(VTableAlias->getLinkage() == Linkage); + } + VTableAlias->setVisibility(VTable->getVisibility()); + VTableAlias->setUnnamedAddr(VTable->getUnnamedAddr()); + + // Both of these imply dso_local for the vtable. + if (!VTable->hasComdat()) { + // If this is in a comdat, then we shouldn't make the linkage private due to + // an issue in lld where private symbols can be used as the key symbol when + // choosing the prevelant group. This leads to "relocation refers to a + // symbol in a discarded section". + VTable->setLinkage(llvm::GlobalValue::PrivateLinkage); + } else { + // We should at least make this hidden since we don't want to expose it. + VTable->setVisibility(llvm::GlobalValue::HiddenVisibility); + } + + VTableAlias->setAliasee(VTable); +} + static bool shouldEmitAvailableExternallyVTable(const CodeGenModule &CGM, const CXXRecordDecl *RD) { return CGM.getCodeGenOpts().OptimizationLevel > 0 && @@ -1011,6 +1255,26 @@ void CodeGenModule::EmitDeferredVTables() { DeferredVTables.clear(); } +bool CodeGenModule::HasLTOVisibilityPublicStd(const CXXRecordDecl *RD) { + if (!getCodeGenOpts().LTOVisibilityPublicStd) + return false; + + const DeclContext *DC = RD; + while (1) { + auto *D = cast<Decl>(DC); + DC = DC->getParent(); + if (isa<TranslationUnitDecl>(DC->getRedeclContext())) { + if (auto *ND = dyn_cast<NamespaceDecl>(D)) + if (const IdentifierInfo *II = ND->getIdentifier()) + if (II->isStr("std") || II->isStr("stdext")) + return true; + break; + } + } + + return false; +} + bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) { LinkageInfo LV = RD->getLinkageAndVisibility(); if (!isExternallyVisible(LV.getLinkage())) @@ -1027,22 +1291,7 @@ bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) { return false; } - if (getCodeGenOpts().LTOVisibilityPublicStd) { - const DeclContext *DC = RD; - while (1) { - auto *D = cast<Decl>(DC); - DC = DC->getParent(); - if (isa<TranslationUnitDecl>(DC->getRedeclContext())) { - if (auto *ND = dyn_cast<NamespaceDecl>(D)) - if (const IdentifierInfo *II = ND->getIdentifier()) - if (II->isStr("std") || II->isStr("stdext")) - return false; - break; - } - } - } - - return true; + return !HasLTOVisibilityPublicStd(RD); } llvm::GlobalObject::VCallVisibility @@ -1131,9 +1380,10 @@ void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD, } } - if (getCodeGenOpts().VirtualFunctionElimination) { + if (getCodeGenOpts().VirtualFunctionElimination || + getCodeGenOpts().WholeProgramVTables) { llvm::GlobalObject::VCallVisibility TypeVis = GetVCallVisibilityLevel(RD); if (TypeVis != llvm::GlobalObject::VCallVisibilityPublic) - VTable->addVCallVisibilityMetadata(TypeVis); + VTable->setVCallVisibilityMetadata(TypeVis); } } diff --git a/clang/lib/CodeGen/CGVTables.h b/clang/lib/CodeGen/CGVTables.h index a47841bfc6c3..bdfc075ee305 100644 --- a/clang/lib/CodeGen/CGVTables.h +++ b/clang/lib/CodeGen/CGVTables.h @@ -62,16 +62,39 @@ class CodeGenVTables { bool ForVTable); void addVTableComponent(ConstantArrayBuilder &builder, - const VTableLayout &layout, unsigned idx, - llvm::Constant *rtti, - unsigned &nextVTableThunkIndex); + const VTableLayout &layout, unsigned componentIndex, + llvm::Constant *rtti, unsigned &nextVTableThunkIndex, + unsigned vtableAddressPoint, + bool vtableHasLocalLinkage); + + /// Add a 32-bit offset to a component relative to the vtable when using the + /// relative vtables ABI. The array builder points to the start of the vtable. + void addRelativeComponent(ConstantArrayBuilder &builder, + llvm::Constant *component, + unsigned vtableAddressPoint, + bool vtableHasLocalLinkage, + bool isCompleteDtor) const; + + /// Create a dso_local stub that will be used for a relative reference in the + /// relative vtable layout. This stub will just be a tail call to the original + /// function and propagate any function attributes from the original. If the + /// original function is already dso_local, the original is returned instead + /// and a stub is not created. + llvm::Function * + getOrCreateRelativeStub(llvm::Function *func, + llvm::GlobalValue::LinkageTypes stubLinkage, + bool isCompleteDtor) const; + + bool useRelativeLayout() const; + + llvm::Type *getVTableComponentType() const; public: /// Add vtable components for the given vtable layout to the given /// global initializer. void createVTableInitializer(ConstantStructBuilder &builder, - const VTableLayout &layout, - llvm::Constant *rtti); + const VTableLayout &layout, llvm::Constant *rtti, + bool vtableHasLocalLinkage); CodeGenVTables(CodeGenModule &CGM); @@ -124,6 +147,13 @@ public: /// arrays of pointers, with one struct element for each vtable in the vtable /// group. llvm::Type *getVTableType(const VTableLayout &layout); + + /// Generate a public facing alias for the vtable and make the vtable either + /// hidden or private. The alias will have the original linkage and visibility + /// of the vtable. This is used for cases under the relative vtables ABI + /// when a vtable may not be dso_local. + void GenerateRelativeVTableAlias(llvm::GlobalVariable *VTable, + llvm::StringRef AliasNameRef); }; } // end namespace CodeGen diff --git a/clang/lib/CodeGen/CGValue.h b/clang/lib/CodeGen/CGValue.h index 9fd07bdb187d..70e6fed3f4f6 100644 --- a/clang/lib/CodeGen/CGValue.h +++ b/clang/lib/CodeGen/CGValue.h @@ -170,7 +170,8 @@ class LValue { VectorElt, // This is a vector element l-value (V[i]), use getVector* BitField, // This is a bitfield l-value, use getBitfield*. ExtVectorElt, // This is an extended vector subset, use getExtVectorComp - GlobalReg // This is a register l-value, use getGlobalReg() + GlobalReg, // This is a register l-value, use getGlobalReg() + MatrixElt // This is a matrix element, use getVector* } LVType; llvm::Value *V; @@ -254,6 +255,7 @@ public: bool isBitField() const { return LVType == BitField; } bool isExtVectorElt() const { return LVType == ExtVectorElt; } bool isGlobalReg() const { return LVType == GlobalReg; } + bool isMatrixElt() const { return LVType == MatrixElt; } bool isVolatileQualified() const { return Quals.hasVolatile(); } bool isRestrictQualified() const { return Quals.hasRestrict(); } @@ -337,8 +339,26 @@ public: Address getVectorAddress() const { return Address(getVectorPointer(), getAlignment()); } - llvm::Value *getVectorPointer() const { assert(isVectorElt()); return V; } - llvm::Value *getVectorIdx() const { assert(isVectorElt()); return VectorIdx; } + llvm::Value *getVectorPointer() const { + assert(isVectorElt()); + return V; + } + llvm::Value *getVectorIdx() const { + assert(isVectorElt()); + return VectorIdx; + } + + Address getMatrixAddress() const { + return Address(getMatrixPointer(), getAlignment()); + } + llvm::Value *getMatrixPointer() const { + assert(isMatrixElt()); + return V; + } + llvm::Value *getMatrixIdx() const { + assert(isMatrixElt()); + return VectorIdx; + } // extended vector elements. Address getExtVectorAddress() const { @@ -430,6 +450,18 @@ public: return R; } + static LValue MakeMatrixElt(Address matAddress, llvm::Value *Idx, + QualType type, LValueBaseInfo BaseInfo, + TBAAAccessInfo TBAAInfo) { + LValue R; + R.LVType = MatrixElt; + R.V = matAddress.getPointer(); + R.VectorIdx = Idx; + R.Initialize(type, type.getQualifiers(), matAddress.getAlignment(), + BaseInfo, TBAAInfo); + return R; + } + RValue asAggregateRValue(CodeGenFunction &CGF) const { return RValue::getAggregate(getAddress(CGF), isVolatileQualified()); } diff --git a/clang/lib/CodeGen/CodeGenABITypes.cpp b/clang/lib/CodeGen/CodeGenABITypes.cpp index 6b6a116cf259..d3a16a1d5acc 100644 --- a/clang/lib/CodeGen/CodeGenABITypes.cpp +++ b/clang/lib/CodeGen/CodeGenABITypes.cpp @@ -16,7 +16,9 @@ //===----------------------------------------------------------------------===// #include "clang/CodeGen/CodeGenABITypes.h" +#include "CGCXXABI.h" #include "CGRecordLayout.h" +#include "CodeGenFunction.h" #include "CodeGenModule.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/Lex/HeaderSearchOptions.h" @@ -25,6 +27,11 @@ using namespace clang; using namespace CodeGen; +void CodeGen::addDefaultFunctionDefinitionAttributes(CodeGenModule &CGM, + llvm::AttrBuilder &attrs) { + CGM.addDefaultFunctionDefinitionAttributes(attrs); +} + const CGFunctionInfo & CodeGen::arrangeObjCMessageSendSignature(CodeGenModule &CGM, const ObjCMethodDecl *MD, @@ -63,6 +70,30 @@ CodeGen::arrangeFreeFunctionCall(CodeGenModule &CGM, info, {}, args); } +ImplicitCXXConstructorArgs +CodeGen::getImplicitCXXConstructorArgs(CodeGenModule &CGM, + const CXXConstructorDecl *D) { + // We have to create a dummy CodeGenFunction here to pass to + // getImplicitConstructorArgs(). In some cases (base and delegating + // constructor calls), getImplicitConstructorArgs() can reach into the + // CodeGenFunction to find parameters of the calling constructor to pass on to + // the called constructor, but that can't happen here because we're asking for + // the args for a complete, non-delegating constructor call. + CodeGenFunction CGF(CGM, /* suppressNewContext= */ true); + CGCXXABI::AddedStructorArgs addedArgs = + CGM.getCXXABI().getImplicitConstructorArgs(CGF, D, Ctor_Complete, + /* ForVirtualBase= */ false, + /* Delegating= */ false); + ImplicitCXXConstructorArgs implicitArgs; + for (const auto &arg : addedArgs.Prefix) { + implicitArgs.Prefix.push_back(arg.Value); + } + for (const auto &arg : addedArgs.Suffix) { + implicitArgs.Suffix.push_back(arg.Value); + } + return implicitArgs; +} + llvm::FunctionType * CodeGen::convertFreeFunctionType(CodeGenModule &CGM, const FunctionDecl *FD) { assert(FD != nullptr && "Expected a non-null function declaration!"); @@ -84,3 +115,16 @@ unsigned CodeGen::getLLVMFieldNumber(CodeGenModule &CGM, const FieldDecl *FD) { return CGM.getTypes().getCGRecordLayout(RD).getLLVMFieldNo(FD); } + +llvm::Value *CodeGen::getCXXDestructorImplicitParam( + CodeGenModule &CGM, llvm::BasicBlock *InsertBlock, + llvm::BasicBlock::iterator InsertPoint, const CXXDestructorDecl *D, + CXXDtorType Type, bool ForVirtualBase, bool Delegating) { + CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); + CGF.CurCodeDecl = D; + CGF.CurFuncDecl = D; + CGF.CurFn = InsertBlock->getParent(); + CGF.Builder.SetInsertPoint(InsertBlock, InsertPoint); + return CGM.getCXXABI().getCXXDestructorImplicitParam( + CGF, D, Type, ForVirtualBase, Delegating); +} diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 7065e78f19a2..55925110708e 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -32,8 +32,8 @@ #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LLVMRemarkStreamer.h" #include "llvm/IR/Module.h" -#include "llvm/IR/RemarkStreamer.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" #include "llvm/Pass.h" @@ -86,15 +86,15 @@ namespace clang { const CodeGenOptions CodeGenOpts) { handleAllErrors( std::move(E), - [&](const RemarkSetupFileError &E) { + [&](const LLVMRemarkSetupFileError &E) { Diags.Report(diag::err_cannot_open_file) << CodeGenOpts.OptRecordFile << E.message(); }, - [&](const RemarkSetupPatternError &E) { + [&](const LLVMRemarkSetupPatternError &E) { Diags.Report(diag::err_drv_optimization_remark_pattern) << E.message() << CodeGenOpts.OptRecordPasses; }, - [&](const RemarkSetupFormatError &E) { + [&](const LLVMRemarkSetupFormatError &E) { Diags.Report(diag::err_drv_optimization_remark_format) << CodeGenOpts.OptRecordFormat; }); @@ -246,7 +246,7 @@ namespace clang { for (auto &LM : LinkModules) { if (LM.PropagateAttrs) for (Function &F : *LM.Module) - Gen->CGM().AddDefaultFnAttrs(F); + Gen->CGM().addDefaultFunctionDefinitionAttributes(F); CurLinkModule = LM.Module.get(); @@ -309,7 +309,7 @@ namespace clang { CodeGenOpts, this)); Expected<std::unique_ptr<llvm::ToolOutputFile>> OptRecordFileOrErr = - setupOptimizationRemarks( + setupLLVMOptimizationRemarks( Ctx, CodeGenOpts.OptRecordFile, CodeGenOpts.OptRecordPasses, CodeGenOpts.OptRecordFormat, CodeGenOpts.DiagnosticsWithHotness, CodeGenOpts.DiagnosticsHotnessThreshold); @@ -633,8 +633,9 @@ const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc( void BackendConsumer::UnsupportedDiagHandler( const llvm::DiagnosticInfoUnsupported &D) { - // We only support errors. - assert(D.getSeverity() == llvm::DS_Error); + // We only support warnings or errors. + assert(D.getSeverity() == llvm::DS_Error || + D.getSeverity() == llvm::DS_Warning); StringRef Filename; unsigned Line, Column; @@ -652,7 +653,11 @@ void BackendConsumer::UnsupportedDiagHandler( DiagnosticPrinterRawOStream DP(MsgStream); D.print(DP); } - Diags.Report(Loc, diag::err_fe_backend_unsupported) << MsgStream.str(); + + auto DiagType = D.getSeverity() == llvm::DS_Error + ? diag::err_fe_backend_unsupported + : diag::warn_fe_backend_unsupported; + Diags.Report(Loc, DiagType) << MsgStream.str(); if (BadDebugInfo) // If we were not able to translate the file:line:col information @@ -994,7 +999,7 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { std::unique_ptr<BackendConsumer> Result(new BackendConsumer( BA, CI.getDiagnostics(), CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(), CI.getCodeGenOpts(), CI.getTargetOpts(), - CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile, + CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, std::string(InFile), std::move(LinkModules), std::move(OS), *VMContext, CoverageInfo)); BEConsumer = Result.get(); @@ -1146,11 +1151,14 @@ void CodeGenAction::ExecuteAction() { CI.getTargetOpts(), CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, std::move(LinkModules), *VMContext, nullptr); + // PR44896: Force DiscardValueNames as false. DiscardValueNames cannot be + // true here because the valued names are needed for reading textual IR. + Ctx.setDiscardValueNames(false); Ctx.setDiagnosticHandler( std::make_unique<ClangDiagnosticHandler>(CodeGenOpts, &Result)); Expected<std::unique_ptr<llvm::ToolOutputFile>> OptRecordFileOrErr = - setupOptimizationRemarks( + setupLLVMOptimizationRemarks( Ctx, CodeGenOpts.OptRecordFile, CodeGenOpts.OptRecordPasses, CodeGenOpts.OptRecordFormat, CodeGenOpts.DiagnosticsWithHotness, CodeGenOpts.DiagnosticsHotnessThreshold); diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 2bf94f697e01..4a7c84562dee 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -32,6 +32,7 @@ #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/Frontend/FrontendDiagnostic.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/FPEnv.h" @@ -64,67 +65,36 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext) : CodeGenTypeCache(cgm), CGM(cgm), Target(cgm.getTarget()), Builder(cgm, cgm.getModule().getContext(), llvm::ConstantFolder(), CGBuilderInserterTy(this)), - SanOpts(CGM.getLangOpts().Sanitize), DebugInfo(CGM.getModuleDebugInfo()), - PGO(cgm), ShouldEmitLifetimeMarkers(shouldEmitLifetimeMarkers( - CGM.getCodeGenOpts(), CGM.getLangOpts())) { + SanOpts(CGM.getLangOpts().Sanitize), CurFPFeatures(CGM.getLangOpts()), + DebugInfo(CGM.getModuleDebugInfo()), PGO(cgm), + ShouldEmitLifetimeMarkers( + shouldEmitLifetimeMarkers(CGM.getCodeGenOpts(), CGM.getLangOpts())) { if (!suppressNewContext) CGM.getCXXABI().getMangleContext().startNewFunction(); - llvm::FastMathFlags FMF; - if (CGM.getLangOpts().FastMath) - FMF.setFast(); - if (CGM.getLangOpts().FiniteMathOnly) { - FMF.setNoNaNs(); - FMF.setNoInfs(); - } - if (CGM.getCodeGenOpts().NoNaNsFPMath) { - FMF.setNoNaNs(); - } - if (CGM.getCodeGenOpts().NoSignedZeros) { - FMF.setNoSignedZeros(); - } - if (CGM.getCodeGenOpts().ReciprocalMath) { - FMF.setAllowReciprocal(); - } - if (CGM.getCodeGenOpts().Reassociate) { - FMF.setAllowReassoc(); - } - Builder.setFastMathFlags(FMF); + SetFastMathFlags(CurFPFeatures); SetFPModel(); } CodeGenFunction::~CodeGenFunction() { assert(LifetimeExtendedCleanupStack.empty() && "failed to emit a cleanup"); - // If there are any unclaimed block infos, go ahead and destroy them - // now. This can happen if IR-gen gets clever and skips evaluating - // something. - if (FirstBlockInfo) - destroyBlockInfos(FirstBlockInfo); - if (getLangOpts().OpenMP && CurFn) CGM.getOpenMPRuntime().functionFinished(*this); -} - -// Map the LangOption for rounding mode into -// the corresponding enum in the IR. -static llvm::fp::RoundingMode ToConstrainedRoundingMD( - LangOptions::FPRoundingModeKind Kind) { - switch (Kind) { - case LangOptions::FPR_ToNearest: return llvm::fp::rmToNearest; - case LangOptions::FPR_Downward: return llvm::fp::rmDownward; - case LangOptions::FPR_Upward: return llvm::fp::rmUpward; - case LangOptions::FPR_TowardZero: return llvm::fp::rmTowardZero; - case LangOptions::FPR_Dynamic: return llvm::fp::rmDynamic; - } - llvm_unreachable("Unsupported FP RoundingMode"); + // If we have an OpenMPIRBuilder we want to finalize functions (incl. + // outlining etc) at some point. Doing it once the function codegen is done + // seems to be a reasonable spot. We do it here, as opposed to the deletion + // time of the CodeGenModule, because we have to ensure the IR has not yet + // been "emitted" to the outside, thus, modifications are still sensible. + if (CGM.getLangOpts().OpenMPIRBuilder) + CGM.getOpenMPRuntime().getOMPBuilder().finalize(); } // Map the LangOption for exception behavior into // the corresponding enum in the IR. -static llvm::fp::ExceptionBehavior ToConstrainedExceptMD( - LangOptions::FPExceptionModeKind Kind) { +llvm::fp::ExceptionBehavior +clang::ToConstrainedExceptMD(LangOptions::FPExceptionModeKind Kind) { switch (Kind) { case LangOptions::FPE_Ignore: return llvm::fp::ebIgnore; @@ -135,81 +105,79 @@ static llvm::fp::ExceptionBehavior ToConstrainedExceptMD( } void CodeGenFunction::SetFPModel() { - auto fpRoundingMode = ToConstrainedRoundingMD( - getLangOpts().getFPRoundingMode()); + llvm::RoundingMode RM = getLangOpts().getFPRoundingMode(); auto fpExceptionBehavior = ToConstrainedExceptMD( getLangOpts().getFPExceptionMode()); - if (fpExceptionBehavior == llvm::fp::ebIgnore && - fpRoundingMode == llvm::fp::rmToNearest) - // Constrained intrinsics are not used. - ; - else { - Builder.setIsFPConstrained(true); - Builder.setDefaultConstrainedRounding(fpRoundingMode); - Builder.setDefaultConstrainedExcept(fpExceptionBehavior); - } -} - -CharUnits CodeGenFunction::getNaturalPointeeTypeAlignment(QualType T, - LValueBaseInfo *BaseInfo, - TBAAAccessInfo *TBAAInfo) { - return getNaturalTypeAlignment(T->getPointeeType(), BaseInfo, TBAAInfo, - /* forPointeeType= */ true); -} - -CharUnits CodeGenFunction::getNaturalTypeAlignment(QualType T, - LValueBaseInfo *BaseInfo, - TBAAAccessInfo *TBAAInfo, - bool forPointeeType) { - if (TBAAInfo) - *TBAAInfo = CGM.getTBAAAccessInfo(T); - - // Honor alignment typedef attributes even on incomplete types. - // We also honor them straight for C++ class types, even as pointees; - // there's an expressivity gap here. - if (auto TT = T->getAs<TypedefType>()) { - if (auto Align = TT->getDecl()->getMaxAlignment()) { - if (BaseInfo) - *BaseInfo = LValueBaseInfo(AlignmentSource::AttributedType); - return getContext().toCharUnitsFromBits(Align); - } - } + Builder.setDefaultConstrainedRounding(RM); + Builder.setDefaultConstrainedExcept(fpExceptionBehavior); + Builder.setIsFPConstrained(fpExceptionBehavior != llvm::fp::ebIgnore || + RM != llvm::RoundingMode::NearestTiesToEven); +} - if (BaseInfo) - *BaseInfo = LValueBaseInfo(AlignmentSource::Type); +void CodeGenFunction::SetFastMathFlags(FPOptions FPFeatures) { + llvm::FastMathFlags FMF; + FMF.setAllowReassoc(FPFeatures.getAllowFPReassociate()); + FMF.setNoNaNs(FPFeatures.getNoHonorNaNs()); + FMF.setNoInfs(FPFeatures.getNoHonorInfs()); + FMF.setNoSignedZeros(FPFeatures.getNoSignedZero()); + FMF.setAllowReciprocal(FPFeatures.getAllowReciprocal()); + FMF.setApproxFunc(FPFeatures.getAllowApproxFunc()); + FMF.setAllowContract(FPFeatures.allowFPContractAcrossStatement()); + Builder.setFastMathFlags(FMF); +} - CharUnits Alignment; - if (T->isIncompleteType()) { - Alignment = CharUnits::One(); // Shouldn't be used, but pessimistic is best. - } else { - // For C++ class pointees, we don't know whether we're pointing at a - // base or a complete object, so we generally need to use the - // non-virtual alignment. - const CXXRecordDecl *RD; - if (forPointeeType && (RD = T->getAsCXXRecordDecl())) { - Alignment = CGM.getClassPointerAlignment(RD); - } else { - Alignment = getContext().getTypeAlignInChars(T); - if (T.getQualifiers().hasUnaligned()) - Alignment = CharUnits::One(); - } +CodeGenFunction::CGFPOptionsRAII::CGFPOptionsRAII(CodeGenFunction &CGF, + FPOptions FPFeatures) + : CGF(CGF), OldFPFeatures(CGF.CurFPFeatures) { + CGF.CurFPFeatures = FPFeatures; - // Cap to the global maximum type alignment unless the alignment - // was somehow explicit on the type. - if (unsigned MaxAlign = getLangOpts().MaxTypeAlign) { - if (Alignment.getQuantity() > MaxAlign && - !getContext().isAlignmentRequired(T)) - Alignment = CharUnits::fromQuantity(MaxAlign); - } - } - return Alignment; + if (OldFPFeatures == FPFeatures) + return; + + FMFGuard.emplace(CGF.Builder); + + llvm::RoundingMode NewRoundingBehavior = + static_cast<llvm::RoundingMode>(FPFeatures.getRoundingMode()); + CGF.Builder.setDefaultConstrainedRounding(NewRoundingBehavior); + auto NewExceptionBehavior = + ToConstrainedExceptMD(static_cast<LangOptions::FPExceptionModeKind>( + FPFeatures.getFPExceptionMode())); + CGF.Builder.setDefaultConstrainedExcept(NewExceptionBehavior); + + CGF.SetFastMathFlags(FPFeatures); + + assert((CGF.CurFuncDecl == nullptr || CGF.Builder.getIsFPConstrained() || + isa<CXXConstructorDecl>(CGF.CurFuncDecl) || + isa<CXXDestructorDecl>(CGF.CurFuncDecl) || + (NewExceptionBehavior == llvm::fp::ebIgnore && + NewRoundingBehavior == llvm::RoundingMode::NearestTiesToEven)) && + "FPConstrained should be enabled on entire function"); + + auto mergeFnAttrValue = [&](StringRef Name, bool Value) { + auto OldValue = + CGF.CurFn->getFnAttribute(Name).getValueAsString() == "true"; + auto NewValue = OldValue & Value; + if (OldValue != NewValue) + CGF.CurFn->addFnAttr(Name, llvm::toStringRef(NewValue)); + }; + mergeFnAttrValue("no-infs-fp-math", FPFeatures.getNoHonorInfs()); + mergeFnAttrValue("no-nans-fp-math", FPFeatures.getNoHonorNaNs()); + mergeFnAttrValue("no-signed-zeros-fp-math", FPFeatures.getNoSignedZero()); + mergeFnAttrValue("unsafe-fp-math", FPFeatures.getAllowFPReassociate() && + FPFeatures.getAllowReciprocal() && + FPFeatures.getAllowApproxFunc() && + FPFeatures.getNoSignedZero()); +} + +CodeGenFunction::CGFPOptionsRAII::~CGFPOptionsRAII() { + CGF.CurFPFeatures = OldFPFeatures; } LValue CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) { LValueBaseInfo BaseInfo; TBAAAccessInfo TBAAInfo; - CharUnits Alignment = getNaturalTypeAlignment(T, &BaseInfo, &TBAAInfo); + CharUnits Alignment = CGM.getNaturalTypeAlignment(T, &BaseInfo, &TBAAInfo); return LValue::MakeAddr(Address(V, Alignment), T, getContext(), BaseInfo, TBAAInfo); } @@ -220,8 +188,8 @@ LValue CodeGenFunction::MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T) { LValueBaseInfo BaseInfo; TBAAAccessInfo TBAAInfo; - CharUnits Align = getNaturalTypeAlignment(T, &BaseInfo, &TBAAInfo, - /* forPointeeType= */ true); + CharUnits Align = CGM.getNaturalTypeAlignment(T, &BaseInfo, &TBAAInfo, + /* forPointeeType= */ true); return MakeAddrLValue(Address(V, Align), T, BaseInfo, TBAAInfo); } @@ -259,11 +227,13 @@ TypeEvaluationKind CodeGenFunction::getEvaluationKind(QualType type) { case Type::MemberPointer: case Type::Vector: case Type::ExtVector: + case Type::ConstantMatrix: case Type::FunctionProto: case Type::FunctionNoProto: case Type::Enum: case Type::ObjCObjectPointer: case Type::Pipe: + case Type::ExtInt: return TEK_Scalar; // Complexes. @@ -486,13 +456,15 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { // Scan function arguments for vector width. for (llvm::Argument &A : CurFn->args()) if (auto *VT = dyn_cast<llvm::VectorType>(A.getType())) - LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, - VT->getPrimitiveSizeInBits().getFixedSize()); + LargestVectorWidth = + std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getKnownMinSize()); // Update vector width based on return type. if (auto *VT = dyn_cast<llvm::VectorType>(CurFn->getReturnType())) - LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, - VT->getPrimitiveSizeInBits().getFixedSize()); + LargestVectorWidth = + std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getKnownMinSize()); // Add the required-vector-width attribute. This contains the max width from: // 1. min-vector-width attribute used in the source program. @@ -799,35 +771,54 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, FD->getBody()->getStmtClass() == Stmt::CoroutineBodyStmtClass) SanOpts.Mask &= ~SanitizerKind::Null; - if (D) { - // Apply xray attributes to the function (as a string, for now) - if (const auto *XRayAttr = D->getAttr<XRayInstrumentAttr>()) { - if (CGM.getCodeGenOpts().XRayInstrumentationBundle.has( - XRayInstrKind::Function)) { - if (XRayAttr->alwaysXRayInstrument() && ShouldXRayInstrumentFunction()) - Fn->addFnAttr("function-instrument", "xray-always"); - if (XRayAttr->neverXRayInstrument()) - Fn->addFnAttr("function-instrument", "xray-never"); - if (const auto *LogArgs = D->getAttr<XRayLogArgsAttr>()) - if (ShouldXRayInstrumentFunction()) - Fn->addFnAttr("xray-log-args", - llvm::utostr(LogArgs->getArgumentCount())); - } - } else { - if (ShouldXRayInstrumentFunction() && !CGM.imbueXRayAttrs(Fn, Loc)) - Fn->addFnAttr( - "xray-instruction-threshold", - llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold)); + // Apply xray attributes to the function (as a string, for now) + if (const auto *XRayAttr = D ? D->getAttr<XRayInstrumentAttr>() : nullptr) { + if (CGM.getCodeGenOpts().XRayInstrumentationBundle.has( + XRayInstrKind::FunctionEntry) || + CGM.getCodeGenOpts().XRayInstrumentationBundle.has( + XRayInstrKind::FunctionExit)) { + if (XRayAttr->alwaysXRayInstrument() && ShouldXRayInstrumentFunction()) + Fn->addFnAttr("function-instrument", "xray-always"); + if (XRayAttr->neverXRayInstrument()) + Fn->addFnAttr("function-instrument", "xray-never"); + if (const auto *LogArgs = D->getAttr<XRayLogArgsAttr>()) + if (ShouldXRayInstrumentFunction()) + Fn->addFnAttr("xray-log-args", + llvm::utostr(LogArgs->getArgumentCount())); } + } else { + if (ShouldXRayInstrumentFunction() && !CGM.imbueXRayAttrs(Fn, Loc)) + Fn->addFnAttr( + "xray-instruction-threshold", + llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold)); + } - if (const auto *Attr = D->getAttr<PatchableFunctionEntryAttr>()) { - // Attr->getStart is currently ignored. - Fn->addFnAttr("patchable-function-entry", - std::to_string(Attr->getCount())); - } else if (unsigned Count = CGM.getCodeGenOpts().PatchableFunctionEntryCount) { - Fn->addFnAttr("patchable-function-entry", - std::to_string(Count)); - } + if (ShouldXRayInstrumentFunction()) { + if (CGM.getCodeGenOpts().XRayIgnoreLoops) + Fn->addFnAttr("xray-ignore-loops"); + + if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has( + XRayInstrKind::FunctionExit)) + Fn->addFnAttr("xray-skip-exit"); + + if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has( + XRayInstrKind::FunctionEntry)) + Fn->addFnAttr("xray-skip-entry"); + } + + unsigned Count, Offset; + if (const auto *Attr = + D ? D->getAttr<PatchableFunctionEntryAttr>() : nullptr) { + Count = Attr->getCount(); + Offset = Attr->getOffset(); + } else { + Count = CGM.getCodeGenOpts().PatchableFunctionEntryCount; + Offset = CGM.getCodeGenOpts().PatchableFunctionEntryOffset; + } + if (Count && Offset <= Count) { + Fn->addFnAttr("patchable-function-entry", std::to_string(Count - Offset)); + if (Offset) + Fn->addFnAttr("patchable-function-prefix", std::to_string(Offset)); } // Add no-jump-tables value. @@ -842,6 +833,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, if (CGM.getCodeGenOpts().ProfileSampleAccurate) Fn->addFnAttr("profile-sample-accurate"); + if (!CGM.getCodeGenOpts().SampleProfileFile.empty()) + Fn->addFnAttr("use-sample-profile"); + if (D && D->hasAttr<CFICanonicalJumpTableAttr>()) Fn->addFnAttr("cfi-canonical-jump-table"); @@ -889,14 +883,26 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, // If we're in C++ mode and the function name is "main", it is guaranteed // to be norecurse by the standard (3.6.1.3 "The function main shall not be // used within a program"). - if (getLangOpts().CPlusPlus) - if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) - if (FD->isMain()) - Fn->addFnAttr(llvm::Attribute::NoRecurse); - - if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) + // + // OpenCL C 2.0 v2.2-11 s6.9.i: + // Recursion is not supported. + // + // SYCL v1.2.1 s3.10: + // kernels cannot include RTTI information, exception classes, + // recursive code, virtual functions or make use of C++ libraries that + // are not compiled for the device. + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { + if ((getLangOpts().CPlusPlus && FD->isMain()) || getLangOpts().OpenCL || + getLangOpts().SYCLIsDevice || + (getLangOpts().CUDA && FD->hasAttr<CUDAGlobalAttr>())) + Fn->addFnAttr(llvm::Attribute::NoRecurse); + } + + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { + Builder.setIsFPConstrained(FD->usesFPIntrin()); if (FD->usesFPIntrin()) Fn->addFnAttr(llvm::Attribute::StrictFP); + } // If a custom alignment is used, force realigning to this alignment on // any main function which certainly will need it. @@ -1021,7 +1027,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, llvm::Value *Addr = Builder.CreateStructGEP(nullptr, &*EI, Idx); ReturnValuePointer = Address(Addr, getPointerAlign()); Addr = Builder.CreateAlignedLoad(Addr, getPointerAlign(), "agg.result"); - ReturnValue = Address(Addr, getNaturalTypeAlignment(RetTy)); + ReturnValue = Address(Addr, CGM.getNaturalTypeAlignment(RetTy)); } else { ReturnValue = CreateIRTemp(RetTy, "retval"); @@ -1978,6 +1984,7 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) { case Type::Complex: case Type::Vector: case Type::ExtVector: + case Type::ConstantMatrix: case Type::Record: case Type::Enum: case Type::Elaborated: @@ -1986,6 +1993,7 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) { case Type::ObjCObject: case Type::ObjCInterface: case Type::ObjCObjectPointer: + case Type::ExtInt: llvm_unreachable("type class is never variably-modified!"); case Type::Adjusted: @@ -2141,21 +2149,47 @@ void CodeGenFunction::unprotectFromPeepholes(PeepholeProtection protection) { protection.Inst->eraseFromParent(); } -void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue, +void CodeGenFunction::emitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, SourceLocation Loc, SourceLocation AssumptionLoc, llvm::Value *Alignment, llvm::Value *OffsetValue) { - llvm::Value *TheCheck; - llvm::Instruction *Assumption = Builder.CreateAlignmentAssumption( - CGM.getDataLayout(), PtrValue, Alignment, OffsetValue, &TheCheck); + if (Alignment->getType() != IntPtrTy) + Alignment = + Builder.CreateIntCast(Alignment, IntPtrTy, false, "casted.align"); + if (OffsetValue && OffsetValue->getType() != IntPtrTy) + OffsetValue = + Builder.CreateIntCast(OffsetValue, IntPtrTy, true, "casted.offset"); + llvm::Value *TheCheck = nullptr; if (SanOpts.has(SanitizerKind::Alignment)) { - EmitAlignmentAssumptionCheck(PtrValue, Ty, Loc, AssumptionLoc, Alignment, - OffsetValue, TheCheck, Assumption); + llvm::Value *PtrIntValue = + Builder.CreatePtrToInt(PtrValue, IntPtrTy, "ptrint"); + + if (OffsetValue) { + bool IsOffsetZero = false; + if (const auto *CI = dyn_cast<llvm::ConstantInt>(OffsetValue)) + IsOffsetZero = CI->isZero(); + + if (!IsOffsetZero) + PtrIntValue = Builder.CreateSub(PtrIntValue, OffsetValue, "offsetptr"); + } + + llvm::Value *Zero = llvm::ConstantInt::get(IntPtrTy, 0); + llvm::Value *Mask = + Builder.CreateSub(Alignment, llvm::ConstantInt::get(IntPtrTy, 1)); + llvm::Value *MaskedPtr = Builder.CreateAnd(PtrIntValue, Mask, "maskedptr"); + TheCheck = Builder.CreateICmpEQ(MaskedPtr, Zero, "maskcond"); } + llvm::Instruction *Assumption = Builder.CreateAlignmentAssumption( + CGM.getDataLayout(), PtrValue, Alignment, OffsetValue); + + if (!SanOpts.has(SanitizerKind::Alignment)) + return; + emitAlignmentAssumptionCheck(PtrValue, Ty, Loc, AssumptionLoc, Alignment, + OffsetValue, TheCheck, Assumption); } -void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue, +void CodeGenFunction::emitAlignmentAssumption(llvm::Value *PtrValue, const Expr *E, SourceLocation AssumptionLoc, llvm::Value *Alignment, @@ -2165,7 +2199,7 @@ void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty = E->getType(); SourceLocation Loc = E->getExprLoc(); - EmitAlignmentAssumption(PtrValue, Ty, Loc, AssumptionLoc, Alignment, + emitAlignmentAssumption(PtrValue, Ty, Loc, AssumptionLoc, Alignment, OffsetValue); } @@ -2319,8 +2353,7 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc, SmallVector<StringRef, 1> ReqFeatures; llvm::StringMap<bool> CalleeFeatureMap; - CGM.getContext().getFunctionFeatureMap(CalleeFeatureMap, - GlobalDecl(TargetDecl)); + CGM.getContext().getFunctionFeatureMap(CalleeFeatureMap, TargetDecl); for (const auto &F : ParsedAttr.Features) { if (F[0] == '+' && CalleeFeatureMap.lookup(F.substr(1))) @@ -2433,13 +2466,13 @@ void CodeGenFunction::EmitMultiVersionResolver( // Loc), the diagnostic will additionally point a "Note:" to this location. // It should be the location where the __attribute__((assume_aligned)) // was written e.g. -void CodeGenFunction::EmitAlignmentAssumptionCheck( +void CodeGenFunction::emitAlignmentAssumptionCheck( llvm::Value *Ptr, QualType Ty, SourceLocation Loc, SourceLocation SecondaryLoc, llvm::Value *Alignment, llvm::Value *OffsetValue, llvm::Value *TheCheck, llvm::Instruction *Assumption) { assert(Assumption && isa<llvm::CallInst>(Assumption) && - cast<llvm::CallInst>(Assumption)->getCalledValue() == + cast<llvm::CallInst>(Assumption)->getCalledOperand() == llvm::Intrinsic::getDeclaration( Builder.GetInsertBlock()->getParent()->getParent(), llvm::Intrinsic::assume) && diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 3d8bc93eb965..d794f4f0fa81 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -26,6 +26,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" +#include "clang/AST/StmtOpenMP.h" #include "clang/AST/Type.h" #include "clang/Basic/ABI.h" #include "clang/Basic/CapturedStmt.h" @@ -36,6 +37,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/SanitizerStats.h" @@ -75,7 +77,11 @@ class ObjCAtTryStmt; class ObjCAtThrowStmt; class ObjCAtSynchronizedStmt; class ObjCAutoreleasePoolStmt; +class OMPUseDevicePtrClause; +class OMPUseDeviceAddrClause; class ReturnsNonNullAttr; +class SVETypeFlags; +class OMPExecutableDirective; namespace analyze_os_log { class OSLogBufferLayout; @@ -118,6 +124,7 @@ enum TypeEvaluationKind { SANITIZER_CHECK(FunctionTypeMismatch, function_type_mismatch, 1) \ SANITIZER_CHECK(ImplicitConversion, implicit_conversion, 0) \ SANITIZER_CHECK(InvalidBuiltin, invalid_builtin, 0) \ + SANITIZER_CHECK(InvalidObjCCast, invalid_objc_cast, 0) \ SANITIZER_CHECK(LoadInvalidValue, load_invalid_value, 0) \ SANITIZER_CHECK(MissingReturn, missing_return, 0) \ SANITIZER_CHECK(MulOverflow, mul_overflow, 0) \ @@ -258,6 +265,9 @@ public: CodeGenModule &CGM; // Per-module state. const TargetInfo &Target; + // For EH/SEH outlined funclets, this field points to parent's CGF + CodeGenFunction *ParentCGF = nullptr; + typedef std::pair<llvm::Value *, llvm::Value *> ComplexPairTy; LoopInfoStack LoopStack; CGBuilderTy Builder; @@ -332,6 +342,10 @@ public: /// This is invalid if sret is not in use. Address ReturnValuePointer = Address::invalid(); + /// If a return statement is being visited, this holds the return statment's + /// result expression. + const Expr *RetExpr = nullptr; + /// Return true if a label was seen in the current scope. bool hasLabelBeenSeenInCurrentScope() const { if (CurLexicalScope) @@ -485,6 +499,9 @@ public: /// region. bool IsInPreservedAIRegion = false; + /// True if the current statement has nomerge attribute. + bool InNoMergeAttributedStmt = false; + const CodeGen::CGBlockInfo *BlockInfo = nullptr; llvm::Value *BlockPointer = nullptr; @@ -533,9 +550,6 @@ public: unsigned NextCleanupDestIndex = 1; - /// FirstBlockInfo - The head of a singly-linked-list of block layouts. - CGBlockInfo *FirstBlockInfo = nullptr; - /// EHResumeBlock - Unified block containing a call to llvm.eh.resume. llvm::BasicBlock *EHResumeBlock = nullptr; @@ -560,11 +574,49 @@ public: llvm::BasicBlock *getInvokeDestImpl(); + /// Parent loop-based directive for scan directive. + const OMPExecutableDirective *OMPParentLoopDirectiveForScan = nullptr; + llvm::BasicBlock *OMPBeforeScanBlock = nullptr; + llvm::BasicBlock *OMPAfterScanBlock = nullptr; + llvm::BasicBlock *OMPScanExitBlock = nullptr; + llvm::BasicBlock *OMPScanDispatch = nullptr; + bool OMPFirstScanLoop = false; + + /// Manages parent directive for scan directives. + class ParentLoopDirectiveForScanRegion { + CodeGenFunction &CGF; + const OMPExecutableDirective *ParentLoopDirectiveForScan; + + public: + ParentLoopDirectiveForScanRegion( + CodeGenFunction &CGF, + const OMPExecutableDirective &ParentLoopDirectiveForScan) + : CGF(CGF), + ParentLoopDirectiveForScan(CGF.OMPParentLoopDirectiveForScan) { + CGF.OMPParentLoopDirectiveForScan = &ParentLoopDirectiveForScan; + } + ~ParentLoopDirectiveForScanRegion() { + CGF.OMPParentLoopDirectiveForScan = ParentLoopDirectiveForScan; + } + }; + template <class T> typename DominatingValue<T>::saved_type saveValueInCond(T value) { return DominatingValue<T>::save(*this, value); } + class CGFPOptionsRAII { + public: + CGFPOptionsRAII(CodeGenFunction &CGF, FPOptions FPFeatures); + ~CGFPOptionsRAII(); + + private: + CodeGenFunction &CGF; + FPOptions OldFPFeatures; + Optional<CGBuilderTy::FastMathFlagGuard> FMFGuard; + }; + FPOptions CurFPFeatures; + public: /// ObjCEHValueStack - Stack of Objective-C exception values, used for /// rethrows. @@ -1541,6 +1593,169 @@ public: CallArgList OldCXXInheritedCtorInitExprArgs; }; + // Helper class for the OpenMP IR Builder. Allows reusability of code used for + // region body, and finalization codegen callbacks. This will class will also + // contain privatization functions used by the privatization call backs + // + // TODO: this is temporary class for things that are being moved out of + // CGOpenMPRuntime, new versions of current CodeGenFunction methods, or + // utility function for use with the OMPBuilder. Once that move to use the + // OMPBuilder is done, everything here will either become part of CodeGenFunc. + // directly, or a new helper class that will contain functions used by both + // this and the OMPBuilder + + struct OMPBuilderCBHelpers { + + OMPBuilderCBHelpers() = delete; + OMPBuilderCBHelpers(const OMPBuilderCBHelpers &) = delete; + OMPBuilderCBHelpers &operator=(const OMPBuilderCBHelpers &) = delete; + + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + + /// Cleanup action for allocate support. + class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { + + private: + llvm::CallInst *RTLFnCI; + + public: + OMPAllocateCleanupTy(llvm::CallInst *RLFnCI) : RTLFnCI(RLFnCI) { + RLFnCI->removeFromParent(); + } + + void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { + if (!CGF.HaveInsertPoint()) + return; + CGF.Builder.Insert(RTLFnCI); + } + }; + + /// Returns address of the threadprivate variable for the current + /// thread. This Also create any necessary OMP runtime calls. + /// + /// \param VD VarDecl for Threadprivate variable. + /// \param VDAddr Address of the Vardecl + /// \param Loc The location where the barrier directive was encountered + static Address getAddrOfThreadPrivate(CodeGenFunction &CGF, + const VarDecl *VD, Address VDAddr, + SourceLocation Loc); + + /// Gets the OpenMP-specific address of the local variable /p VD. + static Address getAddressOfLocalVariable(CodeGenFunction &CGF, + const VarDecl *VD); + /// Get the platform-specific name separator. + /// \param Parts different parts of the final name that needs separation + /// \param FirstSeparator First separator used between the initial two + /// parts of the name. + /// \param Separator separator used between all of the rest consecutinve + /// parts of the name + static std::string getNameWithSeparators(ArrayRef<StringRef> Parts, + StringRef FirstSeparator = ".", + StringRef Separator = "."); + /// Emit the Finalization for an OMP region + /// \param CGF The Codegen function this belongs to + /// \param IP Insertion point for generating the finalization code. + static void FinalizeOMPRegion(CodeGenFunction &CGF, InsertPointTy IP) { + CGBuilderTy::InsertPointGuard IPG(CGF.Builder); + assert(IP.getBlock()->end() != IP.getPoint() && + "OpenMP IR Builder should cause terminated block!"); + + llvm::BasicBlock *IPBB = IP.getBlock(); + llvm::BasicBlock *DestBB = IPBB->getUniqueSuccessor(); + assert(DestBB && "Finalization block should have one successor!"); + + // erase and replace with cleanup branch. + IPBB->getTerminator()->eraseFromParent(); + CGF.Builder.SetInsertPoint(IPBB); + CodeGenFunction::JumpDest Dest = CGF.getJumpDestInCurrentScope(DestBB); + CGF.EmitBranchThroughCleanup(Dest); + } + + /// Emit the body of an OMP region + /// \param CGF The Codegen function this belongs to + /// \param RegionBodyStmt The body statement for the OpenMP region being + /// generated + /// \param CodeGenIP Insertion point for generating the body code. + /// \param FiniBB The finalization basic block + static void EmitOMPRegionBody(CodeGenFunction &CGF, + const Stmt *RegionBodyStmt, + InsertPointTy CodeGenIP, + llvm::BasicBlock &FiniBB) { + llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); + if (llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator()) + CodeGenIPBBTI->eraseFromParent(); + + CGF.Builder.SetInsertPoint(CodeGenIPBB); + + CGF.EmitStmt(RegionBodyStmt); + + if (CGF.Builder.saveIP().isSet()) + CGF.Builder.CreateBr(&FiniBB); + } + + /// RAII for preserving necessary info during Outlined region body codegen. + class OutlinedRegionBodyRAII { + + llvm::AssertingVH<llvm::Instruction> OldAllocaIP; + CodeGenFunction::JumpDest OldReturnBlock; + CGBuilderTy::InsertPoint IP; + CodeGenFunction &CGF; + + public: + OutlinedRegionBodyRAII(CodeGenFunction &cgf, InsertPointTy &AllocaIP, + llvm::BasicBlock &RetBB) + : CGF(cgf) { + assert(AllocaIP.isSet() && + "Must specify Insertion point for allocas of outlined function"); + OldAllocaIP = CGF.AllocaInsertPt; + CGF.AllocaInsertPt = &*AllocaIP.getPoint(); + IP = CGF.Builder.saveIP(); + + OldReturnBlock = CGF.ReturnBlock; + CGF.ReturnBlock = CGF.getJumpDestInCurrentScope(&RetBB); + } + + ~OutlinedRegionBodyRAII() { + CGF.AllocaInsertPt = OldAllocaIP; + CGF.ReturnBlock = OldReturnBlock; + CGF.Builder.restoreIP(IP); + } + }; + + /// RAII for preserving necessary info during inlined region body codegen. + class InlinedRegionBodyRAII { + + llvm::AssertingVH<llvm::Instruction> OldAllocaIP; + CodeGenFunction &CGF; + + public: + InlinedRegionBodyRAII(CodeGenFunction &cgf, InsertPointTy &AllocaIP, + llvm::BasicBlock &FiniBB) + : CGF(cgf) { + // Alloca insertion block should be in the entry block of the containing + // function so it expects an empty AllocaIP in which case will reuse the + // old alloca insertion point, or a new AllocaIP in the same block as + // the old one + assert((!AllocaIP.isSet() || + CGF.AllocaInsertPt->getParent() == AllocaIP.getBlock()) && + "Insertion point should be in the entry block of containing " + "function!"); + OldAllocaIP = CGF.AllocaInsertPt; + if (AllocaIP.isSet()) + CGF.AllocaInsertPt = &*AllocaIP.getPoint(); + + // TODO: Remove the call, after making sure the counter is not used by + // the EHStack. + // Since this is an inlined region, it should not modify the + // ReturnBlock, and should reuse the one for the enclosing outlined + // region. So, the JumpDest being return by the function is discarded + (void)CGF.getJumpDestInCurrentScope(&FiniBB); + } + + ~InlinedRegionBodyRAII() { CGF.AllocaInsertPt = OldAllocaIP; } + }; + }; + private: /// CXXThisDecl - When generating code for a C++ member function, /// this will hold the implicit 'this' declaration. @@ -1772,7 +1987,6 @@ public: /// information about the block, including the block invoke function, the /// captured variables, etc. llvm::Value *EmitBlockLiteral(const BlockExpr *); - static void destroyBlockInfos(CGBlockInfo *info); llvm::Function *GenerateBlockFunction(GlobalDecl GD, const CGBlockInfo &Info, @@ -2155,13 +2369,6 @@ public: LValue MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T); LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T); - CharUnits getNaturalTypeAlignment(QualType T, - LValueBaseInfo *BaseInfo = nullptr, - TBAAAccessInfo *TBAAInfo = nullptr, - bool forPointeeType = false); - CharUnits getNaturalPointeeTypeAlignment(QualType T, - LValueBaseInfo *BaseInfo = nullptr, - TBAAAccessInfo *TBAAInfo = nullptr); Address EmitLoadOfReference(LValue RefLVal, LValueBaseInfo *PointeeBaseInfo = nullptr, @@ -2264,8 +2471,9 @@ public: /// CreateAggTemp - Create a temporary memory object for the given /// aggregate type. - AggValueSlot CreateAggTemp(QualType T, const Twine &Name = "tmp") { - return AggValueSlot::forAddr(CreateMemTemp(T, Name), + AggValueSlot CreateAggTemp(QualType T, const Twine &Name = "tmp", + Address *Alloca = nullptr) { + return AggValueSlot::forAddr(CreateMemTemp(T, Name, Alloca), T.getQualifiers(), AggValueSlot::IsNotDestructed, AggValueSlot::DoesNotNeedGCBarriers, @@ -2594,7 +2802,8 @@ public: Address EmitCXXUuidofExpr(const CXXUuidofExpr *E); /// Situations in which we might emit a check for the suitability of a - /// pointer or glvalue. + /// pointer or glvalue. Needs to be kept in sync with ubsan_handlers.cpp in + /// compiler-rt. enum TypeCheckKind { /// Checking the operand of a load. Must be suitably sized and aligned. TCK_Load, @@ -2826,7 +3035,7 @@ public: PeepholeProtection protectFromPeepholes(RValue rvalue); void unprotectFromPeepholes(PeepholeProtection protection); - void EmitAlignmentAssumptionCheck(llvm::Value *Ptr, QualType Ty, + void emitAlignmentAssumptionCheck(llvm::Value *Ptr, QualType Ty, SourceLocation Loc, SourceLocation AssumptionLoc, llvm::Value *Alignment, @@ -2834,13 +3043,14 @@ public: llvm::Value *TheCheck, llvm::Instruction *Assumption); - void EmitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, + void emitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, SourceLocation Loc, SourceLocation AssumptionLoc, llvm::Value *Alignment, llvm::Value *OffsetValue = nullptr); - void EmitAlignmentAssumption(llvm::Value *PtrValue, const Expr *E, - SourceLocation AssumptionLoc, llvm::Value *Alignment, + void emitAlignmentAssumption(llvm::Value *PtrValue, const Expr *E, + SourceLocation AssumptionLoc, + llvm::Value *Alignment, llvm::Value *OffsetValue = nullptr); //===--------------------------------------------------------------------===// @@ -2983,7 +3193,8 @@ public: llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K); llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S); Address GenerateCapturedStmtArgument(const CapturedStmt &S); - llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S); + llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, + SourceLocation Loc); void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars); void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy, @@ -3037,7 +3248,10 @@ public: void EmitOMPPrivateClause(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope); void EmitOMPUseDevicePtrClause( - const OMPClause &C, OMPPrivateScope &PrivateScope, + const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope, + const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap); + void EmitOMPUseDeviceAddrClause( + const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap); /// Emit code for copyin clause in \a D directive. The next code is /// generated at the start of outlined functions for directives: @@ -3091,7 +3305,8 @@ public: /// proper codegen in internal captured statement. /// void EmitOMPReductionClauseInit(const OMPExecutableDirective &D, - OMPPrivateScope &PrivateScope); + OMPPrivateScope &PrivateScope, + bool ForInscan = false); /// Emit final update of reduction values to original variables at /// the end of the directive. /// @@ -3149,6 +3364,8 @@ public: void EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S); void EmitOMPTaskgroupDirective(const OMPTaskgroupDirective &S); void EmitOMPFlushDirective(const OMPFlushDirective &S); + void EmitOMPDepobjDirective(const OMPDepobjDirective &S); + void EmitOMPScanDirective(const OMPScanDirective &S); void EmitOMPOrderedDirective(const OMPOrderedDirective &S); void EmitOMPAtomicDirective(const OMPAtomicDirective &S); void EmitOMPTargetDirective(const OMPTargetDirective &S); @@ -3250,8 +3467,8 @@ public: /// \param PostIncGen Genrator for post-increment code (required for ordered /// loop directvies). void EmitOMPInnerLoop( - const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, - const Expr *IncExpr, + const OMPExecutableDirective &S, bool RequiresCleanup, + const Expr *LoopCond, const Expr *IncExpr, const llvm::function_ref<void(CodeGenFunction &)> BodyGen, const llvm::function_ref<void(CodeGenFunction &)> PostIncGen); @@ -3517,6 +3734,7 @@ public: LValue EmitUnaryOpLValue(const UnaryOperator *E); LValue EmitArraySubscriptExpr(const ArraySubscriptExpr *E, bool Accessed = false); + LValue EmitMatrixSubscriptExpr(const MatrixSubscriptExpr *E); LValue EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, bool IsLowerBound = true); LValue EmitExtVectorElementExpr(const ExtVectorElementExpr *E); @@ -3722,6 +3940,8 @@ public: RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue); + RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E, + ReturnValueSlot ReturnValue); RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue); @@ -3757,6 +3977,13 @@ public: llvm::Value *EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch); + llvm::Value *EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, + ReturnValueSlot ReturnValue, + llvm::Triple::ArchType Arch); + llvm::Value *EmitCMSEClearRecord(llvm::Value *V, llvm::IntegerType *ITy, + QualType RTy); + llvm::Value *EmitCMSEClearRecord(llvm::Value *V, llvm::ArrayType *ATy, + QualType RTy); llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, @@ -3775,12 +4002,62 @@ public: SmallVectorImpl<llvm::Value*> &O, const char *name, unsigned shift = 0, bool rightshift = false); + llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, + const llvm::ElementCount &Count); llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx); llvm::Value *EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift); llvm::Value *EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name); llvm::Value *vectorWrapScalar16(llvm::Value *Op); + /// SVEBuiltinMemEltTy - Returns the memory element type for this memory + /// access builtin. Only required if it can't be inferred from the base + /// pointer operand. + llvm::Type *SVEBuiltinMemEltTy(SVETypeFlags TypeFlags); + + SmallVector<llvm::Type *, 2> getSVEOverloadTypes(SVETypeFlags TypeFlags, + llvm::Type *ReturnType, + ArrayRef<llvm::Value *> Ops); + llvm::Type *getEltType(SVETypeFlags TypeFlags); + llvm::ScalableVectorType *getSVEType(const SVETypeFlags &TypeFlags); + llvm::ScalableVectorType *getSVEPredType(SVETypeFlags TypeFlags); + llvm::Value *EmitSVEAllTruePred(SVETypeFlags TypeFlags); + llvm::Value *EmitSVEDupX(llvm::Value *Scalar); + llvm::Value *EmitSVEDupX(llvm::Value *Scalar, llvm::Type *Ty); + llvm::Value *EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty); + llvm::Value *EmitSVEPMull(SVETypeFlags TypeFlags, + llvm::SmallVectorImpl<llvm::Value *> &Ops, + unsigned BuiltinID); + llvm::Value *EmitSVEMovl(SVETypeFlags TypeFlags, + llvm::ArrayRef<llvm::Value *> Ops, + unsigned BuiltinID); + llvm::Value *EmitSVEPredicateCast(llvm::Value *Pred, + llvm::ScalableVectorType *VTy); + llvm::Value *EmitSVEGatherLoad(SVETypeFlags TypeFlags, + llvm::SmallVectorImpl<llvm::Value *> &Ops, + unsigned IntID); + llvm::Value *EmitSVEScatterStore(SVETypeFlags TypeFlags, + llvm::SmallVectorImpl<llvm::Value *> &Ops, + unsigned IntID); + llvm::Value *EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, + SmallVectorImpl<llvm::Value *> &Ops, + unsigned BuiltinID, bool IsZExtReturn); + llvm::Value *EmitSVEMaskedStore(const CallExpr *, + SmallVectorImpl<llvm::Value *> &Ops, + unsigned BuiltinID); + llvm::Value *EmitSVEPrefetchLoad(SVETypeFlags TypeFlags, + SmallVectorImpl<llvm::Value *> &Ops, + unsigned BuiltinID); + llvm::Value *EmitSVEGatherPrefetch(SVETypeFlags TypeFlags, + SmallVectorImpl<llvm::Value *> &Ops, + unsigned IntID); + llvm::Value *EmitSVEStructLoad(SVETypeFlags TypeFlags, + SmallVectorImpl<llvm::Value *> &Ops, unsigned IntID); + llvm::Value *EmitSVEStructStore(SVETypeFlags TypeFlags, + SmallVectorImpl<llvm::Value *> &Ops, + unsigned IntID); + llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch); llvm::Value *EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E); @@ -3794,6 +4071,9 @@ public: llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + bool ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, + llvm::AtomicOrdering &AO, + llvm::SyncScope::ID &SSID); private: enum class MSVCIntrin; @@ -3924,6 +4204,10 @@ public: /// aggregate type into a temporary LValue. LValue EmitAggExprToLValue(const Expr *E); + /// Build all the stores needed to initialize an aggregate at Dest with the + /// value Val. + void EmitAggregateStore(llvm::Value *Val, Address Dest, bool DestIsVolatile); + /// EmitExtendGCLifetime - Given a pointer to an Objective-C object, /// make sure it survives garbage collection until this point. void EmitExtendGCLifetime(llvm::Value *object); @@ -3974,6 +4258,9 @@ public: /// Call atexit() with function dtorStub. void registerGlobalDtorWithAtExit(llvm::Constant *dtorStub); + /// Call unatexit() with function dtorStub. + llvm::Value *unregisterGlobalDtorWithUnAtExit(llvm::Function *dtorStub); + /// Emit code in this function to perform a guarded variable /// initialization. Guarded initializations are used when it's not /// possible to prove that an initialization will be done exactly @@ -3997,12 +4284,12 @@ public: ArrayRef<llvm::Function *> CXXThreadLocals, ConstantAddress Guard = ConstantAddress::invalid()); - /// GenerateCXXGlobalDtorsFunc - Generates code for destroying global + /// GenerateCXXGlobalCleanUpFunc - Generates code for cleaning up global /// variables. - void GenerateCXXGlobalDtorsFunc( + void GenerateCXXGlobalCleanUpFunc( llvm::Function *Fn, const std::vector<std::tuple<llvm::FunctionType *, llvm::WeakTrackingVH, - llvm::Constant *>> &DtorsAndObjects); + llvm::Constant *>> &DtorsOrStermFinalizers); void GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn, const VarDecl *D, @@ -4013,14 +4300,6 @@ public: void EmitSynthesizedCXXCopyCtor(Address Dest, Address Src, const Expr *Exp); - void enterFullExpression(const FullExpr *E) { - if (const auto *EWC = dyn_cast<ExprWithCleanups>(E)) - if (EWC->getNumObjects() == 0) - return; - enterNonTrivialFullExpression(E); - } - void enterNonTrivialFullExpression(const FullExpr *E); - void EmitCXXThrowExpr(const CXXThrowExpr *E, bool KeepInsertionPoint = true); RValue EmitAtomicExpr(AtomicExpr *E); @@ -4175,6 +4454,9 @@ public: /// SetFPModel - Control floating point behavior via fp-model settings. void SetFPModel(); + /// Set the codegen fast-math flags. + void SetFastMathFlags(FPOptions FPFeatures); + private: llvm::MDNode *getRangeForLoadFromType(QualType Ty); void EmitReturnOfRValue(RValue RV, QualType Ty); @@ -4195,7 +4477,7 @@ private: /// /// \param AI - The first function argument of the expansion. void ExpandTypeFromArgs(QualType Ty, LValue Dst, - SmallVectorImpl<llvm::Value *>::iterator &AI); + llvm::Function::arg_iterator &AI); /// ExpandTypeToArgs - Expand an CallArg \arg Arg, with the LLVM type for \arg /// Ty, into individual arguments on the provided vector \arg IRCallArgs, @@ -4411,10 +4693,15 @@ inline llvm::Value *DominatingLLVMValue::restore(CodeGenFunction &CGF, // Otherwise, it should be an alloca instruction, as set up in save(). auto alloca = cast<llvm::AllocaInst>(value.getPointer()); - return CGF.Builder.CreateAlignedLoad(alloca, alloca->getAlignment()); + return CGF.Builder.CreateAlignedLoad(alloca, alloca->getAlign()); } } // end namespace CodeGen + +// Map the LangOption for floating point exception behavior into +// the corresponding enum in the IR. +llvm::fp::ExceptionBehavior +ToConstrainedExceptMD(LangOptions::FPExceptionModeKind Kind); } // end namespace clang #endif diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 57beda26677c..4ae8ce7e5ccf 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -38,6 +38,7 @@ #include "clang/Basic/CharInfo.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/Diagnostic.h" +#include "clang/Basic/FileManager.h" #include "clang/Basic/Module.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" @@ -83,6 +84,7 @@ static CGCXXABI *createCXXABI(CodeGenModule &CGM) { case TargetCXXABI::GenericMIPS: case TargetCXXABI::GenericItanium: case TargetCXXABI::WebAssembly: + case TargetCXXABI::XL: return CreateItaniumCXXABI(CGM); case TargetCXXABI::Microsoft: return CreateMicrosoftCXXABI(CGM); @@ -110,6 +112,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO, Int32Ty = llvm::Type::getInt32Ty(LLVMContext); Int64Ty = llvm::Type::getInt64Ty(LLVMContext); HalfTy = llvm::Type::getHalfTy(LLVMContext); + BFloatTy = llvm::Type::getBFloatTy(LLVMContext); FloatTy = llvm::Type::getFloatTy(LLVMContext); DoubleTy = llvm::Type::getDoubleTy(LLVMContext); PointerWidthInBits = C.getTargetInfo().getPointerWidth(0); @@ -219,14 +222,6 @@ void CodeGenModule::createOpenMPRuntime() { OpenMPRuntime.reset(new CGOpenMPRuntime(*this)); break; } - - // The OpenMP-IR-Builder should eventually replace the above runtime codegens - // but we are not there yet so they both reside in CGModule for now and the - // OpenMP-IR-Builder is opt-in only. - if (LangOpts.OpenMPIRBuilder) { - OMPBuilder.reset(new llvm::OpenMPIRBuilder(TheModule)); - OMPBuilder->initialize(); - } } void CodeGenModule::createCUDARuntime() { @@ -408,7 +403,7 @@ void CodeGenModule::Release() { checkAliases(); emitMultiVersionFunctions(); EmitCXXGlobalInitFunc(); - EmitCXXGlobalDtorFunc(); + EmitCXXGlobalCleanUpFunc(); registerGlobalDtorsWithAtExit(); EmitCXXThreadLocalInitFunc(); if (ObjCRuntime) @@ -447,6 +442,10 @@ void CodeGenModule::Release() { CodeGenFunction(*this).EmitCfiCheckStub(); } emitAtAvailableLinkGuard(); + if (Context.getTargetInfo().getTriple().isWasm() && + !Context.getTargetInfo().getTriple().isOSEmscripten()) { + EmitMainVoidAlias(); + } emitLLVMUsed(); if (SanStats) SanStats->finish(); @@ -483,6 +482,14 @@ void CodeGenModule::Release() { getModule().addModuleFlag(llvm::Module::Max, "Dwarf Version", CodeGenOpts.DwarfVersion); } + + if (Context.getLangOpts().SemanticInterposition) + // Require various optimization to respect semantic interposition. + getModule().setSemanticInterposition(1); + else if (Context.getLangOpts().ExplicitNoSemanticInterposition) + // Allow dso_local on applicable targets. + getModule().setSemanticInterposition(0); + if (CodeGenOpts.EmitCodeView) { // Indicate that we want CodeView in the metadata. getModule().addModuleFlag(llvm::Module::Warning, "CodeView", 1); @@ -513,7 +520,7 @@ void CodeGenModule::Release() { "StrictVTablePointersRequirement", llvm::MDNode::get(VMContext, Ops)); } - if (DebugInfo) + if (getModuleDebugInfo()) // We support a single version in the linked module. The LLVM // parser will drop debug info with a different version number // (and warn about it, too). @@ -537,11 +544,26 @@ void CodeGenModule::Release() { getModule().addModuleFlag(llvm::Module::Error, "min_enum_size", EnumWidth); } + if (Arch == llvm::Triple::riscv32 || Arch == llvm::Triple::riscv64) { + StringRef ABIStr = Target.getABI(); + llvm::LLVMContext &Ctx = TheModule.getContext(); + getModule().addModuleFlag(llvm::Module::Error, "target-abi", + llvm::MDString::get(Ctx, ABIStr)); + } + if (CodeGenOpts.SanitizeCfiCrossDso) { // Indicate that we want cross-DSO control flow integrity checks. getModule().addModuleFlag(llvm::Module::Override, "Cross-DSO CFI", 1); } + if (CodeGenOpts.WholeProgramVTables) { + // Indicate whether VFE was enabled for this module, so that the + // vcall_visibility metadata added under whole program vtables is handled + // appropriately in the optimizer. + getModule().addModuleFlag(llvm::Module::Error, "Virtual Function Elim", + CodeGenOpts.VirtualFunctionElimination); + } + if (LangOpts.Sanitize.has(SanitizerKind::CFIICall)) { getModule().addModuleFlag(llvm::Module::Override, "CFI Canonical Jump Tables", @@ -567,7 +589,8 @@ void CodeGenModule::Release() { // floating point values to 0. (This corresponds to its "__CUDA_FTZ" // property.) getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-ftz", - CodeGenOpts.FlushDenorm ? 1 : 0); + CodeGenOpts.FP32DenormalMode.Output != + llvm::DenormalMode::IEEE); } // Emit OpenCL specific module metadata: OpenCL/SPIR version. @@ -623,8 +646,8 @@ void CodeGenModule::Release() { if (getCodeGenOpts().EmitGcovArcs || getCodeGenOpts().EmitGcovNotes) EmitCoverageFile(); - if (DebugInfo) - DebugInfo->finalize(); + if (CGDebugInfo *DI = getModuleDebugInfo()) + DI->finalize(); if (getCodeGenOpts().EmitVersionIdentMetadata) EmitVersionIdentMetadata(); @@ -632,7 +655,9 @@ void CodeGenModule::Release() { if (!getCodeGenOpts().RecordCommandLine.empty()) EmitCommandLineMetadata(); - EmitTargetMetadata(); + getTargetCodeGenInfo().emitTargetMetadata(*this, MangledDeclNames); + + EmitBackendOptionsMetadata(getCodeGenOpts()); } void CodeGenModule::EmitOpenCLMetadata() { @@ -652,6 +677,19 @@ void CodeGenModule::EmitOpenCLMetadata() { OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts)); } +void CodeGenModule::EmitBackendOptionsMetadata( + const CodeGenOptions CodeGenOpts) { + switch (getTriple().getArch()) { + default: + break; + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: + getModule().addModuleFlag(llvm::Module::Error, "SmallDataLimit", + CodeGenOpts.SmallDataLimit); + break; + } +} + void CodeGenModule::UpdateCompletedType(const TagDecl *TD) { // Make sure that this type is translated. Types.UpdateCompletedType(TD); @@ -671,6 +709,19 @@ llvm::MDNode *CodeGenModule::getTBAATypeInfo(QualType QTy) { TBAAAccessInfo CodeGenModule::getTBAAAccessInfo(QualType AccessType) { if (!TBAA) return TBAAAccessInfo(); + if (getLangOpts().CUDAIsDevice) { + // As CUDA builtin surface/texture types are replaced, skip generating TBAA + // access info. + if (AccessType->isCUDADeviceBuiltinSurfaceType()) { + if (getTargetCodeGenInfo().getCUDADeviceBuiltinSurfaceDeviceType() != + nullptr) + return TBAAAccessInfo(); + } else if (AccessType->isCUDADeviceBuiltinTextureType()) { + if (getTargetCodeGenInfo().getCUDADeviceBuiltinTextureDeviceType() != + nullptr) + return TBAAAccessInfo(); + } + } return TBAA->getAccessInfo(AccessType); } @@ -856,7 +907,7 @@ static bool shouldAssumeDSOLocal(const CodeGenModule &CGM, if (isa<llvm::Function>(GV) && !CGOpts.NoPLT && RM == llvm::Reloc::Static) return true; - // Otherwise don't assue it is local. + // Otherwise don't assume it is local. return false; } @@ -912,9 +963,9 @@ static llvm::GlobalVariable::ThreadLocalMode GetLLVMTLSModel(StringRef S) { .Case("local-exec", llvm::GlobalVariable::LocalExecTLSModel); } -static llvm::GlobalVariable::ThreadLocalMode GetLLVMTLSModel( - CodeGenOptions::TLSModel M) { - switch (M) { +llvm::GlobalVariable::ThreadLocalMode +CodeGenModule::GetDefaultLLVMTLSModel() const { + switch (CodeGenOpts.getDefaultTLSModel()) { case CodeGenOptions::GeneralDynamicTLSModel: return llvm::GlobalVariable::GeneralDynamicTLSModel; case CodeGenOptions::LocalDynamicTLSModel: @@ -931,7 +982,7 @@ void CodeGenModule::setTLSMode(llvm::GlobalValue *GV, const VarDecl &D) const { assert(D.getTLSKind() && "setting TLS mode on non-TLS var!"); llvm::GlobalValue::ThreadLocalMode TLM; - TLM = GetLLVMTLSModel(CodeGenOpts.getDefaultTLSModel()); + TLM = GetDefaultLLVMTLSModel(); // Override the TLS model if it is explicitly specified. if (const TLSModelAttr *Attr = D.getAttr<TLSModelAttr>()) { @@ -997,23 +1048,19 @@ static std::string getMangledNameImpl(const CodeGenModule &CGM, GlobalDecl GD, SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); MangleContext &MC = CGM.getCXXABI().getMangleContext(); - if (MC.shouldMangleDeclName(ND)) { - llvm::raw_svector_ostream Out(Buffer); - if (const auto *D = dyn_cast<CXXConstructorDecl>(ND)) - MC.mangleCXXCtor(D, GD.getCtorType(), Out); - else if (const auto *D = dyn_cast<CXXDestructorDecl>(ND)) - MC.mangleCXXDtor(D, GD.getDtorType(), Out); - else - MC.mangleName(ND, Out); - } else { + if (MC.shouldMangleDeclName(ND)) + MC.mangleName(GD.getWithDecl(ND), Out); + else { IdentifierInfo *II = ND->getIdentifier(); assert(II && "Attempt to mangle unnamed decl."); const auto *FD = dyn_cast<FunctionDecl>(ND); if (FD && FD->getType()->castAs<FunctionType>()->getCallConv() == CC_X86RegCall) { - llvm::raw_svector_ostream Out(Buffer); Out << "__regcall3__" << II->getName(); + } else if (FD && FD->hasAttr<CUDAGlobalAttr>() && + GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { + Out << "__device_stub__" << II->getName(); } else { Out << II->getName(); } @@ -1036,7 +1083,7 @@ static std::string getMangledNameImpl(const CodeGenModule &CGM, GlobalDecl GD, } } - return Out.str(); + return std::string(Out.str()); } void CodeGenModule::UpdateMultiVersionNames(GlobalDecl GD, @@ -1101,11 +1148,25 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) { const auto *ND = cast<NamedDecl>(GD.getDecl()); std::string MangledName = getMangledNameImpl(*this, GD, ND); - // Adjust kernel stub mangling as we may need to be able to differentiate - // them from the kernel itself (e.g., for HIP). - if (auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) - if (!getLangOpts().CUDAIsDevice && FD->hasAttr<CUDAGlobalAttr>()) - MangledName = getCUDARuntime().getDeviceStubName(MangledName); + // Ensure either we have different ABIs between host and device compilations, + // says host compilation following MSVC ABI but device compilation follows + // Itanium C++ ABI or, if they follow the same ABI, kernel names after + // mangling should be the same after name stubbing. The later checking is + // very important as the device kernel name being mangled in host-compilation + // is used to resolve the device binaries to be executed. Inconsistent naming + // result in undefined behavior. Even though we cannot check that naming + // directly between host- and device-compilations, the host- and + // device-mangling in host compilation could help catching certain ones. + assert(!isa<FunctionDecl>(ND) || !ND->hasAttr<CUDAGlobalAttr>() || + getLangOpts().CUDAIsDevice || + (getContext().getAuxTargetInfo() && + (getContext().getAuxTargetInfo()->getCXXABI() != + getContext().getTargetInfo().getCXXABI())) || + getCUDARuntime().getDeviceSideName(ND) == + getMangledNameImpl( + *this, + GD.getWithKernelReferenceKind(KernelReferenceKind::Kernel), + ND)); auto Result = Manglings.insert(std::make_pair(MangledName, GD)); return MangledDeclNames[CanonicalGD] = Result.first->first(); @@ -1357,7 +1418,7 @@ void CodeGenModule::GenOpenCLArgMetadata(llvm::Function *Fn, std::string typeName; if (isPipe) typeName = ty.getCanonicalType() - ->getAs<PipeType>() + ->castAs<PipeType>() ->getElementType() .getAsString(Policy); else @@ -1371,7 +1432,7 @@ void CodeGenModule::GenOpenCLArgMetadata(llvm::Function *Fn, std::string baseTypeName; if (isPipe) baseTypeName = ty.getCanonicalType() - ->getAs<PipeType>() + ->castAs<PipeType>() ->getElementType() .getCanonicalType() .getAsString(Policy); @@ -1493,6 +1554,9 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, if (CodeGenOpts.UnwindTables) B.addAttribute(llvm::Attribute::UWTable); + if (CodeGenOpts.StackClashProtector) + B.addAttribute("probe-stack", "inline-asm"); + if (!hasUnwindExceptions(LangOpts)) B.addAttribute(llvm::Attribute::NoUnwind); @@ -1840,9 +1904,16 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, else if (const auto *SA = FD->getAttr<SectionAttr>()) F->setSection(SA->getName()); + // If we plan on emitting this inline builtin, we can't treat it as a builtin. if (FD->isInlineBuiltinDeclaration()) { - F->addAttribute(llvm::AttributeList::FunctionIndex, - llvm::Attribute::NoBuiltin); + const FunctionDecl *FDBody; + bool HasBody = FD->hasBody(FDBody); + (void)HasBody; + assert(HasBody && "Inline builtin declarations should always have an " + "available body!"); + if (shouldEmitFunction(FDBody)) + F->addAttribute(llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoBuiltin); } if (FD->isReplaceableGlobalAllocationFunction()) { @@ -1850,15 +1921,6 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, // default, only if it is invoked by a new-expression or delete-expression. F->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::NoBuiltin); - - // A sane operator new returns a non-aliasing pointer. - // FIXME: Also add NonNull attribute to the return value - // for the non-nothrow forms? - auto Kind = FD->getDeclName().getCXXOverloadedOperator(); - if (getCodeGenOpts().AssumeSaneOperatorNew && - (Kind == OO_New || Kind == OO_Array_New)) - F->addAttribute(llvm::AttributeList::ReturnIndex, - llvm::Attribute::NoAlias); } if (isa<CXXConstructorDecl>(FD) || isa<CXXDestructorDecl>(FD)) @@ -2375,13 +2437,8 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) { return true; } -ConstantAddress CodeGenModule::GetAddrOfUuidDescriptor( - const CXXUuidofExpr* E) { - // Sema has verified that IIDSource has a __declspec(uuid()), and that its - // well-formed. - StringRef Uuid = E->getUuidStr(); - std::string Name = "_GUID_" + Uuid.lower(); - std::replace(Name.begin(), Name.end(), '-', '_'); +ConstantAddress CodeGenModule::GetAddrOfMSGuidDecl(const MSGuidDecl *GD) { + StringRef Name = getMangledName(GD); // The UUID descriptor should be pointer aligned. CharUnits Alignment = CharUnits::fromQuantity(PointerAlignInBytes); @@ -2390,8 +2447,30 @@ ConstantAddress CodeGenModule::GetAddrOfUuidDescriptor( if (llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name)) return ConstantAddress(GV, Alignment); - llvm::Constant *Init = EmitUuidofInitializer(Uuid); - assert(Init && "failed to initialize as constant"); + ConstantEmitter Emitter(*this); + llvm::Constant *Init; + + APValue &V = GD->getAsAPValue(); + if (!V.isAbsent()) { + // If possible, emit the APValue version of the initializer. In particular, + // this gets the type of the constant right. + Init = Emitter.emitForInitializer( + GD->getAsAPValue(), GD->getType().getAddressSpace(), GD->getType()); + } else { + // As a fallback, directly construct the constant. + // FIXME: This may get padding wrong under esoteric struct layout rules. + // MSVC appears to create a complete type 'struct __s_GUID' that it + // presumably uses to represent these constants. + MSGuidDecl::Parts Parts = GD->getParts(); + llvm::Constant *Fields[4] = { + llvm::ConstantInt::get(Int32Ty, Parts.Part1), + llvm::ConstantInt::get(Int16Ty, Parts.Part2), + llvm::ConstantInt::get(Int16Ty, Parts.Part3), + llvm::ConstantDataArray::getRaw( + StringRef(reinterpret_cast<char *>(Parts.Part4And5), 8), 8, + Int8Ty)}; + Init = llvm::ConstantStruct::getAnon(Fields); + } auto *GV = new llvm::GlobalVariable( getModule(), Init->getType(), @@ -2399,7 +2478,16 @@ ConstantAddress CodeGenModule::GetAddrOfUuidDescriptor( if (supportsCOMDAT()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); setDSOLocal(GV); - return ConstantAddress(GV, Alignment); + + llvm::Constant *Addr = GV; + if (!V.isAbsent()) { + Emitter.finalize(GV); + } else { + llvm::Type *Ty = getTypes().ConvertTypeForMem(GD->getType()); + Addr = llvm::ConstantExpr::getBitCast( + GV, Ty->getPointerTo(GV->getAddressSpace())); + } + return ConstantAddress(Addr, Alignment); } ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) { @@ -2461,7 +2549,8 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { !Global->hasAttr<CUDAGlobalAttr>() && !Global->hasAttr<CUDAConstantAttr>() && !Global->hasAttr<CUDASharedAttr>() && - !(LangOpts.HIP && Global->hasAttr<HIPPinnedShadowAttr>())) + !Global->getType()->isCUDADeviceBuiltinSurfaceType() && + !Global->getType()->isCUDADeviceBuiltinTextureType()) return; } else { // We need to emit host-side 'shadows' for all global @@ -2554,11 +2643,6 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { return; } - // Check if this must be emitted as declare variant. - if (LangOpts.OpenMP && isa<FunctionDecl>(Global) && OpenMPRuntime && - OpenMPRuntime->emitDeclareVariant(GD, /*IsForDefinition=*/false)) - return; - // If we're deferring emission of a C++ variable with an // initializer, remember the order in which it appeared in the file. if (getLangOpts().CPlusPlus && isa<VarDecl>(Global) && @@ -2741,8 +2825,8 @@ bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) { // PR9614. Avoid cases where the source code is lying to us. An available // externally function should have an equivalent function somewhere else, - // but a function that calls itself is clearly not equivalent to the real - // implementation. + // but a function that calls itself through asm label/`__builtin_` trickery is + // clearly not equivalent to the real implementation. // This happens in glibc's btowc and in some configure checks. return !isTriviallyRecursive(F); } @@ -2764,50 +2848,6 @@ void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD, EmitGlobalFunctionDefinition(GD, GV); } -void CodeGenModule::emitOpenMPDeviceFunctionRedefinition( - GlobalDecl OldGD, GlobalDecl NewGD, llvm::GlobalValue *GV) { - assert(getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice && - OpenMPRuntime && "Expected OpenMP device mode."); - const auto *D = cast<FunctionDecl>(OldGD.getDecl()); - - // Compute the function info and LLVM type. - const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(OldGD); - llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); - - // Get or create the prototype for the function. - if (!GV || (GV->getType()->getElementType() != Ty)) { - GV = cast<llvm::GlobalValue>(GetOrCreateLLVMFunction( - getMangledName(OldGD), Ty, GlobalDecl(), /*ForVTable=*/false, - /*DontDefer=*/true, /*IsThunk=*/false, llvm::AttributeList(), - ForDefinition)); - SetFunctionAttributes(OldGD, cast<llvm::Function>(GV), - /*IsIncompleteFunction=*/false, - /*IsThunk=*/false); - } - // We need to set linkage and visibility on the function before - // generating code for it because various parts of IR generation - // want to propagate this information down (e.g. to local static - // declarations). - auto *Fn = cast<llvm::Function>(GV); - setFunctionLinkage(OldGD, Fn); - - // FIXME: this is redundant with part of - // setFunctionDefinitionAttributes - setGVProperties(Fn, OldGD); - - MaybeHandleStaticInExternC(D, Fn); - - maybeSetTrivialComdat(*D, *Fn); - - CodeGenFunction(*this).GenerateCode(NewGD, Fn, FI); - - setNonAliasAttributes(OldGD, Fn); - SetLLVMFunctionAttributesForDefinition(D, Fn); - - if (D->hasAttr<AnnotateAttr>()) - AddGlobalAnnotations(D, Fn); -} - void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { const auto *D = cast<ValueDecl>(GD.getDecl()); @@ -3122,14 +3162,9 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( EmitGlobal(GDDef); } } - // Check if this must be emitted as declare variant and emit reference to - // the the declare variant function. - if (LangOpts.OpenMP && OpenMPRuntime) - (void)OpenMPRuntime->emitDeclareVariant(GD, /*IsForDefinition=*/true); if (FD->isMultiVersion()) { - const auto *TA = FD->getAttr<TargetAttr>(); - if (TA && TA->isDefaultVersion()) + if (FD->hasAttr<TargetAttr>()) UpdateMultiVersionNames(GD, FD); if (!IsForDefinition) return GetOrCreateMultiVersionResolver(GD, Ty, FD); @@ -3169,7 +3204,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( } if ((isa<llvm::Function>(Entry) || isa<llvm::GlobalAlias>(Entry)) && - (Entry->getType()->getElementType() == Ty)) { + (Entry->getValueType() == Ty)) { return Entry; } @@ -3218,7 +3253,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( } llvm::Constant *BC = llvm::ConstantExpr::getBitCast( - F, Entry->getType()->getElementType()->getPointerTo()); + F, Entry->getValueType()->getPointerTo()); addGlobalValReplacement(Entry, BC); } @@ -3277,7 +3312,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( // Make sure the result is of the requested type. if (!IsIncompleteFunction) { - assert(F->getType()->getElementType() == Ty); + assert(F->getFunctionType() == Ty); return F; } @@ -3293,6 +3328,8 @@ llvm::Constant *CodeGenModule::GetAddrOfFunction(GlobalDecl GD, bool ForVTable, bool DontDefer, ForDefinition_t IsForDefinition) { + assert(!cast<FunctionDecl>(GD.getDecl())->isConsteval() && + "consteval function should never be emitted"); // If there was no specific requested type, just convert it now. if (!Ty) { const auto *FD = cast<FunctionDecl>(GD.getDecl()); @@ -3568,7 +3605,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Constant *Init = emitter.tryEmitForInitializer(*InitDecl); if (Init) { auto *InitType = Init->getType(); - if (GV->getType()->getElementType() != InitType) { + if (GV->getValueType() != InitType) { // The type of the initializer does not match the definition. // This happens when an initializer has a different type from // the type of the global (because of padding at the end of a @@ -3611,26 +3648,29 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, } llvm::Constant * -CodeGenModule::GetAddrOfGlobal(GlobalDecl GD, - ForDefinition_t IsForDefinition) { +CodeGenModule::GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition) { const Decl *D = GD.getDecl(); + if (isa<CXXConstructorDecl>(D) || isa<CXXDestructorDecl>(D)) return getAddrOfCXXStructor(GD, /*FnInfo=*/nullptr, /*FnType=*/nullptr, /*DontDefer=*/false, IsForDefinition); - else if (isa<CXXMethodDecl>(D)) { - auto FInfo = &getTypes().arrangeCXXMethodDeclaration( - cast<CXXMethodDecl>(D)); + + if (isa<CXXMethodDecl>(D)) { + auto FInfo = + &getTypes().arrangeCXXMethodDeclaration(cast<CXXMethodDecl>(D)); auto Ty = getTypes().GetFunctionType(*FInfo); return GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/false, IsForDefinition); - } else if (isa<FunctionDecl>(D)) { + } + + if (isa<FunctionDecl>(D)) { const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); return GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/false, IsForDefinition); - } else - return GetAddrOfGlobalVar(cast<VarDecl>(D), /*Ty=*/nullptr, - IsForDefinition); + } + + return GetAddrOfGlobalVar(cast<VarDecl>(D), /*Ty=*/nullptr, IsForDefinition); } llvm::GlobalVariable *CodeGenModule::CreateOrReplaceCXXRuntimeVariable( @@ -3641,7 +3681,7 @@ llvm::GlobalVariable *CodeGenModule::CreateOrReplaceCXXRuntimeVariable( if (GV) { // Check if the variable has the right type. - if (GV->getType()->getElementType() == Ty) + if (GV->getValueType() == Ty) return GV; // Because C++ name mangling, the only way we can end up with an already @@ -3915,12 +3955,16 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, !getLangOpts().CUDAIsDevice && (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDASharedAttr>()); + bool IsCUDADeviceShadowVar = + getLangOpts().CUDAIsDevice && + (D->getType()->isCUDADeviceBuiltinSurfaceType() || + D->getType()->isCUDADeviceBuiltinTextureType()); // HIP pinned shadow of initialized host-side global variables are also // left undefined. - bool IsHIPPinnedShadowVar = - getLangOpts().CUDAIsDevice && D->hasAttr<HIPPinnedShadowAttr>(); if (getLangOpts().CUDA && - (IsCUDASharedVar || IsCUDAShadowVar || IsHIPPinnedShadowVar)) + (IsCUDASharedVar || IsCUDAShadowVar || IsCUDADeviceShadowVar)) + Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy)); + else if (D->hasAttr<LoaderUninitializedAttr>()) Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy)); else if (!InitExpr) { // This is a tentative definition; tentative definitions are @@ -3979,7 +4023,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // "extern int x[];") and then a definition of a different type (e.g. // "int x[10];"). This also happens when an initializer has a different type // from the type of the global (this happens with unions). - if (!GV || GV->getType()->getElementType() != InitType || + if (!GV || GV->getValueType() != InitType || GV->getType()->getAddressSpace() != getContext().getTargetAddressSpace(GetGlobalVarAddressSpace(D))) { @@ -4026,34 +4070,56 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // global variables become internal definitions. These have to // be internal in order to prevent name conflicts with global // host variables with the same name in a different TUs. - if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() || - D->hasAttr<HIPPinnedShadowAttr>()) { + if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) { Linkage = llvm::GlobalValue::InternalLinkage; - - // Shadow variables and their properties must be registered - // with CUDA runtime. - unsigned Flags = 0; - if (!D->hasDefinition()) - Flags |= CGCUDARuntime::ExternDeviceVar; - if (D->hasAttr<CUDAConstantAttr>()) - Flags |= CGCUDARuntime::ConstantDeviceVar; - // Extern global variables will be registered in the TU where they are - // defined. + // Shadow variables and their properties must be registered with CUDA + // runtime. Skip Extern global variables, which will be registered in + // the TU where they are defined. if (!D->hasExternalStorage()) - getCUDARuntime().registerDeviceVar(D, *GV, Flags); - } else if (D->hasAttr<CUDASharedAttr>()) + getCUDARuntime().registerDeviceVar(D, *GV, !D->hasDefinition(), + D->hasAttr<CUDAConstantAttr>()); + } else if (D->hasAttr<CUDASharedAttr>()) { // __shared__ variables are odd. Shadows do get created, but // they are not registered with the CUDA runtime, so they // can't really be used to access their device-side // counterparts. It's not clear yet whether it's nvcc's bug or // a feature, but we've got to do the same for compatibility. Linkage = llvm::GlobalValue::InternalLinkage; + } else if (D->getType()->isCUDADeviceBuiltinSurfaceType() || + D->getType()->isCUDADeviceBuiltinTextureType()) { + // Builtin surfaces and textures and their template arguments are + // also registered with CUDA runtime. + Linkage = llvm::GlobalValue::InternalLinkage; + const ClassTemplateSpecializationDecl *TD = + cast<ClassTemplateSpecializationDecl>( + D->getType()->getAs<RecordType>()->getDecl()); + const TemplateArgumentList &Args = TD->getTemplateArgs(); + if (TD->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>()) { + assert(Args.size() == 2 && + "Unexpected number of template arguments of CUDA device " + "builtin surface type."); + auto SurfType = Args[1].getAsIntegral(); + if (!D->hasExternalStorage()) + getCUDARuntime().registerDeviceSurf(D, *GV, !D->hasDefinition(), + SurfType.getSExtValue()); + } else { + assert(Args.size() == 3 && + "Unexpected number of template arguments of CUDA device " + "builtin texture type."); + auto TexType = Args[1].getAsIntegral(); + auto Normalized = Args[2].getAsIntegral(); + if (!D->hasExternalStorage()) + getCUDARuntime().registerDeviceTex(D, *GV, !D->hasDefinition(), + TexType.getSExtValue(), + Normalized.getZExtValue()); + } + } } } - if (!IsHIPPinnedShadowVar) - GV->setInitializer(Init); - if (emitter) emitter->finalize(GV); + GV->setInitializer(Init); + if (emitter) + emitter->finalize(GV); // If it is safe to mark the global 'constant', do so now. GV->setConstant(!NeedsGlobalCtor && !NeedsGlobalDtor && @@ -4068,17 +4134,24 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, GV->setAlignment(getContext().getDeclAlign(D).getAsAlign()); - // On Darwin, if the normal linkage of a C++ thread_local variable is - // LinkOnce or Weak, we keep the normal linkage to prevent multiple - // copies within a linkage unit; otherwise, the backing variable has - // internal linkage and all accesses should just be calls to the - // Itanium-specified entry point, which has the normal linkage of the - // variable. This is to preserve the ability to change the implementation - // behind the scenes. - if (!D->isStaticLocal() && D->getTLSKind() == VarDecl::TLS_Dynamic && + // On Darwin, unlike other Itanium C++ ABI platforms, the thread-wrapper + // function is only defined alongside the variable, not also alongside + // callers. Normally, all accesses to a thread_local go through the + // thread-wrapper in order to ensure initialization has occurred, underlying + // variable will never be used other than the thread-wrapper, so it can be + // converted to internal linkage. + // + // However, if the variable has the 'constinit' attribute, it _can_ be + // referenced directly, without calling the thread-wrapper, so the linkage + // must not be changed. + // + // Additionally, if the variable isn't plain external linkage, e.g. if it's + // weak or linkonce, the de-duplication semantics are important to preserve, + // so we don't change the linkage. + if (D->getTLSKind() == VarDecl::TLS_Dynamic && + Linkage == llvm::GlobalValue::ExternalLinkage && Context.getTargetInfo().getTriple().isOSDarwin() && - !llvm::GlobalVariable::isLinkOnceLinkage(Linkage) && - !llvm::GlobalVariable::isWeakLinkage(Linkage)) + !D->hasAttr<ConstInitAttr>()) Linkage = llvm::GlobalValue::InternalLinkage; GV->setLinkage(Linkage); @@ -4421,11 +4494,6 @@ void CodeGenModule::HandleCXXStaticMemberVarInstantiation(VarDecl *VD) { void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { - // Check if this must be emitted as declare variant. - if (LangOpts.OpenMP && OpenMPRuntime && - OpenMPRuntime->emitDeclareVariant(GD, /*IsForDefinition=*/true)) - return; - const auto *D = cast<FunctionDecl>(GD.getDecl()); // Compute the function info and LLVM type. @@ -4433,7 +4501,7 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); // Get or create the prototype for the function. - if (!GV || (GV->getType()->getElementType() != Ty)) + if (!GV || (GV->getValueType() != Ty)) GV = cast<llvm::GlobalValue>(GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/true, ForDefinition)); @@ -4457,7 +4525,7 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, maybeSetTrivialComdat(*D, *Fn); - CodeGenFunction(*this).GenerateCode(D, Fn, FI); + CodeGenFunction(*this).GenerateCode(GD, Fn, FI); setNonAliasAttributes(GD, Fn); SetLLVMFunctionAttributesForDefinition(D, Fn); @@ -4509,8 +4577,9 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { } // Create the new alias itself, but don't set a name yet. + unsigned AS = Aliasee->getType()->getPointerAddressSpace(); auto *GA = - llvm::GlobalAlias::create(DeclTy, 0, LT, "", Aliasee, &getModule()); + llvm::GlobalAlias::create(DeclTy, AS, LT, "", Aliasee, &getModule()); if (Entry) { if (GA->getAliasee() == Entry) { @@ -5258,6 +5327,11 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { if (D->isTemplated()) return; + // Consteval function shouldn't be emitted. + if (auto *FD = dyn_cast<FunctionDecl>(D)) + if (FD->isConsteval()) + return; + switch (D->getKind()) { case Decl::CXXConversion: case Decl::CXXMethod: @@ -5293,17 +5367,17 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { break; case Decl::ClassTemplateSpecialization: { const auto *Spec = cast<ClassTemplateSpecializationDecl>(D); - if (DebugInfo && - Spec->getSpecializationKind() == TSK_ExplicitInstantiationDefinition && - Spec->hasDefinition()) - DebugInfo->completeTemplateDefinition(*Spec); + if (CGDebugInfo *DI = getModuleDebugInfo()) + if (Spec->getSpecializationKind() == + TSK_ExplicitInstantiationDefinition && + Spec->hasDefinition()) + DI->completeTemplateDefinition(*Spec); } LLVM_FALLTHROUGH; case Decl::CXXRecord: - if (DebugInfo) { + if (CGDebugInfo *DI = getModuleDebugInfo()) if (auto *ES = D->getASTContext().getExternalSource()) if (ES->hasExternalDefinitions(D) == ExternalASTSource::EK_Never) - DebugInfo->completeUnusedClass(cast<CXXRecordDecl>(*D)); - } + DI->completeUnusedClass(cast<CXXRecordDecl>(*D)); // Emit any static data members, they may be definitions. for (auto *I : cast<CXXRecordDecl>(D)->decls()) if (isa<VarDecl>(I) || isa<CXXRecordDecl>(I)) @@ -5324,15 +5398,15 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { case Decl::Using: // using X; [C++] if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitUsingDecl(cast<UsingDecl>(*D)); - return; + break; case Decl::NamespaceAlias: if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitNamespaceAlias(cast<NamespaceAliasDecl>(*D)); - return; + break; case Decl::UsingDirective: // using namespace X; [C++] if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitUsingDirective(cast<UsingDirectiveDecl>(*D)); - return; + break; case Decl::CXXConstructor: getCXXABI().EmitCXXConstructors(cast<CXXConstructorDecl>(D)); break; @@ -5515,10 +5589,10 @@ void CodeGenModule::AddDeferredUnusedCoverageMapping(Decl *D) { case Decl::CXXConstructor: case Decl::CXXDestructor: { if (!cast<FunctionDecl>(D)->doesThisDeclarationHaveABody()) - return; + break; SourceManager &SM = getContext().getSourceManager(); if (LimitedCoverage && SM.getMainFileID() != SM.getFileID(D->getBeginLoc())) - return; + break; auto I = DeferredEmptyCoverageMappingDecls.find(D); if (I == DeferredEmptyCoverageMappingDecls.end()) DeferredEmptyCoverageMappingDecls[D] = true; @@ -5584,6 +5658,17 @@ void CodeGenModule::EmitDeferredUnusedCoverageMappings() { } } +void CodeGenModule::EmitMainVoidAlias() { + // In order to transition away from "__original_main" gracefully, emit an + // alias for "main" in the no-argument case so that libc can detect when + // new-style no-argument main is in used. + if (llvm::Function *F = getModule().getFunction("main")) { + if (!F->isDeclaration() && F->arg_size() == 0 && !F->isVarArg() && + F->getReturnType()->isIntegerTy(Context.getTargetInfo().getIntWidth())) + addUsedGlobal(llvm::GlobalAlias::create("__main_void", F)); + } +} + /// Turns the given pointer into a constant. static llvm::Constant *GetPointerConstant(llvm::LLVMContext &Context, const void *Ptr) { @@ -5698,21 +5783,6 @@ void CodeGenModule::EmitCommandLineMetadata() { CommandLineMetadata->addOperand(llvm::MDNode::get(Ctx, CommandLineNode)); } -void CodeGenModule::EmitTargetMetadata() { - // Warning, new MangledDeclNames may be appended within this loop. - // We rely on MapVector insertions adding new elements to the end - // of the container. - // FIXME: Move this loop into the one target that needs it, and only - // loop over those declarations for which we couldn't emit the target - // metadata when we emitted the declaration. - for (unsigned I = 0; I != MangledDeclNames.size(); ++I) { - auto Val = *(MangledDeclNames.begin() + I); - const Decl *D = Val.first.getDecl()->getMostRecentDecl(); - llvm::GlobalValue *GV = GetGlobalValue(Val.second); - getTargetCodeGenInfo().emitTargetMD(D, GV, *this); - } -} - void CodeGenModule::EmitCoverageFile() { if (getCodeGenOpts().CoverageDataFile.empty() && getCodeGenOpts().CoverageNotesFile.empty()) @@ -5735,39 +5805,14 @@ void CodeGenModule::EmitCoverageFile() { } } -llvm::Constant *CodeGenModule::EmitUuidofInitializer(StringRef Uuid) { - // Sema has checked that all uuid strings are of the form - // "12345678-1234-1234-1234-1234567890ab". - assert(Uuid.size() == 36); - for (unsigned i = 0; i < 36; ++i) { - if (i == 8 || i == 13 || i == 18 || i == 23) assert(Uuid[i] == '-'); - else assert(isHexDigit(Uuid[i])); - } - - // The starts of all bytes of Field3 in Uuid. Field 3 is "1234-1234567890ab". - const unsigned Field3ValueOffsets[8] = { 19, 21, 24, 26, 28, 30, 32, 34 }; - - llvm::Constant *Field3[8]; - for (unsigned Idx = 0; Idx < 8; ++Idx) - Field3[Idx] = llvm::ConstantInt::get( - Int8Ty, Uuid.substr(Field3ValueOffsets[Idx], 2), 16); - - llvm::Constant *Fields[4] = { - llvm::ConstantInt::get(Int32Ty, Uuid.substr(0, 8), 16), - llvm::ConstantInt::get(Int16Ty, Uuid.substr(9, 4), 16), - llvm::ConstantInt::get(Int16Ty, Uuid.substr(14, 4), 16), - llvm::ConstantArray::get(llvm::ArrayType::get(Int8Ty, 8), Field3) - }; - - return llvm::ConstantStruct::getAnon(Fields); -} - llvm::Constant *CodeGenModule::GetAddrOfRTTIDescriptor(QualType Ty, bool ForEH) { // Return a bogus pointer if RTTI is disabled, unless it's for EH. // FIXME: should we even be calling this method if RTTI is disabled // and it's not for EH? - if ((!ForEH && !getLangOpts().RTTI) || getLangOpts().CUDAIsDevice) + if ((!ForEH && !getLangOpts().RTTI) || getLangOpts().CUDAIsDevice || + (getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice && + getTriple().isNVPTX())) return llvm::Constant::getNullValue(Int8PtrTy); if (ForEH && Ty->isObjCObjectPointerType() && @@ -5911,3 +5956,99 @@ CodeGenModule::createOpenCLIntToSamplerConversion(const Expr *E, "__translate_sampler_initializer"), {C}); } + +CharUnits CodeGenModule::getNaturalPointeeTypeAlignment( + QualType T, LValueBaseInfo *BaseInfo, TBAAAccessInfo *TBAAInfo) { + return getNaturalTypeAlignment(T->getPointeeType(), BaseInfo, TBAAInfo, + /* forPointeeType= */ true); +} + +CharUnits CodeGenModule::getNaturalTypeAlignment(QualType T, + LValueBaseInfo *BaseInfo, + TBAAAccessInfo *TBAAInfo, + bool forPointeeType) { + if (TBAAInfo) + *TBAAInfo = getTBAAAccessInfo(T); + + // FIXME: This duplicates logic in ASTContext::getTypeAlignIfKnown. But + // that doesn't return the information we need to compute BaseInfo. + + // Honor alignment typedef attributes even on incomplete types. + // We also honor them straight for C++ class types, even as pointees; + // there's an expressivity gap here. + if (auto TT = T->getAs<TypedefType>()) { + if (auto Align = TT->getDecl()->getMaxAlignment()) { + if (BaseInfo) + *BaseInfo = LValueBaseInfo(AlignmentSource::AttributedType); + return getContext().toCharUnitsFromBits(Align); + } + } + + bool AlignForArray = T->isArrayType(); + + // Analyze the base element type, so we don't get confused by incomplete + // array types. + T = getContext().getBaseElementType(T); + + if (T->isIncompleteType()) { + // We could try to replicate the logic from + // ASTContext::getTypeAlignIfKnown, but nothing uses the alignment if the + // type is incomplete, so it's impossible to test. We could try to reuse + // getTypeAlignIfKnown, but that doesn't return the information we need + // to set BaseInfo. So just ignore the possibility that the alignment is + // greater than one. + if (BaseInfo) + *BaseInfo = LValueBaseInfo(AlignmentSource::Type); + return CharUnits::One(); + } + + if (BaseInfo) + *BaseInfo = LValueBaseInfo(AlignmentSource::Type); + + CharUnits Alignment; + // For C++ class pointees, we don't know whether we're pointing at a + // base or a complete object, so we generally need to use the + // non-virtual alignment. + const CXXRecordDecl *RD; + if (forPointeeType && !AlignForArray && (RD = T->getAsCXXRecordDecl())) { + Alignment = getClassPointerAlignment(RD); + } else { + Alignment = getContext().getTypeAlignInChars(T); + if (T.getQualifiers().hasUnaligned()) + Alignment = CharUnits::One(); + } + + // Cap to the global maximum type alignment unless the alignment + // was somehow explicit on the type. + if (unsigned MaxAlign = getLangOpts().MaxTypeAlign) { + if (Alignment.getQuantity() > MaxAlign && + !getContext().isAlignmentRequired(T)) + Alignment = CharUnits::fromQuantity(MaxAlign); + } + return Alignment; +} + +bool CodeGenModule::stopAutoInit() { + unsigned StopAfter = getContext().getLangOpts().TrivialAutoVarInitStopAfter; + if (StopAfter) { + // This number is positive only when -ftrivial-auto-var-init-stop-after=* is + // used + if (NumAutoVarInit >= StopAfter) { + return true; + } + if (!NumAutoVarInit) { + unsigned DiagID = getDiags().getCustomDiagID( + DiagnosticsEngine::Warning, + "-ftrivial-auto-var-init-stop-after=%0 has been enabled to limit the " + "number of times ftrivial-auto-var-init=%1 gets applied."); + getDiags().Report(DiagID) + << StopAfter + << (getContext().getLangOpts().getTrivialAutoVarInit() == + LangOptions::TrivialAutoVarInitKind::Zero + ? "zero" + : "pattern"); + } + ++NumAutoVarInit; + } + return false; +} diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 115e754bb392..a6c4a1f7b278 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -26,6 +26,7 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/Module.h" #include "clang/Basic/SanitizerBlacklist.h" +#include "clang/Basic/TargetInfo.h" #include "clang/Basic/XRayLists.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" @@ -301,6 +302,7 @@ private: const HeaderSearchOptions &HeaderSearchOpts; // Only used for debug info. const PreprocessorOptions &PreprocessorOpts; // Only used for debug info. const CodeGenOptions &CodeGenOpts; + unsigned NumAutoVarInit = 0; llvm::Module &TheModule; DiagnosticsEngine &Diags; const TargetInfo &Target; @@ -322,7 +324,6 @@ private: std::unique_ptr<CGObjCRuntime> ObjCRuntime; std::unique_ptr<CGOpenCLRuntime> OpenCLRuntime; std::unique_ptr<CGOpenMPRuntime> OpenMPRuntime; - std::unique_ptr<llvm::OpenMPIRBuilder> OMPBuilder; std::unique_ptr<CGCUDARuntime> CUDARuntime; std::unique_ptr<CGDebugInfo> DebugInfo; std::unique_ptr<ObjCEntrypoints> ObjCData; @@ -395,6 +396,10 @@ private: /// emitted when the translation unit is complete. CtorList GlobalDtors; + /// A unique trailing identifier as a part of sinit/sterm function when + /// UseSinitAndSterm of CXXABI is set as true. + std::string GlobalUniqueModuleId; + /// An ordered map of canonical GlobalDecls to their mangled names. llvm::MapVector<GlobalDecl, StringRef> MangledDeclNames; llvm::StringMap<GlobalDecl, llvm::BumpPtrAllocator> Manglings; @@ -463,9 +468,11 @@ private: SmallVector<GlobalInitData, 8> PrioritizedCXXGlobalInits; /// Global destructor functions and arguments that need to run on termination. + /// When UseSinitAndSterm is set, it instead contains sterm finalizer + /// functions, which also run on unloading a shared library. std::vector< std::tuple<llvm::FunctionType *, llvm::WeakTrackingVH, llvm::Constant *>> - CXXGlobalDtors; + CXXGlobalDtorsOrStermFinalizers; /// The complete set of modules that has been imported. llvm::SetVector<clang::Module *> ImportedModules; @@ -589,9 +596,6 @@ public: return *OpenMPRuntime; } - /// Return a pointer to the configured OpenMPIRBuilder, if any. - llvm::OpenMPIRBuilder *getOpenMPIRBuilder() { return OMPBuilder.get(); } - /// Return a reference to the configured CUDA runtime. CGCUDARuntime &getCUDARuntime() { assert(CUDARuntime != nullptr); @@ -788,6 +792,9 @@ public: /// variable declaration D. void setTLSMode(llvm::GlobalValue *GV, const VarDecl &D) const; + /// Get LLVM TLS mode from CodeGenOptions. + llvm::GlobalVariable::ThreadLocalMode GetDefaultLLVMTLSModel() const; + static llvm::GlobalValue::VisibilityTypes GetLLVMVisibility(Visibility V) { switch (V) { case DefaultVisibility: return llvm::GlobalValue::DefaultVisibility; @@ -810,11 +817,10 @@ public: llvm::GlobalValue::LinkageTypes Linkage, unsigned Alignment); - llvm::Function * - CreateGlobalInitOrDestructFunction(llvm::FunctionType *ty, const Twine &name, - const CGFunctionInfo &FI, - SourceLocation Loc = SourceLocation(), - bool TLS = false); + llvm::Function *CreateGlobalInitOrCleanUpFunction( + llvm::FunctionType *ty, const Twine &name, const CGFunctionInfo &FI, + SourceLocation Loc = SourceLocation(), bool TLS = false, + bool IsExternalLinkage = false); /// Return the AST address space of the underlying global variable for D, as /// determined by its declaration. Normally this is the same as the address @@ -855,8 +861,8 @@ public: /// Get the address of the RTTI descriptor for the given type. llvm::Constant *GetAddrOfRTTIDescriptor(QualType Ty, bool ForEH = false); - /// Get the address of a uuid descriptor . - ConstantAddress GetAddrOfUuidDescriptor(const CXXUuidofExpr* E); + /// Get the address of a GUID. + ConstantAddress GetAddrOfMSGuidDecl(const MSGuidDecl *GD); /// Get the address of the thunk for the given global decl. llvm::Constant *GetAddrOfThunk(StringRef Name, llvm::Type *FnTy, @@ -868,6 +874,17 @@ public: /// Returns the assumed alignment of an opaque pointer to the given class. CharUnits getClassPointerAlignment(const CXXRecordDecl *CD); + /// Returns the minimum object size for an object of the given class type + /// (or a class derived from it). + CharUnits getMinimumClassObjectSize(const CXXRecordDecl *CD); + + /// Returns the minimum object size for an object of the given type. + CharUnits getMinimumObjectSize(QualType Ty) { + if (CXXRecordDecl *RD = Ty->getAsCXXRecordDecl()) + return getMinimumClassObjectSize(RD); + return getContext().getTypeSizeInChars(Ty); + } + /// Returns the assumed alignment of a virtual base of a class. CharUnits getVBaseAlignment(CharUnits DerivedAlign, const CXXRecordDecl *Derived, @@ -1012,6 +1029,9 @@ public: /// for the uninstrumented functions. void EmitDeferredUnusedCoverageMappings(); + /// Emit an alias for "main" if it has no arguments (needed for wasm). + void EmitMainVoidAlias(); + /// Tell the consumer that this variable has been instantiated. void HandleCXXStaticMemberVarInstantiation(VarDecl *VD); @@ -1029,8 +1049,14 @@ public: /// Add a destructor and object to add to the C++ global destructor function. void AddCXXDtorEntry(llvm::FunctionCallee DtorFn, llvm::Constant *Object) { - CXXGlobalDtors.emplace_back(DtorFn.getFunctionType(), DtorFn.getCallee(), - Object); + CXXGlobalDtorsOrStermFinalizers.emplace_back(DtorFn.getFunctionType(), + DtorFn.getCallee(), Object); + } + + /// Add an sterm finalizer to the C++ global cleanup function. + void AddCXXStermFinalizerEntry(llvm::FunctionCallee DtorFn) { + CXXGlobalDtorsOrStermFinalizers.emplace_back(DtorFn.getFunctionType(), + DtorFn.getCallee(), nullptr); } /// Create or return a runtime function declaration with the specified type @@ -1155,7 +1181,11 @@ public: /// on the function more conservative. But it's unsafe to call this on a /// function which relies on particular fast-math attributes for correctness. /// It's up to you to ensure that this is safe. - void AddDefaultFnAttrs(llvm::Function &F); + void addDefaultFunctionDefinitionAttributes(llvm::Function &F); + + /// Like the overload taking a `Function &`, but intended specifically + /// for frontends that want to build on Clang's target-configuration logic. + void addDefaultFunctionDefinitionAttributes(llvm::AttrBuilder &attrs); StringRef getMangledName(GlobalDecl GD); StringRef getBlockMangledName(GlobalDecl GD, const BlockDecl *BD); @@ -1282,16 +1312,16 @@ public: /// \param D Requires declaration void EmitOMPRequiresDecl(const OMPRequiresDecl *D); - /// Emits the definition of \p OldGD function with body from \p NewGD. - /// Required for proper handling of declare variant directive on the GPU. - void emitOpenMPDeviceFunctionRedefinition(GlobalDecl OldGD, GlobalDecl NewGD, - llvm::GlobalValue *GV); - /// Returns whether the given record has hidden LTO visibility and therefore /// may participate in (single-module) CFI and whole-program vtable /// optimization. bool HasHiddenLTOVisibility(const CXXRecordDecl *RD); + /// Returns whether the given record has public std LTO visibility + /// and therefore may not participate in (single-module) CFI and whole-program + /// vtable optimization. + bool HasLTOVisibilityPublicStd(const CXXRecordDecl *RD); + /// Returns the vcall visibility of the given type. This is the scope in which /// a virtual function call could be made which ends up being dispatched to a /// member function of this class. This scope can be wider than the visibility @@ -1367,6 +1397,15 @@ public: /// \param QT is the clang QualType of the null pointer. llvm::Constant *getNullPointer(llvm::PointerType *T, QualType QT); + CharUnits getNaturalTypeAlignment(QualType T, + LValueBaseInfo *BaseInfo = nullptr, + TBAAAccessInfo *TBAAInfo = nullptr, + bool forPointeeType = false); + CharUnits getNaturalPointeeTypeAlignment(QualType T, + LValueBaseInfo *BaseInfo = nullptr, + TBAAAccessInfo *TBAAInfo = nullptr); + bool stopAutoInit(); + private: llvm::Constant *GetOrCreateLLVMFunction( StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable, @@ -1417,8 +1456,8 @@ private: /// Emit the function that initializes C++ globals. void EmitCXXGlobalInitFunc(); - /// Emit the function that destroys C++ globals. - void EmitCXXGlobalDtorFunc(); + /// Emit the function that performs cleanup associated with C++ globals. + void EmitCXXGlobalCleanUpFunc(); /// Emit the function that initializes the specified global (if PerformInit is /// true) and registers its destructor. @@ -1489,8 +1528,9 @@ private: /// Emit the Clang commandline as llvm.commandline metadata. void EmitCommandLineMetadata(); - /// Emits target specific Metadata for global declarations. - void EmitTargetMetadata(); + /// Emit the module flag metadata used to pass options controlling the + /// the backend to LLVM. + void EmitBackendOptionsMetadata(const CodeGenOptions CodeGenOpts); /// Emits OpenCL specific Metadata e.g. OpenCL version. void EmitOpenCLMetadata(); @@ -1499,9 +1539,6 @@ private: /// .gcda files in a way that persists in .bc files. void EmitCoverageFile(); - /// Emits the initializer for a uuidof string. - llvm::Constant *EmitUuidofInitializer(StringRef uuidstr); - /// Determine whether the definition must be emitted; if this returns \c /// false, the definition can be emitted lazily if it's used. bool MustBeEmitted(const ValueDecl *D); @@ -1516,11 +1553,12 @@ private: /// function. void SimplifyPersonality(); - /// Helper function for ConstructAttributeList and AddDefaultFnAttrs. - /// Constructs an AttrList for a function with the given properties. - void ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, - bool AttrOnCallSite, - llvm::AttrBuilder &FuncAttrs); + /// Helper function for ConstructAttributeList and + /// addDefaultFunctionDefinitionAttributes. Builds a set of function + /// attributes to add to a function with the given properties. + void getDefaultFunctionAttributes(StringRef Name, bool HasOptnone, + bool AttrOnCallSite, + llvm::AttrBuilder &FuncAttrs); llvm::Metadata *CreateMetadataIdentifierImpl(QualType T, MetadataTypeMap &Map, StringRef Suffix); diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index bad796bf92dc..e810f608ab78 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -52,9 +52,10 @@ void CodeGenPGO::setFuncName(llvm::Function *Fn) { enum PGOHashVersion : unsigned { PGO_HASH_V1, PGO_HASH_V2, + PGO_HASH_V3, // Keep this set to the latest hash version. - PGO_HASH_LATEST = PGO_HASH_V2 + PGO_HASH_LATEST = PGO_HASH_V3 }; namespace { @@ -122,7 +123,7 @@ public: BinaryOperatorGE, BinaryOperatorEQ, BinaryOperatorNE, - // The preceding values are available with PGO_HASH_V2. + // The preceding values are available since PGO_HASH_V2. // Keep this last. It's for the static assert that follows. LastHashType @@ -144,7 +145,9 @@ static PGOHashVersion getPGOHashVersion(llvm::IndexedInstrProfReader *PGOReader, CodeGenModule &CGM) { if (PGOReader->getVersion() <= 4) return PGO_HASH_V1; - return PGO_HASH_V2; + if (PGOReader->getVersion() <= 5) + return PGO_HASH_V2; + return PGO_HASH_V3; } /// A RecursiveASTVisitor that fills a map of statements to PGO counters. @@ -288,7 +291,7 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> { return PGOHash::BinaryOperatorLAnd; if (BO->getOpcode() == BO_LOr) return PGOHash::BinaryOperatorLOr; - if (HashVersion == PGO_HASH_V2) { + if (HashVersion >= PGO_HASH_V2) { switch (BO->getOpcode()) { default: break; @@ -310,7 +313,7 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> { } } - if (HashVersion == PGO_HASH_V2) { + if (HashVersion >= PGO_HASH_V2) { switch (S->getStmtClass()) { default: break; @@ -747,13 +750,21 @@ uint64_t PGOHash::finalize() { return Working; // Check for remaining work in Working. - if (Working) - MD5.update(Working); + if (Working) { + // Keep the buggy behavior from v1 and v2 for backward-compatibility. This + // is buggy because it converts a uint64_t into an array of uint8_t. + if (HashVersion < PGO_HASH_V3) { + MD5.update({(uint8_t)Working}); + } else { + using namespace llvm::support; + uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working); + MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped))); + } + } // Finalize the MD5 and return the hash. llvm::MD5::MD5Result Result; MD5.final(Result); - using namespace llvm::support; return Result.low(); } @@ -1051,8 +1062,7 @@ llvm::MDNode *CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond, if (!PGO.haveRegionCounts()) return nullptr; Optional<uint64_t> CondCount = PGO.getStmtCount(Cond); - assert(CondCount.hasValue() && "missing expected loop condition count"); - if (*CondCount == 0) + if (!CondCount || *CondCount == 0) return nullptr; return createProfileWeights(LoopCount, std::max(*CondCount, LoopCount) - LoopCount); diff --git a/clang/lib/CodeGen/CodeGenPGO.h b/clang/lib/CodeGen/CodeGenPGO.h index a3778b549910..dda8c66b6db2 100644 --- a/clang/lib/CodeGen/CodeGenPGO.h +++ b/clang/lib/CodeGen/CodeGenPGO.h @@ -40,8 +40,8 @@ private: uint64_t CurrentRegionCount; public: - CodeGenPGO(CodeGenModule &CGM) - : CGM(CGM), FuncNameVar(nullptr), NumValueSites({{0}}), + CodeGenPGO(CodeGenModule &CGModule) + : CGM(CGModule), FuncNameVar(nullptr), NumValueSites({{0}}), NumRegionCounters(0), FunctionHash(0), CurrentRegionCount(0) {} /// Whether or not we have PGO region data for the current function. This is diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp index 7d730cb1ed15..f4ebe6885675 100644 --- a/clang/lib/CodeGen/CodeGenTBAA.cpp +++ b/clang/lib/CodeGen/CodeGenTBAA.cpp @@ -141,6 +141,34 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { case BuiltinType::UInt128: return getTypeInfo(Context.Int128Ty); + case BuiltinType::UShortFract: + return getTypeInfo(Context.ShortFractTy); + case BuiltinType::UFract: + return getTypeInfo(Context.FractTy); + case BuiltinType::ULongFract: + return getTypeInfo(Context.LongFractTy); + + case BuiltinType::SatUShortFract: + return getTypeInfo(Context.SatShortFractTy); + case BuiltinType::SatUFract: + return getTypeInfo(Context.SatFractTy); + case BuiltinType::SatULongFract: + return getTypeInfo(Context.SatLongFractTy); + + case BuiltinType::UShortAccum: + return getTypeInfo(Context.ShortAccumTy); + case BuiltinType::UAccum: + return getTypeInfo(Context.AccumTy); + case BuiltinType::ULongAccum: + return getTypeInfo(Context.LongAccumTy); + + case BuiltinType::SatUShortAccum: + return getTypeInfo(Context.SatShortAccumTy); + case BuiltinType::SatUAccum: + return getTypeInfo(Context.SatAccumTy); + case BuiltinType::SatULongAccum: + return getTypeInfo(Context.SatLongAccumTy); + // Treat all other builtin types as distinct types. This includes // treating wchar_t, char16_t, and char32_t as distinct from their // "underlying types". @@ -181,6 +209,15 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { return createScalarTypeNode(OutName, getChar(), Size); } + if (const auto *EIT = dyn_cast<ExtIntType>(Ty)) { + SmallString<256> OutName; + llvm::raw_svector_ostream Out(OutName); + // Don't specify signed/unsigned since integer types can alias despite sign + // differences. + Out << "_ExtInt(" << EIT->getNumBits() << ')'; + return createScalarTypeNode(OutName, getChar(), Size); + } + // For now, handle any other kind of type conservatively. return getChar(); } diff --git a/clang/lib/CodeGen/CodeGenTypeCache.h b/clang/lib/CodeGen/CodeGenTypeCache.h index ed4b773afd13..20a3263c0b1a 100644 --- a/clang/lib/CodeGen/CodeGenTypeCache.h +++ b/clang/lib/CodeGen/CodeGenTypeCache.h @@ -35,8 +35,8 @@ struct CodeGenTypeCache { /// i8, i16, i32, and i64 llvm::IntegerType *Int8Ty, *Int16Ty, *Int32Ty, *Int64Ty; - /// float, double - llvm::Type *HalfTy, *FloatTy, *DoubleTy; + /// half, bfloat, float, double + llvm::Type *HalfTy, *BFloatTy, *FloatTy, *DoubleTy; /// int llvm::IntegerType *IntTy; diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index a458811d7a30..d431c0263666 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -36,8 +36,6 @@ CodeGenTypes::CodeGenTypes(CodeGenModule &cgm) } CodeGenTypes::~CodeGenTypes() { - llvm::DeleteContainerSeconds(CGRecordLayouts); - for (llvm::FoldingSet<CGFunctionInfo>::iterator I = FunctionInfos.begin(), E = FunctionInfos.end(); I != E; ) delete &*I++; @@ -83,19 +81,26 @@ void CodeGenTypes::addRecordTypeName(const RecordDecl *RD, /// ConvertType in that it is used to convert to the memory representation for /// a type. For example, the scalar representation for _Bool is i1, but the /// memory representation is usually i8 or i32, depending on the target. -llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) { +llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) { + if (T->isConstantMatrixType()) { + const Type *Ty = Context.getCanonicalType(T).getTypePtr(); + const ConstantMatrixType *MT = cast<ConstantMatrixType>(Ty); + return llvm::ArrayType::get(ConvertType(MT->getElementType()), + MT->getNumRows() * MT->getNumColumns()); + } + llvm::Type *R = ConvertType(T); - // If this is a non-bool type, don't map it. - if (!R->isIntegerTy(1)) - return R; + // If this is a bool type, or an ExtIntType in a bitfield representation, + // map this integer to the target-specified size. + if ((ForBitField && T->isExtIntType()) || R->isIntegerTy(1)) + return llvm::IntegerType::get(getLLVMContext(), + (unsigned)Context.getTypeSize(T)); - // Otherwise, return an integer of the target-specified size. - return llvm::IntegerType::get(getLLVMContext(), - (unsigned)Context.getTypeSize(T)); + // Else, don't map it. + return R; } - /// isRecordLayoutComplete - Return true if the specified type is already /// completely laid out. bool CodeGenTypes::isRecordLayoutComplete(const Type *Ty) const { @@ -295,6 +300,8 @@ static llvm::Type *getTypeForFormat(llvm::LLVMContext &VMContext, else return llvm::Type::getInt16Ty(VMContext); } + if (&format == &llvm::APFloat::BFloat()) + return llvm::Type::getBFloatTy(VMContext); if (&format == &llvm::APFloat::IEEEsingle()) return llvm::Type::getFloatTy(VMContext); if (&format == &llvm::APFloat::IEEEdouble()) @@ -383,6 +390,20 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { const Type *Ty = T.getTypePtr(); + // For the device-side compilation, CUDA device builtin surface/texture types + // may be represented in different types. + if (Context.getLangOpts().CUDAIsDevice) { + if (T->isCUDADeviceBuiltinSurfaceType()) { + if (auto *Ty = CGM.getTargetCodeGenInfo() + .getCUDADeviceBuiltinSurfaceDeviceType()) + return Ty; + } else if (T->isCUDADeviceBuiltinTextureType()) { + if (auto *Ty = CGM.getTargetCodeGenInfo() + .getCUDADeviceBuiltinTextureDeviceType()) + return Ty; + } + } + // RecordTypes are cached and processed specially. if (const RecordType *RT = dyn_cast<RecordType>(Ty)) return ConvertRecordDeclType(RT->getDecl()); @@ -479,6 +500,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { Context.getLangOpts().NativeHalfType || !Context.getTargetInfo().useFP16ConversionIntrinsics()); break; + case BuiltinType::BFloat16: case BuiltinType::Float: case BuiltinType::Double: case BuiltinType::LongDouble: @@ -511,23 +533,99 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { case BuiltinType::OCLReserveID: ResultType = CGM.getOpenCLRuntime().convertOpenCLSpecificType(Ty); break; - - // TODO: real CodeGen support for SVE types requires more infrastructure - // to be added first. Report an error until then. -#define SVE_TYPE(Name, Id, SingletonId) case BuiltinType::Id: -#include "clang/Basic/AArch64SVEACLETypes.def" - { - unsigned DiagID = CGM.getDiags().getCustomDiagID( - DiagnosticsEngine::Error, - "cannot yet generate code for SVE type '%0'"); - auto *BT = cast<BuiltinType>(Ty); - auto Name = BT->getName(CGM.getContext().getPrintingPolicy()); - CGM.getDiags().Report(DiagID) << Name; - // Return something safe. - ResultType = llvm::IntegerType::get(getLLVMContext(), 32); - break; - } - +#define GET_SVE_INT_VEC(BITS, ELTS) \ + llvm::ScalableVectorType::get( \ + llvm::IntegerType::get(getLLVMContext(), BITS), ELTS); + case BuiltinType::SveInt8: + case BuiltinType::SveUint8: + return GET_SVE_INT_VEC(8, 16); + case BuiltinType::SveInt8x2: + case BuiltinType::SveUint8x2: + return GET_SVE_INT_VEC(8, 32); + case BuiltinType::SveInt8x3: + case BuiltinType::SveUint8x3: + return GET_SVE_INT_VEC(8, 48); + case BuiltinType::SveInt8x4: + case BuiltinType::SveUint8x4: + return GET_SVE_INT_VEC(8, 64); + case BuiltinType::SveInt16: + case BuiltinType::SveUint16: + return GET_SVE_INT_VEC(16, 8); + case BuiltinType::SveInt16x2: + case BuiltinType::SveUint16x2: + return GET_SVE_INT_VEC(16, 16); + case BuiltinType::SveInt16x3: + case BuiltinType::SveUint16x3: + return GET_SVE_INT_VEC(16, 24); + case BuiltinType::SveInt16x4: + case BuiltinType::SveUint16x4: + return GET_SVE_INT_VEC(16, 32); + case BuiltinType::SveInt32: + case BuiltinType::SveUint32: + return GET_SVE_INT_VEC(32, 4); + case BuiltinType::SveInt32x2: + case BuiltinType::SveUint32x2: + return GET_SVE_INT_VEC(32, 8); + case BuiltinType::SveInt32x3: + case BuiltinType::SveUint32x3: + return GET_SVE_INT_VEC(32, 12); + case BuiltinType::SveInt32x4: + case BuiltinType::SveUint32x4: + return GET_SVE_INT_VEC(32, 16); + case BuiltinType::SveInt64: + case BuiltinType::SveUint64: + return GET_SVE_INT_VEC(64, 2); + case BuiltinType::SveInt64x2: + case BuiltinType::SveUint64x2: + return GET_SVE_INT_VEC(64, 4); + case BuiltinType::SveInt64x3: + case BuiltinType::SveUint64x3: + return GET_SVE_INT_VEC(64, 6); + case BuiltinType::SveInt64x4: + case BuiltinType::SveUint64x4: + return GET_SVE_INT_VEC(64, 8); + case BuiltinType::SveBool: + return GET_SVE_INT_VEC(1, 16); +#undef GET_SVE_INT_VEC +#define GET_SVE_FP_VEC(TY, ISFP16, ELTS) \ + llvm::ScalableVectorType::get( \ + getTypeForFormat(getLLVMContext(), \ + Context.getFloatTypeSemantics(Context.TY), \ + /* UseNativeHalf = */ ISFP16), \ + ELTS); + case BuiltinType::SveFloat16: + return GET_SVE_FP_VEC(HalfTy, true, 8); + case BuiltinType::SveFloat16x2: + return GET_SVE_FP_VEC(HalfTy, true, 16); + case BuiltinType::SveFloat16x3: + return GET_SVE_FP_VEC(HalfTy, true, 24); + case BuiltinType::SveFloat16x4: + return GET_SVE_FP_VEC(HalfTy, true, 32); + case BuiltinType::SveFloat32: + return GET_SVE_FP_VEC(FloatTy, false, 4); + case BuiltinType::SveFloat32x2: + return GET_SVE_FP_VEC(FloatTy, false, 8); + case BuiltinType::SveFloat32x3: + return GET_SVE_FP_VEC(FloatTy, false, 12); + case BuiltinType::SveFloat32x4: + return GET_SVE_FP_VEC(FloatTy, false, 16); + case BuiltinType::SveFloat64: + return GET_SVE_FP_VEC(DoubleTy, false, 2); + case BuiltinType::SveFloat64x2: + return GET_SVE_FP_VEC(DoubleTy, false, 4); + case BuiltinType::SveFloat64x3: + return GET_SVE_FP_VEC(DoubleTy, false, 6); + case BuiltinType::SveFloat64x4: + return GET_SVE_FP_VEC(DoubleTy, false, 8); + case BuiltinType::SveBFloat16: + return GET_SVE_FP_VEC(BFloat16Ty, false, 8); + case BuiltinType::SveBFloat16x2: + return GET_SVE_FP_VEC(BFloat16Ty, false, 16); + case BuiltinType::SveBFloat16x3: + return GET_SVE_FP_VEC(BFloat16Ty, false, 24); + case BuiltinType::SveBFloat16x4: + return GET_SVE_FP_VEC(BFloat16Ty, false, 32); +#undef GET_SVE_FP_VEC case BuiltinType::Dependent: #define BUILTIN_TYPE(Id, SingletonId) #define PLACEHOLDER_TYPE(Id, SingletonId) \ @@ -560,7 +658,11 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { llvm::Type *PointeeType = ConvertTypeForMem(ETy); if (PointeeType->isVoidTy()) PointeeType = llvm::Type::getInt8Ty(getLLVMContext()); - unsigned AS = Context.getTargetAddressSpace(ETy); + + unsigned AS = PointeeType->isFunctionTy() + ? getDataLayout().getProgramAddressSpace() + : Context.getTargetAddressSpace(ETy); + ResultType = llvm::PointerType::get(PointeeType, AS); break; } @@ -605,8 +707,15 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { case Type::ExtVector: case Type::Vector: { const VectorType *VT = cast<VectorType>(Ty); - ResultType = llvm::VectorType::get(ConvertType(VT->getElementType()), - VT->getNumElements()); + ResultType = llvm::FixedVectorType::get(ConvertType(VT->getElementType()), + VT->getNumElements()); + break; + } + case Type::ConstantMatrix: { + const ConstantMatrixType *MT = cast<ConstantMatrixType>(Ty); + ResultType = + llvm::FixedVectorType::get(ConvertType(MT->getElementType()), + MT->getNumRows() * MT->getNumColumns()); break; } case Type::FunctionNoProto: @@ -692,6 +801,11 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { ResultType = CGM.getOpenCLRuntime().getPipeType(cast<PipeType>(Ty)); break; } + case Type::ExtInt: { + const auto &EIT = cast<ExtIntType>(Ty); + ResultType = llvm::Type::getIntNTy(getLLVMContext(), EIT->getNumBits()); + break; + } } assert(ResultType && "Didn't convert a type?"); @@ -749,8 +863,8 @@ llvm::StructType *CodeGenTypes::ConvertRecordDeclType(const RecordDecl *RD) { } // Layout fields. - CGRecordLayout *Layout = ComputeRecordLayout(RD, Ty); - CGRecordLayouts[Key] = Layout; + std::unique_ptr<CGRecordLayout> Layout = ComputeRecordLayout(RD, Ty); + CGRecordLayouts[Key] = std::move(Layout); // We're done laying out this struct. bool EraseResult = RecordsBeingLaidOut.erase(Key); (void)EraseResult; @@ -776,17 +890,18 @@ const CGRecordLayout & CodeGenTypes::getCGRecordLayout(const RecordDecl *RD) { const Type *Key = Context.getTagDeclType(RD).getTypePtr(); - const CGRecordLayout *Layout = CGRecordLayouts.lookup(Key); - if (!Layout) { - // Compute the type information. - ConvertRecordDeclType(RD); + auto I = CGRecordLayouts.find(Key); + if (I != CGRecordLayouts.end()) + return *I->second; + // Compute the type information. + ConvertRecordDeclType(RD); - // Now try again. - Layout = CGRecordLayouts.lookup(Key); - } + // Now try again. + I = CGRecordLayouts.find(Key); - assert(Layout && "Unable to find record layout information for type"); - return *Layout; + assert(I != CGRecordLayouts.end() && + "Unable to find record layout information for type"); + return *I->second; } bool CodeGenTypes::isPointerZeroInitializable(QualType T) { diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h index 03102329507e..f8f7542e4c83 100644 --- a/clang/lib/CodeGen/CodeGenTypes.h +++ b/clang/lib/CodeGen/CodeGenTypes.h @@ -75,7 +75,7 @@ class CodeGenTypes { llvm::DenseMap<const ObjCInterfaceType*, llvm::Type *> InterfaceTypes; /// Maps clang struct type with corresponding record layout info. - llvm::DenseMap<const Type*, CGRecordLayout *> CGRecordLayouts; + llvm::DenseMap<const Type*, std::unique_ptr<CGRecordLayout>> CGRecordLayouts; /// Contains the LLVM IR type for any converted RecordDecl. llvm::DenseMap<const Type*, llvm::StructType *> RecordDeclTypes; @@ -134,7 +134,7 @@ public: /// ConvertType in that it is used to convert to the memory representation for /// a type. For example, the scalar representation for _Bool is i1, but the /// memory representation is usually i8 or i32, depending on the target. - llvm::Type *ConvertTypeForMem(QualType T); + llvm::Type *ConvertTypeForMem(QualType T, bool ForBitField = false); /// GetFunctionType - Get the LLVM function type for \arg Info. llvm::FunctionType *GetFunctionType(const CGFunctionInfo &Info); @@ -272,8 +272,8 @@ public: RequiredArgs args); /// Compute a new LLVM record layout object for the given record. - CGRecordLayout *ComputeRecordLayout(const RecordDecl *D, - llvm::StructType *Ty); + std::unique_ptr<CGRecordLayout> ComputeRecordLayout(const RecordDecl *D, + llvm::StructType *Ty); /// addRecordTypeName - Compute a name from the given record decl with an /// optional suffix and name the given LLVM type using it. diff --git a/clang/lib/CodeGen/ConstantEmitter.h b/clang/lib/CodeGen/ConstantEmitter.h index 121acbac4fa9..188b82e56f53 100644 --- a/clang/lib/CodeGen/ConstantEmitter.h +++ b/clang/lib/CodeGen/ConstantEmitter.h @@ -110,6 +110,8 @@ public: llvm::Constant *tryEmitAbstract(const APValue &value, QualType T); llvm::Constant *tryEmitAbstractForMemory(const APValue &value, QualType T); + llvm::Constant *tryEmitConstantExpr(const ConstantExpr *CE); + llvm::Constant *emitNullForMemory(QualType T) { return emitNullForMemory(CGM, T); } diff --git a/clang/lib/CodeGen/ConstantInitBuilder.cpp b/clang/lib/CodeGen/ConstantInitBuilder.cpp index 2d63d88020be..24e3ca19709c 100644 --- a/clang/lib/CodeGen/ConstantInitBuilder.cpp +++ b/clang/lib/CodeGen/ConstantInitBuilder.cpp @@ -128,8 +128,14 @@ void ConstantAggregateBuilderBase::addSize(CharUnits size) { llvm::Constant * ConstantAggregateBuilderBase::getRelativeOffset(llvm::IntegerType *offsetType, llvm::Constant *target) { + return getRelativeOffsetToPosition(offsetType, target, + Builder.Buffer.size() - Begin); +} + +llvm::Constant *ConstantAggregateBuilderBase::getRelativeOffsetToPosition( + llvm::IntegerType *offsetType, llvm::Constant *target, size_t position) { // Compute the address of the relative-address slot. - auto base = getAddrOfCurrentPosition(offsetType); + auto base = getAddrOfPosition(offsetType, position); // Subtract. base = llvm::ConstantExpr::getPtrToInt(base, Builder.CGM.IntPtrTy); @@ -145,6 +151,20 @@ ConstantAggregateBuilderBase::getRelativeOffset(llvm::IntegerType *offsetType, } llvm::Constant * +ConstantAggregateBuilderBase::getAddrOfPosition(llvm::Type *type, + size_t position) { + // Make a global variable. We will replace this with a GEP to this + // position after installing the initializer. + auto dummy = new llvm::GlobalVariable(Builder.CGM.getModule(), type, true, + llvm::GlobalVariable::PrivateLinkage, + nullptr, ""); + Builder.SelfReferences.emplace_back(dummy); + auto &entry = Builder.SelfReferences.back(); + (void)getGEPIndicesTo(entry.Indices, position + Begin); + return dummy; +} + +llvm::Constant * ConstantAggregateBuilderBase::getAddrOfCurrentPosition(llvm::Type *type) { // Make a global variable. We will replace this with a GEP to this // position after installing the initializer. diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index bdecff39c88f..78b268f423cb 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -13,10 +13,13 @@ #include "CoverageMappingGen.h" #include "CodeGenFunction.h" #include "clang/AST/StmtVisitor.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/FileManager.h" +#include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Lex/Lexer.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/Optional.h" #include "llvm/ProfileData/Coverage/CoverageMapping.h" #include "llvm/ProfileData/Coverage/CoverageMappingReader.h" #include "llvm/ProfileData/Coverage/CoverageMappingWriter.h" @@ -24,6 +27,10 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" +// This selects the coverage mapping format defined when `InstrProfData.inc` +// is textually included. +#define COVMAP_V3 + using namespace clang; using namespace CodeGen; using namespace llvm::coverage; @@ -901,6 +908,18 @@ struct CounterCoverageMappingBuilder terminateRegion(S); } + void VisitCoroutineBodyStmt(const CoroutineBodyStmt *S) { + extendRegion(S); + Visit(S->getBody()); + } + + void VisitCoreturnStmt(const CoreturnStmt *S) { + extendRegion(S); + if (S->getOperand()) + Visit(S->getOperand()); + terminateRegion(S); + } + void VisitCXXThrowExpr(const CXXThrowExpr *E) { extendRegion(E); if (E->getSubExpr()) @@ -1272,17 +1291,11 @@ struct CounterCoverageMappingBuilder } }; -std::string getCoverageSection(const CodeGenModule &CGM) { - return llvm::getInstrProfSectionName( - llvm::IPSK_covmap, - CGM.getContext().getTargetInfo().getTriple().getObjectFormat()); -} - std::string normalizeFilename(StringRef Filename) { llvm::SmallString<256> Path(Filename); llvm::sys::fs::make_absolute(Path); llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true); - return Path.str().str(); + return std::string(Path); } } // end anonymous namespace @@ -1317,30 +1330,71 @@ static void dump(llvm::raw_ostream &OS, StringRef FunctionName, } } -void CoverageMappingModuleGen::addFunctionMappingRecord( - llvm::GlobalVariable *NamePtr, StringRef NameValue, uint64_t FuncHash, - const std::string &CoverageMapping, bool IsUsed) { +static std::string getInstrProfSection(const CodeGenModule &CGM, + llvm::InstrProfSectKind SK) { + return llvm::getInstrProfSectionName( + SK, CGM.getContext().getTargetInfo().getTriple().getObjectFormat()); +} + +void CoverageMappingModuleGen::emitFunctionMappingRecord( + const FunctionInfo &Info, uint64_t FilenamesRef) { llvm::LLVMContext &Ctx = CGM.getLLVMContext(); - if (!FunctionRecordTy) { + + // Assign a name to the function record. This is used to merge duplicates. + std::string FuncRecordName = "__covrec_" + llvm::utohexstr(Info.NameHash); + + // A dummy description for a function included-but-not-used in a TU can be + // replaced by full description provided by a different TU. The two kinds of + // descriptions play distinct roles: therefore, assign them different names + // to prevent `linkonce_odr` merging. + if (Info.IsUsed) + FuncRecordName += "u"; + + // Create the function record type. + const uint64_t NameHash = Info.NameHash; + const uint64_t FuncHash = Info.FuncHash; + const std::string &CoverageMapping = Info.CoverageMapping; #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) LLVMType, - llvm::Type *FunctionRecordTypes[] = { - #include "llvm/ProfileData/InstrProfData.inc" - }; - FunctionRecordTy = - llvm::StructType::get(Ctx, makeArrayRef(FunctionRecordTypes), - /*isPacked=*/true); - } + llvm::Type *FunctionRecordTypes[] = { +#include "llvm/ProfileData/InstrProfData.inc" + }; + auto *FunctionRecordTy = + llvm::StructType::get(Ctx, makeArrayRef(FunctionRecordTypes), + /*isPacked=*/true); - #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Init, + // Create the function record constant. +#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Init, llvm::Constant *FunctionRecordVals[] = { #include "llvm/ProfileData/InstrProfData.inc" }; - FunctionRecords.push_back(llvm::ConstantStruct::get( - FunctionRecordTy, makeArrayRef(FunctionRecordVals))); + auto *FuncRecordConstant = llvm::ConstantStruct::get( + FunctionRecordTy, makeArrayRef(FunctionRecordVals)); + + // Create the function record global. + auto *FuncRecord = new llvm::GlobalVariable( + CGM.getModule(), FunctionRecordTy, /*isConstant=*/true, + llvm::GlobalValue::LinkOnceODRLinkage, FuncRecordConstant, + FuncRecordName); + FuncRecord->setVisibility(llvm::GlobalValue::HiddenVisibility); + FuncRecord->setSection(getInstrProfSection(CGM, llvm::IPSK_covfun)); + FuncRecord->setAlignment(llvm::Align(8)); + if (CGM.supportsCOMDAT()) + FuncRecord->setComdat(CGM.getModule().getOrInsertComdat(FuncRecordName)); + + // Make sure the data doesn't get deleted. + CGM.addUsedGlobal(FuncRecord); +} + +void CoverageMappingModuleGen::addFunctionMappingRecord( + llvm::GlobalVariable *NamePtr, StringRef NameValue, uint64_t FuncHash, + const std::string &CoverageMapping, bool IsUsed) { + llvm::LLVMContext &Ctx = CGM.getLLVMContext(); + const uint64_t NameHash = llvm::IndexedInstrProf::ComputeHash(NameValue); + FunctionRecords.push_back({NameHash, FuncHash, CoverageMapping, IsUsed}); + if (!IsUsed) FunctionNames.push_back( llvm::ConstantExpr::getBitCast(NamePtr, llvm::Type::getInt8PtrTy(Ctx))); - CoverageMappings.push_back(CoverageMapping); if (CGM.getCodeGenOpts().DumpCoverageMapping) { // Dump the coverage mapping data for this function by decoding the @@ -1385,37 +1439,22 @@ void CoverageMappingModuleGen::emit() { FilenameRefs[I] = FilenameStrs[I]; } - std::string FilenamesAndCoverageMappings; - llvm::raw_string_ostream OS(FilenamesAndCoverageMappings); - CoverageFilenamesSectionWriter(FilenameRefs).write(OS); - - // Stream the content of CoverageMappings to OS while keeping - // memory consumption under control. - size_t CoverageMappingSize = 0; - for (auto &S : CoverageMappings) { - CoverageMappingSize += S.size(); - OS << S; - S.clear(); - S.shrink_to_fit(); - } - CoverageMappings.clear(); - CoverageMappings.shrink_to_fit(); - - size_t FilenamesSize = OS.str().size() - CoverageMappingSize; - // Append extra zeroes if necessary to ensure that the size of the filenames - // and coverage mappings is a multiple of 8. - if (size_t Rem = OS.str().size() % 8) { - CoverageMappingSize += 8 - Rem; - OS.write_zeros(8 - Rem); + std::string Filenames; + { + llvm::raw_string_ostream OS(Filenames); + CoverageFilenamesSectionWriter(FilenameRefs).write(OS); } - auto *FilenamesAndMappingsVal = - llvm::ConstantDataArray::getString(Ctx, OS.str(), false); + auto *FilenamesVal = + llvm::ConstantDataArray::getString(Ctx, Filenames, false); + const int64_t FilenamesRef = llvm::IndexedInstrProf::ComputeHash(Filenames); - // Create the deferred function records array - auto RecordsTy = - llvm::ArrayType::get(FunctionRecordTy, FunctionRecords.size()); - auto RecordsVal = llvm::ConstantArray::get(RecordsTy, FunctionRecords); + // Emit the function records. + for (const FunctionInfo &Info : FunctionRecords) + emitFunctionMappingRecord(Info, FilenamesRef); + const unsigned NRecords = 0; + const size_t FilenamesSize = Filenames.size(); + const unsigned CoverageMappingSize = 0; llvm::Type *CovDataHeaderTypes[] = { #define COVMAP_HEADER(Type, LLVMType, Name, Init) LLVMType, #include "llvm/ProfileData/InstrProfData.inc" @@ -1430,18 +1469,16 @@ void CoverageMappingModuleGen::emit() { CovDataHeaderTy, makeArrayRef(CovDataHeaderVals)); // Create the coverage data record - llvm::Type *CovDataTypes[] = {CovDataHeaderTy, RecordsTy, - FilenamesAndMappingsVal->getType()}; + llvm::Type *CovDataTypes[] = {CovDataHeaderTy, FilenamesVal->getType()}; auto CovDataTy = llvm::StructType::get(Ctx, makeArrayRef(CovDataTypes)); - llvm::Constant *TUDataVals[] = {CovDataHeaderVal, RecordsVal, - FilenamesAndMappingsVal}; + llvm::Constant *TUDataVals[] = {CovDataHeaderVal, FilenamesVal}; auto CovDataVal = llvm::ConstantStruct::get(CovDataTy, makeArrayRef(TUDataVals)); auto CovData = new llvm::GlobalVariable( - CGM.getModule(), CovDataTy, true, llvm::GlobalValue::InternalLinkage, + CGM.getModule(), CovDataTy, true, llvm::GlobalValue::PrivateLinkage, CovDataVal, llvm::getCoverageMappingVarName()); - CovData->setSection(getCoverageSection(CGM)); + CovData->setSection(getInstrProfSection(CGM, llvm::IPSK_covmap)); CovData->setAlignment(llvm::Align(8)); // Make sure the data doesn't get deleted. diff --git a/clang/lib/CodeGen/CoverageMappingGen.h b/clang/lib/CodeGen/CoverageMappingGen.h index 3bf51f590479..5d79d1e65670 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.h +++ b/clang/lib/CodeGen/CoverageMappingGen.h @@ -47,17 +47,27 @@ class CodeGenModule; /// Organizes the cross-function state that is used while generating /// code coverage mapping data. class CoverageMappingModuleGen { + /// Information needed to emit a coverage record for a function. + struct FunctionInfo { + uint64_t NameHash; + uint64_t FuncHash; + std::string CoverageMapping; + bool IsUsed; + }; + CodeGenModule &CGM; CoverageSourceInfo &SourceInfo; llvm::SmallDenseMap<const FileEntry *, unsigned, 8> FileEntries; - std::vector<llvm::Constant *> FunctionRecords; std::vector<llvm::Constant *> FunctionNames; - llvm::StructType *FunctionRecordTy; - std::vector<std::string> CoverageMappings; + std::vector<FunctionInfo> FunctionRecords; + + /// Emit a function record. + void emitFunctionMappingRecord(const FunctionInfo &Info, + uint64_t FilenamesRef); public: CoverageMappingModuleGen(CodeGenModule &CGM, CoverageSourceInfo &SourceInfo) - : CGM(CGM), SourceInfo(SourceInfo), FunctionRecordTy(nullptr) {} + : CGM(CGM), SourceInfo(SourceInfo) {} CoverageSourceInfo &getSourceInfo() const { return SourceInfo; diff --git a/clang/lib/CodeGen/EHScopeStack.h b/clang/lib/CodeGen/EHScopeStack.h index 0ed67aabcd62..3a640d6117d6 100644 --- a/clang/lib/CodeGen/EHScopeStack.h +++ b/clang/lib/CodeGen/EHScopeStack.h @@ -85,11 +85,6 @@ enum CleanupKind : unsigned { NormalAndEHCleanup = EHCleanup | NormalCleanup, - InactiveCleanup = 0x4, - InactiveEHCleanup = EHCleanup | InactiveCleanup, - InactiveNormalCleanup = NormalCleanup | InactiveCleanup, - InactiveNormalAndEHCleanup = NormalAndEHCleanup | InactiveCleanup, - LifetimeMarker = 0x8, NormalEHLifetimeMarker = LifetimeMarker | NormalAndEHCleanup, }; @@ -158,9 +153,10 @@ public: /// Generation flags. class Flags { enum { - F_IsForEH = 0x1, + F_IsForEH = 0x1, F_IsNormalCleanupKind = 0x2, - F_IsEHCleanupKind = 0x4 + F_IsEHCleanupKind = 0x4, + F_HasExitSwitch = 0x8, }; unsigned flags; @@ -179,8 +175,10 @@ public: /// cleanup. bool isEHCleanupKind() const { return flags & F_IsEHCleanupKind; } void setIsEHCleanupKind() { flags |= F_IsEHCleanupKind; } - }; + bool hasExitSwitch() const { return flags & F_HasExitSwitch; } + void setHasExitSwitch() { flags |= F_HasExitSwitch; } + }; /// Emit the cleanup. For normal cleanups, this is run in the /// same EH context as when the cleanup was pushed, i.e. the diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index b5b8702c551e..80de2a6e3950 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -203,7 +203,7 @@ public: void EmitCXXConstructors(const CXXConstructorDecl *D) override; - AddedStructorArgs + AddedStructorArgCounts buildStructorSignature(GlobalDecl GD, SmallVectorImpl<CanQualType> &ArgTys) override; @@ -222,10 +222,17 @@ public: void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override; - AddedStructorArgs - addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D, - CXXCtorType Type, bool ForVirtualBase, - bool Delegating, CallArgList &Args) override; + AddedStructorArgs getImplicitConstructorArgs(CodeGenFunction &CGF, + const CXXConstructorDecl *D, + CXXCtorType Type, + bool ForVirtualBase, + bool Delegating) override; + + llvm::Value *getCXXDestructorImplicitParam(CodeGenFunction &CGF, + const CXXDestructorDecl *DD, + CXXDtorType Type, + bool ForVirtualBase, + bool Delegating) override; void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD, CXXDtorType Type, bool ForVirtualBase, @@ -516,6 +523,22 @@ private: } bool canCallMismatchedFunctionType() const override { return false; } }; + +class XLCXXABI final : public ItaniumCXXABI { +public: + explicit XLCXXABI(CodeGen::CodeGenModule &CGM) + : ItaniumCXXABI(CGM) {} + + void registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, + llvm::FunctionCallee dtor, + llvm::Constant *addr) override; + + bool useSinitAndSterm() const override { return true; } + +private: + void emitCXXStermFinalizer(const VarDecl &D, llvm::Function *dtorStub, + llvm::Constant *addr); +}; } CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) { @@ -546,6 +569,9 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) { case TargetCXXABI::WebAssembly: return new WebAssemblyCXXABI(CGM); + case TargetCXXABI::XL: + return new XLCXXABI(CGM); + case TargetCXXABI::GenericItanium: if (CGM.getContext().getTargetInfo().getTriple().getArch() == llvm::Triple::le32) { @@ -670,6 +696,10 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( CGM.HasHiddenLTOVisibility(RD); bool ShouldEmitVFEInfo = CGM.getCodeGenOpts().VirtualFunctionElimination && CGM.HasHiddenLTOVisibility(RD); + bool ShouldEmitWPDInfo = + CGM.getCodeGenOpts().WholeProgramVTables && + // Don't insert type tests if we are forcing public std visibility. + !CGM.HasLTOVisibilityPublicStd(RD); llvm::Value *VirtualFn = nullptr; { @@ -677,16 +707,17 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( llvm::Value *TypeId = nullptr; llvm::Value *CheckResult = nullptr; - if (ShouldEmitCFICheck || ShouldEmitVFEInfo) { - // If doing CFI or VFE, we will need the metadata node to check against. + if (ShouldEmitCFICheck || ShouldEmitVFEInfo || ShouldEmitWPDInfo) { + // If doing CFI, VFE or WPD, we will need the metadata node to check + // against. llvm::Metadata *MD = CGM.CreateMetadataIdentifierForVirtualMemPtrType(QualType(MPT, 0)); TypeId = llvm::MetadataAsValue::get(CGF.getLLVMContext(), MD); } - llvm::Value *VFPAddr = Builder.CreateGEP(VTable, VTableOffset); - if (ShouldEmitVFEInfo) { + llvm::Value *VFPAddr = Builder.CreateGEP(VTable, VTableOffset); + // If doing VFE, load from the vtable with a type.checked.load intrinsic // call. Note that we use the GEP to calculate the address to load from // and pass 0 as the offset to the intrinsic. This is because every @@ -702,18 +733,30 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( } else { // When not doing VFE, emit a normal load, as it allows more // optimisations than type.checked.load. - if (ShouldEmitCFICheck) { + if (ShouldEmitCFICheck || ShouldEmitWPDInfo) { + llvm::Value *VFPAddr = Builder.CreateGEP(VTable, VTableOffset); CheckResult = Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::type_test), {Builder.CreateBitCast(VFPAddr, CGF.Int8PtrTy), TypeId}); } - VFPAddr = - Builder.CreateBitCast(VFPAddr, FTy->getPointerTo()->getPointerTo()); - VirtualFn = Builder.CreateAlignedLoad(VFPAddr, CGF.getPointerAlign(), - "memptr.virtualfn"); + + if (CGM.getItaniumVTableContext().isRelativeLayout()) { + VirtualFn = CGF.Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::load_relative, + {VTableOffset->getType()}), + {VTable, VTableOffset}); + VirtualFn = CGF.Builder.CreateBitCast(VirtualFn, FTy->getPointerTo()); + } else { + llvm::Value *VFPAddr = CGF.Builder.CreateGEP(VTable, VTableOffset); + VFPAddr = CGF.Builder.CreateBitCast( + VFPAddr, FTy->getPointerTo()->getPointerTo()); + VirtualFn = CGF.Builder.CreateAlignedLoad( + VFPAddr, CGF.getPointerAlign(), "memptr.virtualfn"); + } } assert(VirtualFn && "Virtual fuction pointer not created!"); - assert((!ShouldEmitCFICheck || !ShouldEmitVFEInfo || CheckResult) && + assert((!ShouldEmitCFICheck || !ShouldEmitVFEInfo || !ShouldEmitWPDInfo || + CheckResult) && "Check result required but not created!"); if (ShouldEmitCFICheck) { @@ -984,11 +1027,16 @@ llvm::Constant *ItaniumCXXABI::BuildMemberPointer(const CXXMethodDecl *MD, llvm::Constant *MemPtr[2]; if (MD->isVirtual()) { uint64_t Index = CGM.getItaniumVTableContext().getMethodVTableIndex(MD); - - const ASTContext &Context = getContext(); - CharUnits PointerWidth = - Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerWidth(0)); - uint64_t VTableOffset = (Index * PointerWidth.getQuantity()); + uint64_t VTableOffset; + if (CGM.getItaniumVTableContext().isRelativeLayout()) { + // Multiply by 4-byte relative offsets. + VTableOffset = Index * 4; + } else { + const ASTContext &Context = getContext(); + CharUnits PointerWidth = Context.toCharUnitsFromBits( + Context.getTargetInfo().getPointerWidth(0)); + VTableOffset = Index * PointerWidth.getQuantity(); + } if (UseARMMethodPtrABI) { // ARM C++ ABI 3.2.1: @@ -1402,8 +1450,19 @@ llvm::Value *ItaniumCXXABI::EmitTypeid(CodeGenFunction &CGF, llvm::Value *Value = CGF.GetVTablePtr(ThisPtr, StdTypeInfoPtrTy->getPointerTo(), ClassDecl); - // Load the type info. - Value = CGF.Builder.CreateConstInBoundsGEP1_64(Value, -1ULL); + if (CGM.getItaniumVTableContext().isRelativeLayout()) { + // Load the type info. + Value = CGF.Builder.CreateBitCast(Value, CGM.Int8PtrTy); + Value = CGF.Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::load_relative, {CGM.Int32Ty}), + {Value, llvm::ConstantInt::get(CGM.Int32Ty, -4)}); + + // Setup to dereference again since this is a proxy we accessed. + Value = CGF.Builder.CreateBitCast(Value, StdTypeInfoPtrTy->getPointerTo()); + } else { + // Load the type info. + Value = CGF.Builder.CreateConstInBoundsGEP1_64(Value, -1ULL); + } return CGF.Builder.CreateAlignedLoad(Value, CGF.getPointerAlign()); } @@ -1459,28 +1518,37 @@ llvm::Value *ItaniumCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, Address ThisAddr, QualType SrcRecordTy, QualType DestTy) { - llvm::Type *PtrDiffLTy = - CGF.ConvertType(CGF.getContext().getPointerDiffType()); llvm::Type *DestLTy = CGF.ConvertType(DestTy); - auto *ClassDecl = cast<CXXRecordDecl>(SrcRecordTy->castAs<RecordType>()->getDecl()); - // Get the vtable pointer. - llvm::Value *VTable = CGF.GetVTablePtr(ThisAddr, PtrDiffLTy->getPointerTo(), - ClassDecl); + llvm::Value *OffsetToTop; + if (CGM.getItaniumVTableContext().isRelativeLayout()) { + // Get the vtable pointer. + llvm::Value *VTable = + CGF.GetVTablePtr(ThisAddr, CGM.Int32Ty->getPointerTo(), ClassDecl); - // Get the offset-to-top from the vtable. - llvm::Value *OffsetToTop = - CGF.Builder.CreateConstInBoundsGEP1_64(VTable, -2ULL); - OffsetToTop = - CGF.Builder.CreateAlignedLoad(OffsetToTop, CGF.getPointerAlign(), - "offset.to.top"); + // Get the offset-to-top from the vtable. + OffsetToTop = + CGF.Builder.CreateConstInBoundsGEP1_32(/*Type=*/nullptr, VTable, -2U); + OffsetToTop = CGF.Builder.CreateAlignedLoad( + OffsetToTop, CharUnits::fromQuantity(4), "offset.to.top"); + } else { + llvm::Type *PtrDiffLTy = + CGF.ConvertType(CGF.getContext().getPointerDiffType()); + // Get the vtable pointer. + llvm::Value *VTable = + CGF.GetVTablePtr(ThisAddr, PtrDiffLTy->getPointerTo(), ClassDecl); + + // Get the offset-to-top from the vtable. + OffsetToTop = CGF.Builder.CreateConstInBoundsGEP1_64(VTable, -2ULL); + OffsetToTop = CGF.Builder.CreateAlignedLoad( + OffsetToTop, CGF.getPointerAlign(), "offset.to.top"); + } // Finally, add the offset to the pointer. llvm::Value *Value = ThisAddr.getPointer(); Value = CGF.EmitCastToVoidPtr(Value); Value = CGF.Builder.CreateInBoundsGEP(Value, OffsetToTop); - return CGF.Builder.CreateBitCast(Value, DestLTy); } @@ -1501,17 +1569,22 @@ ItaniumCXXABI::GetVirtualBaseClassOffset(CodeGenFunction &CGF, CharUnits VBaseOffsetOffset = CGM.getItaniumVTableContext().getVirtualBaseOffsetOffset(ClassDecl, BaseClassDecl); - llvm::Value *VBaseOffsetPtr = CGF.Builder.CreateConstGEP1_64(VTablePtr, VBaseOffsetOffset.getQuantity(), "vbase.offset.ptr"); - VBaseOffsetPtr = CGF.Builder.CreateBitCast(VBaseOffsetPtr, - CGM.PtrDiffTy->getPointerTo()); - - llvm::Value *VBaseOffset = - CGF.Builder.CreateAlignedLoad(VBaseOffsetPtr, CGF.getPointerAlign(), - "vbase.offset"); + llvm::Value *VBaseOffset; + if (CGM.getItaniumVTableContext().isRelativeLayout()) { + VBaseOffsetPtr = + CGF.Builder.CreateBitCast(VBaseOffsetPtr, CGF.Int32Ty->getPointerTo()); + VBaseOffset = CGF.Builder.CreateAlignedLoad( + VBaseOffsetPtr, CharUnits::fromQuantity(4), "vbase.offset"); + } else { + VBaseOffsetPtr = CGF.Builder.CreateBitCast(VBaseOffsetPtr, + CGM.PtrDiffTy->getPointerTo()); + VBaseOffset = CGF.Builder.CreateAlignedLoad( + VBaseOffsetPtr, CGF.getPointerAlign(), "vbase.offset"); + } return VBaseOffset; } @@ -1531,7 +1604,7 @@ void ItaniumCXXABI::EmitCXXConstructors(const CXXConstructorDecl *D) { } } -CGCXXABI::AddedStructorArgs +CGCXXABI::AddedStructorArgCounts ItaniumCXXABI::buildStructorSignature(GlobalDecl GD, SmallVectorImpl<CanQualType> &ArgTys) { ASTContext &Context = getContext(); @@ -1545,9 +1618,9 @@ ItaniumCXXABI::buildStructorSignature(GlobalDecl GD, cast<CXXMethodDecl>(GD.getDecl())->getParent()->getNumVBases() != 0) { ArgTys.insert(ArgTys.begin() + 1, Context.getPointerType(Context.VoidPtrTy)); - return AddedStructorArgs::prefix(1); + return AddedStructorArgCounts::prefix(1); } - return AddedStructorArgs{}; + return AddedStructorArgCounts{}; } void ItaniumCXXABI::EmitCXXDestructors(const CXXDestructorDecl *D) { @@ -1613,9 +1686,9 @@ void ItaniumCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) { CGF.Builder.CreateStore(getThisValue(CGF), CGF.ReturnValue); } -CGCXXABI::AddedStructorArgs ItaniumCXXABI::addImplicitConstructorArgs( +CGCXXABI::AddedStructorArgs ItaniumCXXABI::getImplicitConstructorArgs( CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type, - bool ForVirtualBase, bool Delegating, CallArgList &Args) { + bool ForVirtualBase, bool Delegating) { if (!NeedsVTTParameter(GlobalDecl(D, Type))) return AddedStructorArgs{}; @@ -1623,8 +1696,14 @@ CGCXXABI::AddedStructorArgs ItaniumCXXABI::addImplicitConstructorArgs( llvm::Value *VTT = CGF.GetVTTParameter(GlobalDecl(D, Type), ForVirtualBase, Delegating); QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy); - Args.insert(Args.begin() + 1, CallArg(RValue::get(VTT), VTTTy)); - return AddedStructorArgs::prefix(1); // Added one arg. + return AddedStructorArgs::prefix({{VTT, VTTTy}}); +} + +llvm::Value *ItaniumCXXABI::getCXXDestructorImplicitParam( + CodeGenFunction &CGF, const CXXDestructorDecl *DD, CXXDtorType Type, + bool ForVirtualBase, bool Delegating) { + GlobalDecl GD(DD, Type); + return CGF.GetVTTParameter(GD, ForVirtualBase, Delegating); } void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF, @@ -1633,7 +1712,8 @@ void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF, bool Delegating, Address This, QualType ThisTy) { GlobalDecl GD(DD, Type); - llvm::Value *VTT = CGF.GetVTTParameter(GD, ForVirtualBase, Delegating); + llvm::Value *VTT = + getCXXDestructorImplicitParam(CGF, DD, Type, ForVirtualBase, Delegating); QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy); CGCallee Callee; @@ -1660,10 +1740,11 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, CGM.GetAddrOfRTTIDescriptor(CGM.getContext().getTagDeclType(RD)); // Create and set the initializer. - ConstantInitBuilder Builder(CGM); - auto Components = Builder.beginStruct(); - CGVT.createVTableInitializer(Components, VTLayout, RTTI); - Components.finishAndSetAsInitializer(VTable); + ConstantInitBuilder builder(CGM); + auto components = builder.beginStruct(); + CGVT.createVTableInitializer(components, VTLayout, RTTI, + llvm::GlobalValue::isLocalLinkage(Linkage)); + components.finishAndSetAsInitializer(VTable); // Set the correct linkage. VTable->setLinkage(Linkage); @@ -1687,6 +1768,9 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, if (!VTable->isDeclarationForLinker()) CGM.EmitVTableTypeMetadata(RD, VTable, VTLayout); + + if (VTContext.isRelativeLayout() && !VTable->isDSOLocal()) + CGVT.GenerateRelativeVTableAlias(VTable, VTable->getName()); } bool ItaniumCXXABI::isVirtualOffsetNeededForVTableField( @@ -1776,7 +1860,9 @@ llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD, // Use pointer alignment for the vtable. Otherwise we would align them based // on the size of the initializer which doesn't make sense as only single // values are read. - unsigned PAlign = CGM.getTarget().getPointerAlign(0); + unsigned PAlign = CGM.getItaniumVTableContext().isRelativeLayout() + ? 32 + : CGM.getTarget().getPointerAlign(0); VTable = CGM.CreateOrReplaceCXXRuntimeVariable( Name, VTableType, llvm::GlobalValue::ExternalLinkage, @@ -1793,9 +1879,9 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, Address This, llvm::Type *Ty, SourceLocation Loc) { - Ty = Ty->getPointerTo()->getPointerTo(); auto *MethodDecl = cast<CXXMethodDecl>(GD.getDecl()); - llvm::Value *VTable = CGF.GetVTablePtr(This, Ty, MethodDecl->getParent()); + llvm::Value *VTable = CGF.GetVTablePtr( + This, Ty->getPointerTo()->getPointerTo(), MethodDecl->getParent()); uint64_t VTableIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(GD); llvm::Value *VFunc; @@ -1806,10 +1892,21 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, } else { CGF.EmitTypeMetadataCodeForVCall(MethodDecl->getParent(), VTable, Loc); - llvm::Value *VFuncPtr = - CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfn"); - auto *VFuncLoad = - CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign()); + llvm::Value *VFuncLoad; + if (CGM.getItaniumVTableContext().isRelativeLayout()) { + VTable = CGF.Builder.CreateBitCast(VTable, CGM.Int8PtrTy); + llvm::Value *Load = CGF.Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::load_relative, {CGM.Int32Ty}), + {VTable, llvm::ConstantInt::get(CGM.Int32Ty, 4 * VTableIndex)}); + VFuncLoad = CGF.Builder.CreateBitCast(Load, Ty->getPointerTo()); + } else { + VTable = + CGF.Builder.CreateBitCast(VTable, Ty->getPointerTo()->getPointerTo()); + llvm::Value *VTableSlotPtr = + CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfn"); + VFuncLoad = + CGF.Builder.CreateAlignedLoad(VTableSlotPtr, CGF.getPointerAlign()); + } // Add !invariant.load md to virtual function load to indicate that // function didn't change inside vtable. @@ -1818,11 +1915,14 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, // the same virtual function loads from the same vtable load, which won't // happen without enabled devirtualization with -fstrict-vtable-pointers. if (CGM.getCodeGenOpts().OptimizationLevel > 0 && - CGM.getCodeGenOpts().StrictVTablePointers) - VFuncLoad->setMetadata( - llvm::LLVMContext::MD_invariant_load, - llvm::MDNode::get(CGM.getLLVMContext(), - llvm::ArrayRef<llvm::Metadata *>())); + CGM.getCodeGenOpts().StrictVTablePointers) { + if (auto *VFuncLoadInstr = dyn_cast<llvm::Instruction>(VFuncLoad)) { + VFuncLoadInstr->setMetadata( + llvm::LLVMContext::MD_invariant_load, + llvm::MDNode::get(CGM.getLLVMContext(), + llvm::ArrayRef<llvm::Metadata *>())); + } + } VFunc = VFuncLoad; } @@ -1939,21 +2039,28 @@ static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF, // Perform the virtual adjustment if we have one. llvm::Value *ResultPtr; if (VirtualAdjustment) { - llvm::Type *PtrDiffTy = - CGF.ConvertType(CGF.getContext().getPointerDiffType()); - Address VTablePtrPtr = CGF.Builder.CreateElementBitCast(V, CGF.Int8PtrTy); llvm::Value *VTablePtr = CGF.Builder.CreateLoad(VTablePtrPtr); + llvm::Value *Offset; llvm::Value *OffsetPtr = CGF.Builder.CreateConstInBoundsGEP1_64(VTablePtr, VirtualAdjustment); + if (CGF.CGM.getItaniumVTableContext().isRelativeLayout()) { + // Load the adjustment offset from the vtable as a 32-bit int. + OffsetPtr = + CGF.Builder.CreateBitCast(OffsetPtr, CGF.Int32Ty->getPointerTo()); + Offset = + CGF.Builder.CreateAlignedLoad(OffsetPtr, CharUnits::fromQuantity(4)); + } else { + llvm::Type *PtrDiffTy = + CGF.ConvertType(CGF.getContext().getPointerDiffType()); - OffsetPtr = CGF.Builder.CreateBitCast(OffsetPtr, PtrDiffTy->getPointerTo()); - - // Load the adjustment offset from the vtable. - llvm::Value *Offset = - CGF.Builder.CreateAlignedLoad(OffsetPtr, CGF.getPointerAlign()); + OffsetPtr = + CGF.Builder.CreateBitCast(OffsetPtr, PtrDiffTy->getPointerTo()); + // Load the adjustment offset from the vtable. + Offset = CGF.Builder.CreateAlignedLoad(OffsetPtr, CGF.getPointerAlign()); + } // Adjust our pointer. ResultPtr = CGF.Builder.CreateInBoundsGEP(V.getPointer(), Offset); } else { @@ -2438,7 +2545,7 @@ void CodeGenModule::registerGlobalDtorsWithAtExit() { std::string GlobalInitFnName = std::string("__GLOBAL_init_") + llvm::to_string(Priority); llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); - llvm::Function *GlobalInitFn = CreateGlobalInitOrDestructFunction( + llvm::Function *GlobalInitFn = CreateGlobalInitOrCleanUpFunction( FTy, GlobalInitFnName, getTypes().arrangeNullaryFunction(), SourceLocation()); ASTContext &Ctx = getContext(); @@ -2592,14 +2699,15 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); - InitFunc = CGM.CreateGlobalInitOrDestructFunction(FTy, "__tls_init", FI, - SourceLocation(), - /*TLS=*/true); + InitFunc = CGM.CreateGlobalInitOrCleanUpFunction(FTy, "__tls_init", FI, + SourceLocation(), + /*TLS=*/true); llvm::GlobalVariable *Guard = new llvm::GlobalVariable( CGM.getModule(), CGM.Int8Ty, /*isConstant=*/false, llvm::GlobalVariable::InternalLinkage, llvm::ConstantInt::get(CGM.Int8Ty, 0), "__tls_guard"); Guard->setThreadLocal(true); + Guard->setThreadLocalMode(CGM.GetDefaultLLVMTLSModel()); CharUnits GuardAlign = CharUnits::One(); Guard->setAlignment(GuardAlign.getAsAlign()); @@ -3008,6 +3116,7 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) { case BuiltinType::SatUShortFract: case BuiltinType::SatUFract: case BuiltinType::SatULongFract: + case BuiltinType::BFloat16: return false; case BuiltinType::Dependent: @@ -3200,9 +3309,11 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) { llvm_unreachable("Pipe types shouldn't get here"); case Type::Builtin: + case Type::ExtInt: // GCC treats vector and complex types as fundamental types. case Type::Vector: case Type::ExtVector: + case Type::ConstantMatrix: case Type::Complex: case Type::Atomic: // FIXME: GCC treats block pointers as fundamental types?! @@ -3277,17 +3388,32 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) { break; } - llvm::Constant *VTable = - CGM.getModule().getOrInsertGlobal(VTableName, CGM.Int8PtrTy); + llvm::Constant *VTable = nullptr; + + // Check if the alias exists. If it doesn't, then get or create the global. + if (CGM.getItaniumVTableContext().isRelativeLayout()) + VTable = CGM.getModule().getNamedAlias(VTableName); + if (!VTable) + VTable = CGM.getModule().getOrInsertGlobal(VTableName, CGM.Int8PtrTy); + CGM.setDSOLocal(cast<llvm::GlobalValue>(VTable->stripPointerCasts())); llvm::Type *PtrDiffTy = - CGM.getTypes().ConvertType(CGM.getContext().getPointerDiffType()); + CGM.getTypes().ConvertType(CGM.getContext().getPointerDiffType()); // The vtable address point is 2. - llvm::Constant *Two = llvm::ConstantInt::get(PtrDiffTy, 2); - VTable = - llvm::ConstantExpr::getInBoundsGetElementPtr(CGM.Int8PtrTy, VTable, Two); + if (CGM.getItaniumVTableContext().isRelativeLayout()) { + // The vtable address point is 8 bytes after its start: + // 4 for the offset to top + 4 for the relative offset to rtti. + llvm::Constant *Eight = llvm::ConstantInt::get(CGM.Int32Ty, 8); + VTable = llvm::ConstantExpr::getBitCast(VTable, CGM.Int8PtrTy); + VTable = + llvm::ConstantExpr::getInBoundsGetElementPtr(CGM.Int8Ty, VTable, Eight); + } else { + llvm::Constant *Two = llvm::ConstantInt::get(PtrDiffTy, 2); + VTable = llvm::ConstantExpr::getInBoundsGetElementPtr(CGM.Int8PtrTy, VTable, + Two); + } VTable = llvm::ConstantExpr::getBitCast(VTable, CGM.Int8PtrTy); Fields.push_back(VTable); @@ -3438,6 +3564,7 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo( case Type::Builtin: case Type::Vector: case Type::ExtVector: + case Type::ConstantMatrix: case Type::Complex: case Type::BlockPointer: // Itanium C++ ABI 2.9.5p4: @@ -3453,7 +3580,10 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo( llvm_unreachable("Undeduced type shouldn't get here"); case Type::Pipe: - llvm_unreachable("Pipe type shouldn't get here"); + break; + + case Type::ExtInt: + break; case Type::ConstantArray: case Type::IncompleteArray: @@ -4401,3 +4531,70 @@ void WebAssemblyCXXABI::emitBeginCatch(CodeGenFunction &CGF, NormalCleanup, cast<llvm::CatchPadInst>(CGF.CurrentFuncletPad)); ItaniumCXXABI::emitBeginCatch(CGF, C); } + +/// Register a global destructor as best as we know how. +void XLCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, + llvm::FunctionCallee dtor, + llvm::Constant *addr) { + if (D.getTLSKind() != VarDecl::TLS_None) + llvm::report_fatal_error("thread local storage not yet implemented on AIX"); + + // Create __dtor function for the var decl. + llvm::Function *dtorStub = CGF.createAtExitStub(D, dtor, addr); + + // Register above __dtor with atexit(). + CGF.registerGlobalDtorWithAtExit(dtorStub); + + // Emit __finalize function to unregister __dtor and (as appropriate) call + // __dtor. + emitCXXStermFinalizer(D, dtorStub, addr); +} + +void XLCXXABI::emitCXXStermFinalizer(const VarDecl &D, llvm::Function *dtorStub, + llvm::Constant *addr) { + llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, false); + SmallString<256> FnName; + { + llvm::raw_svector_ostream Out(FnName); + getMangleContext().mangleDynamicStermFinalizer(&D, Out); + } + + // Create the finalization action associated with a variable. + const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); + llvm::Function *StermFinalizer = CGM.CreateGlobalInitOrCleanUpFunction( + FTy, FnName.str(), FI, D.getLocation()); + + CodeGenFunction CGF(CGM); + + CGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, StermFinalizer, FI, + FunctionArgList()); + + // The unatexit subroutine unregisters __dtor functions that were previously + // registered by the atexit subroutine. If the referenced function is found, + // the unatexit returns a value of 0, meaning that the cleanup is still + // pending (and we should call the __dtor function). + llvm::Value *V = CGF.unregisterGlobalDtorWithUnAtExit(dtorStub); + + llvm::Value *NeedsDestruct = CGF.Builder.CreateIsNull(V, "needs_destruct"); + + llvm::BasicBlock *DestructCallBlock = CGF.createBasicBlock("destruct.call"); + llvm::BasicBlock *EndBlock = CGF.createBasicBlock("destruct.end"); + + // Check if unatexit returns a value of 0. If it does, jump to + // DestructCallBlock, otherwise jump to EndBlock directly. + CGF.Builder.CreateCondBr(NeedsDestruct, DestructCallBlock, EndBlock); + + CGF.EmitBlock(DestructCallBlock); + + // Emit the call to dtorStub. + llvm::CallInst *CI = CGF.Builder.CreateCall(dtorStub); + + // Make sure the call and the callee agree on calling convention. + CI->setCallingConv(dtorStub->getCallingConv()); + + CGF.EmitBlock(EndBlock); + + CGF.FinishFunction(); + + CGM.AddCXXStermFinalizerEntry(StermFinalizer); +} diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index aff46135705a..45c6cb6b2e0d 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -206,7 +206,7 @@ public: // lacks a definition for the destructor, non-base destructors must always // delegate to or alias the base destructor. - AddedStructorArgs + AddedStructorArgCounts buildStructorSignature(GlobalDecl GD, SmallVectorImpl<CanQualType> &ArgTys) override; @@ -253,10 +253,17 @@ public: void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override; - AddedStructorArgs - addImplicitConstructorArgs(CodeGenFunction &CGF, const CXXConstructorDecl *D, - CXXCtorType Type, bool ForVirtualBase, - bool Delegating, CallArgList &Args) override; + AddedStructorArgs getImplicitConstructorArgs(CodeGenFunction &CGF, + const CXXConstructorDecl *D, + CXXCtorType Type, + bool ForVirtualBase, + bool Delegating) override; + + llvm::Value *getCXXDestructorImplicitParam(CodeGenFunction &CGF, + const CXXDestructorDecl *DD, + CXXDtorType Type, + bool ForVirtualBase, + bool Delegating) override; void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD, CXXDtorType Type, bool ForVirtualBase, @@ -1261,10 +1268,10 @@ void MicrosoftCXXABI::EmitVBPtrStores(CodeGenFunction &CGF, } } -CGCXXABI::AddedStructorArgs +CGCXXABI::AddedStructorArgCounts MicrosoftCXXABI::buildStructorSignature(GlobalDecl GD, SmallVectorImpl<CanQualType> &ArgTys) { - AddedStructorArgs Added; + AddedStructorArgCounts Added; // TODO: 'for base' flag if (isa<CXXDestructorDecl>(GD.getDecl()) && GD.getDtorType() == Dtor_Deleting) { @@ -1553,9 +1560,9 @@ void MicrosoftCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) { } } -CGCXXABI::AddedStructorArgs MicrosoftCXXABI::addImplicitConstructorArgs( +CGCXXABI::AddedStructorArgs MicrosoftCXXABI::getImplicitConstructorArgs( CodeGenFunction &CGF, const CXXConstructorDecl *D, CXXCtorType Type, - bool ForVirtualBase, bool Delegating, CallArgList &Args) { + bool ForVirtualBase, bool Delegating) { assert(Type == Ctor_Complete || Type == Ctor_Base); // Check if we need a 'most_derived' parameter. @@ -1570,13 +1577,16 @@ CGCXXABI::AddedStructorArgs MicrosoftCXXABI::addImplicitConstructorArgs( } else { MostDerivedArg = llvm::ConstantInt::get(CGM.Int32Ty, Type == Ctor_Complete); } - RValue RV = RValue::get(MostDerivedArg); if (FPT->isVariadic()) { - Args.insert(Args.begin() + 1, CallArg(RV, getContext().IntTy)); - return AddedStructorArgs::prefix(1); + return AddedStructorArgs::prefix({{MostDerivedArg, getContext().IntTy}}); } - Args.add(RV, getContext().IntTy); - return AddedStructorArgs::suffix(1); + return AddedStructorArgs::suffix({{MostDerivedArg, getContext().IntTy}}); +} + +llvm::Value *MicrosoftCXXABI::getCXXDestructorImplicitParam( + CodeGenFunction &CGF, const CXXDestructorDecl *DD, CXXDtorType Type, + bool ForVirtualBase, bool Delegating) { + return nullptr; } void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF, @@ -1605,8 +1615,11 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF, BaseDtorEndBB = EmitDtorCompleteObjectHandler(CGF); } + llvm::Value *Implicit = + getCXXDestructorImplicitParam(CGF, DD, Type, ForVirtualBase, + Delegating); // = nullptr CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy, - /*ImplicitParam=*/nullptr, + /*ImplicitParam=*/Implicit, /*ImplicitParamTy=*/QualType(), nullptr); if (BaseDtorEndBB) { // Complete object handler should continue to be the remaining @@ -1621,6 +1634,15 @@ void MicrosoftCXXABI::emitVTableTypeMetadata(const VPtrInfo &Info, if (!CGM.getCodeGenOpts().LTOUnit) return; + // TODO: Should VirtualFunctionElimination also be supported here? + // See similar handling in CodeGenModule::EmitVTableTypeMetadata. + if (CGM.getCodeGenOpts().WholeProgramVTables) { + llvm::GlobalObject::VCallVisibility TypeVis = + CGM.GetVCallVisibilityLevel(RD); + if (TypeVis != llvm::GlobalObject::VCallVisibilityPublic) + VTable->setVCallVisibilityMetadata(TypeVis); + } + // The location of the first virtual function pointer in the virtual table, // aka the "address point" on Itanium. This is at offset 0 if RTTI is // disabled, or sizeof(void*) if RTTI is enabled. @@ -1681,10 +1703,11 @@ void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, [](const VTableComponent &VTC) { return VTC.isRTTIKind(); })) RTTI = getMSCompleteObjectLocator(RD, *Info); - ConstantInitBuilder Builder(CGM); - auto Components = Builder.beginStruct(); - CGVT.createVTableInitializer(Components, VTLayout, RTTI); - Components.finishAndSetAsInitializer(VTable); + ConstantInitBuilder builder(CGM); + auto components = builder.beginStruct(); + CGVT.createVTableInitializer(components, VTLayout, RTTI, + VTable->hasLocalLinkage()); + components.finishAndSetAsInitializer(VTable); emitVTableTypeMetadata(*Info, RD, VTable); } @@ -2341,7 +2364,7 @@ void MicrosoftCXXABI::EmitThreadLocalInitFuncs( if (!NonComdatInits.empty()) { llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); - llvm::Function *InitFunc = CGM.CreateGlobalInitOrDestructFunction( + llvm::Function *InitFunc = CGM.CreateGlobalInitOrCleanUpFunction( FTy, "__tls_init", CGM.getTypes().arrangeNullaryFunction(), SourceLocation(), /*TLS=*/true); CodeGenFunction(CGM).GenerateCXXGlobalInitFunc(InitFunc, NonComdatInits); @@ -2515,7 +2538,7 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D, GuardVar->setComdat( CGM.getModule().getOrInsertComdat(GuardVar->getName())); if (D.getTLSKind()) - GuardVar->setThreadLocal(true); + CGM.setTLSMode(GuardVar, D); if (GI && !HasPerVariableGuard) GI->Guard = GuardVar; } @@ -3913,7 +3936,7 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD, // Calculate the mangled name. SmallString<256> ThunkName; llvm::raw_svector_ostream Out(ThunkName); - getMangleContext().mangleCXXCtor(CD, CT, Out); + getMangleContext().mangleName(GlobalDecl(CD, CT), Out); // If the thunk has been generated previously, just return it. if (llvm::GlobalValue *GV = CGM.getModule().getNamedValue(ThunkName)) @@ -4000,7 +4023,7 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD, CGF.EmitCallArgs(Args, FPT, llvm::makeArrayRef(ArgVec), CD, IsCopy ? 1 : 0); // Insert any ABI-specific implicit constructor arguments. - AddedStructorArgs ExtraArgs = + AddedStructorArgCounts ExtraArgs = addImplicitConstructorArgs(CGF, CD, Ctor_Complete, /*ForVirtualBase=*/false, /*Delegating=*/false, Args); diff --git a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index 284e8022a3c4..0c7e5f4598f8 100644 --- a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -173,8 +173,8 @@ public: // Prepare CGDebugInfo to emit debug info for a clang module. auto *DI = Builder->getModuleDebugInfo(); StringRef ModuleName = llvm::sys::path::filename(MainFileName); - DI->setPCHDescriptor({ModuleName, "", OutputFileName, - ASTFileSignature{{{~0U, ~0U, ~0U, ~0U, ~1U}}}}); + DI->setPCHDescriptor( + {ModuleName, "", OutputFileName, ASTFileSignature::createDISentinel()}); DI->setModuleMap(MMap); } diff --git a/clang/lib/CodeGen/PatternInit.cpp b/clang/lib/CodeGen/PatternInit.cpp index 3410c7f21533..26ac8b63a9ba 100644 --- a/clang/lib/CodeGen/PatternInit.cpp +++ b/clang/lib/CodeGen/PatternInit.cpp @@ -8,6 +8,7 @@ #include "PatternInit.h" #include "CodeGenModule.h" +#include "clang/Basic/TargetInfo.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Type.h" @@ -33,17 +34,15 @@ llvm::Constant *clang::CodeGen::initializationPatternFor(CodeGenModule &CGM, constexpr bool NegativeNaN = true; constexpr uint64_t NaNPayload = 0xFFFFFFFFFFFFFFFFull; if (Ty->isIntOrIntVectorTy()) { - unsigned BitWidth = cast<llvm::IntegerType>( - Ty->isVectorTy() ? Ty->getVectorElementType() : Ty) - ->getBitWidth(); + unsigned BitWidth = + cast<llvm::IntegerType>(Ty->getScalarType())->getBitWidth(); if (BitWidth <= 64) return llvm::ConstantInt::get(Ty, IntValue); return llvm::ConstantInt::get( Ty, llvm::APInt::getSplat(BitWidth, llvm::APInt(64, IntValue))); } if (Ty->isPtrOrPtrVectorTy()) { - auto *PtrTy = cast<llvm::PointerType>( - Ty->isVectorTy() ? Ty->getVectorElementType() : Ty); + auto *PtrTy = cast<llvm::PointerType>(Ty->getScalarType()); unsigned PtrWidth = CGM.getContext().getTargetInfo().getPointerWidth( PtrTy->getAddressSpace()); if (PtrWidth > 64) @@ -54,8 +53,7 @@ llvm::Constant *clang::CodeGen::initializationPatternFor(CodeGenModule &CGM, } if (Ty->isFPOrFPVectorTy()) { unsigned BitWidth = llvm::APFloat::semanticsSizeInBits( - (Ty->isVectorTy() ? Ty->getVectorElementType() : Ty) - ->getFltSemantics()); + Ty->getScalarType()->getFltSemantics()); llvm::APInt Payload(64, NaNPayload); if (BitWidth >= 64) Payload = llvm::APInt::getSplat(BitWidth, Payload); diff --git a/clang/lib/CodeGen/SanitizerMetadata.cpp b/clang/lib/CodeGen/SanitizerMetadata.cpp index 24ae6c6e362f..cdf83370c41f 100644 --- a/clang/lib/CodeGen/SanitizerMetadata.cpp +++ b/clang/lib/CodeGen/SanitizerMetadata.cpp @@ -13,6 +13,7 @@ #include "CodeGenModule.h" #include "clang/AST/Attr.h" #include "clang/AST/Type.h" +#include "clang/Basic/SourceManager.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Constants.h" @@ -30,16 +31,16 @@ static bool isAsanHwasanOrMemTag(const SanitizerSet& SS) { void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV, SourceLocation Loc, StringRef Name, QualType Ty, bool IsDynInit, - bool IsBlacklisted) { + bool IsExcluded) { if (!isAsanHwasanOrMemTag(CGM.getLangOpts().Sanitize)) return; IsDynInit &= !CGM.isInSanitizerBlacklist(GV, Loc, Ty, "init"); - IsBlacklisted |= CGM.isInSanitizerBlacklist(GV, Loc, Ty); + IsExcluded |= CGM.isInSanitizerBlacklist(GV, Loc, Ty); llvm::Metadata *LocDescr = nullptr; llvm::Metadata *GlobalName = nullptr; llvm::LLVMContext &VMContext = CGM.getLLVMContext(); - if (!IsBlacklisted) { + if (!IsExcluded) { // Don't generate source location and global name if it is blacklisted - // it won't be instrumented anyway. LocDescr = getLocationMetadata(Loc); @@ -52,7 +53,7 @@ void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV, llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), IsDynInit)), llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - llvm::Type::getInt1Ty(VMContext), IsBlacklisted))}; + llvm::Type::getInt1Ty(VMContext), IsExcluded))}; llvm::MDNode *ThisGlobal = llvm::MDNode::get(VMContext, GlobalMetadata); llvm::NamedMDNode *AsanGlobals = @@ -68,12 +69,12 @@ void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV, llvm::raw_string_ostream OS(QualName); D.printQualifiedName(OS); - bool IsBlacklisted = false; + bool IsExcluded = false; for (auto Attr : D.specific_attrs<NoSanitizeAttr>()) if (Attr->getMask() & SanitizerKind::Address) - IsBlacklisted = true; + IsExcluded = true; reportGlobalToASan(GV, D.getLocation(), OS.str(), D.getType(), IsDynInit, - IsBlacklisted); + IsExcluded); } void SanitizerMetadata::disableSanitizerForGlobal(llvm::GlobalVariable *GV) { diff --git a/clang/lib/CodeGen/SanitizerMetadata.h b/clang/lib/CodeGen/SanitizerMetadata.h index 7ffac4360d9c..440a54590acc 100644 --- a/clang/lib/CodeGen/SanitizerMetadata.h +++ b/clang/lib/CodeGen/SanitizerMetadata.h @@ -40,7 +40,7 @@ public: bool IsDynInit = false); void reportGlobalToASan(llvm::GlobalVariable *GV, SourceLocation Loc, StringRef Name, QualType Ty, bool IsDynInit = false, - bool IsBlacklisted = false); + bool IsExcluded = false); void disableSanitizerForGlobal(llvm::GlobalVariable *GV); void disableSanitizerForInstruction(llvm::Instruction *I); private: diff --git a/clang/lib/CodeGen/SwiftCallingConv.cpp b/clang/lib/CodeGen/SwiftCallingConv.cpp index 8bce93b71c0c..3d7421ac2e16 100644 --- a/clang/lib/CodeGen/SwiftCallingConv.cpp +++ b/clang/lib/CodeGen/SwiftCallingConv.cpp @@ -694,7 +694,7 @@ swiftcall::splitLegalVectorType(CodeGenModule &CGM, CharUnits vectorSize, // Try to split the vector type in half. if (numElts >= 4 && isPowerOf2(numElts)) { if (isLegalVectorType(CGM, vectorSize / 2, eltTy, numElts / 2)) - return {llvm::VectorType::get(eltTy, numElts / 2), 2}; + return {llvm::FixedVectorType::get(eltTy, numElts / 2), 2}; } return {eltTy, numElts}; @@ -747,7 +747,8 @@ void swiftcall::legalizeVectorType(CodeGenModule &CGM, CharUnits origVectorSize, // Add the right number of vectors of this size. auto numVecs = numElts >> logCandidateNumElts; - components.append(numVecs, llvm::VectorType::get(eltTy, candidateNumElts)); + components.append(numVecs, + llvm::FixedVectorType::get(eltTy, candidateNumElts)); numElts -= (numVecs << logCandidateNumElts); if (numElts == 0) return; @@ -757,7 +758,7 @@ void swiftcall::legalizeVectorType(CodeGenModule &CGM, CharUnits origVectorSize, // This only needs to be separately checked if it's not a power of 2. if (numElts > 2 && !isPowerOf2(numElts) && isLegalVectorType(CGM, eltSize * numElts, eltTy, numElts)) { - components.push_back(llvm::VectorType::get(eltTy, numElts)); + components.push_back(llvm::FixedVectorType::get(eltTy, numElts)); return; } diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 682ef18da73b..9cd63ebe29ee 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -20,6 +20,7 @@ #include "clang/AST/Attr.h" #include "clang/AST/RecordLayout.h" #include "clang/Basic/CodeGenOptions.h" +#include "clang/Basic/DiagnosticFrontend.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/CodeGen/SwiftCallingConv.h" #include "llvm/ADT/SmallBitVector.h" @@ -28,6 +29,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/Type.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> // std::sort @@ -96,6 +98,17 @@ Address ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr, return Address::invalid(); } +bool ABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const { + if (Ty->isPromotableIntegerType()) + return true; + + if (const auto *EIT = Ty->getAs<ExtIntType>()) + if (EIT->getNumBits() < getContext().getTypeSize(getContext().IntTy)) + return true; + + return false; +} + ABIInfo::~ABIInfo() {} /// Does the given lowering require more than the given number of @@ -384,7 +397,7 @@ static Address emitMergePHI(CodeGenFunction &CGF, return Address(PHI, Align); } -TargetCodeGenInfo::~TargetCodeGenInfo() { delete Info; } +TargetCodeGenInfo::~TargetCodeGenInfo() = default; // If someone can figure out a general rule for this, that would be great. // It's probably just doomed to be platform-dependent, though. @@ -486,11 +499,15 @@ static bool isEmptyField(ASTContext &Context, const FieldDecl *FD, // Constant arrays of empty records count as empty, strip them off. // Constant arrays of zero length always count as empty. + bool WasArray = false; if (AllowArrays) while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) { if (AT->getSize() == 0) return true; FT = AT->getElementType(); + // The [[no_unique_address]] special case below does not apply to + // arrays of C++ empty records, so we need to remember this fact. + WasArray = true; } const RecordType *RT = FT->getAs<RecordType>(); @@ -501,7 +518,14 @@ static bool isEmptyField(ASTContext &Context, const FieldDecl *FD, // // FIXME: We should use a predicate for whether this behavior is true in the // current ABI. - if (isa<CXXRecordDecl>(RT->getDecl())) + // + // The exception to the above rule are fields marked with the + // [[no_unique_address]] attribute (since C++20). Those do count as empty + // according to the Itanium ABI. The exception applies only to records, + // not arrays of records, so we must also check whether we stripped off an + // array type above. + if (isa<CXXRecordDecl>(RT->getDecl()) && + (WasArray || !FD->hasAttr<NoUniqueAddressAttr>())) return false; return isEmptyRecord(Context, FT, AllowArrays); @@ -681,7 +705,7 @@ public: class DefaultTargetCodeGenInfo : public TargetCodeGenInfo { public: DefaultTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) - : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} + : TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {} }; ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const { @@ -700,8 +724,16 @@ ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const { if (const EnumType *EnumTy = Ty->getAs<EnumType>()) Ty = EnumTy->getDecl()->getIntegerType(); - return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) - : ABIArgInfo::getDirect()); + ASTContext &Context = getContext(); + if (const auto *EIT = Ty->getAs<ExtIntType>()) + if (EIT->getNumBits() > + Context.getTypeSize(Context.getTargetInfo().hasInt128Type() + ? Context.Int128Ty + : Context.LongLongTy)) + return getNaturalAlignIndirect(Ty); + + return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); } ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const { @@ -715,8 +747,15 @@ ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const { if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy) - : ABIArgInfo::getDirect()); + if (const auto *EIT = RetTy->getAs<ExtIntType>()) + if (EIT->getNumBits() > + getContext().getTypeSize(getContext().getTargetInfo().hasInt128Type() + ? getContext().Int128Ty + : getContext().LongLongTy)) + return getNaturalAlignIndirect(RetTy); + + return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); } //===----------------------------------------------------------------------===// @@ -726,11 +765,19 @@ ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const { //===----------------------------------------------------------------------===// class WebAssemblyABIInfo final : public SwiftABIInfo { +public: + enum ABIKind { + MVP = 0, + ExperimentalMV = 1, + }; + +private: DefaultABIInfo defaultInfo; + ABIKind Kind; public: - explicit WebAssemblyABIInfo(CodeGen::CodeGenTypes &CGT) - : SwiftABIInfo(CGT), defaultInfo(CGT) {} + explicit WebAssemblyABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind) + : SwiftABIInfo(CGT), defaultInfo(CGT), Kind(Kind) {} private: ABIArgInfo classifyReturnType(QualType RetTy) const; @@ -761,8 +808,9 @@ private: class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo { public: - explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) - : TargetCodeGenInfo(new WebAssemblyABIInfo(CGT)) {} + explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, + WebAssemblyABIInfo::ABIKind K) + : TargetCodeGenInfo(std::make_unique<WebAssemblyABIInfo>(CGT, K)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { @@ -813,6 +861,20 @@ ABIArgInfo WebAssemblyABIInfo::classifyArgumentType(QualType Ty) const { // though watch out for things like bitfields. if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + // For the experimental multivalue ABI, fully expand all other aggregates + if (Kind == ABIKind::ExperimentalMV) { + const RecordType *RT = Ty->getAs<RecordType>(); + assert(RT); + bool HasBitField = false; + for (auto *Field : RT->getDecl()->fields()) { + if (Field->isBitField()) { + HasBitField = true; + break; + } + } + if (!HasBitField) + return ABIArgInfo::getExpand(); + } } // Otherwise just do the default thing. @@ -832,6 +894,9 @@ ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const { // ABIArgInfo::getDirect(). if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + // For the experimental multivalue ABI, return all other aggregates + if (Kind == ABIKind::ExperimentalMV) + return ABIArgInfo::getDirect(); } } @@ -871,8 +936,8 @@ class PNaClABIInfo : public ABIInfo { class PNaClTargetCodeGenInfo : public TargetCodeGenInfo { public: - PNaClTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) - : TargetCodeGenInfo(new PNaClABIInfo(CGT)) {} + PNaClTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique<PNaClABIInfo>(CGT)) {} }; void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const { @@ -906,10 +971,15 @@ ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty) const { } else if (Ty->isFloatingType()) { // Floating-point types don't go inreg. return ABIArgInfo::getDirect(); + } else if (const auto *EIT = Ty->getAs<ExtIntType>()) { + // Treat extended integers as integers if <=64, otherwise pass indirectly. + if (EIT->getNumBits() > 64) + return getNaturalAlignIndirect(Ty); + return ABIArgInfo::getDirect(); } - return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) - : ABIArgInfo::getDirect()); + return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); } ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const { @@ -920,12 +990,19 @@ ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const { if (isAggregateTypeForABI(RetTy)) return getNaturalAlignIndirect(RetTy); + // Treat extended integers as integers if <=64, otherwise pass indirectly. + if (const auto *EIT = RetTy->getAs<ExtIntType>()) { + if (EIT->getNumBits() > 64) + return getNaturalAlignIndirect(RetTy); + return ABIArgInfo::getDirect(); + } + // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy) - : ABIArgInfo::getDirect()); + return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); } /// IsX86_MMXType - Return true if this is an MMX type. @@ -943,7 +1020,8 @@ static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, .Cases("y", "&y", "^Ym", true) .Default(false); if (IsMMXCons && Ty->isVectorTy()) { - if (cast<llvm::VectorType>(Ty)->getBitWidth() != 64) { + if (cast<llvm::VectorType>(Ty)->getPrimitiveSizeInBits().getFixedSize() != + 64) { // Invalid MMX constraint return nullptr; } @@ -1112,7 +1190,7 @@ public: X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, unsigned NumRegisterParameters, bool SoftFloatABI) - : TargetCodeGenInfo(new X86_32ABIInfo( + : TargetCodeGenInfo(std::make_unique<X86_32ABIInfo>( CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, NumRegisterParameters, SoftFloatABI)) {} @@ -1412,8 +1490,8 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, // registers and we need to make sure to pick a type the LLVM // backend will like. if (Size == 128) - return ABIArgInfo::getDirect(llvm::VectorType::get( - llvm::Type::getInt64Ty(getVMContext()), 2)); + return ABIArgInfo::getDirect(llvm::FixedVectorType::get( + llvm::Type::getInt64Ty(getVMContext()), 2)); // Always return in register if it fits in a general purpose // register, or if it is 64 bits and has a single element. @@ -1470,15 +1548,19 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy) - : ABIArgInfo::getDirect()); + if (const auto *EIT = RetTy->getAs<ExtIntType>()) + if (EIT->getNumBits() > 64) + return getIndirectReturnResult(RetTy, State); + + return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); } -static bool isSSEVectorType(ASTContext &Context, QualType Ty) { +static bool isSIMDVectorType(ASTContext &Context, QualType Ty) { return Ty->getAs<VectorType>() && Context.getTypeSize(Ty) == 128; } -static bool isRecordWithSSEVectorType(ASTContext &Context, QualType Ty) { +static bool isRecordWithSIMDVectorType(ASTContext &Context, QualType Ty) { const RecordType *RT = Ty->getAs<RecordType>(); if (!RT) return 0; @@ -1487,16 +1569,16 @@ static bool isRecordWithSSEVectorType(ASTContext &Context, QualType Ty) { // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) for (const auto &I : CXXRD->bases()) - if (!isRecordWithSSEVectorType(Context, I.getType())) + if (!isRecordWithSIMDVectorType(Context, I.getType())) return false; for (const auto *i : RD->fields()) { QualType FT = i->getType(); - if (isSSEVectorType(Context, FT)) + if (isSIMDVectorType(Context, FT)) return true; - if (isRecordWithSSEVectorType(Context, FT)) + if (isRecordWithSIMDVectorType(Context, FT)) return true; } @@ -1517,8 +1599,8 @@ unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty, } // Otherwise, if the type contains an SSE vector type, the alignment is 16. - if (Align >= 16 && (isSSEVectorType(getContext(), Ty) || - isRecordWithSSEVectorType(getContext(), Ty))) + if (Align >= 16 && (isSIMDVectorType(getContext(), Ty) || + isRecordWithSIMDVectorType(getContext(), Ty))) return 16; return MinABIStackAlignInBytes; @@ -1661,7 +1743,7 @@ void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) c isHomogeneousAggregate(Ty, Base, NumElts)) { if (State.FreeSSERegs >= NumElts) { State.FreeSSERegs -= NumElts; - Args[I].info = ABIArgInfo::getDirect(); + Args[I].info = ABIArgInfo::getDirectInReg(); State.IsPreassigned.set(I); } } @@ -1676,6 +1758,7 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall; Ty = useFirstFieldIfTransparentUnion(Ty); + TypeInfo TI = getContext().getTypeInfo(Ty); // Check with the C++ ABI first. const RecordType *RT = Ty->getAs<RecordType>(); @@ -1725,7 +1808,7 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, bool NeedsPadding = false; bool InReg; if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) { - unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32; + unsigned SizeInRegs = (TI.Width + 31) / 32; SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32); llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); if (InReg) @@ -1735,14 +1818,19 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, } llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr; + // Pass over-aligned aggregates on Windows indirectly. This behavior was + // added in MSVC 2015. + if (IsWin32StructABI && TI.AlignIsRequired && TI.Align > 32) + return getIndirectResult(Ty, /*ByVal=*/false, State); + // Expand small (<= 128-bit) record types when we know that the stack layout // of those arguments will match the struct. This is important because the // LLVM backend isn't smart enough to remove byval, which inhibits many // optimizations. // Don't do this for the MCU if there are still free integer registers // (see X86_64 ABI for full explanation). - if (getContext().getTypeSize(Ty) <= 4 * 32 && - (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty)) + if (TI.Width <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) && + canExpandIndirectArgument(Ty)) return ABIArgInfo::getExpandWithPadding( IsFastCall || IsVectorCall || IsRegCall, PaddingType); @@ -1750,14 +1838,24 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, } if (const VectorType *VT = Ty->getAs<VectorType>()) { + // On Windows, vectors are passed directly if registers are available, or + // indirectly if not. This avoids the need to align argument memory. Pass + // user-defined vector types larger than 512 bits indirectly for simplicity. + if (IsWin32StructABI) { + if (TI.Width <= 512 && State.FreeSSERegs > 0) { + --State.FreeSSERegs; + return ABIArgInfo::getDirectInReg(); + } + return getIndirectResult(Ty, /*ByVal=*/false, State); + } + // On Darwin, some vectors are passed in memory, we handle this by passing // it as an i8/i16/i32/i64. if (IsDarwinVectorABI) { - uint64_t Size = getContext().getTypeSize(Ty); - if ((Size == 8 || Size == 16 || Size == 32) || - (Size == 64 && VT->getNumElements() == 1)) - return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), - Size)); + if ((TI.Width == 8 || TI.Width == 16 || TI.Width == 32) || + (TI.Width == 64 && VT->getNumElements() == 1)) + return ABIArgInfo::getDirect( + llvm::IntegerType::get(getVMContext(), TI.Width)); } if (IsX86_MMXType(CGT.ConvertType(Ty))) @@ -1772,12 +1870,21 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, bool InReg = shouldPrimitiveUseInReg(Ty, State); - if (Ty->isPromotableIntegerType()) { + if (isPromotableIntegerTypeForABI(Ty)) { if (InReg) return ABIArgInfo::getExtendInReg(Ty); return ABIArgInfo::getExtend(Ty); } + if (const auto * EIT = Ty->getAs<ExtIntType>()) { + if (EIT->getNumBits() <= 64) { + if (InReg) + return ABIArgInfo::getDirectInReg(); + return ABIArgInfo::getDirect(); + } + return getIndirectResult(Ty, /*ByVal=*/false, State); + } + if (InReg) return ABIArgInfo::getDirectInReg(); return ABIArgInfo::getDirect(); @@ -1787,9 +1894,10 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { CCState State(FI); if (IsMCUABI) State.FreeRegs = 3; - else if (State.CC == llvm::CallingConv::X86_FastCall) + else if (State.CC == llvm::CallingConv::X86_FastCall) { State.FreeRegs = 2; - else if (State.CC == llvm::CallingConv::X86_VectorCall) { + State.FreeSSERegs = 3; + } else if (State.CC == llvm::CallingConv::X86_VectorCall) { State.FreeRegs = 2; State.FreeSSERegs = 6; } else if (FI.getHasRegParm()) @@ -1797,6 +1905,11 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { else if (State.CC == llvm::CallingConv::X86_RegCall) { State.FreeRegs = 5; State.FreeSSERegs = 8; + } else if (IsWin32StructABI) { + // Since MSVC 2015, the first three SSE vectors have been passed in + // registers. The rest are passed indirectly. + State.FreeRegs = DefaultNumRegisterParameters; + State.FreeSSERegs = 3; } else State.FreeRegs = DefaultNumRegisterParameters; @@ -1843,16 +1956,25 @@ X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields, CharUnits &StackOffset, ABIArgInfo &Info, QualType Type) const { // Arguments are always 4-byte-aligned. - CharUnits FieldAlign = CharUnits::fromQuantity(4); + CharUnits WordSize = CharUnits::fromQuantity(4); + assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct"); - assert(StackOffset.isMultipleOf(FieldAlign) && "unaligned inalloca struct"); - Info = ABIArgInfo::getInAlloca(FrameFields.size()); - FrameFields.push_back(CGT.ConvertTypeForMem(Type)); - StackOffset += getContext().getTypeSizeInChars(Type); + // sret pointers and indirect things will require an extra pointer + // indirection, unless they are byval. Most things are byval, and will not + // require this indirection. + bool IsIndirect = false; + if (Info.isIndirect() && !Info.getIndirectByVal()) + IsIndirect = true; + Info = ABIArgInfo::getInAlloca(FrameFields.size(), IsIndirect); + llvm::Type *LLTy = CGT.ConvertTypeForMem(Type); + if (IsIndirect) + LLTy = LLTy->getPointerTo(0); + FrameFields.push_back(LLTy); + StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(Type); // Insert padding bytes to respect alignment. CharUnits FieldEnd = StackOffset; - StackOffset = FieldEnd.alignTo(FieldAlign); + StackOffset = FieldEnd.alignTo(WordSize); if (StackOffset != FieldEnd) { CharUnits NumBytes = StackOffset - FieldEnd; llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext()); @@ -1866,16 +1988,12 @@ static bool isArgInAlloca(const ABIArgInfo &Info) { switch (Info.getKind()) { case ABIArgInfo::InAlloca: return true; - case ABIArgInfo::Indirect: - assert(Info.getIndirectByVal()); - return true; case ABIArgInfo::Ignore: return false; + case ABIArgInfo::Indirect: case ABIArgInfo::Direct: case ABIArgInfo::Extend: - if (Info.getInReg()) - return false; - return true; + return !Info.getInReg(); case ABIArgInfo::Expand: case ABIArgInfo::CoerceAndExpand: // These are aggregate types which are never passed in registers when @@ -1909,8 +2027,7 @@ void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const { // Put the sret parameter into the inalloca struct if it's in memory. if (Ret.isIndirect() && !Ret.getInReg()) { - CanQualType PtrTy = getContext().getPointerType(FI.getReturnType()); - addFieldToArgStruct(FrameFields, StackOffset, Ret, PtrTy); + addFieldToArgStruct(FrameFields, StackOffset, Ret, FI.getReturnType()); // On Windows, the hidden sret parameter is always returned in eax. Ret.setInAllocaSRet(IsWin32StructABI); } @@ -2207,7 +2324,7 @@ public: if (info.isDirect()) { llvm::Type *ty = info.getCoerceToType(); if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(ty)) - return (vectorTy->getBitWidth() > 128); + return vectorTy->getPrimitiveSizeInBits().getFixedSize() > 128; } return false; } @@ -2280,7 +2397,7 @@ private: class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { public: X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) - : TargetCodeGenInfo(new X86_64ABIInfo(CGT, AVXLevel)) {} + : TargetCodeGenInfo(std::make_unique<X86_64ABIInfo>(CGT, AVXLevel)) {} const X86_64ABIInfo &getABIInfo() const { return static_cast<const X86_64ABIInfo&>(TargetCodeGenInfo::getABIInfo()); @@ -2361,8 +2478,110 @@ public: } } } + + void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc, + const FunctionDecl *Caller, + const FunctionDecl *Callee, + const CallArgList &Args) const override; }; +static void initFeatureMaps(const ASTContext &Ctx, + llvm::StringMap<bool> &CallerMap, + const FunctionDecl *Caller, + llvm::StringMap<bool> &CalleeMap, + const FunctionDecl *Callee) { + if (CalleeMap.empty() && CallerMap.empty()) { + // The caller is potentially nullptr in the case where the call isn't in a + // function. In this case, the getFunctionFeatureMap ensures we just get + // the TU level setting (since it cannot be modified by 'target'.. + Ctx.getFunctionFeatureMap(CallerMap, Caller); + Ctx.getFunctionFeatureMap(CalleeMap, Callee); + } +} + +static bool checkAVXParamFeature(DiagnosticsEngine &Diag, + SourceLocation CallLoc, + const llvm::StringMap<bool> &CallerMap, + const llvm::StringMap<bool> &CalleeMap, + QualType Ty, StringRef Feature, + bool IsArgument) { + bool CallerHasFeat = CallerMap.lookup(Feature); + bool CalleeHasFeat = CalleeMap.lookup(Feature); + if (!CallerHasFeat && !CalleeHasFeat) + return Diag.Report(CallLoc, diag::warn_avx_calling_convention) + << IsArgument << Ty << Feature; + + // Mixing calling conventions here is very clearly an error. + if (!CallerHasFeat || !CalleeHasFeat) + return Diag.Report(CallLoc, diag::err_avx_calling_convention) + << IsArgument << Ty << Feature; + + // Else, both caller and callee have the required feature, so there is no need + // to diagnose. + return false; +} + +static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx, + SourceLocation CallLoc, + const llvm::StringMap<bool> &CallerMap, + const llvm::StringMap<bool> &CalleeMap, QualType Ty, + bool IsArgument) { + uint64_t Size = Ctx.getTypeSize(Ty); + if (Size > 256) + return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, + "avx512f", IsArgument); + + if (Size > 128) + return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, "avx", + IsArgument); + + return false; +} + +void X86_64TargetCodeGenInfo::checkFunctionCallABI( + CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller, + const FunctionDecl *Callee, const CallArgList &Args) const { + llvm::StringMap<bool> CallerMap; + llvm::StringMap<bool> CalleeMap; + unsigned ArgIndex = 0; + + // We need to loop through the actual call arguments rather than the the + // function's parameters, in case this variadic. + for (const CallArg &Arg : Args) { + // The "avx" feature changes how vectors >128 in size are passed. "avx512f" + // additionally changes how vectors >256 in size are passed. Like GCC, we + // warn when a function is called with an argument where this will change. + // Unlike GCC, we also error when it is an obvious ABI mismatch, that is, + // the caller and callee features are mismatched. + // Unfortunately, we cannot do this diagnostic in SEMA, since the callee can + // change its ABI with attribute-target after this call. + if (Arg.getType()->isVectorType() && + CGM.getContext().getTypeSize(Arg.getType()) > 128) { + initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee); + QualType Ty = Arg.getType(); + // The CallArg seems to have desugared the type already, so for clearer + // diagnostics, replace it with the type in the FunctionDecl if possible. + if (ArgIndex < Callee->getNumParams()) + Ty = Callee->getParamDecl(ArgIndex)->getType(); + + if (checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap, + CalleeMap, Ty, /*IsArgument*/ true)) + return; + } + ++ArgIndex; + } + + // Check return always, as we don't have a good way of knowing in codegen + // whether this value is used, tail-called, etc. + if (Callee->getReturnType()->isVectorType() && + CGM.getContext().getTypeSize(Callee->getReturnType()) > 128) { + initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee); + checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap, + CalleeMap, Callee->getReturnType(), + /*IsArgument*/ false); + } +} + static std::string qualifyWindowsLibrary(llvm::StringRef Lib) { // If the argument does not end in .lib, automatically add the suffix. // If the argument contains a space, enclose it in quotes. @@ -2424,7 +2643,7 @@ class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo { public: WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) - : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT, AVXLevel)) {} + : TargetCodeGenInfo(std::make_unique<WinX86_64ABIInfo>(CGT, AVXLevel)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; @@ -2731,6 +2950,15 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, return; } + if (const auto *EITy = Ty->getAs<ExtIntType>()) { + if (EITy->getNumBits() <= 64) + Current = Integer; + else if (EITy->getNumBits() <= 128) + Lo = Hi = Integer; + // Larger values need to get passed in memory. + return; + } + if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { // Arrays are treated like structures. @@ -2905,8 +3133,11 @@ ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const { if (const EnumType *EnumTy = Ty->getAs<EnumType>()) Ty = EnumTy->getDecl()->getIntegerType(); - return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) - : ABIArgInfo::getDirect()); + if (Ty->isExtIntType()) + return getNaturalAlignIndirect(Ty); + + return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); } return getNaturalAlignIndirect(Ty); @@ -2938,13 +3169,14 @@ ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty, // the argument in the free register. This does not seem to happen currently, // but this code would be much safer if we could mark the argument with // 'onstack'. See PR12193. - if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty)) { + if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty) && + !Ty->isExtIntType()) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs<EnumType>()) Ty = EnumTy->getDecl()->getIntegerType(); - return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) - : ABIArgInfo::getDirect()); + return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); } if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) @@ -3001,11 +3233,11 @@ llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const { // Don't pass vXi128 vectors in their native type, the backend can't // legalize them. if (passInt128VectorsInMem() && - IRType->getVectorElementType()->isIntegerTy(128)) { + cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy(128)) { // Use a vXi64 vector. uint64_t Size = getContext().getTypeSize(Ty); - return llvm::VectorType::get(llvm::Type::getInt64Ty(getVMContext()), - Size / 64); + return llvm::FixedVectorType::get(llvm::Type::getInt64Ty(getVMContext()), + Size / 64); } return IRType; @@ -3020,8 +3252,8 @@ llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const { // Return a LLVM IR vector type based on the size of 'Ty'. - return llvm::VectorType::get(llvm::Type::getDoubleTy(getVMContext()), - Size / 64); + return llvm::FixedVectorType::get(llvm::Type::getDoubleTy(getVMContext()), + Size / 64); } /// BitsContainNoUserData - Return true if the specified [start,end) bit range @@ -3155,7 +3387,8 @@ GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, // case. if (ContainsFloatAtOffset(IRType, IROffset, getDataLayout()) && ContainsFloatAtOffset(IRType, IROffset+4, getDataLayout())) - return llvm::VectorType::get(llvm::Type::getFloatTy(getVMContext()), 2); + return llvm::FixedVectorType::get(llvm::Type::getFloatTy(getVMContext()), + 2); return llvm::Type::getDoubleTy(getVMContext()); } @@ -3326,7 +3559,7 @@ classifyReturnType(QualType RetTy) const { RetTy = EnumTy->getDecl()->getIntegerType(); if (RetTy->isIntegralOrEnumerationType() && - RetTy->isPromotableIntegerType()) + isPromotableIntegerTypeForABI(RetTy)) return ABIArgInfo::getExtend(RetTy); } break; @@ -3471,7 +3704,7 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType( Ty = EnumTy->getDecl()->getIntegerType(); if (Ty->isIntegralOrEnumerationType() && - Ty->isPromotableIntegerType()) + isPromotableIntegerTypeForABI(Ty)) return ABIArgInfo::getExtend(Ty); } @@ -3627,14 +3860,15 @@ void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { } else { FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); } - } else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>()) { + } else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>() && + getContext().getCanonicalType(FI.getReturnType() + ->getAs<ComplexType>() + ->getElementType()) == + getContext().LongDoubleTy) // Complex Long Double Type is passed in Memory when Regcall // calling convention is used. - const ComplexType *CT = FI.getReturnType()->getAs<ComplexType>(); - if (getContext().getCanonicalType(CT->getElementType()) == - getContext().LongDoubleTy) - FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); - } else + FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); + else FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); } @@ -4021,14 +4255,25 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, // Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that. // Clang matches them for compatibility. - return ABIArgInfo::getDirect( - llvm::VectorType::get(llvm::Type::getInt64Ty(getVMContext()), 2)); + return ABIArgInfo::getDirect(llvm::FixedVectorType::get( + llvm::Type::getInt64Ty(getVMContext()), 2)); default: break; } } + if (Ty->isExtIntType()) { + // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is + // not 1, 2, 4, or 8 bytes, must be passed by reference." + // However, non-power-of-two _ExtInts will be passed as 1,2,4 or 8 bytes + // anyway as long is it fits in them, so we don't have to check the power of + // 2. + if (Width <= 64) + return ABIArgInfo::getDirect(); + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + } + return ABIArgInfo::getDirect(); } @@ -4118,17 +4363,247 @@ Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, /*allowHigherAlign*/ false); } +static bool PPC_initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address, bool Is64Bit, + bool IsAIX) { + // This is calculated from the LLVM and GCC tables and verified + // against gcc output. AFAIK all PPC ABIs use the same encoding. + + CodeGen::CGBuilderTy &Builder = CGF.Builder; + + llvm::IntegerType *i8 = CGF.Int8Ty; + llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4); + llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8); + llvm::Value *Sixteen8 = llvm::ConstantInt::get(i8, 16); + + // 0-31: r0-31, the 4-byte or 8-byte general-purpose registers + AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 0, 31); + + // 32-63: fp0-31, the 8-byte floating-point registers + AssignToArrayRange(Builder, Address, Eight8, 32, 63); + + // 64-67 are various 4-byte or 8-byte special-purpose registers: + // 64: mq + // 65: lr + // 66: ctr + // 67: ap + AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 64, 67); + + // 68-76 are various 4-byte special-purpose registers: + // 68-75 cr0-7 + // 76: xer + AssignToArrayRange(Builder, Address, Four8, 68, 76); + + // 77-108: v0-31, the 16-byte vector registers + AssignToArrayRange(Builder, Address, Sixteen8, 77, 108); + + // 109: vrsave + // 110: vscr + AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 109, 110); + + // AIX does not utilize the rest of the registers. + if (IsAIX) + return false; + + // 111: spe_acc + // 112: spefscr + // 113: sfp + AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 111, 113); + + if (!Is64Bit) + return false; + + // TODO: Need to verify if these registers are used on 64 bit AIX with Power8 + // or above CPU. + // 64-bit only registers: + // 114: tfhar + // 115: tfiar + // 116: texasr + AssignToArrayRange(Builder, Address, Eight8, 114, 116); + + return false; +} + +// AIX +namespace { +/// AIXABIInfo - The AIX XCOFF ABI information. +class AIXABIInfo : public ABIInfo { + const bool Is64Bit; + const unsigned PtrByteSize; + CharUnits getParamTypeAlignment(QualType Ty) const; + +public: + AIXABIInfo(CodeGen::CodeGenTypes &CGT, bool Is64Bit) + : ABIInfo(CGT), Is64Bit(Is64Bit), PtrByteSize(Is64Bit ? 8 : 4) {} + + bool isPromotableTypeForABI(QualType Ty) const; + + ABIArgInfo classifyReturnType(QualType RetTy) const; + ABIArgInfo classifyArgumentType(QualType Ty) const; + + void computeInfo(CGFunctionInfo &FI) const override { + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type); + } + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; +}; + +class AIXTargetCodeGenInfo : public TargetCodeGenInfo { + const bool Is64Bit; + +public: + AIXTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool Is64Bit) + : TargetCodeGenInfo(std::make_unique<AIXABIInfo>(CGT, Is64Bit)), + Is64Bit(Is64Bit) {} + int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { + return 1; // r1 is the dedicated stack pointer + } + + bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const override; +}; +} // namespace + +// Return true if the ABI requires Ty to be passed sign- or zero- +// extended to 32/64 bits. +bool AIXABIInfo::isPromotableTypeForABI(QualType Ty) const { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + // Promotable integer types are required to be promoted by the ABI. + if (Ty->isPromotableIntegerType()) + return true; + + if (!Is64Bit) + return false; + + // For 64 bit mode, in addition to the usual promotable integer types, we also + // need to extend all 32-bit types, since the ABI requires promotion to 64 + // bits. + if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) + switch (BT->getKind()) { + case BuiltinType::Int: + case BuiltinType::UInt: + return true; + default: + break; + } + + return false; +} + +ABIArgInfo AIXABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isAnyComplexType()) + llvm::report_fatal_error("complex type is not supported on AIX yet"); + + if (RetTy->isVectorType()) + llvm::report_fatal_error("vector type is not supported on AIX yet"); + + if (RetTy->isVoidType()) + return ABIArgInfo::getIgnore(); + + // TODO: Evaluate if AIX power alignment rule would have an impact on the + // alignment here. + if (isAggregateTypeForABI(RetTy)) + return getNaturalAlignIndirect(RetTy); + + return (isPromotableTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); +} + +ABIArgInfo AIXABIInfo::classifyArgumentType(QualType Ty) const { + Ty = useFirstFieldIfTransparentUnion(Ty); + + if (Ty->isAnyComplexType()) + llvm::report_fatal_error("complex type is not supported on AIX yet"); + + if (Ty->isVectorType()) + llvm::report_fatal_error("vector type is not supported on AIX yet"); + + // TODO: Evaluate if AIX power alignment rule would have an impact on the + // alignment here. + if (isAggregateTypeForABI(Ty)) { + // Records with non-trivial destructors/copy-constructors should not be + // passed by value. + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + + CharUnits CCAlign = getParamTypeAlignment(Ty); + CharUnits TyAlign = getContext().getTypeAlignInChars(Ty); + + return ABIArgInfo::getIndirect(CCAlign, /*ByVal*/ true, + /*Realign*/ TyAlign > CCAlign); + } + + return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); +} + +CharUnits AIXABIInfo::getParamTypeAlignment(QualType Ty) const { + if (Ty->isAnyComplexType()) + llvm::report_fatal_error("complex type is not supported on AIX yet"); + + if (Ty->isVectorType()) + llvm::report_fatal_error("vector type is not supported on AIX yet"); + + // If the structure contains a vector type, the alignment is 16. + if (isRecordWithSIMDVectorType(getContext(), Ty)) + return CharUnits::fromQuantity(16); + + return CharUnits::fromQuantity(PtrByteSize); +} + +Address AIXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + if (Ty->isAnyComplexType()) + llvm::report_fatal_error("complex type is not supported on AIX yet"); + + if (Ty->isVectorType()) + llvm::report_fatal_error("vector type is not supported on AIX yet"); + + auto TypeInfo = getContext().getTypeInfoInChars(Ty); + TypeInfo.second = getParamTypeAlignment(Ty); + + CharUnits SlotSize = CharUnits::fromQuantity(PtrByteSize); + + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo, + SlotSize, /*AllowHigher*/ true); +} + +bool AIXTargetCodeGenInfo::initDwarfEHRegSizeTable( + CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { + return PPC_initDwarfEHRegSizeTable(CGF, Address, Is64Bit, /*IsAIX*/ true); +} + // PowerPC-32 namespace { /// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information. class PPC32_SVR4_ABIInfo : public DefaultABIInfo { bool IsSoftFloatABI; + bool IsRetSmallStructInRegABI; CharUnits getParamTypeAlignment(QualType Ty) const; public: - PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI) - : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI) {} + PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI, + bool RetSmallStructInRegABI) + : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI), + IsRetSmallStructInRegABI(RetSmallStructInRegABI) {} + + ABIArgInfo classifyReturnType(QualType RetTy) const; + + void computeInfo(CGFunctionInfo &FI) const override { + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type); + } Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; @@ -4136,8 +4611,13 @@ public: class PPC32TargetCodeGenInfo : public TargetCodeGenInfo { public: - PPC32TargetCodeGenInfo(CodeGenTypes &CGT, bool SoftFloatABI) - : TargetCodeGenInfo(new PPC32_SVR4_ABIInfo(CGT, SoftFloatABI)) {} + PPC32TargetCodeGenInfo(CodeGenTypes &CGT, bool SoftFloatABI, + bool RetSmallStructInRegABI) + : TargetCodeGenInfo(std::make_unique<PPC32_SVR4_ABIInfo>( + CGT, SoftFloatABI, RetSmallStructInRegABI)) {} + + static bool isStructReturnInRegABI(const llvm::Triple &Triple, + const CodeGenOptions &Opts); int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { // This is recovered from gcc output. @@ -4150,7 +4630,7 @@ public: } CharUnits PPC32_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const { - // Complex types are passed just like their elements + // Complex types are passed just like their elements. if (const ComplexType *CTy = Ty->getAs<ComplexType>()) Ty = CTy->getElementType(); @@ -4173,6 +4653,34 @@ CharUnits PPC32_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const { return CharUnits::fromQuantity(4); } +ABIArgInfo PPC32_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const { + uint64_t Size; + + // -msvr4-struct-return puts small aggregates in GPR3 and GPR4. + if (isAggregateTypeForABI(RetTy) && IsRetSmallStructInRegABI && + (Size = getContext().getTypeSize(RetTy)) <= 64) { + // System V ABI (1995), page 3-22, specified: + // > A structure or union whose size is less than or equal to 8 bytes + // > shall be returned in r3 and r4, as if it were first stored in the + // > 8-byte aligned memory area and then the low addressed word were + // > loaded into r3 and the high-addressed word into r4. Bits beyond + // > the last member of the structure or union are not defined. + // + // GCC for big-endian PPC32 inserts the pad before the first member, + // not "beyond the last member" of the struct. To stay compatible + // with GCC, we coerce the struct to an integer of the same size. + // LLVM will extend it and return i32 in r3, or i64 in r3:r4. + if (Size == 0) + return ABIArgInfo::getIgnore(); + else { + llvm::Type *CoerceTy = llvm::Type::getIntNTy(getVMContext(), Size); + return ABIArgInfo::getDirect(CoerceTy); + } + } + + return DefaultABIInfo::classifyReturnType(RetTy); +} + // TODO: this implementation is now likely redundant with // DefaultABIInfo::EmitVAArg. Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, @@ -4328,47 +4836,32 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, return Result; } -bool -PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const { - // This is calculated from the LLVM and GCC tables and verified - // against gcc output. AFAIK all ABIs use the same encoding. - - CodeGen::CGBuilderTy &Builder = CGF.Builder; - - llvm::IntegerType *i8 = CGF.Int8Ty; - llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4); - llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8); - llvm::Value *Sixteen8 = llvm::ConstantInt::get(i8, 16); - - // 0-31: r0-31, the 4-byte general-purpose registers - AssignToArrayRange(Builder, Address, Four8, 0, 31); - - // 32-63: fp0-31, the 8-byte floating-point registers - AssignToArrayRange(Builder, Address, Eight8, 32, 63); - - // 64-76 are various 4-byte special-purpose registers: - // 64: mq - // 65: lr - // 66: ctr - // 67: ap - // 68-75 cr0-7 - // 76: xer - AssignToArrayRange(Builder, Address, Four8, 64, 76); +bool PPC32TargetCodeGenInfo::isStructReturnInRegABI( + const llvm::Triple &Triple, const CodeGenOptions &Opts) { + assert(Triple.getArch() == llvm::Triple::ppc); - // 77-108: v0-31, the 16-byte vector registers - AssignToArrayRange(Builder, Address, Sixteen8, 77, 108); + switch (Opts.getStructReturnConvention()) { + case CodeGenOptions::SRCK_Default: + break; + case CodeGenOptions::SRCK_OnStack: // -maix-struct-return + return false; + case CodeGenOptions::SRCK_InRegs: // -msvr4-struct-return + return true; + } - // 109: vrsave - // 110: vscr - // 111: spe_acc - // 112: spefscr - // 113: sfp - AssignToArrayRange(Builder, Address, Four8, 109, 113); + if (Triple.isOSBinFormatELF() && !Triple.isOSLinux()) + return true; return false; } +bool +PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const { + return PPC_initDwarfEHRegSizeTable(CGF, Address, /*Is64Bit*/ false, + /*IsAIX*/ false); +} + // PowerPC-64 namespace { @@ -4477,8 +4970,8 @@ public: PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT, PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX, bool SoftFloatABI) - : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX, - SoftFloatABI)) {} + : TargetCodeGenInfo(std::make_unique<PPC64_SVR4_ABIInfo>( + CGT, Kind, HasQPX, SoftFloatABI)) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { // This is recovered from gcc output. @@ -4513,7 +5006,7 @@ PPC64_SVR4_ABIInfo::isPromotableTypeForABI(QualType Ty) const { Ty = EnumTy->getDecl()->getIntegerType(); // Promotable integer types are required to be promoted by the ABI. - if (Ty->isPromotableIntegerType()) + if (isPromotableIntegerTypeForABI(Ty)) return true; // In addition to the usual promotable integer types, we also need to @@ -4527,6 +5020,10 @@ PPC64_SVR4_ABIInfo::isPromotableTypeForABI(QualType Ty) const { break; } + if (const auto *EIT = Ty->getAs<ExtIntType>()) + if (EIT->getNumBits() < 64) + return true; + return false; } @@ -4744,6 +5241,10 @@ PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const { } } + if (const auto *EIT = Ty->getAs<ExtIntType>()) + if (EIT->getNumBits() > 128) + return getNaturalAlignIndirect(Ty, /*ByVal=*/true); + if (isAggregateTypeForABI(Ty)) { if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); @@ -4816,6 +5317,10 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const { } } + if (const auto *EIT = RetTy->getAs<ExtIntType>()) + if (EIT->getNumBits() > 128) + return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); + if (isAggregateTypeForABI(RetTy)) { // ELFv2 homogeneous aggregates are returned as array types. const Type *Base = nullptr; @@ -4901,66 +5406,19 @@ Address PPC64_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, TypeInfo, SlotSize, /*AllowHigher*/ true); } -static bool -PPC64_initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) { - // This is calculated from the LLVM and GCC tables and verified - // against gcc output. AFAIK all ABIs use the same encoding. - - CodeGen::CGBuilderTy &Builder = CGF.Builder; - - llvm::IntegerType *i8 = CGF.Int8Ty; - llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4); - llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8); - llvm::Value *Sixteen8 = llvm::ConstantInt::get(i8, 16); - - // 0-31: r0-31, the 8-byte general-purpose registers - AssignToArrayRange(Builder, Address, Eight8, 0, 31); - - // 32-63: fp0-31, the 8-byte floating-point registers - AssignToArrayRange(Builder, Address, Eight8, 32, 63); - - // 64-67 are various 8-byte special-purpose registers: - // 64: mq - // 65: lr - // 66: ctr - // 67: ap - AssignToArrayRange(Builder, Address, Eight8, 64, 67); - - // 68-76 are various 4-byte special-purpose registers: - // 68-75 cr0-7 - // 76: xer - AssignToArrayRange(Builder, Address, Four8, 68, 76); - - // 77-108: v0-31, the 16-byte vector registers - AssignToArrayRange(Builder, Address, Sixteen8, 77, 108); - - // 109: vrsave - // 110: vscr - // 111: spe_acc - // 112: spefscr - // 113: sfp - // 114: tfhar - // 115: tfiar - // 116: texasr - AssignToArrayRange(Builder, Address, Eight8, 109, 116); - - return false; -} - bool PPC64_SVR4_TargetCodeGenInfo::initDwarfEHRegSizeTable( CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { - - return PPC64_initDwarfEHRegSizeTable(CGF, Address); + return PPC_initDwarfEHRegSizeTable(CGF, Address, /*Is64Bit*/ true, + /*IsAIX*/ false); } bool PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const { - - return PPC64_initDwarfEHRegSizeTable(CGF, Address); + return PPC_initDwarfEHRegSizeTable(CGF, Address, /*Is64Bit*/ true, + /*IsAIX*/ false); } //===----------------------------------------------------------------------===// @@ -5031,12 +5489,16 @@ private: bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy, unsigned elts) const override; + + bool allowBFloatArgsAndRet() const override { + return getTarget().hasBFloat16Type(); + } }; class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { public: AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind Kind) - : TargetCodeGenInfo(new AArch64ABIInfo(CGT, Kind)) {} + : TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) {} StringRef getARCRetainAutoreleasedReturnValueMarker() const override { return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue"; @@ -5054,9 +5516,11 @@ public: if (!FD) return; - CodeGenOptions::SignReturnAddressScope Scope = CGM.getCodeGenOpts().getSignReturnAddress(); - CodeGenOptions::SignReturnAddressKeyValue Key = CGM.getCodeGenOpts().getSignReturnAddressKey(); - bool BranchTargetEnforcement = CGM.getCodeGenOpts().BranchTargetEnforcement; + LangOptions::SignReturnAddressScopeKind Scope = + CGM.getLangOpts().getSignReturnAddressScope(); + LangOptions::SignReturnAddressKeyKind Key = + CGM.getLangOpts().getSignReturnAddressKey(); + bool BranchTargetEnforcement = CGM.getLangOpts().BranchTargetEnforcement; if (const auto *TA = FD->getAttr<TargetAttr>()) { ParsedTargetAttr Attr = TA->parse(); if (!Attr.BranchProtection.empty()) { @@ -5072,14 +5536,14 @@ public: } auto *Fn = cast<llvm::Function>(GV); - if (Scope != CodeGenOptions::SignReturnAddressScope::None) { + if (Scope != LangOptions::SignReturnAddressScopeKind::None) { Fn->addFnAttr("sign-return-address", - Scope == CodeGenOptions::SignReturnAddressScope::All + Scope == LangOptions::SignReturnAddressScopeKind::All ? "all" : "non-leaf"); Fn->addFnAttr("sign-return-address-key", - Key == CodeGenOptions::SignReturnAddressKeyValue::AKey + Key == LangOptions::SignReturnAddressKeyKind::AKey ? "a_key" : "b_key"); } @@ -5133,13 +5597,13 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { return ABIArgInfo::getDirect(ResType); } if (Size == 64) { - llvm::Type *ResType = - llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2); + auto *ResType = + llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2); return ABIArgInfo::getDirect(ResType); } if (Size == 128) { - llvm::Type *ResType = - llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4); + auto *ResType = + llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4); return ABIArgInfo::getDirect(ResType); } return getNaturalAlignIndirect(Ty, /*ByVal=*/false); @@ -5150,7 +5614,11 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { if (const EnumType *EnumTy = Ty->getAs<EnumType>()) Ty = EnumTy->getDecl()->getIntegerType(); - return (Ty->isPromotableIntegerType() && isDarwinPCS() + if (const auto *EIT = Ty->getAs<ExtIntType>()) + if (EIT->getNumBits() > 128) + return getNaturalAlignIndirect(Ty); + + return (isPromotableIntegerTypeForABI(Ty) && isDarwinPCS() ? ABIArgInfo::getExtend(Ty) : ABIArgInfo::getDirect()); } @@ -5227,7 +5695,11 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy, if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - return (RetTy->isPromotableIntegerType() && isDarwinPCS() + if (const auto *EIT = RetTy->getAs<ExtIntType>()) + if (EIT->getNumBits() > 128) + return getNaturalAlignIndirect(RetTy); + + return (isPromotableIntegerTypeForABI(RetTy) && isDarwinPCS() ? ABIArgInfo::getExtend(RetTy) : ABIArgInfo::getDirect()); } @@ -5626,11 +6098,14 @@ public: private: ABIKind Kind; + bool IsFloatABISoftFP; public: ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : SwiftABIInfo(CGT), Kind(_Kind) { setCCs(); + IsFloatABISoftFP = CGT.getCodeGenOpts().FloatABI == "softfp" || + CGT.getCodeGenOpts().FloatABI == ""; // default } bool isEABI() const { @@ -5661,6 +6136,10 @@ public: ABIKind getABIKind() const { return Kind; } + bool allowBFloatArgsAndRet() const override { + return !IsFloatABISoftFP && getTarget().hasBFloat16Type(); + } + private: ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic, unsigned functionCallConv) const; @@ -5701,7 +6180,7 @@ private: class ARMTargetCodeGenInfo : public TargetCodeGenInfo { public: ARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIInfo::ABIKind K) - :TargetCodeGenInfo(new ARMABIInfo(CGT, K)) {} + : TargetCodeGenInfo(std::make_unique<ARMABIInfo>(CGT, K)) {} const ARMABIInfo &getABIInfo() const { return static_cast<const ARMABIInfo&>(TargetCodeGenInfo::getABIInfo()); @@ -5856,7 +6335,7 @@ ABIArgInfo ARMABIInfo::coerceIllegalVector(QualType Ty) const { return ABIArgInfo::getDirect(ResType); } if (Size == 64 || Size == 128) { - llvm::Type *ResType = llvm::VectorType::get( + auto *ResType = llvm::FixedVectorType::get( llvm::Type::getInt32Ty(getVMContext()), Size / 32); return ABIArgInfo::getDirect(ResType); } @@ -5872,7 +6351,7 @@ ABIArgInfo ARMABIInfo::classifyHomogeneousAggregate(QualType Ty, // FP16 vectors should be converted to integer vectors if (!getTarget().hasLegalHalfType() && containsAnyFP16Vectors(Ty)) { uint64_t Size = getContext().getTypeSize(VT); - llvm::Type *NewVecTy = llvm::VectorType::get( + auto *NewVecTy = llvm::FixedVectorType::get( llvm::Type::getInt32Ty(getVMContext()), Size / 32); llvm::Type *Ty = llvm::ArrayType::get(NewVecTy, Members); return ABIArgInfo::getDirect(Ty, 0, nullptr, false); @@ -5900,25 +6379,18 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic, if (isIllegalVectorType(Ty)) return coerceIllegalVector(Ty); - // _Float16 and __fp16 get passed as if it were an int or float, but with - // the top 16 bits unspecified. This is not done for OpenCL as it handles the - // half type natively, and does not need to interwork with AAPCS code. - if ((Ty->isFloat16Type() || Ty->isHalfType()) && - !getContext().getLangOpts().NativeHalfArgsAndReturns) { - llvm::Type *ResType = IsAAPCS_VFP ? - llvm::Type::getFloatTy(getVMContext()) : - llvm::Type::getInt32Ty(getVMContext()); - return ABIArgInfo::getDirect(ResType); - } - if (!isAggregateTypeForABI(Ty)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs<EnumType>()) { Ty = EnumTy->getDecl()->getIntegerType(); } - return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) - : ABIArgInfo::getDirect()); + if (const auto *EIT = Ty->getAs<ExtIntType>()) + if (EIT->getNumBits() > 64) + return getNaturalAlignIndirect(Ty, /*ByVal=*/true); + + return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); } if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { @@ -6100,31 +6572,27 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic, // Large vector types should be returned via memory. if (getContext().getTypeSize(RetTy) > 128) return getNaturalAlignIndirect(RetTy); - // FP16 vectors should be converted to integer vectors - if (!getTarget().hasLegalHalfType() && + // TODO: FP16/BF16 vectors should be converted to integer vectors + // This check is similar to isIllegalVectorType - refactor? + if ((!getTarget().hasLegalHalfType() && (VT->getElementType()->isFloat16Type() || - VT->getElementType()->isHalfType())) + VT->getElementType()->isHalfType())) || + (IsFloatABISoftFP && + VT->getElementType()->isBFloat16Type())) return coerceIllegalVector(RetTy); } - // _Float16 and __fp16 get returned as if it were an int or float, but with - // the top 16 bits unspecified. This is not done for OpenCL as it handles the - // half type natively, and does not need to interwork with AAPCS code. - if ((RetTy->isFloat16Type() || RetTy->isHalfType()) && - !getContext().getLangOpts().NativeHalfArgsAndReturns) { - llvm::Type *ResType = IsAAPCS_VFP ? - llvm::Type::getFloatTy(getVMContext()) : - llvm::Type::getInt32Ty(getVMContext()); - return ABIArgInfo::getDirect(ResType); - } - if (!isAggregateTypeForABI(RetTy)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - return RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy) - : ABIArgInfo::getDirect(); + if (const auto *EIT = RetTy->getAs<ExtIntType>()) + if (EIT->getNumBits() > 64) + return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); + + return isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect(); } // Are we following APCS? @@ -6200,12 +6668,17 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic, /// isIllegalVector - check whether Ty is an illegal vector type. bool ARMABIInfo::isIllegalVectorType(QualType Ty) const { if (const VectorType *VT = Ty->getAs<VectorType> ()) { - // On targets that don't support FP16, FP16 is expanded into float, and we - // don't want the ABI to depend on whether or not FP16 is supported in - // hardware. Thus return false to coerce FP16 vectors into integer vectors. - if (!getTarget().hasLegalHalfType() && + // On targets that don't support half, fp16 or bfloat, they are expanded + // into float, and we don't want the ABI to depend on whether or not they + // are supported in hardware. Thus return false to coerce vectors of these + // types into integer vectors. + // We do not depend on hasLegalHalfType for bfloat as it is a + // separate IR type. + if ((!getTarget().hasLegalHalfType() && (VT->getElementType()->isFloat16Type() || - VT->getElementType()->isHalfType())) + VT->getElementType()->isHalfType())) || + (IsFloatABISoftFP && + VT->getElementType()->isBFloat16Type())) return true; if (isAndroid()) { // Android shipped using Clang 3.1, which supported a slightly different @@ -6257,6 +6730,7 @@ bool ARMABIInfo::containsAnyFP16Vectors(QualType Ty) const { } else { if (const VectorType *VT = Ty->getAs<VectorType>()) return (VT->getElementType()->isFloat16Type() || + VT->getElementType()->isBFloat16Type() || VT->getElementType()->isHalfType()); return false; } @@ -6362,9 +6836,14 @@ Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, namespace { +class NVPTXTargetCodeGenInfo; + class NVPTXABIInfo : public ABIInfo { + NVPTXTargetCodeGenInfo &CGInfo; + public: - NVPTXABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {} + NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info) + : ABIInfo(CGT), CGInfo(Info) {} ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType Ty) const; @@ -6372,36 +6851,87 @@ public: void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; + bool isUnsupportedType(QualType T) const; + ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const; }; class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { public: NVPTXTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(new NVPTXABIInfo(CGT)) {} + : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; bool shouldEmitStaticExternCAliases() const override; + llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override { + // On the device side, surface reference is represented as an object handle + // in 64-bit integer. + return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); + } + + llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override { + // On the device side, texture reference is represented as an object handle + // in 64-bit integer. + return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); + } + + bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst, + LValue Src) const override { + emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); + return true; + } + + bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst, + LValue Src) const override { + emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); + return true; + } + private: - // Adds a NamedMDNode with F, Name, and Operand as operands, and adds the + // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the // resulting MDNode to the nvvm.annotations MDNode. - static void addNVVMMetadata(llvm::Function *F, StringRef Name, int Operand); + static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, + int Operand); + + static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst, + LValue Src) { + llvm::Value *Handle = nullptr; + llvm::Constant *C = + llvm::dyn_cast<llvm::Constant>(Src.getAddress(CGF).getPointer()); + // Lookup `addrspacecast` through the constant pointer if any. + if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C)) + C = llvm::cast<llvm::Constant>(ASC->getPointerOperand()); + if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) { + // Load the handle from the specific global variable using + // `nvvm.texsurf.handle.internal` intrinsic. + Handle = CGF.EmitRuntimeCall( + CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal, + {GV->getType()}), + {GV}, "texsurf_handle"); + } else + Handle = CGF.EmitLoadOfScalar(Src, SourceLocation()); + CGF.EmitStoreOfScalar(Handle, Dst); + } }; /// Checks if the type is unsupported directly by the current target. -static bool isUnsupportedType(ASTContext &Context, QualType T) { +bool NVPTXABIInfo::isUnsupportedType(QualType T) const { + ASTContext &Context = getContext(); if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type()) return true; if (!Context.getTargetInfo().hasFloat128Type() && (T->isFloat128Type() || (T->isRealFloatingType() && Context.getTypeSize(T) == 128))) return true; + if (const auto *EIT = T->getAs<ExtIntType>()) + return EIT->getNumBits() > + (Context.getTargetInfo().hasInt128Type() ? 128U : 64U); if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() && - Context.getTypeSize(T) > 64) + Context.getTypeSize(T) > 64U) return true; if (const auto *AT = T->getAsArrayTypeUnsafe()) - return isUnsupportedType(Context, AT->getElementType()); + return isUnsupportedType(AT->getElementType()); const auto *RT = T->getAs<RecordType>(); if (!RT) return false; @@ -6410,24 +6940,23 @@ static bool isUnsupportedType(ASTContext &Context, QualType T) { // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) for (const CXXBaseSpecifier &I : CXXRD->bases()) - if (isUnsupportedType(Context, I.getType())) + if (isUnsupportedType(I.getType())) return true; for (const FieldDecl *I : RD->fields()) - if (isUnsupportedType(Context, I->getType())) + if (isUnsupportedType(I->getType())) return true; return false; } /// Coerce the given type into an array with maximum allowed size of elements. -static ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, ASTContext &Context, - llvm::LLVMContext &LLVMContext, - unsigned MaxSize) { +ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty, + unsigned MaxSize) const { // Alignment and Size are measured in bits. - const uint64_t Size = Context.getTypeSize(Ty); - const uint64_t Alignment = Context.getTypeAlign(Ty); + const uint64_t Size = getContext().getTypeSize(Ty); + const uint64_t Alignment = getContext().getTypeAlign(Ty); const unsigned Div = std::min<unsigned>(MaxSize, Alignment); - llvm::Type *IntType = llvm::Type::getIntNTy(LLVMContext, Div); + llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div); const uint64_t NumElements = (Size + Div - 1) / Div; return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements)); } @@ -6437,9 +6966,8 @@ ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { return ABIArgInfo::getIgnore(); if (getContext().getLangOpts().OpenMP && - getContext().getLangOpts().OpenMPIsDevice && - isUnsupportedType(getContext(), RetTy)) - return coerceToIntArrayWithLimit(RetTy, getContext(), getVMContext(), 64); + getContext().getLangOpts().OpenMPIsDevice && isUnsupportedType(RetTy)) + return coerceToIntArrayWithLimit(RetTy, 64); // note: this is different from default ABI if (!RetTy->isScalarType()) @@ -6449,8 +6977,8 @@ ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy) - : ABIArgInfo::getDirect()); + return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); } ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const { @@ -6459,11 +6987,29 @@ ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const { Ty = EnumTy->getDecl()->getIntegerType(); // Return aggregates type as indirect by value - if (isAggregateTypeForABI(Ty)) + if (isAggregateTypeForABI(Ty)) { + // Under CUDA device compilation, tex/surf builtin types are replaced with + // object types and passed directly. + if (getContext().getLangOpts().CUDAIsDevice) { + if (Ty->isCUDADeviceBuiltinSurfaceType()) + return ABIArgInfo::getDirect( + CGInfo.getCUDADeviceBuiltinSurfaceDeviceType()); + if (Ty->isCUDADeviceBuiltinTextureType()) + return ABIArgInfo::getDirect( + CGInfo.getCUDADeviceBuiltinTextureDeviceType()); + } return getNaturalAlignIndirect(Ty, /* byval */ true); + } - return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) - : ABIArgInfo::getDirect()); + if (const auto *EIT = Ty->getAs<ExtIntType>()) { + if ((EIT->getNumBits() > 128) || + (!getContext().getTargetInfo().hasInt128Type() && + EIT->getNumBits() > 64)) + return getNaturalAlignIndirect(Ty, /* byval */ true); + } + + return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect()); } void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const { @@ -6488,6 +7034,17 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { if (GV->isDeclaration()) return; + const VarDecl *VD = dyn_cast_or_null<VarDecl>(D); + if (VD) { + if (M.getLangOpts().CUDA) { + if (VD->getType()->isCUDADeviceBuiltinSurfaceType()) + addNVVMMetadata(GV, "surface", 1); + else if (VD->getType()->isCUDADeviceBuiltinTextureType()) + addNVVMMetadata(GV, "texture", 1); + return; + } + } + const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; @@ -6536,16 +7093,16 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( } } -void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::Function *F, StringRef Name, - int Operand) { - llvm::Module *M = F->getParent(); +void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, + StringRef Name, int Operand) { + llvm::Module *M = GV->getParent(); llvm::LLVMContext &Ctx = M->getContext(); // Get "nvvm.annotations" metadata node llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); llvm::Metadata *MDVals[] = { - llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, Name), + llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name), llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))}; // Append metadata to nvvm.annotations @@ -6565,12 +7122,13 @@ namespace { class SystemZABIInfo : public SwiftABIInfo { bool HasVector; + bool IsSoftFloatABI; public: - SystemZABIInfo(CodeGenTypes &CGT, bool HV) - : SwiftABIInfo(CGT), HasVector(HV) {} + SystemZABIInfo(CodeGenTypes &CGT, bool HV, bool SF) + : SwiftABIInfo(CGT), HasVector(HV), IsSoftFloatABI(SF) {} - bool isPromotableIntegerType(QualType Ty) const; + bool isPromotableIntegerTypeForABI(QualType Ty) const; bool isCompoundType(QualType Ty) const; bool isVectorArgumentType(QualType Ty) const; bool isFPArgumentType(QualType Ty) const; @@ -6600,21 +7158,26 @@ public: class SystemZTargetCodeGenInfo : public TargetCodeGenInfo { public: - SystemZTargetCodeGenInfo(CodeGenTypes &CGT, bool HasVector) - : TargetCodeGenInfo(new SystemZABIInfo(CGT, HasVector)) {} + SystemZTargetCodeGenInfo(CodeGenTypes &CGT, bool HasVector, bool SoftFloatABI) + : TargetCodeGenInfo( + std::make_unique<SystemZABIInfo>(CGT, HasVector, SoftFloatABI)) {} }; } -bool SystemZABIInfo::isPromotableIntegerType(QualType Ty) const { +bool SystemZABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs<EnumType>()) Ty = EnumTy->getDecl()->getIntegerType(); // Promotable integer types are required to be promoted by the ABI. - if (Ty->isPromotableIntegerType()) + if (ABIInfo::isPromotableIntegerTypeForABI(Ty)) return true; + if (const auto *EIT = Ty->getAs<ExtIntType>()) + if (EIT->getNumBits() < 64) + return true; + // 32-bit values must also be promoted. if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) switch (BT->getKind()) { @@ -6640,6 +7203,9 @@ bool SystemZABIInfo::isVectorArgumentType(QualType Ty) const { } bool SystemZABIInfo::isFPArgumentType(QualType Ty) const { + if (IsSoftFloatABI) + return false; + if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) switch (BT->getKind()) { case BuiltinType::Float: @@ -6653,7 +7219,9 @@ bool SystemZABIInfo::isFPArgumentType(QualType Ty) const { } QualType SystemZABIInfo::GetSingleElementType(QualType Ty) const { - if (const RecordType *RT = Ty->getAsStructureType()) { + const RecordType *RT = Ty->getAs<RecordType>(); + + if (RT && RT->isStructureOrClassType()) { const RecordDecl *RD = RT->getDecl(); QualType Found; @@ -6679,6 +7247,10 @@ QualType SystemZABIInfo::GetSingleElementType(QualType Ty) const { if (getContext().getLangOpts().CPlusPlus && FD->isZeroLengthBitField(getContext())) continue; + // Like isSingleElementStruct(), ignore C++20 empty data members. + if (FD->hasAttr<NoUniqueAddressAttr>() && + isEmptyRecord(getContext(), FD->getType(), true)) + continue; // Unlike isSingleElementStruct(), arrays do not count. // Nested structures still do though. @@ -6725,7 +7297,7 @@ Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, } else { if (AI.getCoerceToType()) ArgTy = AI.getCoerceToType(); - InFPRs = ArgTy->isFloatTy() || ArgTy->isDoubleTy(); + InFPRs = (!IsSoftFloatABI && (ArgTy->isFloatTy() || ArgTy->isDoubleTy())); IsVector = ArgTy->isVectorTy(); UnpaddedSize = TyInfo.first; DirectAlign = TyInfo.second; @@ -6858,8 +7430,8 @@ ABIArgInfo SystemZABIInfo::classifyReturnType(QualType RetTy) const { return ABIArgInfo::getDirect(); if (isCompoundType(RetTy) || getContext().getTypeSize(RetTy) > 64) return getNaturalAlignIndirect(RetTy); - return (isPromotableIntegerType(RetTy) ? ABIArgInfo::getExtend(RetTy) - : ABIArgInfo::getDirect()); + return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect()); } ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const { @@ -6868,7 +7440,7 @@ ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const { return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); // Integers and enums are extended to full register width. - if (isPromotableIntegerType(Ty)) + if (isPromotableIntegerTypeForABI(Ty)) return ABIArgInfo::getExtend(Ty); // Handle vector types and vector-like structure types. Note that @@ -6918,10 +7490,49 @@ ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const { namespace { +class MSP430ABIInfo : public DefaultABIInfo { + static ABIArgInfo complexArgInfo() { + ABIArgInfo Info = ABIArgInfo::getDirect(); + Info.setCanBeFlattened(false); + return Info; + } + +public: + MSP430ABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + + ABIArgInfo classifyReturnType(QualType RetTy) const { + if (RetTy->isAnyComplexType()) + return complexArgInfo(); + + return DefaultABIInfo::classifyReturnType(RetTy); + } + + ABIArgInfo classifyArgumentType(QualType RetTy) const { + if (RetTy->isAnyComplexType()) + return complexArgInfo(); + + return DefaultABIInfo::classifyArgumentType(RetTy); + } + + // Just copy the original implementations because + // DefaultABIInfo::classify{Return,Argument}Type() are not virtual + void computeInfo(CGFunctionInfo &FI) const override { + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &I : FI.arguments()) + I.info = classifyArgumentType(I.type); + } + + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override { + return EmitVAArgInstr(CGF, VAListAddr, Ty, classifyArgumentType(Ty)); + } +}; + class MSP430TargetCodeGenInfo : public TargetCodeGenInfo { public: MSP430TargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} + : TargetCodeGenInfo(std::make_unique<MSP430ABIInfo>(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; }; @@ -6980,8 +7591,8 @@ class MIPSTargetCodeGenInfo : public TargetCodeGenInfo { unsigned SizeOfUnwindException; public: MIPSTargetCodeGenInfo(CodeGenTypes &CGT, bool IsO32) - : TargetCodeGenInfo(new MipsABIInfo(CGT, IsO32)), - SizeOfUnwindException(IsO32 ? 24 : 32) {} + : TargetCodeGenInfo(std::make_unique<MipsABIInfo>(CGT, IsO32)), + SizeOfUnwindException(IsO32 ? 24 : 32) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { return 29; @@ -7163,6 +7774,13 @@ MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const { if (const EnumType *EnumTy = Ty->getAs<EnumType>()) Ty = EnumTy->getDecl()->getIntegerType(); + // Make sure we pass indirectly things that are too large. + if (const auto *EIT = Ty->getAs<ExtIntType>()) + if (EIT->getNumBits() > 128 || + (EIT->getNumBits() > 64 && + !getContext().getTargetInfo().hasInt128Type())) + return getNaturalAlignIndirect(Ty); + // All integral types are promoted to the GPR width. if (Ty->isIntegralOrEnumerationType()) return extendType(Ty); @@ -7247,7 +7865,14 @@ ABIArgInfo MipsABIInfo::classifyReturnType(QualType RetTy) const { if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - if (RetTy->isPromotableIntegerType()) + // Make sure we pass indirectly things that are too large. + if (const auto *EIT = RetTy->getAs<ExtIntType>()) + if (EIT->getNumBits() > 128 || + (EIT->getNumBits() > 64 && + !getContext().getTargetInfo().hasInt128Type())) + return getNaturalAlignIndirect(RetTy); + + if (isPromotableIntegerTypeForABI(RetTy)) return ABIArgInfo::getExtend(RetTy); if ((RetTy->isUnsignedIntegerOrEnumerationType() || @@ -7366,7 +7991,7 @@ namespace { class AVRTargetCodeGenInfo : public TargetCodeGenInfo { public: AVRTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(new DefaultABIInfo(CGT)) { } + : TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { @@ -7455,50 +8080,97 @@ void TCETargetCodeGenInfo::setTargetAttributes( namespace { -class HexagonABIInfo : public ABIInfo { - - +class HexagonABIInfo : public DefaultABIInfo { public: - HexagonABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {} + HexagonABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} private: - ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType RetTy) const; + ABIArgInfo classifyArgumentType(QualType RetTy, unsigned *RegsLeft) const; void computeInfo(CGFunctionInfo &FI) const override; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; + Address EmitVAArgFromMemory(CodeGenFunction &CFG, Address VAListAddr, + QualType Ty) const; + Address EmitVAArgForHexagon(CodeGenFunction &CFG, Address VAListAddr, + QualType Ty) const; + Address EmitVAArgForHexagonLinux(CodeGenFunction &CFG, Address VAListAddr, + QualType Ty) const; }; class HexagonTargetCodeGenInfo : public TargetCodeGenInfo { public: HexagonTargetCodeGenInfo(CodeGenTypes &CGT) - :TargetCodeGenInfo(new HexagonABIInfo(CGT)) {} + : TargetCodeGenInfo(std::make_unique<HexagonABIInfo>(CGT)) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { return 29; } + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &GCM) const override { + if (GV->isDeclaration()) + return; + const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); + if (!FD) + return; + } }; -} +} // namespace void HexagonABIInfo::computeInfo(CGFunctionInfo &FI) const { + unsigned RegsLeft = 6; if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (auto &I : FI.arguments()) - I.info = classifyArgumentType(I.type); + I.info = classifyArgumentType(I.type, &RegsLeft); } -ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty) const { +static bool HexagonAdjustRegsLeft(uint64_t Size, unsigned *RegsLeft) { + assert(Size <= 64 && "Not expecting to pass arguments larger than 64 bits" + " through registers"); + + if (*RegsLeft == 0) + return false; + + if (Size <= 32) { + (*RegsLeft)--; + return true; + } + + if (2 <= (*RegsLeft & (~1U))) { + *RegsLeft = (*RegsLeft & (~1U)) - 2; + return true; + } + + // Next available register was r5 but candidate was greater than 32-bits so it + // has to go on the stack. However we still consume r5 + if (*RegsLeft == 1) + *RegsLeft = 0; + + return false; +} + +ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty, + unsigned *RegsLeft) const { if (!isAggregateTypeForABI(Ty)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs<EnumType>()) Ty = EnumTy->getDecl()->getIntegerType(); - return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) - : ABIArgInfo::getDirect()); + uint64_t Size = getContext().getTypeSize(Ty); + if (Size <= 64) + HexagonAdjustRegsLeft(Size, RegsLeft); + + if (Size > 64 && Ty->isExtIntType()) + return getNaturalAlignIndirect(Ty, /*ByVal=*/true); + + return isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) + : ABIArgInfo::getDirect(); } if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) @@ -7509,63 +8181,304 @@ ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty) const { return ABIArgInfo::getIgnore(); uint64_t Size = getContext().getTypeSize(Ty); + unsigned Align = getContext().getTypeAlign(Ty); + if (Size > 64) return getNaturalAlignIndirect(Ty, /*ByVal=*/true); + + if (HexagonAdjustRegsLeft(Size, RegsLeft)) + Align = Size <= 32 ? 32 : 64; + if (Size <= Align) { // Pass in the smallest viable integer type. - else if (Size > 32) - return ABIArgInfo::getDirect(llvm::Type::getInt64Ty(getVMContext())); - else if (Size > 16) - return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); - else if (Size > 8) - return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); - else - return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); + if (!llvm::isPowerOf2_64(Size)) + Size = llvm::NextPowerOf2(Size); + return ABIArgInfo::getDirect(llvm::Type::getIntNTy(getVMContext(), Size)); + } + return DefaultABIInfo::classifyArgumentType(Ty); } ABIArgInfo HexagonABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); - // Large vector types should be returned via memory. - if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 64) - return getNaturalAlignIndirect(RetTy); + const TargetInfo &T = CGT.getTarget(); + uint64_t Size = getContext().getTypeSize(RetTy); + + if (RetTy->getAs<VectorType>()) { + // HVX vectors are returned in vector registers or register pairs. + if (T.hasFeature("hvx")) { + assert(T.hasFeature("hvx-length64b") || T.hasFeature("hvx-length128b")); + uint64_t VecSize = T.hasFeature("hvx-length64b") ? 64*8 : 128*8; + if (Size == VecSize || Size == 2*VecSize) + return ABIArgInfo::getDirectInReg(); + } + // Large vector types should be returned via memory. + if (Size > 64) + return getNaturalAlignIndirect(RetTy); + } if (!isAggregateTypeForABI(RetTy)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) RetTy = EnumTy->getDecl()->getIntegerType(); - return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend(RetTy) - : ABIArgInfo::getDirect()); + if (Size > 64 && RetTy->isExtIntType()) + return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); + + return isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) + : ABIArgInfo::getDirect(); } if (isEmptyRecord(getContext(), RetTy, true)) return ABIArgInfo::getIgnore(); - // Aggregates <= 8 bytes are returned in r0; other aggregates + // Aggregates <= 8 bytes are returned in registers, other aggregates // are returned indirectly. - uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 64) { // Return in the smallest viable integer type. - if (Size <= 8) - return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); - if (Size <= 16) - return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); - if (Size <= 32) - return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); - return ABIArgInfo::getDirect(llvm::Type::getInt64Ty(getVMContext())); + if (!llvm::isPowerOf2_64(Size)) + Size = llvm::NextPowerOf2(Size); + return ABIArgInfo::getDirect(llvm::Type::getIntNTy(getVMContext(), Size)); } - return getNaturalAlignIndirect(RetTy, /*ByVal=*/true); } +Address HexagonABIInfo::EmitVAArgFromMemory(CodeGenFunction &CGF, + Address VAListAddr, + QualType Ty) const { + // Load the overflow area pointer. + Address __overflow_area_pointer_p = + CGF.Builder.CreateStructGEP(VAListAddr, 2, "__overflow_area_pointer_p"); + llvm::Value *__overflow_area_pointer = CGF.Builder.CreateLoad( + __overflow_area_pointer_p, "__overflow_area_pointer"); + + uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8; + if (Align > 4) { + // Alignment should be a power of 2. + assert((Align & (Align - 1)) == 0 && "Alignment is not power of 2!"); + + // overflow_arg_area = (overflow_arg_area + align - 1) & -align; + llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int64Ty, Align - 1); + + // Add offset to the current pointer to access the argument. + __overflow_area_pointer = + CGF.Builder.CreateGEP(__overflow_area_pointer, Offset); + llvm::Value *AsInt = + CGF.Builder.CreatePtrToInt(__overflow_area_pointer, CGF.Int32Ty); + + // Create a mask which should be "AND"ed + // with (overflow_arg_area + align - 1) + llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int32Ty, -(int)Align); + __overflow_area_pointer = CGF.Builder.CreateIntToPtr( + CGF.Builder.CreateAnd(AsInt, Mask), __overflow_area_pointer->getType(), + "__overflow_area_pointer.align"); + } + + // Get the type of the argument from memory and bitcast + // overflow area pointer to the argument type. + llvm::Type *PTy = CGF.ConvertTypeForMem(Ty); + Address AddrTyped = CGF.Builder.CreateBitCast( + Address(__overflow_area_pointer, CharUnits::fromQuantity(Align)), + llvm::PointerType::getUnqual(PTy)); + + // Round up to the minimum stack alignment for varargs which is 4 bytes. + uint64_t Offset = llvm::alignTo(CGF.getContext().getTypeSize(Ty) / 8, 4); + + __overflow_area_pointer = CGF.Builder.CreateGEP( + __overflow_area_pointer, llvm::ConstantInt::get(CGF.Int32Ty, Offset), + "__overflow_area_pointer.next"); + CGF.Builder.CreateStore(__overflow_area_pointer, __overflow_area_pointer_p); + + return AddrTyped; +} + +Address HexagonABIInfo::EmitVAArgForHexagon(CodeGenFunction &CGF, + Address VAListAddr, + QualType Ty) const { + // FIXME: Need to handle alignment + llvm::Type *BP = CGF.Int8PtrTy; + llvm::Type *BPP = CGF.Int8PtrPtrTy; + CGBuilderTy &Builder = CGF.Builder; + Address VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap"); + llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur"); + // Handle address alignment for type alignment > 32 bits + uint64_t TyAlign = CGF.getContext().getTypeAlign(Ty) / 8; + if (TyAlign > 4) { + assert((TyAlign & (TyAlign - 1)) == 0 && "Alignment is not power of 2!"); + llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int32Ty); + AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt32(TyAlign - 1)); + AddrAsInt = Builder.CreateAnd(AddrAsInt, Builder.getInt32(~(TyAlign - 1))); + Addr = Builder.CreateIntToPtr(AddrAsInt, BP); + } + llvm::Type *PTy = llvm::PointerType::getUnqual(CGF.ConvertType(Ty)); + Address AddrTyped = Builder.CreateBitCast( + Address(Addr, CharUnits::fromQuantity(TyAlign)), PTy); + + uint64_t Offset = llvm::alignTo(CGF.getContext().getTypeSize(Ty) / 8, 4); + llvm::Value *NextAddr = Builder.CreateGEP( + Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset), "ap.next"); + Builder.CreateStore(NextAddr, VAListAddrAsBPP); + + return AddrTyped; +} + +Address HexagonABIInfo::EmitVAArgForHexagonLinux(CodeGenFunction &CGF, + Address VAListAddr, + QualType Ty) const { + int ArgSize = CGF.getContext().getTypeSize(Ty) / 8; + + if (ArgSize > 8) + return EmitVAArgFromMemory(CGF, VAListAddr, Ty); + + // Here we have check if the argument is in register area or + // in overflow area. + // If the saved register area pointer + argsize rounded up to alignment > + // saved register area end pointer, argument is in overflow area. + unsigned RegsLeft = 6; + Ty = CGF.getContext().getCanonicalType(Ty); + (void)classifyArgumentType(Ty, &RegsLeft); + + llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg"); + llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); + llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack"); + llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); + + // Get rounded size of the argument.GCC does not allow vararg of + // size < 4 bytes. We follow the same logic here. + ArgSize = (CGF.getContext().getTypeSize(Ty) <= 32) ? 4 : 8; + int ArgAlign = (CGF.getContext().getTypeSize(Ty) <= 32) ? 4 : 8; + + // Argument may be in saved register area + CGF.EmitBlock(MaybeRegBlock); + + // Load the current saved register area pointer. + Address __current_saved_reg_area_pointer_p = CGF.Builder.CreateStructGEP( + VAListAddr, 0, "__current_saved_reg_area_pointer_p"); + llvm::Value *__current_saved_reg_area_pointer = CGF.Builder.CreateLoad( + __current_saved_reg_area_pointer_p, "__current_saved_reg_area_pointer"); + + // Load the saved register area end pointer. + Address __saved_reg_area_end_pointer_p = CGF.Builder.CreateStructGEP( + VAListAddr, 1, "__saved_reg_area_end_pointer_p"); + llvm::Value *__saved_reg_area_end_pointer = CGF.Builder.CreateLoad( + __saved_reg_area_end_pointer_p, "__saved_reg_area_end_pointer"); + + // If the size of argument is > 4 bytes, check if the stack + // location is aligned to 8 bytes + if (ArgAlign > 4) { + + llvm::Value *__current_saved_reg_area_pointer_int = + CGF.Builder.CreatePtrToInt(__current_saved_reg_area_pointer, + CGF.Int32Ty); + + __current_saved_reg_area_pointer_int = CGF.Builder.CreateAdd( + __current_saved_reg_area_pointer_int, + llvm::ConstantInt::get(CGF.Int32Ty, (ArgAlign - 1)), + "align_current_saved_reg_area_pointer"); + + __current_saved_reg_area_pointer_int = + CGF.Builder.CreateAnd(__current_saved_reg_area_pointer_int, + llvm::ConstantInt::get(CGF.Int32Ty, -ArgAlign), + "align_current_saved_reg_area_pointer"); + + __current_saved_reg_area_pointer = + CGF.Builder.CreateIntToPtr(__current_saved_reg_area_pointer_int, + __current_saved_reg_area_pointer->getType(), + "align_current_saved_reg_area_pointer"); + } + + llvm::Value *__new_saved_reg_area_pointer = + CGF.Builder.CreateGEP(__current_saved_reg_area_pointer, + llvm::ConstantInt::get(CGF.Int32Ty, ArgSize), + "__new_saved_reg_area_pointer"); + + llvm::Value *UsingStack = 0; + UsingStack = CGF.Builder.CreateICmpSGT(__new_saved_reg_area_pointer, + __saved_reg_area_end_pointer); + + CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, InRegBlock); + + // Argument in saved register area + // Implement the block where argument is in register saved area + CGF.EmitBlock(InRegBlock); + + llvm::Type *PTy = CGF.ConvertType(Ty); + llvm::Value *__saved_reg_area_p = CGF.Builder.CreateBitCast( + __current_saved_reg_area_pointer, llvm::PointerType::getUnqual(PTy)); + + CGF.Builder.CreateStore(__new_saved_reg_area_pointer, + __current_saved_reg_area_pointer_p); + + CGF.EmitBranch(ContBlock); + + // Argument in overflow area + // Implement the block where the argument is in overflow area. + CGF.EmitBlock(OnStackBlock); + + // Load the overflow area pointer + Address __overflow_area_pointer_p = + CGF.Builder.CreateStructGEP(VAListAddr, 2, "__overflow_area_pointer_p"); + llvm::Value *__overflow_area_pointer = CGF.Builder.CreateLoad( + __overflow_area_pointer_p, "__overflow_area_pointer"); + + // Align the overflow area pointer according to the alignment of the argument + if (ArgAlign > 4) { + llvm::Value *__overflow_area_pointer_int = + CGF.Builder.CreatePtrToInt(__overflow_area_pointer, CGF.Int32Ty); + + __overflow_area_pointer_int = + CGF.Builder.CreateAdd(__overflow_area_pointer_int, + llvm::ConstantInt::get(CGF.Int32Ty, ArgAlign - 1), + "align_overflow_area_pointer"); + + __overflow_area_pointer_int = + CGF.Builder.CreateAnd(__overflow_area_pointer_int, + llvm::ConstantInt::get(CGF.Int32Ty, -ArgAlign), + "align_overflow_area_pointer"); + + __overflow_area_pointer = CGF.Builder.CreateIntToPtr( + __overflow_area_pointer_int, __overflow_area_pointer->getType(), + "align_overflow_area_pointer"); + } + + // Get the pointer for next argument in overflow area and store it + // to overflow area pointer. + llvm::Value *__new_overflow_area_pointer = CGF.Builder.CreateGEP( + __overflow_area_pointer, llvm::ConstantInt::get(CGF.Int32Ty, ArgSize), + "__overflow_area_pointer.next"); + + CGF.Builder.CreateStore(__new_overflow_area_pointer, + __overflow_area_pointer_p); + + CGF.Builder.CreateStore(__new_overflow_area_pointer, + __current_saved_reg_area_pointer_p); + + // Bitcast the overflow area pointer to the type of argument. + llvm::Type *OverflowPTy = CGF.ConvertTypeForMem(Ty); + llvm::Value *__overflow_area_p = CGF.Builder.CreateBitCast( + __overflow_area_pointer, llvm::PointerType::getUnqual(OverflowPTy)); + + CGF.EmitBranch(ContBlock); + + // Get the correct pointer to load the variable argument + // Implement the ContBlock + CGF.EmitBlock(ContBlock); + + llvm::Type *MemPTy = llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty)); + llvm::PHINode *ArgAddr = CGF.Builder.CreatePHI(MemPTy, 2, "vaarg.addr"); + ArgAddr->addIncoming(__saved_reg_area_p, InRegBlock); + ArgAddr->addIncoming(__overflow_area_p, OnStackBlock); + + return Address(ArgAddr, CharUnits::fromQuantity(ArgAlign)); +} + Address HexagonABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { - // FIXME: Someone needs to audit that this handle alignment correctly. - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, - getContext().getTypeInfoInChars(Ty), - CharUnits::fromQuantity(4), - /*AllowHigherAlign*/ true); + + if (getTarget().getTriple().isMusl()) + return EmitVAArgForHexagonLinux(CGF, VAListAddr, Ty); + + return EmitVAArgForHexagon(CGF, VAListAddr, Ty); } //===----------------------------------------------------------------------===// @@ -7676,7 +8589,13 @@ ABIArgInfo LanaiABIInfo::classifyArgumentType(QualType Ty, Ty = EnumTy->getDecl()->getIntegerType(); bool InReg = shouldUseInReg(Ty, State); - if (Ty->isPromotableIntegerType()) { + + // Don't pass >64 bit integers in registers. + if (const auto *EIT = Ty->getAs<ExtIntType>()) + if (EIT->getNumBits() > 64) + return getIndirectResult(Ty, /*ByVal=*/true, State); + + if (isPromotableIntegerTypeForABI(Ty)) { if (InReg) return ABIArgInfo::getDirectInReg(); return ABIArgInfo::getExtend(Ty); @@ -7690,7 +8609,7 @@ namespace { class LanaiTargetCodeGenInfo : public TargetCodeGenInfo { public: LanaiTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) - : TargetCodeGenInfo(new LanaiABIInfo(CGT)) {} + : TargetCodeGenInfo(std::make_unique<LanaiABIInfo>(CGT)) {} }; } @@ -7730,7 +8649,7 @@ private: EltTys, (STy->getName() + ".coerce").str(), STy->isPacked()); return llvm::StructType::get(getVMContext(), EltTys, STy->isPacked()); } - // Arrary types. + // Array types. if (auto ATy = dyn_cast<llvm::ArrayType>(Ty)) { auto T = ATy->getElementType(); auto NT = coerceKernelArgumentType(T, FromAS, ToAS); @@ -7958,7 +8877,7 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { public: AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(new AMDGPUABIInfo(CGT)) {} + : TargetCodeGenInfo(std::make_unique<AMDGPUABIInfo>(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; unsigned getOpenCLKernelCallingConv() const override; @@ -7994,23 +8913,13 @@ static bool requiresAMDGPUProtectedVisibility(const Decl *D, (isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) || (isa<VarDecl>(D) && (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() || - D->hasAttr<HIPPinnedShadowAttr>())); -} - -static bool requiresAMDGPUDefaultVisibility(const Decl *D, - llvm::GlobalValue *GV) { - if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility) - return false; - - return isa<VarDecl>(D) && D->hasAttr<HIPPinnedShadowAttr>(); + cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() || + cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType())); } void AMDGPUTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { - if (requiresAMDGPUDefaultVisibility(D, GV)) { - GV->setVisibility(llvm::GlobalValue::DefaultVisibility); - GV->setDSOLocal(false); - } else if (requiresAMDGPUProtectedVisibility(D, GV)) { + if (requiresAMDGPUProtectedVisibility(D, GV)) { GV->setVisibility(llvm::GlobalValue::ProtectedVisibility); GV->setDSOLocal(true); } @@ -8035,6 +8944,10 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( (M.getTriple().getOS() == llvm::Triple::AMDHSA)) F->addFnAttr("amdgpu-implicitarg-num-bytes", "56"); + if (IsHIPKernel) + F->addFnAttr("uniform-work-group-size", "true"); + + const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>(); if (ReqdWGS || FlatWGS) { unsigned Min = 0; @@ -8059,9 +8972,13 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( assert(Max == 0 && "Max must be zero"); } else if (IsOpenCLKernel || IsHIPKernel) { // By default, restrict the maximum size to a value specified by - // --gpu-max-threads-per-block=n or its default value. + // --gpu-max-threads-per-block=n or its default value for HIP. + const unsigned OpenCLDefaultMaxWorkGroupSize = 256; + const unsigned DefaultMaxWorkGroupSize = + IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize + : M.getLangOpts().GPUMaxThreadsPerBlock; std::string AttrVal = - std::string("1,") + llvm::utostr(M.getLangOpts().GPUMaxThreadsPerBlock); + std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize); F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } @@ -8223,7 +9140,7 @@ namespace { class SparcV8TargetCodeGenInfo : public TargetCodeGenInfo { public: SparcV8TargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(new SparcV8ABIInfo(CGT)) {} + : TargetCodeGenInfo(std::make_unique<SparcV8ABIInfo>(CGT)) {} }; } // end anonymous namespace @@ -8392,6 +9309,10 @@ SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const { if (Size < 64 && Ty->isIntegerType()) return ABIArgInfo::getExtend(Ty); + if (const auto *EIT = Ty->getAs<ExtIntType>()) + if (EIT->getNumBits() < 64) + return ABIArgInfo::getExtend(Ty); + // Other non-aggregates go in registers. if (!isAggregateTypeForABI(Ty)) return ABIArgInfo::getDirect(); @@ -8485,7 +9406,7 @@ namespace { class SparcV9TargetCodeGenInfo : public TargetCodeGenInfo { public: SparcV9TargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(new SparcV9ABIInfo(CGT)) {} + : TargetCodeGenInfo(std::make_unique<SparcV9ABIInfo>(CGT)) {} int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { return 14; @@ -8578,7 +9499,7 @@ private: class ARCTargetCodeGenInfo : public TargetCodeGenInfo { public: ARCTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(new ARCABIInfo(CGT)) {} + : TargetCodeGenInfo(std::make_unique<ARCABIInfo>(CGT)) {} }; @@ -8641,11 +9562,15 @@ ABIArgInfo ARCABIInfo::classifyArgumentType(QualType Ty, ABIArgInfo::getDirect(Result, 0, nullptr, false); } - return Ty->isPromotableIntegerType() ? - (FreeRegs >= SizeInRegs ? ABIArgInfo::getExtendInReg(Ty) : - ABIArgInfo::getExtend(Ty)) : - (FreeRegs >= SizeInRegs ? ABIArgInfo::getDirectInReg() : - ABIArgInfo::getDirect()); + if (const auto *EIT = Ty->getAs<ExtIntType>()) + if (EIT->getNumBits() > 64) + return getIndirectByValue(Ty); + + return isPromotableIntegerTypeForABI(Ty) + ? (FreeRegs >= SizeInRegs ? ABIArgInfo::getExtendInReg(Ty) + : ABIArgInfo::getExtend(Ty)) + : (FreeRegs >= SizeInRegs ? ABIArgInfo::getDirectInReg() + : ABIArgInfo::getDirect()); } ABIArgInfo ARCABIInfo::classifyReturnType(QualType RetTy) const { @@ -8769,11 +9694,15 @@ public: class XCoreTargetCodeGenInfo : public TargetCodeGenInfo { mutable TypeStringCache TSC; + void emitTargetMD(const Decl *D, llvm::GlobalValue *GV, + const CodeGen::CodeGenModule &M) const; + public: XCoreTargetCodeGenInfo(CodeGenTypes &CGT) - :TargetCodeGenInfo(new XCoreABIInfo(CGT)) {} - void emitTargetMD(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; + : TargetCodeGenInfo(std::make_unique<XCoreABIInfo>(CGT)) {} + void emitTargetMetadata(CodeGen::CodeGenModule &CGM, + const llvm::MapVector<GlobalDecl, StringRef> + &MangledDeclNames) const override; }; } // End anonymous namespace. @@ -8934,11 +9863,13 @@ StringRef TypeStringCache::lookupStr(const IdentifierInfo *ID) { /// The output is tested by test/CodeGen/xcore-stringtype.c. /// static bool getTypeString(SmallStringEnc &Enc, const Decl *D, - CodeGen::CodeGenModule &CGM, TypeStringCache &TSC); + const CodeGen::CodeGenModule &CGM, + TypeStringCache &TSC); /// XCore uses emitTargetMD to emit TypeString metadata for global symbols. -void XCoreTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const { +void XCoreTargetCodeGenInfo::emitTargetMD( + const Decl *D, llvm::GlobalValue *GV, + const CodeGen::CodeGenModule &CGM) const { SmallStringEnc Enc; if (getTypeString(Enc, D, CGM, TSC)) { llvm::LLVMContext &Ctx = CGM.getModule().getContext(); @@ -8950,6 +9881,21 @@ void XCoreTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV, } } +void XCoreTargetCodeGenInfo::emitTargetMetadata( + CodeGen::CodeGenModule &CGM, + const llvm::MapVector<GlobalDecl, StringRef> &MangledDeclNames) const { + // Warning, new MangledDeclNames may be appended within this loop. + // We rely on MapVector insertions adding new elements to the end + // of the container. + for (unsigned I = 0; I != MangledDeclNames.size(); ++I) { + auto Val = *(MangledDeclNames.begin() + I); + llvm::GlobalValue *GV = CGM.GetGlobalValue(Val.second); + if (GV) { + const Decl *D = Val.first.getDecl()->getMostRecentDecl(); + emitTargetMD(D, GV, CGM); + } + } +} //===----------------------------------------------------------------------===// // SPIR ABI Implementation //===----------------------------------------------------------------------===// @@ -8958,7 +9904,7 @@ namespace { class SPIRTargetCodeGenInfo : public TargetCodeGenInfo { public: SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) - : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} + : TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {} unsigned getOpenCLKernelCallingConv() const override; }; @@ -9283,7 +10229,8 @@ static bool appendType(SmallStringEnc &Enc, QualType QType, } static bool getTypeString(SmallStringEnc &Enc, const Decl *D, - CodeGen::CodeGenModule &CGM, TypeStringCache &TSC) { + const CodeGen::CodeGenModule &CGM, + TypeStringCache &TSC) { if (!D) return false; @@ -9613,7 +10560,8 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, uint64_t Size = getContext().getTypeSize(Ty); // Pass floating point values via FPRs if possible. - if (IsFixed && Ty->isFloatingType() && FLen >= Size && ArgFPRsLeft) { + if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() && + FLen >= Size && ArgFPRsLeft) { ArgFPRsLeft--; return ABIArgInfo::getDirect(); } @@ -9676,6 +10624,15 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, return extendType(Ty); } + if (const auto *EIT = Ty->getAs<ExtIntType>()) { + if (EIT->getNumBits() < XLen && !MustUseStack) + return extendType(Ty); + if (EIT->getNumBits() > 128 || + (!getContext().getTargetInfo().hasInt128Type() && + EIT->getNumBits() > 64)) + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); + } + return ABIArgInfo::getDirect(); } @@ -9747,7 +10704,7 @@ class RISCVTargetCodeGenInfo : public TargetCodeGenInfo { public: RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen) - : TargetCodeGenInfo(new RISCVABIInfo(CGT, XLen, FLen)) {} + : TargetCodeGenInfo(std::make_unique<RISCVABIInfo>(CGT, XLen, FLen)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { @@ -9773,6 +10730,56 @@ public: } // namespace //===----------------------------------------------------------------------===// +// VE ABI Implementation. +// +namespace { +class VEABIInfo : public DefaultABIInfo { +public: + VEABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + +private: + ABIArgInfo classifyReturnType(QualType RetTy) const; + ABIArgInfo classifyArgumentType(QualType RetTy) const; + void computeInfo(CGFunctionInfo &FI) const override; +}; +} // end anonymous namespace + +ABIArgInfo VEABIInfo::classifyReturnType(QualType Ty) const { + if (Ty->isAnyComplexType()) { + return ABIArgInfo::getDirect(); + } + return DefaultABIInfo::classifyReturnType(Ty); +} + +ABIArgInfo VEABIInfo::classifyArgumentType(QualType Ty) const { + if (Ty->isAnyComplexType()) { + return ABIArgInfo::getDirect(); + } + return DefaultABIInfo::classifyArgumentType(Ty); +} + +void VEABIInfo::computeInfo(CGFunctionInfo &FI) const { + + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + for (auto &Arg : FI.arguments()) + Arg.info = classifyArgumentType(Arg.type); +} + +namespace { +class VETargetCodeGenInfo : public TargetCodeGenInfo { +public: + VETargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique<VEABIInfo>(CGT)) {} + // VE ABI requires the arguments of variadic and prototype-less functions + // are passed in both registers and memory. + bool isNoProtoCallVariadic(const CallArgList &args, + const FunctionNoProtoType *fnType) const override { + return true; + } +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// // Driver code //===----------------------------------------------------------------------===// @@ -9824,8 +10831,12 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { } case llvm::Triple::wasm32: - case llvm::Triple::wasm64: - return SetCGInfo(new WebAssemblyTargetCodeGenInfo(Types)); + case llvm::Triple::wasm64: { + WebAssemblyABIInfo::ABIKind Kind = WebAssemblyABIInfo::MVP; + if (getTarget().getABI() == "experimental-mv") + Kind = WebAssemblyABIInfo::ExperimentalMV; + return SetCGInfo(new WebAssemblyTargetCodeGenInfo(Types, Kind)); + } case llvm::Triple::arm: case llvm::Triple::armeb: @@ -9852,11 +10863,21 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { return SetCGInfo(new ARMTargetCodeGenInfo(Types, Kind)); } - case llvm::Triple::ppc: + case llvm::Triple::ppc: { + if (Triple.isOSAIX()) + return SetCGInfo(new AIXTargetCodeGenInfo(Types, /*Is64Bit*/ false)); + + bool IsSoftFloat = + CodeGenOpts.FloatABI == "soft" || getTarget().hasFeature("spe"); + bool RetSmallStructInRegABI = + PPC32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts); return SetCGInfo( - new PPC32TargetCodeGenInfo(Types, CodeGenOpts.FloatABI == "soft" || - getTarget().hasFeature("spe"))); + new PPC32TargetCodeGenInfo(Types, IsSoftFloat, RetSmallStructInRegABI)); + } case llvm::Triple::ppc64: + if (Triple.isOSAIX()) + return SetCGInfo(new AIXTargetCodeGenInfo(Types, /*Is64Bit*/ true)); + if (Triple.isOSBinFormatELF()) { PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1; if (getTarget().getABI() == "elfv2") @@ -9866,8 +10887,8 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX, IsSoftFloat)); - } else - return SetCGInfo(new PPC64TargetCodeGenInfo(Types)); + } + return SetCGInfo(new PPC64TargetCodeGenInfo(Types)); case llvm::Triple::ppc64le: { assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!"); PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv2; @@ -9900,8 +10921,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { } case llvm::Triple::systemz: { - bool HasVector = getTarget().getABI() == "vector"; - return SetCGInfo(new SystemZTargetCodeGenInfo(Types, HasVector)); + bool SoftFloat = CodeGenOpts.FloatABI == "soft"; + bool HasVector = !SoftFloat && getTarget().getABI() == "vector"; + return SetCGInfo(new SystemZTargetCodeGenInfo(Types, HasVector, SoftFloat)); } case llvm::Triple::tce: @@ -9959,6 +10981,8 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { case llvm::Triple::spir: case llvm::Triple::spir64: return SetCGInfo(new SPIRTargetCodeGenInfo(Types)); + case llvm::Triple::ve: + return SetCGInfo(new VETargetCodeGenInfo(Types)); } } @@ -10042,9 +11066,9 @@ llvm::Function *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel( auto IP = CGF.Builder.saveIP(); auto *BB = llvm::BasicBlock::Create(C, "entry", F); Builder.SetInsertPoint(BB); - unsigned BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlignment(BlockTy); + const auto BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlign(BlockTy); auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr); - BlockPtr->setAlignment(llvm::MaybeAlign(BlockAlign)); + BlockPtr->setAlignment(BlockAlign); Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign); auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0)); llvm::SmallVector<llvm::Value *, 2> Args; diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index e1e90e73cb58..1152cabce4a0 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -43,11 +43,10 @@ class CGFunctionInfo; /// codegeneration issues, like target-specific attributes, builtins and so /// on. class TargetCodeGenInfo { - ABIInfo *Info; + std::unique_ptr<ABIInfo> Info = nullptr; public: - // WARNING: Acquires the ownership of ABIInfo. - TargetCodeGenInfo(ABIInfo *info = nullptr) : Info(info) {} + TargetCodeGenInfo(std::unique_ptr<ABIInfo> Info) : Info(std::move(Info)) {} virtual ~TargetCodeGenInfo(); /// getABIInfo() - Returns ABI info helper for the target. @@ -58,10 +57,18 @@ public: virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {} - /// emitTargetMD - Provides a convenient hook to handle extra - /// target-specific metadata for the given global. - virtual void emitTargetMD(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const {} + /// emitTargetMetadata - Provides a convenient hook to handle extra + /// target-specific metadata for the given globals. + virtual void emitTargetMetadata( + CodeGen::CodeGenModule &CGM, + const llvm::MapVector<GlobalDecl, StringRef> &MangledDeclNames) const {} + + /// Any further codegen related checks that need to be done on a function call + /// in a target specific manner. + virtual void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc, + const FunctionDecl *Caller, + const FunctionDecl *Callee, + const CallArgList &Args) const {} /// Determines the size of struct _Unwind_Exception on this platform, /// in 8-bit units. The Itanium ABI defines this as: @@ -315,6 +322,32 @@ public: virtual bool shouldEmitStaticExternCAliases() const { return true; } virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {} + + /// Return the device-side type for the CUDA device builtin surface type. + virtual llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const { + // By default, no change from the original one. + return nullptr; + } + /// Return the device-side type for the CUDA device builtin texture type. + virtual llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const { + // By default, no change from the original one. + return nullptr; + } + + /// Emit the device-side copy of the builtin surface type. + virtual bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, + LValue Dst, + LValue Src) const { + // DO NOTHING by default. + return false; + } + /// Emit the device-side copy of the builtin texture type. + virtual bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, + LValue Dst, + LValue Src) const { + // DO NOTHING by default. + return false; + } }; } // namespace CodeGen |