diff options
Diffstat (limited to 'lib/CodeGen')
51 files changed, 3183 insertions, 1178 deletions
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp index 497652e85b47a..75a54d8f3c8a2 100644 --- a/lib/CodeGen/BackendUtil.cpp +++ b/lib/CodeGen/BackendUtil.cpp @@ -37,6 +37,7 @@ #include "llvm/MC/SubtargetFeature.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Passes/PassPlugin.h" +#include "llvm/Passes/StandardInstrumentations.h" #include "llvm/Support/BuryPointer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/MemoryBuffer.h" @@ -60,6 +61,7 @@ #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" +#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar.h" @@ -117,8 +119,8 @@ class EmitAssemblyHelper { std::unique_ptr<llvm::ToolOutputFile> openOutputFile(StringRef Path) { std::error_code EC; - auto F = llvm::make_unique<llvm::ToolOutputFile>(Path, EC, - llvm::sys::fs::F_None); + auto F = std::make_unique<llvm::ToolOutputFile>(Path, EC, + llvm::sys::fs::OF_None); if (EC) { Diags.Report(diag::err_fe_unable_to_open_output) << Path << EC.message(); F.reset(); @@ -195,11 +197,8 @@ static void addBoundsCheckingPass(const PassManagerBuilder &Builder, PM.add(createBoundsCheckingLegacyPass()); } -static void addSanitizerCoveragePass(const PassManagerBuilder &Builder, - legacy::PassManagerBase &PM) { - const PassManagerBuilderWrapper &BuilderWrapper = - static_cast<const PassManagerBuilderWrapper&>(Builder); - const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts(); +static SanitizerCoverageOptions +getSancovOptsFromCGOpts(const CodeGenOptions &CGOpts) { SanitizerCoverageOptions Opts; Opts.CoverageType = static_cast<SanitizerCoverageOptions::Type>(CGOpts.SanitizeCoverageType); @@ -215,7 +214,16 @@ static void addSanitizerCoveragePass(const PassManagerBuilder &Builder, Opts.Inline8bitCounters = CGOpts.SanitizeCoverageInline8bitCounters; Opts.PCTable = CGOpts.SanitizeCoveragePCTable; Opts.StackDepth = CGOpts.SanitizeCoverageStackDepth; - PM.add(createSanitizerCoverageModulePass(Opts)); + return Opts; +} + +static void addSanitizerCoveragePass(const PassManagerBuilder &Builder, + legacy::PassManagerBase &PM) { + const PassManagerBuilderWrapper &BuilderWrapper = + static_cast<const PassManagerBuilderWrapper &>(Builder); + const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts(); + auto Opts = getSancovOptsFromCGOpts(CGOpts); + PM.add(createModuleSanitizerCoverageLegacyPassPass(Opts)); } // Check if ASan should use GC-friendly instrumentation for globals. @@ -231,9 +239,13 @@ static bool asanUseGlobalsGC(const Triple &T, const CodeGenOptions &CGOpts) { return true; case Triple::ELF: return CGOpts.DataSections && !CGOpts.DisableIntegratedAS; - default: - return false; + case Triple::XCOFF: + llvm::report_fatal_error("ASan not implemented for XCOFF."); + case Triple::Wasm: + case Triple::UnknownObjectFormat: + break; } + return false; } static void addAddressSanitizerPasses(const PassManagerBuilder &Builder, @@ -456,6 +468,8 @@ static void initTargetOptions(llvm::TargetOptions &Options, Options.ExceptionModel = llvm::ExceptionHandling::WinEH; if (LangOpts.DWARFExceptions) Options.ExceptionModel = llvm::ExceptionHandling::DwarfCFI; + if (LangOpts.WasmExceptions) + Options.ExceptionModel = llvm::ExceptionHandling::Wasm; Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath; Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath; @@ -481,6 +495,7 @@ static void initTargetOptions(llvm::TargetOptions &Options, CodeGenOpts.IncrementalLinkerCompatible; Options.MCOptions.MCPIECopyRelocations = CodeGenOpts.PIECopyRelocations; Options.MCOptions.MCFatalWarnings = CodeGenOpts.FatalWarnings; + Options.MCOptions.MCNoWarn = CodeGenOpts.NoWarn; Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose; Options.MCOptions.PreserveAsmComments = CodeGenOpts.PreserveAsmComments; Options.MCOptions.ABIName = TargetOpts.ABI; @@ -848,7 +863,7 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, if (!TheModule->getModuleFlag("ThinLTO")) TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", - CodeGenOpts.EnableSplitLTOUnit); + uint32_t(1)); } PerModulePasses.add(createBitcodeWriterPass( @@ -880,6 +895,7 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, { PrettyStackTraceString CrashInfo("Per-function optimization"); + llvm::TimeTraceScope TimeScope("PerFunctionPasses", StringRef("")); PerFunctionPasses.doInitialization(); for (Function &F : *TheModule) @@ -890,11 +906,13 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, { PrettyStackTraceString CrashInfo("Per-module optimization passes"); + llvm::TimeTraceScope TimeScope("PerModulePasses", StringRef("")); PerModulePasses.run(*TheModule); } { PrettyStackTraceString CrashInfo("Code generation"); + llvm::TimeTraceScope TimeScope("CodeGenPasses", StringRef("")); CodeGenPasses.run(*TheModule); } @@ -956,6 +974,7 @@ static void addSanitizersAtO0(ModulePassManager &MPM, } if (LangOpts.Sanitize.has(SanitizerKind::Memory)) { + MPM.addPass(MemorySanitizerPass({})); MPM.addPass(createModuleToFunctionPassAdaptor(MemorySanitizerPass({}))); } @@ -965,6 +984,7 @@ static void addSanitizersAtO0(ModulePassManager &MPM, } if (LangOpts.Sanitize.has(SanitizerKind::Thread)) { + MPM.addPass(ThreadSanitizerPass()); MPM.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass())); } } @@ -1050,7 +1070,10 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( PTO.LoopVectorization = CodeGenOpts.VectorizeLoop; PTO.SLPVectorization = CodeGenOpts.VectorizeSLP; - PassBuilder PB(TM.get(), PTO, PGOOpt); + PassInstrumentationCallbacks PIC; + StandardInstrumentations SI; + SI.registerCallbacks(PIC); + PassBuilder PB(TM.get(), PTO, PGOOpt, &PIC); // Attempt to load pass plugins and register their callbacks with PB. for (auto &PluginFN : CodeGenOpts.PassPlugins) { @@ -1077,7 +1100,6 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( std::unique_ptr<TargetLibraryInfoImpl> TLII( createTLII(TargetTriple, CodeGenOpts)); FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); - MAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); // Register all the basic analyses with the managers. PB.registerModuleAnalyses(MAM); @@ -1105,6 +1127,16 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // code generation. MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/false)); + // At -O0, we can still do PGO. Add all the requested passes for + // instrumentation PGO, if requested. + if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr || + PGOOpt->Action == PGOOptions::IRUse)) + PB.addPGOInstrPassesForO0( + MPM, CodeGenOpts.DebugPassManager, + /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr), + /* IsCS */ false, PGOOpt->ProfileFile, + PGOOpt->ProfileRemappingFile); + // At -O0 we directly run necessary sanitizer passes. if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) MPM.addPass(createModuleToFunctionPassAdaptor(BoundsCheckingPass())); @@ -1132,16 +1164,23 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { FPM.addPass(BoundsCheckingPass()); }); - if (LangOpts.Sanitize.has(SanitizerKind::Memory)) + if (LangOpts.Sanitize.has(SanitizerKind::Memory)) { + PB.registerPipelineStartEPCallback([](ModulePassManager &MPM) { + MPM.addPass(MemorySanitizerPass({})); + }); PB.registerOptimizerLastEPCallback( [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { FPM.addPass(MemorySanitizerPass({})); }); - if (LangOpts.Sanitize.has(SanitizerKind::Thread)) + } + if (LangOpts.Sanitize.has(SanitizerKind::Thread)) { + PB.registerPipelineStartEPCallback( + [](ModulePassManager &MPM) { MPM.addPass(ThreadSanitizerPass()); }); PB.registerOptimizerLastEPCallback( [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { FPM.addPass(ThreadSanitizerPass()); }); + } if (LangOpts.Sanitize.has(SanitizerKind::Address)) { PB.registerPipelineStartEPCallback([&](ModulePassManager &MPM) { MPM.addPass( @@ -1191,6 +1230,13 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( } } + if (CodeGenOpts.SanitizeCoverageType || + CodeGenOpts.SanitizeCoverageIndirectCalls || + CodeGenOpts.SanitizeCoverageTraceCmp) { + auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts); + MPM.addPass(ModuleSanitizerCoveragePass(SancovOpts)); + } + if (LangOpts.Sanitize.has(SanitizerKind::HWAddress)) { bool Recover = CodeGenOpts.SanitizeRecover.has(SanitizerKind::HWAddress); MPM.addPass(HWAddressSanitizerPass( @@ -1201,8 +1247,9 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( /*CompileKernel=*/true, /*Recover=*/true)); } - if (CodeGenOpts.OptimizationLevel == 0) + if (CodeGenOpts.OptimizationLevel == 0) { addSanitizersAtO0(MPM, TargetTriple, LangOpts, CodeGenOpts); + } } // FIXME: We still use the legacy pass manager to do code generation. We @@ -1239,7 +1286,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( if (!TheModule->getModuleFlag("ThinLTO")) TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", - CodeGenOpts.EnableSplitLTOUnit); + uint32_t(1)); } MPM.addPass( BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, EmitLTOSummary)); @@ -1372,7 +1419,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, OwnedImports.push_back(std::move(*MBOrErr)); } auto AddStream = [&](size_t Task) { - return llvm::make_unique<lto::NativeObjectStream>(std::move(OS)); + return std::make_unique<lto::NativeObjectStream>(std::move(OS)); }; lto::Config Conf; if (CGOpts.SaveTempsFilePrefix != "") { @@ -1484,7 +1531,7 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, // trying to read it. Also for some features, like CFI, we must skip // the compilation as CombinedIndex does not contain all required // information. - EmptyModule = llvm::make_unique<llvm::Module>("empty", M->getContext()); + EmptyModule = std::make_unique<llvm::Module>("empty", M->getContext()); EmptyModule->setTargetTriple(M->getTargetTriple()); M = EmptyModule.get(); } diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp index a95cd12c2d648..5059163507503 100644 --- a/lib/CodeGen/CGAtomic.cpp +++ b/lib/CodeGen/CGAtomic.cpp @@ -102,12 +102,13 @@ namespace { llvm::APInt Size( /*numBits=*/32, C.toCharUnitsFromBits(AtomicSizeInBits).getQuantity()); - AtomicTy = C.getConstantArrayType(C.CharTy, Size, ArrayType::Normal, - /*IndexTypeQuals=*/0); + AtomicTy = + C.getConstantArrayType(C.CharTy, Size, nullptr, ArrayType::Normal, + /*IndexTypeQuals=*/0); } AtomicAlign = ValueAlign = lvalue.getAlignment(); } else if (lvalue.isVectorElt()) { - ValueTy = lvalue.getType()->getAs<VectorType>()->getElementType(); + ValueTy = lvalue.getType()->castAs<VectorType>()->getElementType(); ValueSizeInBits = C.getTypeSize(ValueTy); AtomicTy = lvalue.getType(); AtomicSizeInBits = C.getTypeSize(AtomicTy); @@ -969,7 +970,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { auto CastToGenericAddrSpace = [&](llvm::Value *V, QualType PT) { if (!E->isOpenCL()) return V; - auto AS = PT->getAs<PointerType>()->getPointeeType().getAddressSpace(); + auto AS = PT->castAs<PointerType>()->getPointeeType().getAddressSpace(); if (AS == LangAS::opencl_generic) return V; auto DestAS = getContext().getTargetAddressSpace(LangAS::opencl_generic); diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp index c3ee7129d9d78..f90d9439af257 100644 --- a/lib/CodeGen/CGBlocks.cpp +++ b/lib/CodeGen/CGBlocks.cpp @@ -1253,8 +1253,7 @@ llvm::Type *CodeGenModule::getGenericBlockLiteralType() { RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue) { - const BlockPointerType *BPT = - E->getCallee()->getType()->getAs<BlockPointerType>(); + const auto *BPT = E->getCallee()->getType()->castAs<BlockPointerType>(); llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee()); llvm::Type *GenBlockTy = CGM.getGenericBlockLiteralType(); llvm::Value *Func = nullptr; @@ -1802,7 +1801,7 @@ struct CallBlockRelease final : EHScopeStack::Cleanup { bool CodeGenFunction::cxxDestructorCanThrow(QualType T) { if (const auto *RD = T->getAsCXXRecordDecl()) if (const CXXDestructorDecl *DD = RD->getDestructor()) - return DD->getType()->getAs<FunctionProtoType>()->canThrow(); + return DD->getType()->castAs<FunctionProtoType>()->canThrow(); return false; } diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index a300bab49f9c8..f9871b233149a 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -843,10 +843,12 @@ static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex"; Arg1Ty = CGF.Int8PtrTy; if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) { - Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::sponentry)); + Arg1 = CGF.Builder.CreateCall( + CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy)); } else - Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress), - llvm::ConstantInt::get(CGF.Int32Ty, 0)); + Arg1 = CGF.Builder.CreateCall( + CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy), + llvm::ConstantInt::get(CGF.Int32Ty, 0)); } // Mark the call site and declaration with ReturnsTwice. @@ -1394,9 +1396,8 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType, Value *&RecordPtr, CharUnits Align, llvm::FunctionCallee Func, int Lvl) { - const auto *RT = RType->getAs<RecordType>(); ASTContext &Context = CGF.getContext(); - RecordDecl *RD = RT->getDecl()->getDefinition(); + RecordDecl *RD = RType->castAs<RecordType>()->getDecl()->getDefinition(); std::string Pad = std::string(Lvl * 4, ' '); Value *GString = @@ -1555,6 +1556,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIceill: case Builtin::BI__builtin_ceil: case Builtin::BI__builtin_ceilf: + case Builtin::BI__builtin_ceilf16: case Builtin::BI__builtin_ceill: return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil)); @@ -1563,6 +1565,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIcopysignl: case Builtin::BI__builtin_copysign: case Builtin::BI__builtin_copysignf: + case Builtin::BI__builtin_copysignf16: case Builtin::BI__builtin_copysignl: case Builtin::BI__builtin_copysignf128: return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); @@ -1572,6 +1575,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIcosl: case Builtin::BI__builtin_cos: case Builtin::BI__builtin_cosf: + case Builtin::BI__builtin_cosf16: case Builtin::BI__builtin_cosl: return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::cos)); @@ -1580,6 +1584,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIexpl: case Builtin::BI__builtin_exp: case Builtin::BI__builtin_expf: + case Builtin::BI__builtin_expf16: case Builtin::BI__builtin_expl: return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp)); @@ -1588,6 +1593,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIexp2l: case Builtin::BI__builtin_exp2: case Builtin::BI__builtin_exp2f: + case Builtin::BI__builtin_exp2f16: case Builtin::BI__builtin_exp2l: return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp2)); @@ -1596,6 +1602,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIfabsl: case Builtin::BI__builtin_fabs: case Builtin::BI__builtin_fabsf: + case Builtin::BI__builtin_fabsf16: case Builtin::BI__builtin_fabsl: case Builtin::BI__builtin_fabsf128: return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); @@ -1605,6 +1612,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIfloorl: case Builtin::BI__builtin_floor: case Builtin::BI__builtin_floorf: + case Builtin::BI__builtin_floorf16: case Builtin::BI__builtin_floorl: return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor)); @@ -1613,6 +1621,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIfmal: case Builtin::BI__builtin_fma: case Builtin::BI__builtin_fmaf: + case Builtin::BI__builtin_fmaf16: case Builtin::BI__builtin_fmal: return RValue::get(emitTernaryBuiltin(*this, E, Intrinsic::fma)); @@ -1621,6 +1630,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIfmaxl: case Builtin::BI__builtin_fmax: case Builtin::BI__builtin_fmaxf: + case Builtin::BI__builtin_fmaxf16: case Builtin::BI__builtin_fmaxl: return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum)); @@ -1629,6 +1639,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIfminl: case Builtin::BI__builtin_fmin: case Builtin::BI__builtin_fminf: + case Builtin::BI__builtin_fminf16: case Builtin::BI__builtin_fminl: return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum)); @@ -1639,6 +1650,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIfmodl: case Builtin::BI__builtin_fmod: case Builtin::BI__builtin_fmodf: + case Builtin::BI__builtin_fmodf16: case Builtin::BI__builtin_fmodl: { Value *Arg1 = EmitScalarExpr(E->getArg(0)); Value *Arg2 = EmitScalarExpr(E->getArg(1)); @@ -1650,6 +1662,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIlogl: case Builtin::BI__builtin_log: case Builtin::BI__builtin_logf: + case Builtin::BI__builtin_logf16: case Builtin::BI__builtin_logl: return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log)); @@ -1658,6 +1671,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIlog10l: case Builtin::BI__builtin_log10: case Builtin::BI__builtin_log10f: + case Builtin::BI__builtin_log10f16: case Builtin::BI__builtin_log10l: return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log10)); @@ -1666,6 +1680,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIlog2l: case Builtin::BI__builtin_log2: case Builtin::BI__builtin_log2f: + case Builtin::BI__builtin_log2f16: case Builtin::BI__builtin_log2l: return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log2)); @@ -1682,6 +1697,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIpowl: case Builtin::BI__builtin_pow: case Builtin::BI__builtin_powf: + case Builtin::BI__builtin_powf16: case Builtin::BI__builtin_powl: return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::pow)); @@ -1690,6 +1706,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIrintl: case Builtin::BI__builtin_rint: case Builtin::BI__builtin_rintf: + case Builtin::BI__builtin_rintf16: case Builtin::BI__builtin_rintl: return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint)); @@ -1698,6 +1715,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIroundl: case Builtin::BI__builtin_round: case Builtin::BI__builtin_roundf: + case Builtin::BI__builtin_roundf16: case Builtin::BI__builtin_roundl: return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round)); @@ -1706,6 +1724,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIsinl: case Builtin::BI__builtin_sin: case Builtin::BI__builtin_sinf: + case Builtin::BI__builtin_sinf16: case Builtin::BI__builtin_sinl: return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sin)); @@ -1714,6 +1733,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIsqrtl: case Builtin::BI__builtin_sqrt: case Builtin::BI__builtin_sqrtf: + case Builtin::BI__builtin_sqrtf16: case Builtin::BI__builtin_sqrtl: return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sqrt)); @@ -1722,6 +1742,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BItruncl: case Builtin::BI__builtin_trunc: case Builtin::BI__builtin_truncf: + case Builtin::BI__builtin_truncf16: case Builtin::BI__builtin_truncl: return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc)); @@ -2026,11 +2047,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); - unsigned Alignment = (unsigned)AlignmentCI->getZExtValue(); + if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment)) + AlignmentCI = ConstantInt::get(AlignmentCI->getType(), + llvm::Value::MaximumAlignment); EmitAlignmentAssumption(PtrValue, Ptr, /*The expr loc is sufficient.*/ SourceLocation(), - Alignment, OffsetValue); + AlignmentCI, OffsetValue); return RValue::get(PtrValue); } case Builtin::BI__assume: @@ -2077,10 +2100,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_constant_p: { llvm::Type *ResultType = ConvertType(E->getType()); - if (CGM.getCodeGenOpts().OptimizationLevel == 0) - // At -O0, we don't perform inlining, so we don't need to delay the - // processing. - return RValue::get(ConstantInt::get(ResultType, 0)); const Expr *Arg = E->getArg(0); QualType ArgType = Arg->getType(); @@ -2131,7 +2150,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : llvm::ConstantInt::get(Int32Ty, 3); Value *Data = llvm::ConstantInt::get(Int32Ty, 1); - Function *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data})); } case Builtin::BI__builtin_readcyclecounter: { @@ -2344,7 +2363,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, .toCharUnitsFromBits(TI.getSuitableAlign()) .getQuantity(); AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); - AI->setAlignment(SuitableAlignmentInBytes); + AI->setAlignment(MaybeAlign(SuitableAlignmentInBytes)); initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes); return RValue::get(AI); } @@ -2357,7 +2376,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, unsigned AlignmentInBytes = CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity(); AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); - AI->setAlignment(AlignmentInBytes); + AI->setAlignment(MaybeAlign(AlignmentInBytes)); initializeAlloca(*this, AI, Size, AlignmentInBytes); return RValue::get(AI); } @@ -2556,7 +2575,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_frame_address: { Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), getContext().UnsignedIntTy); - Function *F = CGM.getIntrinsic(Intrinsic::frameaddress); + Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy); return RValue::get(Builder.CreateCall(F, Depth)); } case Builtin::BI__builtin_extract_return_addr: { @@ -2637,9 +2656,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Address Buf = EmitPointerWithAlignment(E->getArg(0)); // Store the frame pointer to the setjmp buffer. - Value *FrameAddr = - Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), - ConstantInt::get(Int32Ty, 0)); + Value *FrameAddr = Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy), + ConstantInt::get(Int32Ty, 0)); Builder.CreateStore(FrameAddr, Buf); // Store the stack pointer to the setjmp buffer. @@ -3673,13 +3692,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIget_pipe_num_packets: case Builtin::BIget_pipe_max_packets: { const char *BaseName; - const PipeType *PipeTy = E->getArg(0)->getType()->getAs<PipeType>(); + const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>(); if (BuiltinID == Builtin::BIget_pipe_num_packets) BaseName = "__get_pipe_num_packets"; else BaseName = "__get_pipe_max_packets"; - auto Name = std::string(BaseName) + - std::string(PipeTy->isReadOnly() ? "_ro" : "_wo"); + std::string Name = std::string(BaseName) + + std::string(PipeTy->isReadOnly() ? "_ro" : "_wo"); // Building the generic function prototype. Value *Arg0 = EmitScalarExpr(E->getArg(0)); @@ -3769,7 +3788,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> { llvm::APInt ArraySize(32, NumArgs - First); QualType SizeArrayTy = getContext().getConstantArrayType( - getContext().getSizeType(), ArraySize, ArrayType::Normal, + getContext().getSizeType(), ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes"); llvm::Value *TmpPtr = Tmp.getPointer(); @@ -3977,6 +3996,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, break; case Builtin::BI__builtin_canonicalize: case Builtin::BI__builtin_canonicalizef: + case Builtin::BI__builtin_canonicalizef16: case Builtin::BI__builtin_canonicalizel: return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize)); @@ -4219,6 +4239,9 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch); + case llvm::Triple::bpfeb: + case llvm::Triple::bpfel: + return CGF->EmitBPFBuiltinExpr(BuiltinID, E); case llvm::Triple::x86: case llvm::Triple::x86_64: return CGF->EmitX86BuiltinExpr(BuiltinID, E); @@ -6019,7 +6042,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, // Locality is not supported on ARM target Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); - Function *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); return Builder.CreateCall(F, {Address, RW, Locality, IsData}); } @@ -6958,7 +6981,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify // PLDL3STRM or PLDL2STRM. - Function *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); return Builder.CreateCall(F, {Address, RW, Locality, IsData}); } @@ -7293,12 +7316,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } if (BuiltinID == AArch64::BI_AddressOfReturnAddress) { - llvm::Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); + llvm::Function *F = + CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy); return Builder.CreateCall(F); } if (BuiltinID == AArch64::BI__builtin_sponentry) { - llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy); return Builder.CreateCall(F); } @@ -8011,6 +8035,151 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); } + case AArch64::BI_BitScanForward: + case AArch64::BI_BitScanForward64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); + case AArch64::BI_BitScanReverse: + case AArch64::BI_BitScanReverse64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); + case AArch64::BI_InterlockedAnd64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); + case AArch64::BI_InterlockedExchange64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); + case AArch64::BI_InterlockedExchangeAdd64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); + case AArch64::BI_InterlockedExchangeSub64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); + case AArch64::BI_InterlockedOr64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); + case AArch64::BI_InterlockedXor64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); + case AArch64::BI_InterlockedDecrement64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); + case AArch64::BI_InterlockedIncrement64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); + case AArch64::BI_InterlockedExchangeAdd8_acq: + case AArch64::BI_InterlockedExchangeAdd16_acq: + case AArch64::BI_InterlockedExchangeAdd_acq: + case AArch64::BI_InterlockedExchangeAdd64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E); + case AArch64::BI_InterlockedExchangeAdd8_rel: + case AArch64::BI_InterlockedExchangeAdd16_rel: + case AArch64::BI_InterlockedExchangeAdd_rel: + case AArch64::BI_InterlockedExchangeAdd64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E); + case AArch64::BI_InterlockedExchangeAdd8_nf: + case AArch64::BI_InterlockedExchangeAdd16_nf: + case AArch64::BI_InterlockedExchangeAdd_nf: + case AArch64::BI_InterlockedExchangeAdd64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E); + case AArch64::BI_InterlockedExchange8_acq: + case AArch64::BI_InterlockedExchange16_acq: + case AArch64::BI_InterlockedExchange_acq: + case AArch64::BI_InterlockedExchange64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E); + case AArch64::BI_InterlockedExchange8_rel: + case AArch64::BI_InterlockedExchange16_rel: + case AArch64::BI_InterlockedExchange_rel: + case AArch64::BI_InterlockedExchange64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E); + case AArch64::BI_InterlockedExchange8_nf: + case AArch64::BI_InterlockedExchange16_nf: + case AArch64::BI_InterlockedExchange_nf: + case AArch64::BI_InterlockedExchange64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E); + case AArch64::BI_InterlockedCompareExchange8_acq: + case AArch64::BI_InterlockedCompareExchange16_acq: + case AArch64::BI_InterlockedCompareExchange_acq: + case AArch64::BI_InterlockedCompareExchange64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E); + case AArch64::BI_InterlockedCompareExchange8_rel: + case AArch64::BI_InterlockedCompareExchange16_rel: + case AArch64::BI_InterlockedCompareExchange_rel: + case AArch64::BI_InterlockedCompareExchange64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E); + case AArch64::BI_InterlockedCompareExchange8_nf: + case AArch64::BI_InterlockedCompareExchange16_nf: + case AArch64::BI_InterlockedCompareExchange_nf: + case AArch64::BI_InterlockedCompareExchange64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E); + case AArch64::BI_InterlockedOr8_acq: + case AArch64::BI_InterlockedOr16_acq: + case AArch64::BI_InterlockedOr_acq: + case AArch64::BI_InterlockedOr64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E); + case AArch64::BI_InterlockedOr8_rel: + case AArch64::BI_InterlockedOr16_rel: + case AArch64::BI_InterlockedOr_rel: + case AArch64::BI_InterlockedOr64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E); + case AArch64::BI_InterlockedOr8_nf: + case AArch64::BI_InterlockedOr16_nf: + case AArch64::BI_InterlockedOr_nf: + case AArch64::BI_InterlockedOr64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E); + case AArch64::BI_InterlockedXor8_acq: + case AArch64::BI_InterlockedXor16_acq: + case AArch64::BI_InterlockedXor_acq: + case AArch64::BI_InterlockedXor64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E); + case AArch64::BI_InterlockedXor8_rel: + case AArch64::BI_InterlockedXor16_rel: + case AArch64::BI_InterlockedXor_rel: + case AArch64::BI_InterlockedXor64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E); + case AArch64::BI_InterlockedXor8_nf: + case AArch64::BI_InterlockedXor16_nf: + case AArch64::BI_InterlockedXor_nf: + case AArch64::BI_InterlockedXor64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E); + case AArch64::BI_InterlockedAnd8_acq: + case AArch64::BI_InterlockedAnd16_acq: + case AArch64::BI_InterlockedAnd_acq: + case AArch64::BI_InterlockedAnd64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E); + case AArch64::BI_InterlockedAnd8_rel: + case AArch64::BI_InterlockedAnd16_rel: + case AArch64::BI_InterlockedAnd_rel: + case AArch64::BI_InterlockedAnd64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E); + case AArch64::BI_InterlockedAnd8_nf: + case AArch64::BI_InterlockedAnd16_nf: + case AArch64::BI_InterlockedAnd_nf: + case AArch64::BI_InterlockedAnd64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E); + case AArch64::BI_InterlockedIncrement16_acq: + case AArch64::BI_InterlockedIncrement_acq: + case AArch64::BI_InterlockedIncrement64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E); + case AArch64::BI_InterlockedIncrement16_rel: + case AArch64::BI_InterlockedIncrement_rel: + case AArch64::BI_InterlockedIncrement64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E); + case AArch64::BI_InterlockedIncrement16_nf: + case AArch64::BI_InterlockedIncrement_nf: + case AArch64::BI_InterlockedIncrement64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E); + case AArch64::BI_InterlockedDecrement16_acq: + case AArch64::BI_InterlockedDecrement_acq: + case AArch64::BI_InterlockedDecrement64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E); + case AArch64::BI_InterlockedDecrement16_rel: + case AArch64::BI_InterlockedDecrement_rel: + case AArch64::BI_InterlockedDecrement64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E); + case AArch64::BI_InterlockedDecrement16_nf: + case AArch64::BI_InterlockedDecrement_nf: + case AArch64::BI_InterlockedDecrement64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E); + + case AArch64::BI_InterlockedAdd: { + Value *Arg0 = EmitScalarExpr(E->getArg(0)); + Value *Arg1 = EmitScalarExpr(E->getArg(1)); + AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( + AtomicRMWInst::Add, Arg0, Arg1, + llvm::AtomicOrdering::SequentiallyConsistent); + return Builder.CreateAdd(RMWI, Arg1); + } } llvm::VectorType *VTy = GetNeonType(this, Type); @@ -9128,152 +9297,38 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = Intrinsic::aarch64_neon_suqadd; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); } - case AArch64::BI_BitScanForward: - case AArch64::BI_BitScanForward64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); - case AArch64::BI_BitScanReverse: - case AArch64::BI_BitScanReverse64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); - case AArch64::BI_InterlockedAnd64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); - case AArch64::BI_InterlockedExchange64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); - case AArch64::BI_InterlockedExchangeAdd64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); - case AArch64::BI_InterlockedExchangeSub64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); - case AArch64::BI_InterlockedOr64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); - case AArch64::BI_InterlockedXor64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); - case AArch64::BI_InterlockedDecrement64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); - case AArch64::BI_InterlockedIncrement64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); - case AArch64::BI_InterlockedExchangeAdd8_acq: - case AArch64::BI_InterlockedExchangeAdd16_acq: - case AArch64::BI_InterlockedExchangeAdd_acq: - case AArch64::BI_InterlockedExchangeAdd64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E); - case AArch64::BI_InterlockedExchangeAdd8_rel: - case AArch64::BI_InterlockedExchangeAdd16_rel: - case AArch64::BI_InterlockedExchangeAdd_rel: - case AArch64::BI_InterlockedExchangeAdd64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E); - case AArch64::BI_InterlockedExchangeAdd8_nf: - case AArch64::BI_InterlockedExchangeAdd16_nf: - case AArch64::BI_InterlockedExchangeAdd_nf: - case AArch64::BI_InterlockedExchangeAdd64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E); - case AArch64::BI_InterlockedExchange8_acq: - case AArch64::BI_InterlockedExchange16_acq: - case AArch64::BI_InterlockedExchange_acq: - case AArch64::BI_InterlockedExchange64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E); - case AArch64::BI_InterlockedExchange8_rel: - case AArch64::BI_InterlockedExchange16_rel: - case AArch64::BI_InterlockedExchange_rel: - case AArch64::BI_InterlockedExchange64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E); - case AArch64::BI_InterlockedExchange8_nf: - case AArch64::BI_InterlockedExchange16_nf: - case AArch64::BI_InterlockedExchange_nf: - case AArch64::BI_InterlockedExchange64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E); - case AArch64::BI_InterlockedCompareExchange8_acq: - case AArch64::BI_InterlockedCompareExchange16_acq: - case AArch64::BI_InterlockedCompareExchange_acq: - case AArch64::BI_InterlockedCompareExchange64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E); - case AArch64::BI_InterlockedCompareExchange8_rel: - case AArch64::BI_InterlockedCompareExchange16_rel: - case AArch64::BI_InterlockedCompareExchange_rel: - case AArch64::BI_InterlockedCompareExchange64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E); - case AArch64::BI_InterlockedCompareExchange8_nf: - case AArch64::BI_InterlockedCompareExchange16_nf: - case AArch64::BI_InterlockedCompareExchange_nf: - case AArch64::BI_InterlockedCompareExchange64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E); - case AArch64::BI_InterlockedOr8_acq: - case AArch64::BI_InterlockedOr16_acq: - case AArch64::BI_InterlockedOr_acq: - case AArch64::BI_InterlockedOr64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E); - case AArch64::BI_InterlockedOr8_rel: - case AArch64::BI_InterlockedOr16_rel: - case AArch64::BI_InterlockedOr_rel: - case AArch64::BI_InterlockedOr64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E); - case AArch64::BI_InterlockedOr8_nf: - case AArch64::BI_InterlockedOr16_nf: - case AArch64::BI_InterlockedOr_nf: - case AArch64::BI_InterlockedOr64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E); - case AArch64::BI_InterlockedXor8_acq: - case AArch64::BI_InterlockedXor16_acq: - case AArch64::BI_InterlockedXor_acq: - case AArch64::BI_InterlockedXor64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E); - case AArch64::BI_InterlockedXor8_rel: - case AArch64::BI_InterlockedXor16_rel: - case AArch64::BI_InterlockedXor_rel: - case AArch64::BI_InterlockedXor64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E); - case AArch64::BI_InterlockedXor8_nf: - case AArch64::BI_InterlockedXor16_nf: - case AArch64::BI_InterlockedXor_nf: - case AArch64::BI_InterlockedXor64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E); - case AArch64::BI_InterlockedAnd8_acq: - case AArch64::BI_InterlockedAnd16_acq: - case AArch64::BI_InterlockedAnd_acq: - case AArch64::BI_InterlockedAnd64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E); - case AArch64::BI_InterlockedAnd8_rel: - case AArch64::BI_InterlockedAnd16_rel: - case AArch64::BI_InterlockedAnd_rel: - case AArch64::BI_InterlockedAnd64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E); - case AArch64::BI_InterlockedAnd8_nf: - case AArch64::BI_InterlockedAnd16_nf: - case AArch64::BI_InterlockedAnd_nf: - case AArch64::BI_InterlockedAnd64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E); - case AArch64::BI_InterlockedIncrement16_acq: - case AArch64::BI_InterlockedIncrement_acq: - case AArch64::BI_InterlockedIncrement64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E); - case AArch64::BI_InterlockedIncrement16_rel: - case AArch64::BI_InterlockedIncrement_rel: - case AArch64::BI_InterlockedIncrement64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E); - case AArch64::BI_InterlockedIncrement16_nf: - case AArch64::BI_InterlockedIncrement_nf: - case AArch64::BI_InterlockedIncrement64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E); - case AArch64::BI_InterlockedDecrement16_acq: - case AArch64::BI_InterlockedDecrement_acq: - case AArch64::BI_InterlockedDecrement64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E); - case AArch64::BI_InterlockedDecrement16_rel: - case AArch64::BI_InterlockedDecrement_rel: - case AArch64::BI_InterlockedDecrement64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E); - case AArch64::BI_InterlockedDecrement16_nf: - case AArch64::BI_InterlockedDecrement_nf: - case AArch64::BI_InterlockedDecrement64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E); - - case AArch64::BI_InterlockedAdd: { - Value *Arg0 = EmitScalarExpr(E->getArg(0)); - Value *Arg1 = EmitScalarExpr(E->getArg(1)); - AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( - AtomicRMWInst::Add, Arg0, Arg1, - llvm::AtomicOrdering::SequentiallyConsistent); - return Builder.CreateAdd(RMWI, Arg1); } +} + +Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + assert(BuiltinID == BPF::BI__builtin_preserve_field_info && + "unexpected ARM builtin"); + + const Expr *Arg = E->getArg(0); + bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField; + + if (!getDebugInfo()) { + CGM.Error(E->getExprLoc(), "using builtin_preserve_field_info() without -g"); + return IsBitField ? EmitLValue(Arg).getBitFieldPointer() + : EmitLValue(Arg).getPointer(); } + + // Enable underlying preserve_*_access_index() generation. + bool OldIsInPreservedAIRegion = IsInPreservedAIRegion; + IsInPreservedAIRegion = true; + Value *FieldAddr = IsBitField ? EmitLValue(Arg).getBitFieldPointer() + : EmitLValue(Arg).getPointer(); + IsInPreservedAIRegion = OldIsInPreservedAIRegion; + + ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); + Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue()); + + // Built the IR for the preserve_field_info intrinsic. + llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration( + &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info, + {FieldAddr->getType()}); + return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind}); } llvm::Value *CodeGenFunction:: @@ -10034,7 +10089,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1); Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3); Value *Data = ConstantInt::get(Int32Ty, 1); - Function *F = CGM.getIntrinsic(Intrinsic::prefetch); + Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); return Builder.CreateCall(F, {Address, RW, Locality, Data}); } case X86::BI_mm_clflush: { @@ -11169,7 +11224,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Unaligned nontemporal store of the scalar value. StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC); SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); - SI->setAlignment(1); + SI->setAlignment(llvm::Align::None()); return SI; } // Rotate is a special case of funnel shift - 1st 2 args are the same. @@ -12113,7 +12168,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, } case X86::BI_AddressOfReturnAddress: { - Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); + Function *F = + CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy); return Builder.CreateCall(F); } case X86::BI__stosb: { @@ -13924,6 +13980,15 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType); return Builder.CreateCall(Callee); } + case WebAssembly::BI__builtin_wasm_tls_align: { + llvm::Type *ResultType = ConvertType(E->getType()); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType); + return Builder.CreateCall(Callee); + } + case WebAssembly::BI__builtin_wasm_tls_base: { + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base); + return Builder.CreateCall(Callee); + } case WebAssembly::BI__builtin_wasm_throw: { Value *Tag = EmitScalarExpr(E->getArg(0)); Value *Obj = EmitScalarExpr(E->getArg(1)); @@ -13954,6 +14019,26 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify); return Builder.CreateCall(Callee, {Addr, Count}); } + case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32: + case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64: + case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32: + case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: { + Value *Src = EmitScalarExpr(E->getArg(0)); + llvm::Type *ResT = ConvertType(E->getType()); + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()}); + return Builder.CreateCall(Callee, {Src}); + } + case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32: + case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64: + case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32: + case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: { + Value *Src = EmitScalarExpr(E->getArg(0)); + llvm::Type *ResT = ConvertType(E->getType()); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned, + {ResT, Src->getType()}); + return Builder.CreateCall(Callee, {Src}); + } case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32: case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64: case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32: @@ -13998,6 +14083,12 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } + case WebAssembly::BI__builtin_wasm_swizzle_v8x16: { + Value *Src = EmitScalarExpr(E->getArg(0)); + Value *Indices = EmitScalarExpr(E->getArg(1)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle); + return Builder.CreateCall(Callee, {Src, Indices}); + } case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16: case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16: case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8: @@ -14139,7 +14230,86 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } - + case WebAssembly::BI__builtin_wasm_qfma_f32x4: + case WebAssembly::BI__builtin_wasm_qfms_f32x4: + case WebAssembly::BI__builtin_wasm_qfma_f64x2: + case WebAssembly::BI__builtin_wasm_qfms_f64x2: { + Value *A = EmitScalarExpr(E->getArg(0)); + Value *B = EmitScalarExpr(E->getArg(1)); + Value *C = EmitScalarExpr(E->getArg(2)); + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_qfma_f32x4: + case WebAssembly::BI__builtin_wasm_qfma_f64x2: + IntNo = Intrinsic::wasm_qfma; + break; + case WebAssembly::BI__builtin_wasm_qfms_f32x4: + case WebAssembly::BI__builtin_wasm_qfms_f64x2: + IntNo = Intrinsic::wasm_qfms; + break; + default: + llvm_unreachable("unexpected builtin ID"); + } + Function *Callee = CGM.getIntrinsic(IntNo, A->getType()); + return Builder.CreateCall(Callee, {A, B, C}); + } + case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8: + case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8: + case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4: + case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: { + Value *Low = EmitScalarExpr(E->getArg(0)); + Value *High = EmitScalarExpr(E->getArg(1)); + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8: + case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4: + IntNo = Intrinsic::wasm_narrow_signed; + break; + case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8: + case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: + IntNo = Intrinsic::wasm_narrow_unsigned; + break; + default: + llvm_unreachable("unexpected builtin ID"); + } + Function *Callee = + CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()}); + return Builder.CreateCall(Callee, {Low, High}); + } + case WebAssembly::BI__builtin_wasm_widen_low_s_i16x8_i8x16: + case WebAssembly::BI__builtin_wasm_widen_high_s_i16x8_i8x16: + case WebAssembly::BI__builtin_wasm_widen_low_u_i16x8_i8x16: + case WebAssembly::BI__builtin_wasm_widen_high_u_i16x8_i8x16: + case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i16x8: + case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i16x8: + case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i16x8: + case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i16x8: { + Value *Vec = EmitScalarExpr(E->getArg(0)); + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_widen_low_s_i16x8_i8x16: + case WebAssembly::BI__builtin_wasm_widen_low_s_i32x4_i16x8: + IntNo = Intrinsic::wasm_widen_low_signed; + break; + case WebAssembly::BI__builtin_wasm_widen_high_s_i16x8_i8x16: + case WebAssembly::BI__builtin_wasm_widen_high_s_i32x4_i16x8: + IntNo = Intrinsic::wasm_widen_high_signed; + break; + case WebAssembly::BI__builtin_wasm_widen_low_u_i16x8_i8x16: + case WebAssembly::BI__builtin_wasm_widen_low_u_i32x4_i16x8: + IntNo = Intrinsic::wasm_widen_low_unsigned; + break; + case WebAssembly::BI__builtin_wasm_widen_high_u_i16x8_i8x16: + case WebAssembly::BI__builtin_wasm_widen_high_u_i32x4_i16x8: + IntNo = Intrinsic::wasm_widen_high_unsigned; + break; + default: + llvm_unreachable("unexpected builtin ID"); + } + Function *Callee = + CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Vec->getType()}); + return Builder.CreateCall(Callee, Vec); + } default: return nullptr; } diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp index 4d4038dae9cfc..5c5cbaff0252e 100644 --- a/lib/CodeGen/CGCUDANV.cpp +++ b/lib/CodeGen/CGCUDANV.cpp @@ -93,7 +93,7 @@ private: GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None); } if (Alignment) - GV->setAlignment(Alignment); + GV->setAlignment(llvm::Align(Alignment)); return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(), ConstStr.getPointer(), Zeros); @@ -236,7 +236,8 @@ void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl}); if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(), - CudaFeature::CUDA_USES_NEW_LAUNCH)) + CudaFeature::CUDA_USES_NEW_LAUNCH) || + CGF.getLangOpts().HIPUseNewLaunchAPI) emitDeviceStubBodyNew(CGF, Args); else emitDeviceStubBodyLegacy(CGF, Args); @@ -264,14 +265,18 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); - // Lookup cudaLaunchKernel function. + // Lookup cudaLaunchKernel/hipLaunchKernel function. // cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, // void **args, size_t sharedMem, // cudaStream_t stream); + // hipError_t hipLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, + // void **args, size_t sharedMem, + // hipStream_t stream); TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); + auto LaunchKernelName = addPrefixToName("LaunchKernel"); IdentifierInfo &cudaLaunchKernelII = - CGM.getContext().Idents.get("cudaLaunchKernel"); + CGM.getContext().Idents.get(LaunchKernelName); FunctionDecl *cudaLaunchKernelFD = nullptr; for (const auto &Result : DC->lookup(&cudaLaunchKernelII)) { if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result)) @@ -280,7 +285,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, if (cudaLaunchKernelFD == nullptr) { CGM.Error(CGF.CurFuncDecl->getLocation(), - "Can't find declaration for cudaLaunchKernel()"); + "Can't find declaration for " + LaunchKernelName); return; } // Create temporary dim3 grid_dim, block_dim. @@ -301,7 +306,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, /*ShmemSize=*/ShmemSize.getType(), /*Stream=*/Stream.getType()}, /*isVarArg=*/false), - "__cudaPopCallConfiguration"); + addUnderscoredPrefixToName("PopCallConfiguration")); CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn, {GridDim.getPointer(), BlockDim.getPointer(), @@ -329,7 +334,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, const CGFunctionInfo &FI = CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD); llvm::FunctionCallee cudaLaunchKernelFn = - CGM.CreateRuntimeFunction(FTy, "cudaLaunchKernel"); + CGM.CreateRuntimeFunction(FTy, LaunchKernelName); CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(), LaunchKernelArgs); CGF.EmitBranch(EndBlock); @@ -623,7 +628,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { Linkage, /*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__hip_gpubin_handle"); - GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity()); + GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign()); // Prevent the weak symbol in different shared libraries being merged. if (Linkage != llvm::GlobalValue::InternalLinkage) GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility); @@ -664,7 +669,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { GpuBinaryHandle = new llvm::GlobalVariable( TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage, llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle"); - GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity()); + GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign()); CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle, CGM.getPointerAlign()); diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp index 6d903a0d09e23..7e5fe0fd6b1d5 100644 --- a/lib/CodeGen/CGCXX.cpp +++ b/lib/CodeGen/CGCXX.cpp @@ -80,7 +80,7 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) { // Skip base classes with trivial destructors. const auto *Base = - cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl()); + cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); if (Base->hasTrivialDestructor()) continue; // If we've already found a base class with a non-trivial @@ -104,8 +104,8 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) { // Give up if the calling conventions don't match. We could update the call, // but it is probably not worth it. const CXXDestructorDecl *BaseD = UniqueBase->getDestructor(); - if (BaseD->getType()->getAs<FunctionType>()->getCallConv() != - D->getType()->getAs<FunctionType>()->getCallConv()) + if (BaseD->getType()->castAs<FunctionType>()->getCallConv() != + D->getType()->castAs<FunctionType>()->getCallConv()) return true; GlobalDecl AliasDecl(D, Dtor_Base); diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp index 041c0f8959fd7..23dae2b61d047 100644 --- a/lib/CodeGen/CGCXXABI.cpp +++ b/lib/CodeGen/CGCXXABI.cpp @@ -46,8 +46,8 @@ CGCallee CGCXXABI::EmitLoadOfMemberFunctionPointer( ThisPtrForCall = This.getPointer(); const FunctionProtoType *FPT = MPT->getPointeeType()->getAs<FunctionProtoType>(); - const CXXRecordDecl *RD = - cast<CXXRecordDecl>(MPT->getClass()->getAs<RecordType>()->getDecl()); + const auto *RD = + cast<CXXRecordDecl>(MPT->getClass()->castAs<RecordType>()->getDecl()); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType( CGM.getTypes().arrangeCXXMethodType(RD, FPT, /*FD=*/nullptr)); llvm::Constant *FnPtr = llvm::Constant::getNullValue(FTy->getPointerTo()); diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h index 3a9c3b3474394..bff49be7a3c45 100644 --- a/lib/CodeGen/CGCXXABI.h +++ b/lib/CodeGen/CGCXXABI.h @@ -577,7 +577,7 @@ public: // Determine if references to thread_local global variables can be made // directly or require access through a thread wrapper function. - virtual bool usesThreadWrapperFunction() const = 0; + virtual bool usesThreadWrapperFunction(const VarDecl *VD) const = 0; /// Emit a reference to a non-local thread_local variable (including /// triggering the initialization of all thread_local variables in its diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index cf8024550eeec..b74f6f942426e 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -903,7 +903,7 @@ struct NoExpansion : TypeExpansion { static std::unique_ptr<TypeExpansion> getTypeExpansion(QualType Ty, const ASTContext &Context) { if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) { - return llvm::make_unique<ConstantArrayExpansion>( + return std::make_unique<ConstantArrayExpansion>( AT->getElementType(), AT->getSize().getZExtValue()); } if (const RecordType *RT = Ty->getAs<RecordType>()) { @@ -947,13 +947,13 @@ getTypeExpansion(QualType Ty, const ASTContext &Context) { Fields.push_back(FD); } } - return llvm::make_unique<RecordExpansion>(std::move(Bases), + return std::make_unique<RecordExpansion>(std::move(Bases), std::move(Fields)); } if (const ComplexType *CT = Ty->getAs<ComplexType>()) { - return llvm::make_unique<ComplexExpansion>(CT->getElementType()); + return std::make_unique<ComplexExpansion>(CT->getElementType()); } - return llvm::make_unique<NoExpansion>(); + return std::make_unique<NoExpansion>(); } static int getExpansionSize(QualType Ty, const ASTContext &Context) { @@ -1713,16 +1713,19 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, if (!CodeGenOpts.TrapFuncName.empty()) FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName); } else { - // Attributes that should go on the function, but not the call site. - if (!CodeGenOpts.DisableFPElim) { - FuncAttrs.addAttribute("no-frame-pointer-elim", "false"); - } else if (CodeGenOpts.OmitLeafFramePointer) { - FuncAttrs.addAttribute("no-frame-pointer-elim", "false"); - FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf"); - } else { - FuncAttrs.addAttribute("no-frame-pointer-elim", "true"); - FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf"); + StringRef FpKind; + switch (CodeGenOpts.getFramePointer()) { + case CodeGenOptions::FramePointerKind::None: + FpKind = "none"; + break; + case CodeGenOptions::FramePointerKind::NonLeaf: + FpKind = "non-leaf"; + break; + case CodeGenOptions::FramePointerKind::All: + FpKind = "all"; + break; } + FuncAttrs.addAttribute("frame-pointer", FpKind); FuncAttrs.addAttribute("less-precise-fpmad", llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD)); @@ -2123,8 +2126,8 @@ void CodeGenModule::ConstructAttributeList( if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) { auto info = getContext().getTypeInfoInChars(PTy); Attrs.addDereferenceableAttr(info.first.getQuantity()); - Attrs.addAttribute(llvm::Attribute::getWithAlignment(getLLVMContext(), - info.second.getQuantity())); + Attrs.addAttribute(llvm::Attribute::getWithAlignment( + getLLVMContext(), info.second.getAsAlign())); } break; } @@ -3089,8 +3092,8 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args, // Deactivate the cleanup for the callee-destructed param that was pushed. if (hasAggregateEvaluationKind(type) && !CurFuncIsThunk && - type->getAs<RecordType>()->getDecl()->isParamDestroyedInCallee() && - type.isDestructedType()) { + type->castAs<RecordType>()->getDecl()->isParamDestroyedInCallee() && + param->needsDestruction(getContext())) { EHScopeStack::stable_iterator cleanup = CalleeDestructedParamCleanups.lookup(cast<ParmVarDecl>(param)); assert(cleanup.isValid() && @@ -3574,7 +3577,7 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E, // However, we still have to push an EH-only cleanup in case we unwind before // we make it to the call. if (HasAggregateEvalKind && - type->getAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) { + type->castAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) { // If we're using inalloca, use the argument memory. Otherwise, use a // temporary. AggValueSlot Slot; @@ -3838,7 +3841,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, AI = CreateTempAlloca(ArgStruct, "argmem"); } auto Align = CallInfo.getArgStructAlignment(); - AI->setAlignment(Align.getQuantity()); + AI->setAlignment(Align.getAsAlign()); AI->setUsedWithInAlloca(true); assert(AI->isUsedWithInAlloca() && !AI->isStaticAlloca()); ArgMemory = Address(AI, Align); @@ -3875,6 +3878,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Address swiftErrorTemp = Address::invalid(); Address swiftErrorArg = Address::invalid(); + // When passing arguments using temporary allocas, we need to add the + // appropriate lifetime markers. This vector keeps track of all the lifetime + // markers that need to be ended right after the call. + SmallVector<CallLifetimeEnd, 2> CallLifetimeEndAfterCall; + // Translate all of the arguments as necessary to match the IR lowering. assert(CallInfo.arg_size() == CallArgs.size() && "Mismatch between function signature & arguments."); @@ -3991,6 +3999,18 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Address AI = CreateMemTempWithoutCast( I->Ty, ArgInfo.getIndirectAlign(), "byval-temp"); IRCallArgs[FirstIRArg] = AI.getPointer(); + + // Emit lifetime markers for the temporary alloca. + uint64_t ByvalTempElementSize = + CGM.getDataLayout().getTypeAllocSize(AI.getElementType()); + llvm::Value *LifetimeSize = + EmitLifetimeStart(ByvalTempElementSize, AI.getPointer()); + + // Add cleanup code to emit the end lifetime marker after the call. + if (LifetimeSize) // In case we disabled lifetime markers. + CallLifetimeEndAfterCall.emplace_back(AI, LifetimeSize); + + // Generate the copy. I->copyInto(*this, AI); } else { // Skip the extra memcpy call. @@ -4129,11 +4149,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, auto scalarAlign = CGM.getDataLayout().getPrefTypeAlignment(scalarType); // Materialize to a temporary. - addr = CreateTempAlloca(RV.getScalarVal()->getType(), - CharUnits::fromQuantity(std::max( - layout->getAlignment(), scalarAlign)), - "tmp", - /*ArraySize=*/nullptr, &AllocaAddr); + addr = CreateTempAlloca( + RV.getScalarVal()->getType(), + CharUnits::fromQuantity(std::max( + (unsigned)layout->getAlignment().value(), scalarAlign)), + "tmp", + /*ArraySize=*/nullptr, &AllocaAddr); tempSize = EmitLifetimeStart(scalarSize, AllocaAddr.getPointer()); Builder.CreateStore(RV.getScalarVal(), addr); @@ -4273,8 +4294,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Update the largest vector width if any arguments have vector types. for (unsigned i = 0; i < IRCallArgs.size(); ++i) { if (auto *VT = dyn_cast<llvm::VectorType>(IRCallArgs[i]->getType())) - LargestVectorWidth = std::max(LargestVectorWidth, - VT->getPrimitiveSizeInBits()); + LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getFixedSize()); } // Compute the calling convention and attributes. @@ -4357,8 +4378,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Update largest vector width from the return type. if (auto *VT = dyn_cast<llvm::VectorType>(CI->getType())) - LargestVectorWidth = std::max(LargestVectorWidth, - VT->getPrimitiveSizeInBits()); + LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getFixedSize()); // Insert instrumentation or attach profile metadata at indirect call sites. // For more details, see the comment before the definition of @@ -4548,7 +4569,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, llvm::Value *Alignment = EmitScalarExpr(AA->getAlignment()); llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(Alignment); EmitAlignmentAssumption(Ret.getScalarVal(), RetTy, Loc, AA->getLocation(), - AlignmentCI->getZExtValue(), OffsetValue); + AlignmentCI, OffsetValue); } else if (const auto *AA = TargetDecl->getAttr<AllocAlignAttr>()) { llvm::Value *AlignmentVal = CallArgs[AA->getParamIndex().getLLVMIndex()] .getRValue(*this) @@ -4558,6 +4579,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } } + // Explicitly call CallLifetimeEnd::Emit just to re-use the code even though + // we can't use the full cleanup mechanism. + for (CallLifetimeEnd &LifetimeEnd : CallLifetimeEndAfterCall) + LifetimeEnd.Emit(*this, /*Flags=*/{}); + return Ret; } diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp index c8bb63c5c4b1f..04ef912b18bd4 100644 --- a/lib/CodeGen/CGClass.cpp +++ b/lib/CodeGen/CGClass.cpp @@ -161,8 +161,8 @@ CharUnits CodeGenModule::computeNonVirtualBaseClassOffset( // Get the layout. const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); - const CXXRecordDecl *BaseDecl = - cast<CXXRecordDecl>(Base->getType()->getAs<RecordType>()->getDecl()); + const auto *BaseDecl = + cast<CXXRecordDecl>(Base->getType()->castAs<RecordType>()->getDecl()); // Add the offset. Offset += Layout.getBaseClassOffset(BaseDecl); @@ -246,7 +246,8 @@ ApplyNonVirtualAndVirtualOffset(CodeGenFunction &CGF, Address addr, // Apply the base offset. llvm::Value *ptr = addr.getPointer(); - ptr = CGF.Builder.CreateBitCast(ptr, CGF.Int8PtrTy); + unsigned AddrSpace = ptr->getType()->getPointerAddressSpace(); + ptr = CGF.Builder.CreateBitCast(ptr, CGF.Int8Ty->getPointerTo(AddrSpace)); ptr = CGF.Builder.CreateInBoundsGEP(ptr, baseOffset, "add.ptr"); // If we have a virtual component, the alignment of the result will @@ -279,8 +280,8 @@ Address CodeGenFunction::GetAddressOfBaseClass( // *start* with a step down to the correct virtual base subobject, // and hence will not require any further steps. if ((*Start)->isVirtual()) { - VBase = - cast<CXXRecordDecl>((*Start)->getType()->getAs<RecordType>()->getDecl()); + VBase = cast<CXXRecordDecl>( + (*Start)->getType()->castAs<RecordType>()->getDecl()); ++Start; } @@ -381,7 +382,9 @@ CodeGenFunction::GetAddressOfDerivedClass(Address BaseAddr, QualType DerivedTy = getContext().getCanonicalType(getContext().getTagDeclType(Derived)); - llvm::Type *DerivedPtrTy = ConvertType(DerivedTy)->getPointerTo(); + unsigned AddrSpace = + BaseAddr.getPointer()->getType()->getPointerAddressSpace(); + llvm::Type *DerivedPtrTy = ConvertType(DerivedTy)->getPointerTo(AddrSpace); llvm::Value *NonVirtualOffset = CGM.GetNonVirtualBaseClassOffset(Derived, PathBegin, PathEnd); @@ -536,8 +539,8 @@ static void EmitBaseInitializer(CodeGenFunction &CGF, Address ThisPtr = CGF.LoadCXXThisAddress(); const Type *BaseType = BaseInit->getBaseClass(); - CXXRecordDecl *BaseClassDecl = - cast<CXXRecordDecl>(BaseType->getAs<RecordType>()->getDecl()); + const auto *BaseClassDecl = + cast<CXXRecordDecl>(BaseType->castAs<RecordType>()->getDecl()); bool isBaseVirtual = BaseInit->isBaseVirtual(); @@ -739,7 +742,7 @@ bool CodeGenFunction::IsConstructorDelegationValid( // We also disable the optimization for variadic functions because // it's impossible to "re-pass" varargs. - if (Ctor->getType()->getAs<FunctionProtoType>()->isVariadic()) + if (Ctor->getType()->castAs<FunctionProtoType>()->isVariadic()) return false; // FIXME: Decide if we can do a delegation of a delegating constructor. @@ -1245,7 +1248,7 @@ namespace { static bool isInitializerOfDynamicClass(const CXXCtorInitializer *BaseInit) { const Type *BaseType = BaseInit->getBaseClass(); const auto *BaseClassDecl = - cast<CXXRecordDecl>(BaseType->getAs<RecordType>()->getDecl()); + cast<CXXRecordDecl>(BaseType->castAs<RecordType>()->getDecl()); return BaseClassDecl->isDynamicClass(); } @@ -1814,8 +1817,8 @@ void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD, // We push them in the forward order so that they'll be popped in // the reverse order. for (const auto &Base : ClassDecl->vbases()) { - CXXRecordDecl *BaseClassDecl - = cast<CXXRecordDecl>(Base.getType()->getAs<RecordType>()->getDecl()); + auto *BaseClassDecl = + cast<CXXRecordDecl>(Base.getType()->castAs<RecordType>()->getDecl()); // Ignore trivial destructors. if (BaseClassDecl->hasTrivialDestructor()) @@ -2083,7 +2086,7 @@ static bool canEmitDelegateCallArgs(CodeGenFunction &CGF, if (CGF.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) { // If the parameters are callee-cleanup, it's not safe to forward. for (auto *P : Ctor->parameters()) - if (P->getType().isDestructedType()) + if (P->needsDestruction(CGF.getContext())) return false; // Likewise if they're inalloca. @@ -2530,8 +2533,8 @@ void CodeGenFunction::getVTablePointers(BaseSubobject Base, // Traverse bases. for (const auto &I : RD->bases()) { - CXXRecordDecl *BaseDecl - = cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl()); + auto *BaseDecl = + cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); // Ignore classes without a vtable. if (!BaseDecl->isDynamicClass()) @@ -2784,11 +2787,16 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD, bool CodeGenFunction::ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) { if (!CGM.getCodeGenOpts().WholeProgramVTables || - !SanOpts.has(SanitizerKind::CFIVCall) || - !CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIVCall) || !CGM.HasHiddenLTOVisibility(RD)) return false; + if (CGM.getCodeGenOpts().VirtualFunctionElimination) + return true; + + if (!SanOpts.has(SanitizerKind::CFIVCall) || + !CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIVCall)) + return false; + std::string TypeName = RD->getQualifiedNameAsString(); return !getContext().getSanitizerBlacklist().isBlacklistedType( SanitizerKind::CFIVCall, TypeName); @@ -2811,8 +2819,13 @@ llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad( TypeId}); llvm::Value *CheckResult = Builder.CreateExtractValue(CheckedLoad, 1); - EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIVCall), - SanitizerHandler::CFICheckFail, nullptr, nullptr); + std::string TypeName = RD->getQualifiedNameAsString(); + if (SanOpts.has(SanitizerKind::CFIVCall) && + !getContext().getSanitizerBlacklist().isBlacklistedType( + SanitizerKind::CFIVCall, TypeName)) { + EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIVCall), + SanitizerHandler::CFICheckFail, {}, {}); + } return Builder.CreateBitCast( Builder.CreateExtractValue(CheckedLoad, 0), diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp index 5594f3030229d..c117dd5c25c1a 100644 --- a/lib/CodeGen/CGCleanup.cpp +++ b/lib/CodeGen/CGCleanup.cpp @@ -304,13 +304,13 @@ void EHScopeStack::Cleanup::anchor() {} static void createStoreInstBefore(llvm::Value *value, Address addr, llvm::Instruction *beforeInst) { auto store = new llvm::StoreInst(value, addr.getPointer(), beforeInst); - store->setAlignment(addr.getAlignment().getQuantity()); + store->setAlignment(addr.getAlignment().getAsAlign()); } static llvm::LoadInst *createLoadInstBefore(Address addr, const Twine &name, llvm::Instruction *beforeInst) { auto load = new llvm::LoadInst(addr.getPointer(), name, beforeInst); - load->setAlignment(addr.getAlignment().getQuantity()); + load->setAlignment(addr.getAlignment().getAsAlign()); return load; } @@ -740,14 +740,15 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { // here. Unfortunately, if you ask for a SmallVector<char>, the // alignment isn't sufficient. auto *CleanupSource = reinterpret_cast<char *>(Scope.getCleanupBuffer()); - llvm::AlignedCharArray<EHScopeStack::ScopeStackAlignment, 8 * sizeof(void *)> CleanupBufferStack; + alignas(EHScopeStack::ScopeStackAlignment) char + CleanupBufferStack[8 * sizeof(void *)]; std::unique_ptr<char[]> CleanupBufferHeap; size_t CleanupSize = Scope.getCleanupSize(); EHScopeStack::Cleanup *Fn; if (CleanupSize <= sizeof(CleanupBufferStack)) { - memcpy(CleanupBufferStack.buffer, CleanupSource, CleanupSize); - Fn = reinterpret_cast<EHScopeStack::Cleanup *>(CleanupBufferStack.buffer); + memcpy(CleanupBufferStack, CleanupSource, CleanupSize); + Fn = reinterpret_cast<EHScopeStack::Cleanup *>(CleanupBufferStack); } else { CleanupBufferHeap.reset(new char[CleanupSize]); memcpy(CleanupBufferHeap.get(), CleanupSource, CleanupSize); diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp index f6ee7ee26d4bf..7c63743f3b43d 100644 --- a/lib/CodeGen/CGDebugInfo.cpp +++ b/lib/CodeGen/CGDebugInfo.cpp @@ -314,7 +314,9 @@ StringRef CGDebugInfo::getClassName(const RecordDecl *RD) { if (isa<ClassTemplateSpecializationDecl>(RD)) { SmallString<128> Name; llvm::raw_svector_ostream OS(Name); - RD->getNameForDiagnostic(OS, getPrintingPolicy(), + PrintingPolicy PP = getPrintingPolicy(); + PP.PrintCanonicalTypes = true; + RD->getNameForDiagnostic(OS, PP, /*Qualified*/ false); // Copy this name on the side and use its reference. @@ -537,11 +539,11 @@ void CGDebugInfo::CreateCompileUnit() { // file to determine the real absolute path for the file. std::string MainFileDir; if (const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID())) { - MainFileDir = remapDIPath(MainFile->getDir()->getName()); - if (MainFileDir != ".") { + MainFileDir = MainFile->getDir()->getName(); + if (!llvm::sys::path::is_absolute(MainFileName)) { llvm::SmallString<1024> MainFileDirSS(MainFileDir); llvm::sys::path::append(MainFileDirSS, MainFileName); - MainFileName = MainFileDirSS.str(); + MainFileName = llvm::sys::path::remove_leading_dotslash(MainFileDirSS); } // If the main file name provided is identical to the input file name, and // if the input file is a preprocessed source, use the module name for @@ -561,6 +563,10 @@ void CGDebugInfo::CreateCompileUnit() { if (LO.CPlusPlus) { if (LO.ObjC) LangTag = llvm::dwarf::DW_LANG_ObjC_plus_plus; + else if (LO.CPlusPlus14) + LangTag = llvm::dwarf::DW_LANG_C_plus_plus_14; + else if (LO.CPlusPlus11) + LangTag = llvm::dwarf::DW_LANG_C_plus_plus_11; else LangTag = llvm::dwarf::DW_LANG_C_plus_plus; } else if (LO.ObjC) { @@ -697,6 +703,22 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { case BuiltinType::Id: \ return getOrCreateStructPtrType("opencl_" #ExtType, Id##Ty); #include "clang/Basic/OpenCLExtensionTypes.def" + // TODO: real support for SVE types requires more infrastructure + // to be added first. The types have a variable length and are + // represented in debug info as types whose length depends on a + // target-specific pseudo register. +#define SVE_TYPE(Name, Id, SingletonId) \ + case BuiltinType::Id: +#include "clang/Basic/AArch64SVEACLETypes.def" + { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, + "cannot yet generate debug info for SVE type '%0'"); + auto Name = BT->getName(CGM.getContext().getPrintingPolicy()); + CGM.getDiags().Report(DiagID) << Name; + // Return something safe. + return CreateType(cast<const BuiltinType>(CGM.getContext().IntTy)); + } case BuiltinType::UChar: case BuiltinType::Char_U: @@ -862,6 +884,8 @@ llvm::DIType *CGDebugInfo::CreateType(const PointerType *Ty, static bool hasCXXMangling(const TagDecl *TD, llvm::DICompileUnit *TheCU) { switch (TheCU->getSourceLanguage()) { case llvm::dwarf::DW_LANG_C_plus_plus: + case llvm::dwarf::DW_LANG_C_plus_plus_11: + case llvm::dwarf::DW_LANG_C_plus_plus_14: return true; case llvm::dwarf::DW_LANG_ObjC_plus_plus: return isa<CXXRecordDecl>(TD) || isa<EnumDecl>(TD); @@ -1583,6 +1607,8 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( ContainingType = RecordTy; } + if (Method->isNoReturn()) + Flags |= llvm::DINode::FlagNoReturn; if (Method->isStatic()) Flags |= llvm::DINode::FlagStaticMember; if (Method->isImplicit()) @@ -1637,7 +1663,7 @@ void CGDebugInfo::CollectCXXMemberFunctions( if (!Method || Method->isImplicit() || Method->hasAttr<NoDebugAttr>()) continue; - if (Method->getType()->getAs<FunctionProtoType>()->getContainedAutoType()) + if (Method->getType()->castAs<FunctionProtoType>()->getContainedAutoType()) continue; // Reuse the existing member function declaration if it exists. @@ -1677,7 +1703,7 @@ void CGDebugInfo::CollectCXXBasesAux( const ASTRecordLayout &RL = CGM.getContext().getASTRecordLayout(RD); for (const auto &BI : Bases) { const auto *Base = - cast<CXXRecordDecl>(BI.getType()->getAs<RecordType>()->getDecl()); + cast<CXXRecordDecl>(BI.getType()->castAs<RecordType>()->getDecl()); if (!SeenTypes.insert(Base).second) continue; auto *BaseTy = getOrCreateType(BI.getType(), Unit); @@ -1769,6 +1795,7 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, CGM.getContext().toCharUnitsFromBits((int64_t)fieldOffset); V = CGM.getCXXABI().EmitMemberDataPointer(MPT, chars); } + assert(V && "Failed to find template parameter pointer"); V = V->stripPointerCasts(); } TemplateParams.push_back(DBuilder.createTemplateValueParameter( @@ -2695,6 +2722,8 @@ llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty, break; case MSInheritanceAttr::Keyword_unspecified_inheritance: break; + case MSInheritanceAttr::SpellingNotCalculated: + llvm_unreachable("Spelling not yet calculated"); } } } @@ -2978,7 +3007,7 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) { #define ABSTRACT_TYPE(Class, Base) #define NON_CANONICAL_TYPE(Class, Base) #define DEPENDENT_TYPE(Class, Base) case Type::Class: -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" llvm_unreachable("Dependent types cannot show up in debug information"); case Type::ExtVector: @@ -3105,7 +3134,8 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) { SmallString<256> Identifier = getTypeIdentifier(Ty, CGM, TheCU); - // Explicitly record the calling convention for C++ records. + // Explicitly record the calling convention and export symbols for C++ + // records. auto Flags = llvm::DINode::FlagZero; if (auto CXXRD = dyn_cast<CXXRecordDecl>(RD)) { if (CGM.getCXXABI().getRecordArgABI(CXXRD) == CGCXXABI::RAA_Indirect) @@ -3116,6 +3146,10 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) { // Record if a C++ record is non-trivial type. if (!CXXRD->isTrivial()) Flags |= llvm::DINode::FlagNonTrivial; + + // Record exports it symbols to the containing structure. + if (CXXRD->isAnonymousStructOrUnion()) + Flags |= llvm::DINode::FlagExportSymbols; } llvm::DICompositeType *RealDecl = DBuilder.createReplaceableCompositeType( @@ -3247,8 +3281,8 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit, llvm::APInt ConstVal(32, 1); QualType ET = CGM.getContext().getAsArrayType(T)->getElementType(); - T = CGM.getContext().getConstantArrayType(ET, ConstVal, ArrayType::Normal, - 0); + T = CGM.getContext().getConstantArrayType(ET, ConstVal, nullptr, + ArrayType::Normal, 0); } Name = VD->getName(); @@ -3298,13 +3332,13 @@ llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD, unsigned Line = getLineNumber(Loc); collectFunctionDeclProps(GD, Unit, Name, LinkageName, DContext, TParamsArray, Flags); - auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()); + auto *FD = cast<FunctionDecl>(GD.getDecl()); // Build function type. SmallVector<QualType, 16> ArgTypes; - if (FD) - for (const ParmVarDecl *Parm : FD->parameters()) - ArgTypes.push_back(Parm->getType()); + for (const ParmVarDecl *Parm : FD->parameters()) + ArgTypes.push_back(Parm->getType()); + CallingConv CC = FD->getType()->castAs<FunctionType>()->getCallConv(); QualType FnType = CGM.getContext().getFunctionType( FD->getReturnType(), ArgTypes, FunctionProtoType::ExtProtoInfo(CC)); @@ -3677,8 +3711,7 @@ void CGDebugInfo::EmitFuncDeclForCallSite(llvm::CallBase *CallOrInvoke, const FunctionDecl *CalleeDecl) { auto &CGOpts = CGM.getCodeGenOpts(); if (!CGOpts.EnableDebugEntryValues || !CGM.getLangOpts().Optimize || - !CallOrInvoke || - CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo) + !CallOrInvoke) return; auto *Func = CallOrInvoke->getCalledFunction(); @@ -3844,8 +3877,8 @@ CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD, if (NumPaddingBytes.isPositive()) { llvm::APInt pad(32, NumPaddingBytes.getQuantity()); - FType = CGM.getContext().getConstantArrayType(CGM.getContext().CharTy, - pad, ArrayType::Normal, 0); + FType = CGM.getContext().getConstantArrayType( + CGM.getContext().CharTy, pad, nullptr, ArrayType::Normal, 0); EltTys.push_back(CreateMemberType(Unit, FType, "", &FieldOffset)); } } @@ -4417,19 +4450,27 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) { StringRef Name = VD->getName(); llvm::DIType *Ty = getOrCreateType(VD->getType(), Unit); - // Do not use global variables for enums, unless in CodeView. if (const auto *ECD = dyn_cast<EnumConstantDecl>(VD)) { const auto *ED = cast<EnumDecl>(ECD->getDeclContext()); assert(isa<EnumType>(ED->getTypeForDecl()) && "Enum without EnumType?"); - (void)ED; - - // If CodeView, emit enums as global variables, unless they are defined - // inside a class. We do this because MSVC doesn't emit S_CONSTANTs for - // enums in classes, and because it is difficult to attach this scope - // information to the global variable. - if (!CGM.getCodeGenOpts().EmitCodeView || - isa<RecordDecl>(ED->getDeclContext())) + + if (CGM.getCodeGenOpts().EmitCodeView) { + // If CodeView, emit enums as global variables, unless they are defined + // inside a class. We do this because MSVC doesn't emit S_CONSTANTs for + // enums in classes, and because it is difficult to attach this scope + // information to the global variable. + if (isa<RecordDecl>(ED->getDeclContext())) + return; + } else { + // If not CodeView, emit DW_TAG_enumeration_type if necessary. For + // example: for "enum { ZERO };", a DW_TAG_enumeration_type is created the + // first time `ZERO` is referenced in a function. + llvm::DIType *EDTy = + getOrCreateType(QualType(ED->getTypeForDecl(), 0), Unit); + assert (EDTy->getTag() == llvm::dwarf::DW_TAG_enumeration_type); + (void)EDTy; return; + } } llvm::DIScope *DContext = nullptr; @@ -4524,7 +4565,7 @@ void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) { // return type in the definition) if (const auto *FD = dyn_cast<FunctionDecl>(USD.getUnderlyingDecl())) if (const auto *AT = - FD->getType()->getAs<FunctionProtoType>()->getContainedAutoType()) + FD->getType()->castAs<FunctionProtoType>()->getContainedAutoType()) if (AT->getDeducedType().isNull()) return; if (llvm::DINode *Target = diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp index 6ad43cefc4d29..563841c068f60 100644 --- a/lib/CodeGen/CGDecl.cpp +++ b/lib/CodeGen/CGDecl.cpp @@ -250,7 +250,7 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl( llvm::GlobalVariable *GV = new llvm::GlobalVariable( getModule(), LTy, Ty.isConstant(getContext()), Linkage, Init, Name, nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS); - GV->setAlignment(getContext().getDeclAlign(&D).getQuantity()); + GV->setAlignment(getContext().getDeclAlign(&D).getAsAlign()); if (supportsCOMDAT() && GV->isWeakForLinker()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); @@ -305,14 +305,6 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl( return Addr; } -/// hasNontrivialDestruction - Determine whether a type's destruction is -/// non-trivial. If so, and the variable uses static initialization, we must -/// register its destructor to run on exit. -static bool hasNontrivialDestruction(QualType T) { - CXXRecordDecl *RD = T->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); - return RD && !RD->hasTrivialDestructor(); -} - /// AddInitializerToStaticVarDecl - Add the initializer for 'D' to the /// global variable that has already been created for it. If the initializer /// has a different type than GV does, this may free GV and return a different @@ -372,7 +364,7 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D, emitter.finalize(GV); - if (hasNontrivialDestruction(D.getType()) && HaveInsertPoint()) { + if (D.needsDestruction(getContext()) && HaveInsertPoint()) { // We have a constant initializer, but a nontrivial destructor. We still // need to perform a guarded "initialization" in order to register the // destructor. @@ -416,7 +408,7 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D, if (D.getInit() && !isCudaSharedVar) var = AddInitializerToStaticVarDecl(D, var); - var->setAlignment(alignment.getQuantity()); + var->setAlignment(alignment.getAsAlign()); if (D.hasAttr<AnnotateAttr>()) CGM.AddGlobalAnnotations(&D, var); @@ -427,6 +419,8 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D, var->addAttribute("data-section", SA->getName()); if (auto *SA = D.getAttr<PragmaClangRodataSectionAttr>()) var->addAttribute("rodata-section", SA->getName()); + if (auto *SA = D.getAttr<PragmaClangRelroSectionAttr>()) + var->addAttribute("relro-section", SA->getName()); if (const SectionAttr *SA = D.getAttr<SectionAttr>()) var->setSection(SA->getName()); @@ -1120,11 +1114,11 @@ Address CodeGenModule::createUnnamedGlobalFrom(const VarDecl &D, llvm::GlobalVariable *GV = new llvm::GlobalVariable( getModule(), Ty, isConstant, llvm::GlobalValue::PrivateLinkage, Constant, Name, InsertBefore, llvm::GlobalValue::NotThreadLocal, AS); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); CacheEntry = GV; } else if (CacheEntry->getAlignment() < Align.getQuantity()) { - CacheEntry->setAlignment(Align.getQuantity()); + CacheEntry->setAlignment(Align.getAsAlign()); } return Address(CacheEntry, Align); @@ -1994,7 +1988,7 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) { const VarDecl &D = *emission.Variable; // Check the type for a cleanup. - if (QualType::DestructionKind dtorKind = D.getType().isDestructedType()) + if (QualType::DestructionKind dtorKind = D.needsDestruction(getContext())) emitAutoVarTypeCleanup(emission, dtorKind); // In GC mode, honor objc_precise_lifetime. @@ -2403,8 +2397,9 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, // Don't push a cleanup in a thunk for a method that will also emit a // cleanup. if (hasAggregateEvaluationKind(Ty) && !CurFuncIsThunk && - Ty->getAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) { - if (QualType::DestructionKind DtorKind = Ty.isDestructedType()) { + Ty->castAs<RecordType>()->getDecl()->isParamDestroyedInCallee()) { + if (QualType::DestructionKind DtorKind = + D.needsDestruction(getContext())) { assert((DtorKind == QualType::DK_cxx_destructor || DtorKind == QualType::DK_nontrivial_c_struct) && "unexpected destructor type"); @@ -2496,10 +2491,11 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, setAddrOfLocalVar(&D, DeclPtr); - // Emit debug info for param declaration. + // Emit debug info for param declarations in non-thunk functions. if (CGDebugInfo *DI = getDebugInfo()) { if (CGM.getCodeGenOpts().getDebugInfo() >= - codegenoptions::LimitedDebugInfo) { + codegenoptions::LimitedDebugInfo && + !CurFuncIsThunk) { DI->EmitDeclareOfArgVariable(&D, DeclPtr.getPointer(), ArgNo, Builder); } } @@ -2529,10 +2525,11 @@ void CodeGenModule::EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D, } void CodeGenModule::EmitOMPDeclareMapper(const OMPDeclareMapperDecl *D, - CodeGenFunction *CGF) { - if (!LangOpts.OpenMP || (!LangOpts.EmitAllDecls && !D->isUsed())) + CodeGenFunction *CGF) { + if (!LangOpts.OpenMP || LangOpts.OpenMPSimd || + (!LangOpts.EmitAllDecls && !D->isUsed())) return; - // FIXME: need to implement mapper code generation + getOpenMPRuntime().emitUserDefinedMapper(D, CGF); } void CodeGenModule::EmitOMPRequiresDecl(const OMPRequiresDecl *D) { diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp index 7a0605b8450ac..bf16b7bec4b19 100644 --- a/lib/CodeGen/CGDeclCXX.cpp +++ b/lib/CodeGen/CGDeclCXX.cpp @@ -73,16 +73,10 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, // that isn't balanced out by a destructor call as intended by the // attribute. This also checks for -fno-c++-static-destructors and // bails even if the attribute is not present. - if (D.isNoDestroy(CGF.getContext())) - return; - - CodeGenModule &CGM = CGF.CGM; + QualType::DestructionKind DtorKind = D.needsDestruction(CGF.getContext()); // FIXME: __attribute__((cleanup)) ? - QualType Type = D.getType(); - QualType::DestructionKind DtorKind = Type.isDestructedType(); - switch (DtorKind) { case QualType::DK_none: return; @@ -101,6 +95,9 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, llvm::FunctionCallee Func; llvm::Constant *Argument; + CodeGenModule &CGM = CGF.CGM; + QualType Type = D.getType(); + // Special-case non-array C++ destructors, if they have the right signature. // Under some ABIs, destructors return this instead of void, and cannot be // passed directly to __cxa_atexit if the target does not allow this @@ -251,8 +248,8 @@ llvm::Function *CodeGenFunction::createAtExitStub(const VarDecl &VD, llvm::CallInst *call = CGF.Builder.CreateCall(dtor, addr); // Make sure the call and the callee agree on calling convention. - if (llvm::Function *dtorFn = - dyn_cast<llvm::Function>(dtor.getCallee()->stripPointerCasts())) + if (auto *dtorFn = dyn_cast<llvm::Function>( + dtor.getCallee()->stripPointerCastsAndAliases())) call->setCallingConv(dtorFn->getCallingConv()); CGF.FinishFunction(); diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp index 3b7a88a0b7693..645d7a878e3b1 100644 --- a/lib/CodeGen/CGException.cpp +++ b/lib/CodeGen/CGException.cpp @@ -165,10 +165,7 @@ static const EHPersonality &getCXXPersonality(const TargetInfo &Target, return EHPersonality::GNU_CPlusPlus; if (L.SEHExceptions) return EHPersonality::GNU_CPlusPlus_SEH; - // Wasm EH is a non-MVP feature for now. - if (Target.hasFeature("exception-handling") && - (T.getArch() == llvm::Triple::wasm32 || - T.getArch() == llvm::Triple::wasm64)) + if (L.WasmExceptions) return EHPersonality::GNU_Wasm_CPlusPlus; return EHPersonality::GNU_CPlusPlus; } @@ -1774,7 +1771,8 @@ void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF, // EH registration is passed in as the EBP physical register. We can // recover that with llvm.frameaddress(1). EntryFP = Builder.CreateCall( - CGM.getIntrinsic(llvm::Intrinsic::frameaddress), {Builder.getInt32(1)}); + CGM.getIntrinsic(llvm::Intrinsic::frameaddress, AllocaInt8PtrTy), + {Builder.getInt32(1)}); } else { // Otherwise, for x64 and 32-bit finally functions, the parent FP is the // second parameter. diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index 5a4b1188b7114..dcd365c8eaf0f 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -66,7 +66,7 @@ Address CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type *Ty, const Twine &Name, llvm::Value *ArraySize) { auto Alloca = CreateTempAlloca(Ty, Name, ArraySize); - Alloca->setAlignment(Align.getQuantity()); + Alloca->setAlignment(Align.getAsAlign()); return Address(Alloca, Align); } @@ -126,7 +126,7 @@ Address CodeGenFunction::CreateDefaultAlignTempAlloca(llvm::Type *Ty, void CodeGenFunction::InitTempAlloca(Address Var, llvm::Value *Init) { assert(isa<llvm::AllocaInst>(Var.getPointer())); auto *Store = new llvm::StoreInst(Init, Var.getPointer()); - Store->setAlignment(Var.getAlignment().getQuantity()); + Store->setAlignment(Var.getAlignment().getAsAlign()); llvm::BasicBlock *Block = AllocaInsertPt->getParent(); Block->getInstList().insertAfter(AllocaInsertPt->getIterator(), Store); } @@ -392,7 +392,7 @@ static Address createReferenceTemporary(CodeGenFunction &CGF, llvm::GlobalValue::NotThreadLocal, CGF.getContext().getTargetAddressSpace(AS)); CharUnits alignment = CGF.getContext().getTypeAlignInChars(Ty); - GV->setAlignment(alignment.getQuantity()); + GV->setAlignment(alignment.getAsAlign()); llvm::Constant *C = GV; if (AS != LangAS::Default) C = TCG.performAddrSpaceCast( @@ -516,13 +516,13 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { // Avoid creating a conditional cleanup just to hold an llvm.lifetime.end // marker. Instead, start the lifetime of a conditional temporary earlier - // so that it's unconditional. Don't do this in ASan's use-after-scope - // mode so that it gets the more precise lifetime marks. If the type has - // a non-trivial destructor, we'll have a cleanup block for it anyway, - // so this typically doesn't help; skip it in that case. + // so that it's unconditional. Don't do this with sanitizers which need + // more precise lifetime marks. ConditionalEvaluation *OldConditional = nullptr; CGBuilderTy::InsertPoint OldIP; if (isInConditionalBranch() && !E->getType().isDestructedType() && + !SanOpts.has(SanitizerKind::HWAddress) && + !SanOpts.has(SanitizerKind::Memory) && !CGM.getCodeGenOpts().SanitizeAddressUseAfterScope) { OldConditional = OutermostConditional; OutermostConditional = nullptr; @@ -677,8 +677,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, // Quickly determine whether we have a pointer to an alloca. It's possible // to skip null checks, and some alignment checks, for these pointers. This // can reduce compile-time significantly. - auto PtrToAlloca = - dyn_cast<llvm::AllocaInst>(Ptr->stripPointerCastsNoFollowAliases()); + auto PtrToAlloca = dyn_cast<llvm::AllocaInst>(Ptr->stripPointerCasts()); llvm::Value *True = llvm::ConstantInt::getTrue(getLLVMContext()); llvm::Value *IsNonNull = nullptr; @@ -998,7 +997,7 @@ EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV, // Add the inc/dec to the real part. NextVal = Builder.CreateAdd(InVal.first, NextVal, isInc ? "inc" : "dec"); } else { - QualType ElemTy = E->getType()->getAs<ComplexType>()->getElementType(); + QualType ElemTy = E->getType()->castAs<ComplexType>()->getElementType(); llvm::APFloat FVal(getContext().getFloatTypeSemantics(ElemTy), 1); if (!isInc) FVal.changeSign(); @@ -1268,6 +1267,8 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { case Expr::CXXOperatorCallExprClass: case Expr::UserDefinedLiteralClass: return EmitCallExprLValue(cast<CallExpr>(E)); + case Expr::CXXRewrittenBinaryOperatorClass: + return EmitLValue(cast<CXXRewrittenBinaryOperator>(E)->getSemanticForm()); case Expr::VAArgExprClass: return EmitVAArgExprLValue(cast<VAArgExpr>(E)); case Expr::DeclRefExprClass: @@ -2195,7 +2196,7 @@ static void setObjCGCLValueClass(const ASTContext &Ctx, const Expr *E, // If ivar is a structure pointer, assigning to field of // this struct follows gcc's behavior and makes it a non-ivar // writer-barrier conservatively. - ExpTy = ExpTy->getAs<PointerType>()->getPointeeType(); + ExpTy = ExpTy->castAs<PointerType>()->getPointeeType(); if (ExpTy->isRecordType()) { LV.setObjCIvar(false); return; @@ -2231,7 +2232,7 @@ static void setObjCGCLValueClass(const ASTContext &Ctx, const Expr *E, // a non-ivar write-barrier. QualType ExpTy = E->getType(); if (ExpTy->isPointerType()) - ExpTy = ExpTy->getAs<PointerType>()->getPointeeType(); + ExpTy = ExpTy->castAs<PointerType>()->getPointeeType(); if (ExpTy->isRecordType()) LV.setObjCIvar(false); } @@ -2362,7 +2363,7 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF, // If it's thread_local, emit a call to its wrapper function instead. if (VD->getTLSKind() == VarDecl::TLS_Dynamic && - CGF.CGM.getCXXABI().usesThreadWrapperFunction()) + CGF.CGM.getCXXABI().usesThreadWrapperFunction(VD)) return CGF.CGM.getCXXABI().EmitThreadLocalVarDeclLValue(CGF, VD, T); // Check if the variable is marked as declare target with link clause in // device codegen. @@ -2540,6 +2541,11 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { // Spill the constant value to a global. Addr = CGM.createUnnamedGlobalFrom(*VD, Val, getContext().getDeclAlign(VD)); + llvm::Type *VarTy = getTypes().ConvertTypeForMem(VD->getType()); + auto *PTy = llvm::PointerType::get( + VarTy, getContext().getTargetAddressSpace(VD->getType())); + if (PTy != Addr.getType()) + Addr = Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, PTy); } else { // Should we be using the alignment of the constant pointer we emitted? CharUnits Alignment = @@ -3400,6 +3406,7 @@ static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr, ArrayRef<llvm::Value *> indices, QualType eltType, bool inbounds, bool signedIndices, SourceLocation loc, + QualType *arrayType = nullptr, const llvm::Twine &name = "arrayidx") { // All the indices except that last must be zero. #ifndef NDEBUG @@ -3428,9 +3435,12 @@ static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr, } else { // Remember the original array subscript for bpf target unsigned idx = LastIndex->getZExtValue(); + llvm::DIType *DbgInfo = nullptr; + if (arrayType) + DbgInfo = CGF.getDebugInfo()->getOrCreateStandaloneType(*arrayType, loc); eltPtr = CGF.Builder.CreatePreserveArrayAccessIndex(addr.getPointer(), indices.size() - 1, - idx); + idx, DbgInfo); } return Address(eltPtr, eltAlign); @@ -3567,19 +3577,21 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, auto *Idx = EmitIdxAfterBase(/*Promote*/true); // Propagate the alignment from the array itself to the result. + QualType arrayType = Array->getType(); Addr = emitArraySubscriptGEP( *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx}, E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices, - E->getExprLoc()); + E->getExprLoc(), &arrayType); EltBaseInfo = ArrayLV.getBaseInfo(); EltTBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, E->getType()); } else { // The base must be a pointer; emit it with an estimate of its alignment. Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo); auto *Idx = EmitIdxAfterBase(/*Promote*/true); + QualType ptrType = E->getBase()->getType(); Addr = emitArraySubscriptGEP(*this, Addr, Idx, E->getType(), !getLangOpts().isSignedOverflowDefined(), - SignedIndices, E->getExprLoc()); + SignedIndices, E->getExprLoc(), &ptrType); } LValue LV = MakeAddrLValue(Addr, E->getType(), EltBaseInfo, EltTBAAInfo); @@ -3980,9 +3992,19 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, const CGBitFieldInfo &Info = RL.getBitFieldInfo(field); Address Addr = base.getAddress(); unsigned Idx = RL.getLLVMFieldNo(field); - if (Idx != 0) - // For structs, we GEP to the field that the record layout suggests. - Addr = Builder.CreateStructGEP(Addr, Idx, field->getName()); + if (!IsInPreservedAIRegion) { + if (Idx != 0) + // For structs, we GEP to the field that the record layout suggests. + Addr = Builder.CreateStructGEP(Addr, Idx, field->getName()); + } else { + const RecordDecl *rec = field->getParent(); + llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType( + getContext().getRecordType(rec), rec->getLocation()); + Addr = Builder.CreatePreserveStructAccessIndex(Addr, Idx, + getDebugInfoFIndex(rec, field->getFieldIndex()), + DbgInfo); + } + // Get the access type. llvm::Type *FieldIntTy = llvm::Type::getIntNTy(getLLVMContext(), Info.StorageSize); @@ -4051,7 +4073,6 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, unsigned RecordCVR = base.getVRQualifiers(); if (rec->isUnion()) { // For unions, there is no pointer adjustment. - assert(!FieldType->isReferenceType() && "union has reference member"); if (CGM.getCodeGenOpts().StrictVTablePointers && hasAnyVptr(FieldType, getContext())) // Because unions can easily skip invariant.barriers, we need to add @@ -4068,27 +4089,30 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, addr.getPointer(), getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo), addr.getAlignment()); } - } else { + if (FieldType->isReferenceType()) + addr = Builder.CreateElementBitCast( + addr, CGM.getTypes().ConvertTypeForMem(FieldType), field->getName()); + } else { if (!IsInPreservedAIRegion) // For structs, we GEP to the field that the record layout suggests. addr = emitAddrOfFieldStorage(*this, addr, field); else // Remember the original struct field index addr = emitPreserveStructAccess(*this, addr, field); + } - // If this is a reference field, load the reference right now. - if (FieldType->isReferenceType()) { - LValue RefLVal = MakeAddrLValue(addr, FieldType, FieldBaseInfo, - FieldTBAAInfo); - if (RecordCVR & Qualifiers::Volatile) - RefLVal.getQuals().addVolatile(); - addr = EmitLoadOfReference(RefLVal, &FieldBaseInfo, &FieldTBAAInfo); - - // Qualifiers on the struct don't apply to the referencee. - RecordCVR = 0; - FieldType = FieldType->getPointeeType(); - } + // If this is a reference field, load the reference right now. + if (FieldType->isReferenceType()) { + LValue RefLVal = + MakeAddrLValue(addr, FieldType, FieldBaseInfo, FieldTBAAInfo); + if (RecordCVR & Qualifiers::Volatile) + RefLVal.getQuals().addVolatile(); + addr = EmitLoadOfReference(RefLVal, &FieldBaseInfo, &FieldTBAAInfo); + + // Qualifiers on the struct don't apply to the referencee. + RecordCVR = 0; + FieldType = FieldType->getPointeeType(); } // Make sure that the address is pointing to the right type. This is critical diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp index 695facd50b677..2f0e4937613f4 100644 --- a/lib/CodeGen/CGExprAgg.cpp +++ b/lib/CodeGen/CGExprAgg.cpp @@ -150,6 +150,9 @@ public: void VisitBinAssign(const BinaryOperator *E); void VisitBinComma(const BinaryOperator *E); void VisitBinCmp(const BinaryOperator *E); + void VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) { + Visit(E->getSemanticForm()); + } void VisitObjCMessageExpr(ObjCMessageExpr *E); void VisitObjCIvarRefExpr(ObjCIvarRefExpr *E) { @@ -501,7 +504,7 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, CGM.getContext().getTargetAddressSpace(AS)); Emitter.finalize(GV); CharUnits Align = CGM.getContext().getTypeAlignInChars(ArrayQTy); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); EmitFinalDestCopy(ArrayQTy, CGF.MakeAddrLValue(GV, ArrayQTy, Align)); return; } @@ -1495,6 +1498,13 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { // initializers throws an exception. SmallVector<EHScopeStack::stable_iterator, 16> cleanups; llvm::Instruction *cleanupDominator = nullptr; + auto addCleanup = [&](const EHScopeStack::stable_iterator &cleanup) { + cleanups.push_back(cleanup); + if (!cleanupDominator) // create placeholder once needed + cleanupDominator = CGF.Builder.CreateAlignedLoad( + CGF.Int8Ty, llvm::Constant::getNullValue(CGF.Int8PtrTy), + CharUnits::One()); + }; unsigned curInitIndex = 0; @@ -1519,7 +1529,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { if (QualType::DestructionKind dtorKind = Base.getType().isDestructedType()) { CGF.pushDestroy(dtorKind, V, Base.getType()); - cleanups.push_back(CGF.EHStack.stable_begin()); + addCleanup(CGF.EHStack.stable_begin()); } } } @@ -1596,15 +1606,9 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { = field->getType().isDestructedType()) { assert(LV.isSimple()); if (CGF.needsEHCleanup(dtorKind)) { - if (!cleanupDominator) - cleanupDominator = CGF.Builder.CreateAlignedLoad( - CGF.Int8Ty, - llvm::Constant::getNullValue(CGF.Int8PtrTy), - CharUnits::One()); // placeholder - CGF.pushDestroy(EHCleanup, LV.getAddress(), field->getType(), CGF.getDestroyer(dtorKind), false); - cleanups.push_back(CGF.EHStack.stable_begin()); + addCleanup(CGF.EHStack.stable_begin()); pushedCleanup = true; } } @@ -1620,6 +1624,8 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { // Deactivate all the partial cleanups in reverse order, which // generally means popping them. + assert((cleanupDominator || cleanups.empty()) && + "Missing cleanupDominator before deactivating cleanup blocks"); for (unsigned i = cleanups.size(); i != 0; --i) CGF.DeactivateCleanupBlock(cleanups[i-1], cleanupDominator); @@ -1756,7 +1762,7 @@ static CharUnits GetNumNonZeroBytesInInit(const Expr *E, CodeGenFunction &CGF) { // referencee. InitListExprs for unions and arrays can't have references. if (const RecordType *RT = E->getType()->getAs<RecordType>()) { if (!RT->isUnionType()) { - RecordDecl *SD = E->getType()->getAs<RecordType>()->getDecl(); + RecordDecl *SD = RT->getDecl(); CharUnits NumNonZeroBytes = CharUnits::Zero(); unsigned ILEElement = 0; diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp index 5476d13b7c461..114d806d454bb 100644 --- a/lib/CodeGen/CGExprCXX.cpp +++ b/lib/CodeGen/CGExprCXX.cpp @@ -382,7 +382,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( const CXXRecordDecl *RD; std::tie(VTable, RD) = CGM.getCXXABI().LoadVTablePtr(*this, This.getAddress(), - MD->getParent()); + CalleeDecl->getParent()); EmitVTablePtrCheckForCall(RD, VTable, CFITCK_NVCall, CE->getBeginLoc()); } @@ -418,13 +418,10 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E, const Expr *BaseExpr = BO->getLHS(); const Expr *MemFnExpr = BO->getRHS(); - const MemberPointerType *MPT = - MemFnExpr->getType()->castAs<MemberPointerType>(); - - const FunctionProtoType *FPT = - MPT->getPointeeType()->castAs<FunctionProtoType>(); - const CXXRecordDecl *RD = - cast<CXXRecordDecl>(MPT->getClass()->getAs<RecordType>()->getDecl()); + const auto *MPT = MemFnExpr->getType()->castAs<MemberPointerType>(); + const auto *FPT = MPT->getPointeeType()->castAs<FunctionProtoType>(); + const auto *RD = + cast<CXXRecordDecl>(MPT->getClass()->castAs<RecordType>()->getDecl()); // Emit the 'this' pointer. Address This = Address::invalid(); @@ -535,7 +532,7 @@ static void EmitNullBaseClassInitialization(CodeGenFunction &CGF, CharUnits Align = std::max(Layout.getNonVirtualAlignment(), DestPtr.getAlignment()); - NullVariable->setAlignment(Align.getQuantity()); + NullVariable->setAlignment(Align.getAsAlign()); Address SrcPtr = Address(CGF.EmitCastToVoidPtr(NullVariable), Align); @@ -1882,9 +1879,33 @@ static void EmitObjectDelete(CodeGenFunction &CGF, Dtor = RD->getDestructor(); if (Dtor->isVirtual()) { - CGF.CGM.getCXXABI().emitVirtualObjectDelete(CGF, DE, Ptr, ElementType, - Dtor); - return; + bool UseVirtualCall = true; + const Expr *Base = DE->getArgument(); + if (auto *DevirtualizedDtor = + dyn_cast_or_null<const CXXDestructorDecl>( + Dtor->getDevirtualizedMethod( + Base, CGF.CGM.getLangOpts().AppleKext))) { + UseVirtualCall = false; + const CXXRecordDecl *DevirtualizedClass = + DevirtualizedDtor->getParent(); + if (declaresSameEntity(getCXXRecord(Base), DevirtualizedClass)) { + // Devirtualized to the class of the base type (the type of the + // whole expression). + Dtor = DevirtualizedDtor; + } else { + // Devirtualized to some other type. Would need to cast the this + // pointer to that type but we don't have support for that yet, so + // do a virtual call. FIXME: handle the case where it is + // devirtualized to the derived type (the type of the inner + // expression) as in EmitCXXMemberOrOperatorMemberCallExpr. + UseVirtualCall = true; + } + } + if (UseVirtualCall) { + CGF.CGM.getCXXABI().emitVirtualObjectDelete(CGF, DE, Ptr, ElementType, + Dtor); + return; + } } } } diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp index 6a5fb45ba259a..385f87f12a9b3 100644 --- a/lib/CodeGen/CGExprComplex.cpp +++ b/lib/CodeGen/CGExprComplex.cpp @@ -279,6 +279,10 @@ public: return EmitBinDiv(EmitBinOps(E)); } + ComplexPairTy VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) { + return Visit(E->getSemanticForm()); + } + // Compound assignments. ComplexPairTy VisitBinAddAssign(const CompoundAssignOperator *E) { return EmitCompoundAssign(E, &ComplexExprEmitter::EmitBinAdd); diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp index 31cf2aef1ba0b..96e8c9c0d0e61 100644 --- a/lib/CodeGen/CGExprConstant.cpp +++ b/lib/CodeGen/CGExprConstant.cpp @@ -659,7 +659,7 @@ static bool EmitDesignatedInitUpdater(ConstantEmitter &Emitter, } bool ConstStructBuilder::Build(InitListExpr *ILE, bool AllowOverwrite) { - RecordDecl *RD = ILE->getType()->getAs<RecordType>()->getDecl(); + RecordDecl *RD = ILE->getType()->castAs<RecordType>()->getDecl(); const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD); unsigned FieldNo = -1; @@ -839,7 +839,7 @@ bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, } llvm::Constant *ConstStructBuilder::Finalize(QualType Type) { - RecordDecl *RD = Type->getAs<RecordType>()->getDecl(); + RecordDecl *RD = Type->castAs<RecordType>()->getDecl(); llvm::Type *ValTy = CGM.getTypes().ConvertType(Type); return Builder.build(ValTy, RD->hasFlexibleArrayMember()); } @@ -907,7 +907,7 @@ static ConstantAddress tryEmitGlobalCompoundLiteral(CodeGenModule &CGM, llvm::GlobalVariable::NotThreadLocal, CGM.getContext().getTargetAddressSpace(addressSpace)); emitter.finalize(GV); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); CGM.setAddrOfConstantCompoundLiteral(E, GV); return ConstantAddress(GV, Align); } @@ -1269,8 +1269,8 @@ public: return nullptr; // FIXME: We should not have to call getBaseElementType here. - const RecordType *RT = - CGM.getContext().getBaseElementType(Ty)->getAs<RecordType>(); + const auto *RT = + CGM.getContext().getBaseElementType(Ty)->castAs<RecordType>(); const CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl()); // If the class doesn't have a trivial destructor, we can't emit it as a diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp index 3d082de2a14f5..55a413a2a7179 100644 --- a/lib/CodeGen/CGExprScalar.cpp +++ b/lib/CodeGen/CGExprScalar.cpp @@ -294,8 +294,7 @@ public: Value *AlignmentValue = CGF.EmitScalarExpr(AVAttr->getAlignment()); llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(AlignmentValue); - CGF.EmitAlignmentAssumption(V, E, AVAttr->getLocation(), - AlignmentCI->getZExtValue()); + CGF.EmitAlignmentAssumption(V, E, AVAttr->getLocation(), AlignmentCI); } /// EmitLoadOfLValue - Given an expression with complex type that represents a @@ -674,6 +673,10 @@ public: return llvm::ConstantInt::get(ConvertType(E->getType()), E->getValue()); } + Value *VisitConceptSpecializationExpr(const ConceptSpecializationExpr *E) { + return Builder.getInt1(E->isSatisfied()); + } + Value *VisitArrayTypeTraitExpr(const ArrayTypeTraitExpr *E) { return llvm::ConstantInt::get(Builder.getInt32Ty(), E->getValue()); } @@ -814,6 +817,10 @@ public: Value *VisitBinPtrMemD(const Expr *E) { return EmitLoadOfLValue(E); } Value *VisitBinPtrMemI(const Expr *E) { return EmitLoadOfLValue(E); } + Value *VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) { + return Visit(E->getSemanticForm()); + } + // Other Operators. Value *VisitBlockExpr(const BlockExpr *BE); Value *VisitAbstractConditionalOperator(const AbstractConditionalOperator *); @@ -1657,8 +1664,8 @@ Value *ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) { if (SrcTy == DstTy) return Src; - QualType SrcEltType = SrcType->getAs<VectorType>()->getElementType(), - DstEltType = DstType->getAs<VectorType>()->getElementType(); + QualType SrcEltType = SrcType->castAs<VectorType>()->getElementType(), + DstEltType = DstType->castAs<VectorType>()->getElementType(); assert(SrcTy->isVectorTy() && "ConvertVector source IR type must be a vector"); @@ -2577,14 +2584,16 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, Value *ScalarExprEmitter::VisitUnaryMinus(const UnaryOperator *E) { TestAndClearIgnoreResultAssign(); + Value *Op = Visit(E->getSubExpr()); + + // Generate a unary FNeg for FP ops. + if (Op->getType()->isFPOrFPVectorTy()) + return Builder.CreateFNeg(Op, "fneg"); + // Emit unary minus with EmitSub so we handle overflow cases etc. BinOpInfo BinOp; - BinOp.RHS = Visit(E->getSubExpr()); - - if (BinOp.RHS->getType()->isFPOrFPVectorTy()) - BinOp.LHS = llvm::ConstantFP::getZeroValueForNegation(BinOp.RHS->getType()); - else - BinOp.LHS = llvm::Constant::getNullValue(BinOp.RHS->getType()); + BinOp.RHS = Op; + BinOp.LHS = llvm::Constant::getNullValue(BinOp.RHS->getType()); BinOp.Ty = E->getType(); BinOp.Opcode = BO_Sub; // FIXME: once UnaryOperator carries FPFeatures, copy it here. @@ -2662,7 +2671,7 @@ Value *ScalarExprEmitter::VisitOffsetOfExpr(OffsetOfExpr *E) { case OffsetOfNode::Field: { FieldDecl *MemberDecl = ON.getField(); - RecordDecl *RD = CurrentType->getAs<RecordType>()->getDecl(); + RecordDecl *RD = CurrentType->castAs<RecordType>()->getDecl(); const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD); // Compute the index of the field in its parent. @@ -2695,7 +2704,7 @@ Value *ScalarExprEmitter::VisitOffsetOfExpr(OffsetOfExpr *E) { continue; } - RecordDecl *RD = CurrentType->getAs<RecordType>()->getDecl(); + RecordDecl *RD = CurrentType->castAs<RecordType>()->getDecl(); const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD); // Save the element type. @@ -3745,7 +3754,7 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E, Value *FirstVecArg = LHS, *SecondVecArg = RHS; - QualType ElTy = LHSTy->getAs<VectorType>()->getElementType(); + QualType ElTy = LHSTy->castAs<VectorType>()->getElementType(); const BuiltinType *BTy = ElTy->getAs<BuiltinType>(); BuiltinType::Kind ElementKind = BTy->getKind(); @@ -4414,8 +4423,8 @@ Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) { return Src; } - return Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), - Src, DstTy, "astype"); + return createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), + Src, DstTy, "astype"); } Value *ScalarExprEmitter::VisitAtomicExpr(AtomicExpr *E) { @@ -4533,32 +4542,43 @@ LValue CodeGenFunction::EmitCompoundAssignmentLValue( llvm_unreachable("Unhandled compound assignment operator"); } -Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr, - ArrayRef<Value *> IdxList, - bool SignedIndices, - bool IsSubtraction, - SourceLocation Loc, - const Twine &Name) { - Value *GEPVal = Builder.CreateInBoundsGEP(Ptr, IdxList, Name); +struct GEPOffsetAndOverflow { + // The total (signed) byte offset for the GEP. + llvm::Value *TotalOffset; + // The offset overflow flag - true if the total offset overflows. + llvm::Value *OffsetOverflows; +}; - // If the pointer overflow sanitizer isn't enabled, do nothing. - if (!SanOpts.has(SanitizerKind::PointerOverflow)) - return GEPVal; +/// Evaluate given GEPVal, which is either an inbounds GEP, or a constant, +/// and compute the total offset it applies from it's base pointer BasePtr. +/// Returns offset in bytes and a boolean flag whether an overflow happened +/// during evaluation. +static GEPOffsetAndOverflow EmitGEPOffsetInBytes(Value *BasePtr, Value *GEPVal, + llvm::LLVMContext &VMContext, + CodeGenModule &CGM, + CGBuilderTy Builder) { + const auto &DL = CGM.getDataLayout(); - // If the GEP has already been reduced to a constant, leave it be. - if (isa<llvm::Constant>(GEPVal)) - return GEPVal; + // The total (signed) byte offset for the GEP. + llvm::Value *TotalOffset = nullptr; - // Only check for overflows in the default address space. - if (GEPVal->getType()->getPointerAddressSpace()) - return GEPVal; + // Was the GEP already reduced to a constant? + if (isa<llvm::Constant>(GEPVal)) { + // Compute the offset by casting both pointers to integers and subtracting: + // GEPVal = BasePtr + ptr(Offset) <--> Offset = int(GEPVal) - int(BasePtr) + Value *BasePtr_int = + Builder.CreatePtrToInt(BasePtr, DL.getIntPtrType(BasePtr->getType())); + Value *GEPVal_int = + Builder.CreatePtrToInt(GEPVal, DL.getIntPtrType(GEPVal->getType())); + TotalOffset = Builder.CreateSub(GEPVal_int, BasePtr_int); + return {TotalOffset, /*OffsetOverflows=*/Builder.getFalse()}; + } auto *GEP = cast<llvm::GEPOperator>(GEPVal); + assert(GEP->getPointerOperand() == BasePtr && + "BasePtr must be the the base of the GEP."); assert(GEP->isInBounds() && "Expected inbounds GEP"); - SanitizerScope SanScope(this); - auto &VMContext = getLLVMContext(); - const auto &DL = CGM.getDataLayout(); auto *IntPtrTy = DL.getIntPtrType(GEP->getPointerOperandType()); // Grab references to the signed add/mul overflow intrinsics for intptr_t. @@ -4568,8 +4588,6 @@ Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr, auto *SMulIntrinsic = CGM.getIntrinsic(llvm::Intrinsic::smul_with_overflow, IntPtrTy); - // The total (signed) byte offset for the GEP. - llvm::Value *TotalOffset = nullptr; // The offset overflow flag - true if the total offset overflows. llvm::Value *OffsetOverflows = Builder.getFalse(); @@ -4627,41 +4645,122 @@ Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr, TotalOffset = eval(BO_Add, TotalOffset, LocalOffset); } - // Common case: if the total offset is zero, don't emit a check. - if (TotalOffset == Zero) + return {TotalOffset, OffsetOverflows}; +} + +Value * +CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr, ArrayRef<Value *> IdxList, + bool SignedIndices, bool IsSubtraction, + SourceLocation Loc, const Twine &Name) { + Value *GEPVal = Builder.CreateInBoundsGEP(Ptr, IdxList, Name); + + // If the pointer overflow sanitizer isn't enabled, do nothing. + if (!SanOpts.has(SanitizerKind::PointerOverflow)) + return GEPVal; + + llvm::Type *PtrTy = Ptr->getType(); + + // Perform nullptr-and-offset check unless the nullptr is defined. + bool PerformNullCheck = !NullPointerIsDefined( + Builder.GetInsertBlock()->getParent(), PtrTy->getPointerAddressSpace()); + // Check for overflows unless the GEP got constant-folded, + // and only in the default address space + bool PerformOverflowCheck = + !isa<llvm::Constant>(GEPVal) && PtrTy->getPointerAddressSpace() == 0; + + if (!(PerformNullCheck || PerformOverflowCheck)) + return GEPVal; + + const auto &DL = CGM.getDataLayout(); + + SanitizerScope SanScope(this); + llvm::Type *IntPtrTy = DL.getIntPtrType(PtrTy); + + GEPOffsetAndOverflow EvaluatedGEP = + EmitGEPOffsetInBytes(Ptr, GEPVal, getLLVMContext(), CGM, Builder); + + assert((!isa<llvm::Constant>(EvaluatedGEP.TotalOffset) || + EvaluatedGEP.OffsetOverflows == Builder.getFalse()) && + "If the offset got constant-folded, we don't expect that there was an " + "overflow."); + + auto *Zero = llvm::ConstantInt::getNullValue(IntPtrTy); + + // Common case: if the total offset is zero, and we are using C++ semantics, + // where nullptr+0 is defined, don't emit a check. + if (EvaluatedGEP.TotalOffset == Zero && CGM.getLangOpts().CPlusPlus) return GEPVal; // Now that we've computed the total offset, add it to the base pointer (with // wrapping semantics). - auto *IntPtr = Builder.CreatePtrToInt(GEP->getPointerOperand(), IntPtrTy); - auto *ComputedGEP = Builder.CreateAdd(IntPtr, TotalOffset); - - // The GEP is valid if: - // 1) The total offset doesn't overflow, and - // 2) The sign of the difference between the computed address and the base - // pointer matches the sign of the total offset. - llvm::Value *ValidGEP; - auto *NoOffsetOverflow = Builder.CreateNot(OffsetOverflows); - if (SignedIndices) { - auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); - auto *PosOrZeroOffset = Builder.CreateICmpSGE(TotalOffset, Zero); - llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr); - ValidGEP = Builder.CreateAnd( - Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid), - NoOffsetOverflow); - } else if (!SignedIndices && !IsSubtraction) { - auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); - ValidGEP = Builder.CreateAnd(PosOrZeroValid, NoOffsetOverflow); - } else { - auto *NegOrZeroValid = Builder.CreateICmpULE(ComputedGEP, IntPtr); - ValidGEP = Builder.CreateAnd(NegOrZeroValid, NoOffsetOverflow); + auto *IntPtr = Builder.CreatePtrToInt(Ptr, IntPtrTy); + auto *ComputedGEP = Builder.CreateAdd(IntPtr, EvaluatedGEP.TotalOffset); + + llvm::SmallVector<std::pair<llvm::Value *, SanitizerMask>, 2> Checks; + + if (PerformNullCheck) { + // In C++, if the base pointer evaluates to a null pointer value, + // the only valid pointer this inbounds GEP can produce is also + // a null pointer, so the offset must also evaluate to zero. + // Likewise, if we have non-zero base pointer, we can not get null pointer + // as a result, so the offset can not be -intptr_t(BasePtr). + // In other words, both pointers are either null, or both are non-null, + // or the behaviour is undefined. + // + // C, however, is more strict in this regard, and gives more + // optimization opportunities: in C, additionally, nullptr+0 is undefined. + // So both the input to the 'gep inbounds' AND the output must not be null. + auto *BaseIsNotNullptr = Builder.CreateIsNotNull(Ptr); + auto *ResultIsNotNullptr = Builder.CreateIsNotNull(ComputedGEP); + auto *Valid = + CGM.getLangOpts().CPlusPlus + ? Builder.CreateICmpEQ(BaseIsNotNullptr, ResultIsNotNullptr) + : Builder.CreateAnd(BaseIsNotNullptr, ResultIsNotNullptr); + Checks.emplace_back(Valid, SanitizerKind::PointerOverflow); + } + + if (PerformOverflowCheck) { + // The GEP is valid if: + // 1) The total offset doesn't overflow, and + // 2) The sign of the difference between the computed address and the base + // pointer matches the sign of the total offset. + llvm::Value *ValidGEP; + auto *NoOffsetOverflow = Builder.CreateNot(EvaluatedGEP.OffsetOverflows); + if (SignedIndices) { + // GEP is computed as `unsigned base + signed offset`, therefore: + // * If offset was positive, then the computed pointer can not be + // [unsigned] less than the base pointer, unless it overflowed. + // * If offset was negative, then the computed pointer can not be + // [unsigned] greater than the bas pointere, unless it overflowed. + auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); + auto *PosOrZeroOffset = + Builder.CreateICmpSGE(EvaluatedGEP.TotalOffset, Zero); + llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr); + ValidGEP = + Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid); + } else if (!IsSubtraction) { + // GEP is computed as `unsigned base + unsigned offset`, therefore the + // computed pointer can not be [unsigned] less than base pointer, + // unless there was an overflow. + // Equivalent to `@llvm.uadd.with.overflow(%base, %offset)`. + ValidGEP = Builder.CreateICmpUGE(ComputedGEP, IntPtr); + } else { + // GEP is computed as `unsigned base - unsigned offset`, therefore the + // computed pointer can not be [unsigned] greater than base pointer, + // unless there was an overflow. + // Equivalent to `@llvm.usub.with.overflow(%base, sub(0, %offset))`. + ValidGEP = Builder.CreateICmpULE(ComputedGEP, IntPtr); + } + ValidGEP = Builder.CreateAnd(ValidGEP, NoOffsetOverflow); + Checks.emplace_back(ValidGEP, SanitizerKind::PointerOverflow); } + assert(!Checks.empty() && "Should have produced some checks."); + llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(Loc)}; // Pass the computed GEP to the runtime to avoid emitting poisoned arguments. llvm::Value *DynamicArgs[] = {IntPtr, ComputedGEP}; - EmitCheck(std::make_pair(ValidGEP, SanitizerKind::PointerOverflow), - SanitizerHandler::PointerOverflow, StaticArgs, DynamicArgs); + EmitCheck(Checks, SanitizerHandler::PointerOverflow, StaticArgs, DynamicArgs); return GEPVal; } diff --git a/lib/CodeGen/CGLoopInfo.cpp b/lib/CodeGen/CGLoopInfo.cpp index b2bc42bfa0136..c21d4feee7a8c 100644 --- a/lib/CodeGen/CGLoopInfo.cpp +++ b/lib/CodeGen/CGLoopInfo.cpp @@ -218,6 +218,7 @@ LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs, if (Attrs.VectorizeEnable == LoopAttributes::Disable) Enabled = false; else if (Attrs.VectorizeEnable != LoopAttributes::Unspecified || + Attrs.VectorizePredicateEnable != LoopAttributes::Unspecified || Attrs.InterleaveCount != 0 || Attrs.VectorizeWidth != 0) Enabled = true; @@ -251,8 +252,32 @@ LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs, Args.push_back(TempNode.get()); Args.append(LoopProperties.begin(), LoopProperties.end()); + // Setting vectorize.predicate + bool IsVectorPredicateEnabled = false; + if (Attrs.VectorizePredicateEnable != LoopAttributes::Unspecified && + Attrs.VectorizeEnable != LoopAttributes::Disable && + Attrs.VectorizeWidth < 1) { + + IsVectorPredicateEnabled = + (Attrs.VectorizePredicateEnable == LoopAttributes::Enable); + + Metadata *Vals[] = { + MDString::get(Ctx, "llvm.loop.vectorize.predicate.enable"), + ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt1Ty(Ctx), + IsVectorPredicateEnabled))}; + Args.push_back(MDNode::get(Ctx, Vals)); + } + // Setting vectorize.width if (Attrs.VectorizeWidth > 0) { + // This implies vectorize.enable = true, but only add it when it is not + // already enabled. + if (Attrs.VectorizeEnable != LoopAttributes::Enable) + Args.push_back( + MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt1Ty(Ctx), 1))})); + Metadata *Vals[] = { MDString::get(Ctx, "llvm.loop.vectorize.width"), ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt32Ty(Ctx), @@ -270,12 +295,15 @@ LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs, } // Setting vectorize.enable - if (Attrs.VectorizeEnable != LoopAttributes::Unspecified) { + if (Attrs.VectorizeEnable != LoopAttributes::Unspecified || + IsVectorPredicateEnabled) { Metadata *Vals[] = { MDString::get(Ctx, "llvm.loop.vectorize.enable"), ConstantAsMetadata::get(ConstantInt::get( llvm::Type::getInt1Ty(Ctx), - (Attrs.VectorizeEnable == LoopAttributes::Enable)))}; + IsVectorPredicateEnabled + ? true + : (Attrs.VectorizeEnable == LoopAttributes::Enable)))}; Args.push_back(MDNode::get(Ctx, Vals)); } @@ -411,7 +439,8 @@ MDNode *LoopInfo::createMetadata( LoopAttributes::LoopAttributes(bool IsParallel) : IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified), UnrollEnable(LoopAttributes::Unspecified), - UnrollAndJamEnable(LoopAttributes::Unspecified), VectorizeWidth(0), + UnrollAndJamEnable(LoopAttributes::Unspecified), + VectorizePredicateEnable(LoopAttributes::Unspecified), VectorizeWidth(0), InterleaveCount(0), UnrollCount(0), UnrollAndJamCount(0), DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false), PipelineInitiationInterval(0) {} @@ -425,6 +454,7 @@ void LoopAttributes::clear() { VectorizeEnable = LoopAttributes::Unspecified; UnrollEnable = LoopAttributes::Unspecified; UnrollAndJamEnable = LoopAttributes::Unspecified; + VectorizePredicateEnable = LoopAttributes::Unspecified; DistributeEnable = LoopAttributes::Unspecified; PipelineDisabled = false; PipelineInitiationInterval = 0; @@ -446,6 +476,7 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs, Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 && Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled && Attrs.PipelineInitiationInterval == 0 && + Attrs.VectorizePredicateEnable == LoopAttributes::Unspecified && Attrs.VectorizeEnable == LoopAttributes::Unspecified && Attrs.UnrollEnable == LoopAttributes::Unspecified && Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified && @@ -480,6 +511,7 @@ void LoopInfo::finish() { BeforeJam.InterleaveCount = Attrs.InterleaveCount; BeforeJam.VectorizeEnable = Attrs.VectorizeEnable; BeforeJam.DistributeEnable = Attrs.DistributeEnable; + BeforeJam.VectorizePredicateEnable = Attrs.VectorizePredicateEnable; switch (Attrs.UnrollEnable) { case LoopAttributes::Unspecified: @@ -495,6 +527,7 @@ void LoopInfo::finish() { break; } + AfterJam.VectorizePredicateEnable = Attrs.VectorizePredicateEnable; AfterJam.UnrollCount = Attrs.UnrollCount; AfterJam.PipelineDisabled = Attrs.PipelineDisabled; AfterJam.PipelineInitiationInterval = Attrs.PipelineInitiationInterval; @@ -516,6 +549,7 @@ void LoopInfo::finish() { // add it manually. SmallVector<Metadata *, 1> BeforeLoopProperties; if (BeforeJam.VectorizeEnable != LoopAttributes::Unspecified || + BeforeJam.VectorizePredicateEnable != LoopAttributes::Unspecified || BeforeJam.InterleaveCount != 0 || BeforeJam.VectorizeWidth != 0) BeforeLoopProperties.push_back( MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized"))); @@ -537,8 +571,9 @@ void LoopInfo::finish() { void LoopInfoStack::push(BasicBlock *Header, const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc) { - Active.push_back(LoopInfo(Header, StagedAttrs, StartLoc, EndLoc, - Active.empty() ? nullptr : &Active.back())); + Active.emplace_back( + new LoopInfo(Header, StagedAttrs, StartLoc, EndLoc, + Active.empty() ? nullptr : Active.back().get())); // Clear the attributes so nested loops do not inherit them. StagedAttrs.clear(); } @@ -603,6 +638,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::UnrollAndJam: setUnrollAndJamState(LoopAttributes::Disable); break; + case LoopHintAttr::VectorizePredicate: + setVectorizePredicateState(LoopAttributes::Disable); + break; case LoopHintAttr::Distribute: setDistributeState(false); break; @@ -630,6 +668,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::UnrollAndJam: setUnrollAndJamState(LoopAttributes::Enable); break; + case LoopHintAttr::VectorizePredicate: + setVectorizePredicateState(LoopAttributes::Enable); + break; case LoopHintAttr::Distribute: setDistributeState(true); break; @@ -653,6 +694,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, break; case LoopHintAttr::Unroll: case LoopHintAttr::UnrollAndJam: + case LoopHintAttr::VectorizePredicate: case LoopHintAttr::UnrollCount: case LoopHintAttr::UnrollAndJamCount: case LoopHintAttr::VectorizeWidth: @@ -681,6 +723,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::Distribute: case LoopHintAttr::PipelineDisabled: case LoopHintAttr::PipelineInitiationInterval: + case LoopHintAttr::VectorizePredicate: llvm_unreachable("Options cannot be used with 'full' hint."); break; } @@ -704,6 +747,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, break; case LoopHintAttr::Unroll: case LoopHintAttr::UnrollAndJam: + case LoopHintAttr::VectorizePredicate: case LoopHintAttr::Vectorize: case LoopHintAttr::Interleave: case LoopHintAttr::Distribute: @@ -721,16 +765,16 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, void LoopInfoStack::pop() { assert(!Active.empty() && "No active loops to pop"); - Active.back().finish(); + Active.back()->finish(); Active.pop_back(); } void LoopInfoStack::InsertHelper(Instruction *I) const { if (I->mayReadOrWriteMemory()) { SmallVector<Metadata *, 4> AccessGroups; - for (const LoopInfo &AL : Active) { + for (const auto &AL : Active) { // Here we assume that every loop that has an access group is parallel. - if (MDNode *Group = AL.getAccessGroup()) + if (MDNode *Group = AL->getAccessGroup()) AccessGroups.push_back(Group); } MDNode *UnionMD = nullptr; diff --git a/lib/CodeGen/CGLoopInfo.h b/lib/CodeGen/CGLoopInfo.h index 35d0e00527b9f..5abcf37c5433e 100644 --- a/lib/CodeGen/CGLoopInfo.h +++ b/lib/CodeGen/CGLoopInfo.h @@ -51,6 +51,9 @@ struct LoopAttributes { /// Value for llvm.loop.unroll_and_jam.* metadata (enable, disable, or full). LVEnableState UnrollAndJamEnable; + /// Value for llvm.loop.vectorize.predicate metadata + LVEnableState VectorizePredicateEnable; + /// Value for llvm.loop.vectorize.width metadata. unsigned VectorizeWidth; @@ -237,6 +240,11 @@ public: StagedAttrs.UnrollEnable = State; } + /// Set the next pushed vectorize predicate state. + void setVectorizePredicateState(const LoopAttributes::LVEnableState &State) { + StagedAttrs.VectorizePredicateEnable = State; + } + /// Set the next pushed loop unroll_and_jam state. void setUnrollAndJamState(const LoopAttributes::LVEnableState &State) { StagedAttrs.UnrollAndJamEnable = State; @@ -267,11 +275,11 @@ private: bool hasInfo() const { return !Active.empty(); } /// Return the LoopInfo for the current loop. HasInfo should be called /// first to ensure LoopInfo is present. - const LoopInfo &getInfo() const { return Active.back(); } + const LoopInfo &getInfo() const { return *Active.back(); } /// The set of attributes that will be applied to the next pushed loop. LoopAttributes StagedAttrs; /// Stack of active loops. - llvm::SmallVector<LoopInfo, 4> Active; + llvm::SmallVector<std::unique_ptr<LoopInfo>, 4> Active; }; } // end namespace CodeGen diff --git a/lib/CodeGen/CGNonTrivialStruct.cpp b/lib/CodeGen/CGNonTrivialStruct.cpp index caf62d2ac93a1..05615aa128816 100644 --- a/lib/CodeGen/CGNonTrivialStruct.cpp +++ b/lib/CodeGen/CGNonTrivialStruct.cpp @@ -823,7 +823,7 @@ static void callSpecialFunction(G &&Gen, StringRef FuncName, QualType QT, Gen.callFunc(FuncName, QT, Addrs, CGF); } -template <size_t N> std::array<Address, N> createNullAddressArray(); +template <size_t N> static std::array<Address, N> createNullAddressArray(); template <> std::array<Address, 1> createNullAddressArray() { return std::array<Address, 1>({{Address(nullptr, CharUnits::Zero())}}); diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp index 1dd7ec52230ee..1fa72678081af 100644 --- a/lib/CodeGen/CGObjC.cpp +++ b/lib/CodeGen/CGObjC.cpp @@ -143,7 +143,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, NumElements); QualType ElementType = Context.getObjCIdType().withConst(); QualType ElementArrayType - = Context.getConstantArrayType(ElementType, APNumElements, + = Context.getConstantArrayType(ElementType, APNumElements, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); // Allocate the temporary array(s). @@ -1661,7 +1661,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ QualType ItemsTy = getContext().getConstantArrayType(getContext().getObjCIdType(), - llvm::APInt(32, NumItems), + llvm::APInt(32, NumItems), nullptr, ArrayType::Normal, 0); Address ItemsPtr = CreateMemTemp(ItemsTy, "items.ptr"); diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp index ee5c12aa35bd7..d2c089d0360e1 100644 --- a/lib/CodeGen/CGObjCGNU.cpp +++ b/lib/CodeGen/CGObjCGNU.cpp @@ -1294,7 +1294,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { // Emit a placeholder symbol. GV = new llvm::GlobalVariable(TheModule, ProtocolTy, false, llvm::GlobalValue::ExternalLinkage, nullptr, Name); - GV->setAlignment(CGM.getPointerAlign().getQuantity()); + GV->setAlignment(CGM.getPointerAlign().getAsAlign()); } return llvm::ConstantExpr::getBitCast(GV, ProtocolPtrTy); } @@ -1318,7 +1318,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { llvm::ConstantExpr::getBitCast(Protocol, ProtocolPtrTy), RefName); GV->setComdat(TheModule.getOrInsertComdat(RefName)); GV->setSection(sectionName<ProtocolReferenceSection>()); - GV->setAlignment(CGM.getPointerAlign().getQuantity()); + GV->setAlignment(CGM.getPointerAlign().getAsAlign()); Ref = GV; } EmittedProtocolRef = true; @@ -1497,7 +1497,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { Sym->setSection((Section + SecSuffix).str()); Sym->setComdat(TheModule.getOrInsertComdat((Prefix + Section).str())); - Sym->setAlignment(CGM.getPointerAlign().getQuantity()); + Sym->setAlignment(CGM.getPointerAlign().getAsAlign()); return Sym; }; return { Sym("__start_", "$a"), Sym("__stop", "$z") }; @@ -1854,7 +1854,8 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { ivarBuilder.addInt(Int32Ty, CGM.getContext().getTypeSizeInChars(ivarTy).getQuantity()); // Alignment will be stored as a base-2 log of the alignment. - int align = llvm::Log2_32(Context.getTypeAlignInChars(ivarTy).getQuantity()); + unsigned align = + llvm::Log2_32(Context.getTypeAlignInChars(ivarTy).getQuantity()); // Objects that require more than 2^64-byte alignment should be impossible! assert(align < 64); // uint32_t flags; @@ -4039,7 +4040,7 @@ LValue CGObjCGNU::EmitObjCValueForIvar(CodeGenFunction &CGF, const ObjCIvarDecl *Ivar, unsigned CVRQualifiers) { const ObjCInterfaceDecl *ID = - ObjectTy->getAs<ObjCObjectType>()->getInterface(); + ObjectTy->castAs<ObjCObjectType>()->getInterface(); return EmitValueForIvarAtOffset(CGF, ID, BaseValue, Ivar, CVRQualifiers, EmitIvarOffset(CGF, ID, Ivar)); } @@ -4086,7 +4087,7 @@ llvm::Value *CGObjCGNU::EmitIvarOffset(CodeGenFunction &CGF, auto GV = new llvm::GlobalVariable(TheModule, IntTy, false, llvm::GlobalValue::LinkOnceAnyLinkage, llvm::Constant::getNullValue(IntTy), name); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); Offset = GV; } Offset = CGF.Builder.CreateAlignedLoad(Offset, Align); diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp index 12880fecbadff..8e28b2f05c167 100644 --- a/lib/CodeGen/CGObjCMac.cpp +++ b/lib/CodeGen/CGObjCMac.cpp @@ -2018,7 +2018,7 @@ CGObjCCommonMac::GenerateConstantNSString(const StringLiteral *Literal) { GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); // Don't enforce the target's minimum global alignment, since the only use // of the string is via this class initializer. - GV->setAlignment(1); + GV->setAlignment(llvm::Align::None()); Fields.addBitCast(GV, CGM.Int8PtrTy); // String length. @@ -2517,14 +2517,12 @@ void CGObjCCommonMac::BuildRCRecordLayout(const llvm::StructLayout *RecLayout, } if (const ArrayType *Array = CGM.getContext().getAsArrayType(FQT)) { - const ConstantArrayType *CArray = - dyn_cast_or_null<ConstantArrayType>(Array); + auto *CArray = cast<ConstantArrayType>(Array); uint64_t ElCount = CArray->getSize().getZExtValue(); assert(CArray && "only array with known element size is supported"); FQT = CArray->getElementType(); while (const ArrayType *Array = CGM.getContext().getAsArrayType(FQT)) { - const ConstantArrayType *CArray = - dyn_cast_or_null<ConstantArrayType>(Array); + auto *CArray = cast<ConstantArrayType>(Array); ElCount *= CArray->getSize().getZExtValue(); FQT = CArray->getElementType(); } @@ -3103,7 +3101,7 @@ llvm::Constant *CGObjCMac::GetOrEmitProtocolRef(const ObjCProtocolDecl *PD) { nullptr, "OBJC_PROTOCOL_" + PD->getName()); Entry->setSection("__OBJC,__protocol,regular,no_dead_strip"); // FIXME: Is this necessary? Why only for protocol? - Entry->setAlignment(4); + Entry->setAlignment(llvm::Align(4)); } return Entry; @@ -3609,7 +3607,7 @@ void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) { "Forward metaclass reference has incorrect type."); values.finishAndSetAsInitializer(GV); GV->setSection(Section); - GV->setAlignment(CGM.getPointerAlign().getQuantity()); + GV->setAlignment(CGM.getPointerAlign().getAsAlign()); CGM.addCompilerUsedGlobal(GV); } else GV = CreateMetadataVar(Name, values, Section, CGM.getPointerAlign(), true); @@ -4016,7 +4014,7 @@ llvm::GlobalVariable *CGObjCCommonMac::CreateMetadataVar(Twine Name, new llvm::GlobalVariable(CGM.getModule(), Ty, false, LT, Init, Name); if (!Section.empty()) GV->setSection(Section); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); if (AddToUsed) CGM.addCompilerUsedGlobal(GV); return GV; @@ -4064,7 +4062,7 @@ CGObjCCommonMac::CreateCStringLiteral(StringRef Name, ObjCLabelType Type, if (CGM.getTriple().isOSBinFormatMachO()) GV->setSection(Section); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - GV->setAlignment(CharUnits::One().getQuantity()); + GV->setAlignment(CharUnits::One().getAsAlign()); CGM.addCompilerUsedGlobal(GV); return GV; @@ -4902,7 +4900,7 @@ LValue CGObjCMac::EmitObjCValueForIvar(CodeGen::CodeGenFunction &CGF, const ObjCIvarDecl *Ivar, unsigned CVRQualifiers) { const ObjCInterfaceDecl *ID = - ObjectTy->getAs<ObjCObjectType>()->getInterface(); + ObjectTy->castAs<ObjCObjectType>()->getInterface(); return EmitValueForIvarAtOffset(CGF, ID, BaseValue, Ivar, CVRQualifiers, EmitIvarOffset(CGF, ID, Ivar)); } @@ -6076,7 +6074,8 @@ void CGObjCNonFragileABIMac::AddModuleClassList( llvm::GlobalVariable *GV = new llvm::GlobalVariable(CGM.getModule(), Init->getType(), false, LT, Init, SymbolName); - GV->setAlignment(CGM.getDataLayout().getABITypeAlignment(Init->getType())); + GV->setAlignment( + llvm::Align(CGM.getDataLayout().getABITypeAlignment(Init->getType()))); GV->setSection(SectionName); CGM.addCompilerUsedGlobal(GV); } @@ -6319,8 +6318,8 @@ CGObjCNonFragileABIMac::BuildClassObject(const ObjCInterfaceDecl *CI, if (CGM.getTriple().isOSBinFormatMachO()) GV->setSection("__DATA, __objc_data"); - GV->setAlignment( - CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ClassnfABITy)); + GV->setAlignment(llvm::Align( + CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ClassnfABITy))); if (!CGM.getTriple().isOSBinFormatCOFF()) if (HiddenVisibility) GV->setVisibility(llvm::GlobalValue::HiddenVisibility); @@ -6527,7 +6526,7 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF, PTGV->setSection(GetSectionName("__objc_protorefs", "coalesced,no_dead_strip")); PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility); - PTGV->setAlignment(Align.getQuantity()); + PTGV->setAlignment(Align.getAsAlign()); if (!CGM.getTriple().isOSBinFormatMachO()) PTGV->setComdat(CGM.getModule().getOrInsertComdat(ProtocolName)); CGM.addUsedGlobal(PTGV); @@ -6759,8 +6758,8 @@ CGObjCNonFragileABIMac::EmitIvarOffsetVar(const ObjCInterfaceDecl *ID, llvm::GlobalVariable *IvarOffsetGV = ObjCIvarOffsetVariable(ID, Ivar); IvarOffsetGV->setInitializer( llvm::ConstantInt::get(ObjCTypes.IvarOffsetVarTy, Offset)); - IvarOffsetGV->setAlignment( - CGM.getDataLayout().getABITypeAlignment(ObjCTypes.IvarOffsetVarTy)); + IvarOffsetGV->setAlignment(llvm::Align( + CGM.getDataLayout().getABITypeAlignment(ObjCTypes.IvarOffsetVarTy))); if (!CGM.getTriple().isOSBinFormatCOFF()) { // FIXME: This matches gcc, but shouldn't the visibility be set on the use @@ -6986,8 +6985,8 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( ProtocolRef); if (!CGM.getTriple().isOSBinFormatMachO()) PTGV->setComdat(CGM.getModule().getOrInsertComdat(ProtocolRef)); - PTGV->setAlignment( - CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ProtocolnfABIPtrTy)); + PTGV->setAlignment(llvm::Align( + CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ProtocolnfABIPtrTy))); PTGV->setSection(GetSectionName("__objc_protolist", "coalesced,no_dead_strip")); PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility); @@ -7053,7 +7052,7 @@ LValue CGObjCNonFragileABIMac::EmitObjCValueForIvar( llvm::Value *BaseValue, const ObjCIvarDecl *Ivar, unsigned CVRQualifiers) { - ObjCInterfaceDecl *ID = ObjectTy->getAs<ObjCObjectType>()->getInterface(); + ObjCInterfaceDecl *ID = ObjectTy->castAs<ObjCObjectType>()->getInterface(); llvm::Value *Offset = EmitIvarOffset(CGF, ID, Ivar); return EmitValueForIvarAtOffset(CGF, ID, BaseValue, Ivar, CVRQualifiers, Offset); @@ -7338,7 +7337,7 @@ CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF, CGM.getModule(), ClassGV->getType(), false, getLinkageTypeForObjCMetadata(CGM, SectionName), ClassGV, "OBJC_CLASSLIST_REFERENCES_$_"); - Entry->setAlignment(CGF.getPointerAlign().getQuantity()); + Entry->setAlignment(CGF.getPointerAlign().getAsAlign()); if (!ID || !ID->hasAttr<ObjCClassStubAttr>()) Entry->setSection(SectionName); @@ -7377,7 +7376,7 @@ CGObjCNonFragileABIMac::EmitSuperClassRef(CodeGenFunction &CGF, CGM.getModule(), ClassGV->getType(), false, getLinkageTypeForObjCMetadata(CGM, SectionName), ClassGV, "OBJC_CLASSLIST_SUP_REFS_$_"); - Entry->setAlignment(CGF.getPointerAlign().getQuantity()); + Entry->setAlignment(CGF.getPointerAlign().getAsAlign()); Entry->setSection(SectionName); CGM.addCompilerUsedGlobal(Entry); } @@ -7401,7 +7400,7 @@ llvm::Value *CGObjCNonFragileABIMac::EmitMetaClassRef(CodeGenFunction &CGF, CGM.getModule(), ObjCTypes.ClassnfABIPtrTy, false, getLinkageTypeForObjCMetadata(CGM, SectionName), MetaClassGV, "OBJC_CLASSLIST_SUP_REFS_$_"); - Entry->setAlignment(Align.getQuantity()); + Entry->setAlignment(Align.getAsAlign()); Entry->setSection(SectionName); CGM.addCompilerUsedGlobal(Entry); } @@ -7500,7 +7499,7 @@ Address CGObjCNonFragileABIMac::EmitSelectorAddr(CodeGenFunction &CGF, "OBJC_SELECTOR_REFERENCES_"); Entry->setExternallyInitialized(true); Entry->setSection(SectionName); - Entry->setAlignment(Align.getQuantity()); + Entry->setAlignment(Align.getAsAlign()); CGM.addCompilerUsedGlobal(Entry); } @@ -7733,7 +7732,7 @@ CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID, : llvm::GlobalValue::WeakAnyLinkage; if (Entry) { values.finishAndSetAsInitializer(Entry); - Entry->setAlignment(CGM.getPointerAlign().getQuantity()); + Entry->setAlignment(CGM.getPointerAlign().getAsAlign()); } else { Entry = values.finishAndCreateGlobal("OBJC_EHTYPE_$_" + ClassName, CGM.getPointerAlign(), diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 27e7175da841f..2a13a2a581562 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -752,6 +752,11 @@ enum OpenMPRTLFunction { // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t // *arg_types); OMPRTL__tgt_target_data_update_nowait, + // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); + OMPRTL__tgt_mapper_num_components, + // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void + // *base, void *begin, int64_t size, int64_t type); + OMPRTL__tgt_push_mapper_component, }; /// A basic class for pre|post-action for advanced codegen sequence for OpenMP @@ -1259,6 +1264,52 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, loadOffloadInfoMetadata(); } +bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD, + const GlobalDecl &OldGD, + llvm::GlobalValue *OrigAddr, + bool IsForDefinition) { + // Emit at least a definition for the aliasee if the the address of the + // original function is requested. + if (IsForDefinition || OrigAddr) + (void)CGM.GetAddrOfGlobal(NewGD); + StringRef NewMangledName = CGM.getMangledName(NewGD); + llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName); + if (Addr && !Addr->isDeclaration()) { + const auto *D = cast<FunctionDecl>(OldGD.getDecl()); + const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(OldGD); + llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI); + + // Create a reference to the named value. This ensures that it is emitted + // if a deferred decl. + llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD); + + // Create the new alias itself, but don't set a name yet. + auto *GA = + llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule()); + + if (OrigAddr) { + assert(OrigAddr->isDeclaration() && "Expected declaration"); + + GA->takeName(OrigAddr); + OrigAddr->replaceAllUsesWith( + llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType())); + OrigAddr->eraseFromParent(); + } else { + GA->setName(CGM.getMangledName(OldGD)); + } + + // Set attributes which are particular to an alias; this is a + // specialization of the attributes which may be set on a global function. + if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() || + D->isWeakImported()) + GA->setLinkage(llvm::Function::WeakAnyLinkage); + + CGM.SetCommonAttributes(OldGD, GA); + return true; + } + return false; +} + void CGOpenMPRuntime::clear() { InternalVars.clear(); // Clean non-target variable declarations possibly used only in debug info. @@ -1272,6 +1323,14 @@ void CGOpenMPRuntime::clear() { continue; GV->eraseFromParent(); } + // Emit aliases for the deferred aliasees. + for (const auto &Pair : DeferredVariantFunction) { + StringRef MangledName = CGM.getMangledName(Pair.second.second); + llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName); + // If not able to emit alias, just emit original declaration. + (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr, + /*IsForDefinition=*/false); + } } std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { @@ -1638,18 +1697,23 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, return ThreadID; } // If exceptions are enabled, do not use parameter to avoid possible crash. - if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || - !CGF.getLangOpts().CXXExceptions || - CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { - if (auto *OMPRegionInfo = - dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { - if (OMPRegionInfo->getThreadIDVariable()) { - // Check if this an outlined function with thread id passed as argument. - LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); + if (auto *OMPRegionInfo = + dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { + if (OMPRegionInfo->getThreadIDVariable()) { + // Check if this an outlined function with thread id passed as argument. + LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); + llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); + if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || + !CGF.getLangOpts().CXXExceptions || + CGF.Builder.GetInsertBlock() == TopBlock || + !isa<llvm::Instruction>(LVal.getPointer()) || + cast<llvm::Instruction>(LVal.getPointer())->getParent() == TopBlock || + cast<llvm::Instruction>(LVal.getPointer())->getParent() == + CGF.Builder.GetInsertBlock()) { ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); // If value loaded in entry block, cache it and use it everywhere in // function. - if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { + if (CGF.Builder.GetInsertBlock() == TopBlock) { auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); Elem.second.ThreadID = ThreadID; } @@ -1686,6 +1750,12 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { UDRMap.erase(D); FunctionUDRMap.erase(CGF.CurFn); } + auto I = FunctionUDMMap.find(CGF.CurFn); + if (I != FunctionUDMMap.end()) { + for(auto *D : I->second) + UDMMap.erase(D); + FunctionUDMMap.erase(I); + } } llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { @@ -2459,6 +2529,24 @@ llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); break; } + case OMPRTL__tgt_mapper_num_components: { + // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); + llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); + break; + } + case OMPRTL__tgt_push_mapper_component: { + // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void + // *base, void *begin, int64_t size, int64_t type); + llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, + CGM.Int64Ty, CGM.Int64Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); + break; + } } assert(RTLFn && "Unable to find OpenMP runtime function"); return RTLFn; @@ -2552,6 +2640,32 @@ CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { return CGM.CreateRuntimeFunction(FnTy, Name); } +/// Obtain information that uniquely identifies a target entry. This +/// consists of the file and device IDs as well as line number associated with +/// the relevant entry source location. +static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, + unsigned &DeviceID, unsigned &FileID, + unsigned &LineNum) { + SourceManager &SM = C.getSourceManager(); + + // The loc should be always valid and have a file ID (the user cannot use + // #pragma directives in macros) + + assert(Loc.isValid() && "Source location is expected to be always valid."); + + PresumedLoc PLoc = SM.getPresumedLoc(Loc); + assert(PLoc.isValid() && "Source location is expected to be always valid."); + + llvm::sys::fs::UniqueID ID; + if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) + SM.getDiagnostics().Report(diag::err_cannot_open_file) + << PLoc.getFilename() << EC.message(); + + DeviceID = ID.getDevice(); + FileID = ID.getFile(); + LineNum = PLoc.getLine(); +} + Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { if (CGM.getLangOpts().OpenMPSimd) return Address::invalid(); @@ -2563,19 +2677,27 @@ Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { SmallString<64> PtrName; { llvm::raw_svector_ostream OS(PtrName); - OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_ref_ptr"; + OS << CGM.getMangledName(GlobalDecl(VD)); + if (!VD->isExternallyVisible()) { + unsigned DeviceID, FileID, Line; + getTargetEntryUniqueInfo(CGM.getContext(), + VD->getCanonicalDecl()->getBeginLoc(), + DeviceID, FileID, Line); + OS << llvm::format("_%x", FileID); + } + OS << "_decl_tgt_ref_ptr"; } llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); if (!Ptr) { QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), PtrName); - if (!CGM.getLangOpts().OpenMPIsDevice) { - auto *GV = cast<llvm::GlobalVariable>(Ptr); - GV->setLinkage(llvm::GlobalValue::ExternalLinkage); + + auto *GV = cast<llvm::GlobalVariable>(Ptr); + GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); + + if (!CGM.getLangOpts().OpenMPIsDevice) GV->setInitializer(CGM.GetAddrOfGlobal(VD)); - } - CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr)); registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); } return Address(Ptr, CGM.getContext().getDeclAlign(VD)); @@ -2749,35 +2871,12 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( return nullptr; } -/// Obtain information that uniquely identifies a target entry. This -/// consists of the file and device IDs as well as line number associated with -/// the relevant entry source location. -static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, - unsigned &DeviceID, unsigned &FileID, - unsigned &LineNum) { - SourceManager &SM = C.getSourceManager(); - - // The loc should be always valid and have a file ID (the user cannot use - // #pragma directives in macros) - - assert(Loc.isValid() && "Source location is expected to be always valid."); - - PresumedLoc PLoc = SM.getPresumedLoc(Loc); - assert(PLoc.isValid() && "Source location is expected to be always valid."); - - llvm::sys::fs::UniqueID ID; - if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) - SM.getDiagnostics().Report(diag::err_cannot_open_file) - << PLoc.getFilename() << EC.message(); - - DeviceID = ID.getDevice(); - FileID = ID.getFile(); - LineNum = PLoc.getLine(); -} - bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, llvm::GlobalVariable *Addr, bool PerformInit) { + if (CGM.getLangOpts().OMPTargetTriples.empty() && + !CGM.getLangOpts().OpenMPIsDevice) + return false; Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || @@ -2981,14 +3080,16 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitRuntimeCall( RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); - // OutlinedFn(>id, &zero, CapturedStruct); - Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, - /*Name*/ ".zero.addr"); - CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); + // OutlinedFn(>id, &zero_bound, CapturedStruct); + Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); + Address ZeroAddrBound = + CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".bound.zero.addr"); + CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; // ThreadId for serialized parallels is 0. - OutlinedFnArgs.push_back(ZeroAddr.getPointer()); - OutlinedFnArgs.push_back(ZeroAddr.getPointer()); + OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); + OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); @@ -3283,9 +3384,9 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, // <copy_func>, did_it); if (DidIt.isValid()) { llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); - QualType CopyprivateArrayTy = - C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, - /*IndexTypeQuals=*/0); + QualType CopyprivateArrayTy = C.getConstantArrayType( + C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, + /*IndexTypeQuals=*/0); // Create a list of all private variables for copyprivate. Address CopyprivateList = CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); @@ -3472,7 +3573,7 @@ bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { return Schedule != OMP_sch_static; } -static int addMonoNonMonoModifier(OpenMPSchedType Schedule, +static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2) { int Modifier = 0; @@ -3506,6 +3607,18 @@ static int addMonoNonMonoModifier(OpenMPSchedType Schedule, case OMPC_SCHEDULE_MODIFIER_unknown: break; } + // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. + // If the static schedule kind is specified or if the ordered clause is + // specified, and if the nonmonotonic modifier is not specified, the effect is + // as if the monotonic modifier is specified. Otherwise, unless the monotonic + // modifier is specified, the effect is as if the nonmonotonic modifier is + // specified. + if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { + if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || + Schedule == OMP_sch_static_balanced_chunked || + Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static)) + Modifier = OMP_sch_modifier_nonmonotonic; + } return Schedule | Modifier; } @@ -3530,13 +3643,14 @@ void CGOpenMPRuntime::emitForDispatchInit( llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk : CGF.Builder.getIntN(IVSize, 1); llvm::Value *Args[] = { - emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), + emitUpdateLocation(CGF, Loc), + getThreadID(CGF, Loc), CGF.Builder.getInt32(addMonoNonMonoModifier( - Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type - DispatchValues.LB, // Lower - DispatchValues.UB, // Upper - CGF.Builder.getIntN(IVSize, 1), // Stride - Chunk // Chunk + CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type + DispatchValues.LB, // Lower + DispatchValues.UB, // Upper + CGF.Builder.getIntN(IVSize, 1), // Stride + Chunk // Chunk }; CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); } @@ -3578,7 +3692,7 @@ static void emitForStaticInitCall( llvm::Value *Args[] = { UpdateLocation, ThreadId, - CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, + CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, M2)), // Schedule type Values.IL.getPointer(), // &isLastIter Values.LB.getPointer(), // &LB @@ -3899,157 +4013,6 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: Action(E.getKey(), E.getValue()); } -llvm::Function * -CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { - // If we don't have entries or if we are emitting code for the device, we - // don't need to do anything. - if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) - return nullptr; - - llvm::Module &M = CGM.getModule(); - ASTContext &C = CGM.getContext(); - - // Get list of devices we care about - const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples; - - // We should be creating an offloading descriptor only if there are devices - // specified. - assert(!Devices.empty() && "No OpenMP offloading devices??"); - - // Create the external variables that will point to the begin and end of the - // host entries section. These will be defined by the linker. - llvm::Type *OffloadEntryTy = - CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); - std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"}); - auto *HostEntriesBegin = new llvm::GlobalVariable( - M, OffloadEntryTy, /*isConstant=*/true, - llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, - EntriesBeginName); - std::string EntriesEndName = getName({"omp_offloading", "entries_end"}); - auto *HostEntriesEnd = - new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true, - llvm::GlobalValue::ExternalLinkage, - /*Initializer=*/nullptr, EntriesEndName); - - // Create all device images - auto *DeviceImageTy = cast<llvm::StructType>( - CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); - ConstantInitBuilder DeviceImagesBuilder(CGM); - ConstantArrayBuilder DeviceImagesEntries = - DeviceImagesBuilder.beginArray(DeviceImageTy); - - for (const llvm::Triple &Device : Devices) { - StringRef T = Device.getTriple(); - std::string BeginName = getName({"omp_offloading", "img_start", ""}); - auto *ImgBegin = new llvm::GlobalVariable( - M, CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::ExternalWeakLinkage, - /*Initializer=*/nullptr, Twine(BeginName).concat(T)); - std::string EndName = getName({"omp_offloading", "img_end", ""}); - auto *ImgEnd = new llvm::GlobalVariable( - M, CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::ExternalWeakLinkage, - /*Initializer=*/nullptr, Twine(EndName).concat(T)); - - llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin, - HostEntriesEnd}; - createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data, - DeviceImagesEntries); - } - - // Create device images global array. - std::string ImagesName = getName({"omp_offloading", "device_images"}); - llvm::GlobalVariable *DeviceImages = - DeviceImagesEntries.finishAndCreateGlobal(ImagesName, - CGM.getPointerAlign(), - /*isConstant=*/true); - DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - - // This is a Zero array to be used in the creation of the constant expressions - llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), - llvm::Constant::getNullValue(CGM.Int32Ty)}; - - // Create the target region descriptor. - llvm::Constant *Data[] = { - llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), - llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), - DeviceImages, Index), - HostEntriesBegin, HostEntriesEnd}; - std::string Descriptor = getName({"omp_offloading", "descriptor"}); - llvm::GlobalVariable *Desc = createGlobalStruct( - CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor); - - // Emit code to register or unregister the descriptor at execution - // startup or closing, respectively. - - llvm::Function *UnRegFn; - { - FunctionArgList Args; - ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); - Args.push_back(&DummyPtr); - - CodeGenFunction CGF(CGM); - // Disable debug info for global (de-)initializer because they are not part - // of some particular construct. - CGF.disableDebugInfo(); - const auto &FI = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); - std::string UnregName = getName({"omp_offloading", "descriptor_unreg"}); - UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI); - CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args); - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), - Desc); - CGF.FinishFunction(); - } - llvm::Function *RegFn; - { - CodeGenFunction CGF(CGM); - // Disable debug info for global (de-)initializer because they are not part - // of some particular construct. - CGF.disableDebugInfo(); - const auto &FI = CGM.getTypes().arrangeNullaryFunction(); - llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); - - // Encode offload target triples into the registration function name. It - // will serve as a comdat key for the registration/unregistration code for - // this particular combination of offloading targets. - SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U); - RegFnNameParts[0] = "omp_offloading"; - RegFnNameParts[1] = "descriptor_reg"; - llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2), - [](const llvm::Triple &T) -> const std::string& { - return T.getTriple(); - }); - llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end()); - std::string Descriptor = getName(RegFnNameParts); - RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI); - CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); - // Create a variable to drive the registration and unregistration of the - // descriptor, so we can reuse the logic that emits Ctors and Dtors. - ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), - SourceLocation(), nullptr, C.CharTy, - ImplicitParamDecl::Other); - CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); - CGF.FinishFunction(); - } - if (CGM.supportsCOMDAT()) { - // It is sufficient to call registration function only once, so create a - // COMDAT group for registration/unregistration functions and associated - // data. That would reduce startup time and code size. Registration - // function serves as a COMDAT group key. - llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName()); - RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); - RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); - RegFn->setComdat(ComdatKey); - UnRegFn->setComdat(ComdatKey); - DeviceImages->setComdat(ComdatKey); - Desc->setComdat(ComdatKey); - } - return RegFn; -} - void CGOpenMPRuntime::createOffloadEntry( llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, llvm::GlobalValue::LinkageTypes Linkage) { @@ -4077,8 +4040,7 @@ void CGOpenMPRuntime::createOffloadEntry( Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); // The entry has to be created in the section the linker expects it to be. - std::string Section = getName({"omp_offloading", "entries"}); - Entry->setSection(Section); + Entry->setSection("omp_offloading_entries"); } void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { @@ -4091,13 +4053,16 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Right now we only generate metadata for function that contain target // regions. - // If we do not have entries, we don't need to do anything. - if (OffloadEntriesInfoManager.empty()) + // If we are in simd mode or there are no entries, we don't need to do + // anything. + if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) return; llvm::Module &M = CGM.getModule(); llvm::LLVMContext &C = M.getContext(); - SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> + SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, + SourceLocation, StringRef>, + 16> OrderedEntries(OffloadEntriesInfoManager.size()); llvm::SmallVector<StringRef, 16> ParentFunctions( OffloadEntriesInfoManager.size()); @@ -4115,7 +4080,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Create function that emits metadata for each target region entry; auto &&TargetRegionMetadataEmitter = - [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString]( + [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, + &GetMDString]( unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { @@ -4133,8 +4099,19 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { GetMDInt(FileID), GetMDString(ParentName), GetMDInt(Line), GetMDInt(E.getOrder())}; + SourceLocation Loc; + for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), + E = CGM.getContext().getSourceManager().fileinfo_end(); + I != E; ++I) { + if (I->getFirst()->getUniqueID().getDevice() == DeviceID && + I->getFirst()->getUniqueID().getFile() == FileID) { + Loc = CGM.getContext().getSourceManager().translateFileLineCol( + I->getFirst(), Line, 1); + break; + } + } // Save this entry in the right position of the ordered entries array. - OrderedEntries[E.getOrder()] = &E; + OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); ParentFunctions[E.getOrder()] = ParentName; // Add metadata to the named metadata node. @@ -4162,7 +4139,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; // Save this entry in the right position of the ordered entries array. - OrderedEntries[E.getOrder()] = &E; + OrderedEntries[E.getOrder()] = + std::make_tuple(&E, SourceLocation(), MangledName); // Add metadata to the named metadata node. MD->addOperand(llvm::MDNode::get(C, Ops)); @@ -4171,11 +4149,11 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( DeviceGlobalVarMetadataEmitter); - for (const auto *E : OrderedEntries) { - assert(E && "All ordered entries must exist!"); + for (const auto &E : OrderedEntries) { + assert(std::get<0>(E) && "All ordered entries must exist!"); if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( - E)) { + std::get<0>(E))) { if (!CE->getID() || !CE->getAddress()) { // Do not blame the entry if the parent funtion is not emitted. StringRef FnName = ParentFunctions[CE->getOrder()]; @@ -4183,16 +4161,16 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { continue; unsigned DiagID = CGM.getDiags().getCustomDiagID( DiagnosticsEngine::Error, - "Offloading entry for target region is incorrect: either the " + "Offloading entry for target region in %0 is incorrect: either the " "address or the ID is invalid."); - CGM.getDiags().Report(DiagID); + CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; continue; } createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); - } else if (const auto *CE = - dyn_cast<OffloadEntriesInfoManagerTy:: - OffloadEntryInfoDeviceGlobalVar>(E)) { + } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: + OffloadEntryInfoDeviceGlobalVar>( + std::get<0>(E))) { OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( CE->getFlags()); @@ -4203,10 +4181,10 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { continue; if (!CE->getAddress()) { unsigned DiagID = CGM.getDiags().getCustomDiagID( - DiagnosticsEngine::Error, - "Offloading entry for declare target variable is incorrect: the " - "address is invalid."); - CGM.getDiags().Report(DiagID); + DiagnosticsEngine::Error, "Offloading entry for declare target " + "variable %0 is incorrect: the " + "address is invalid."); + CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); continue; } // The vaiable has no definition - no need to add the entry. @@ -5242,7 +5220,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, // Define type kmp_depend_info[<Dependences.size()>]; QualType KmpDependInfoArrayTy = C.getConstantArrayType( KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), - ArrayType::Normal, /*IndexTypeQuals=*/0); + nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); // kmp_depend_info[<Dependences.size()>] deps; DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); @@ -5763,7 +5741,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, } llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); QualType ReductionArrayTy = - C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, + C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); Address ReductionList = CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); @@ -6235,7 +6213,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( unsigned Size = Data.ReductionVars.size(); llvm::APInt ArraySize(/*numBits=*/64, Size); QualType ArrayRDType = C.getConstantArrayType( - RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); + RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); // kmp_task_red_input_t .rd_input.[Size]; Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, @@ -6720,12 +6698,16 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: case OMPD_requires: case OMPD_unknown: break; @@ -7025,12 +7007,16 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: case OMPD_requires: case OMPD_unknown: break; @@ -7079,12 +7065,24 @@ public: OMP_MAP_LITERAL = 0x100, /// Implicit map OMP_MAP_IMPLICIT = 0x200, + /// Close is a hint to the runtime to allocate memory close to + /// the target device. + OMP_MAP_CLOSE = 0x400, /// The 16 MSBs of the flags indicate whether the entry is member of some /// struct/class. OMP_MAP_MEMBER_OF = 0xffff000000000000, LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), }; + /// Get the offset of the OMP_MAP_MEMBER_OF field. + static unsigned getFlagMemberOffset() { + unsigned Offset = 0; + for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); + Remain = Remain >> 1) + Offset++; + return Offset; + } + /// Class that associates information with a base pointer to be passed to the /// runtime library. class BasePointerInfo { @@ -7148,8 +7146,11 @@ private: : IE(IE), VD(VD) {} }; - /// Directive from where the map clauses were extracted. - const OMPExecutableDirective &CurDir; + /// The target directive from where the mappable clauses were extracted. It + /// is either a executable directive or a user-defined mapper directive. + llvm::PointerUnion<const OMPExecutableDirective *, + const OMPDeclareMapperDecl *> + CurDir; /// Function the directive is being generated for. CodeGenFunction &CGF; @@ -7181,9 +7182,11 @@ private: OAE->getBase()->IgnoreParenImpCasts()) .getCanonicalType(); - // If there is no length associated with the expression, that means we - // are using the whole length of the base. - if (!OAE->getLength() && OAE->getColonLoc().isValid()) + // If there is no length associated with the expression and lower bound is + // not specified too, that means we are using the whole length of the + // base. + if (!OAE->getLength() && OAE->getColonLoc().isValid() && + !OAE->getLowerBound()) return CGF.getTypeSize(BaseTy); llvm::Value *ElemSize; @@ -7197,13 +7200,30 @@ private: // If we don't have a length at this point, that is because we have an // array section with a single element. - if (!OAE->getLength()) + if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) return ElemSize; - llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); - LengthVal = - CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); - return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); + if (const Expr *LenExpr = OAE->getLength()) { + llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); + LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), + CGF.getContext().getSizeType(), + LenExpr->getExprLoc()); + return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); + } + assert(!OAE->getLength() && OAE->getColonLoc().isValid() && + OAE->getLowerBound() && "expected array_section[lb:]."); + // Size = sizetype - lb * elemtype; + llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); + llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); + LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), + CGF.getContext().getSizeType(), + OAE->getLowerBound()->getExprLoc()); + LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); + llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); + llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); + LengthVal = CGF.Builder.CreateSelect( + Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); + return LengthVal; } return CGF.getTypeSize(ExprTy); } @@ -7247,6 +7267,9 @@ private: if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) != MapModifiers.end()) Bits |= OMP_MAP_ALWAYS; + if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) + != MapModifiers.end()) + Bits |= OMP_MAP_CLOSE; return Bits; } @@ -7675,10 +7698,10 @@ private: if (!IsExpressionFirstInfo) { // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, - // then we reset the TO/FROM/ALWAYS/DELETE flags. + // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. if (IsPointer) Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | - OMP_MAP_DELETE); + OMP_MAP_DELETE | OMP_MAP_CLOSE); if (ShouldBeMemberOf) { // Set placeholder value MEMBER_OF=FFFF to indicate that the flag @@ -7752,9 +7775,9 @@ private: } static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { - // Member of is given by the 16 MSB of the flag, so rotate by 48 bits. + // Rotate by getFlagMemberOffset() bits. return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) - << 48); + << getFlagMemberOffset()); } static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, @@ -7834,7 +7857,7 @@ private: public: MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) - : CurDir(Dir), CGF(CGF) { + : CurDir(&Dir), CGF(CGF) { // Extract firstprivate clause information. for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) for (const auto *D : C->varlists()) @@ -7846,6 +7869,10 @@ public: DevPointersMap[L.first].push_back(L.second); } + /// Constructor for the declare mapper directive. + MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) + : CurDir(&Dir), CGF(CGF) {} + /// Generate code for the combined entry if we have a partially mapped struct /// and take care of the mapping flags of the arguments corresponding to /// individual struct members. @@ -7907,18 +7934,20 @@ public: IsImplicit); }; - // FIXME: MSVC 2013 seems to require this-> to find member CurDir. - for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) + assert(CurDir.is<const OMPExecutableDirective *>() && + "Expect a executable directive"); + const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); + for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) for (const auto &L : C->component_lists()) { InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), /*ReturnDevicePointer=*/false, C->isImplicit()); } - for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) + for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) for (const auto &L : C->component_lists()) { InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit()); } - for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) + for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) for (const auto &L : C->component_lists()) { InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit()); @@ -7933,9 +7962,8 @@ public: llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> DeferredInfo; - // FIXME: MSVC 2013 seems to require this-> to find member CurDir. for (const auto *C : - this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) { + CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { for (const auto &L : C->component_lists()) { assert(!L.second.empty() && "Not expecting empty list of components!"); const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); @@ -7964,7 +7992,6 @@ public: // We didn't find any match in our map information - generate a zero // size array section - if the pointer is a struct member we defer this // action until the whole struct has been processed. - // FIXME: MSVC 2013 seems to require this-> to find member CGF. if (isa<MemberExpr>(IE)) { // Insert the pointer into Info to be processed by // generateInfoForComponentList. Because it is a member pointer @@ -7977,11 +8004,11 @@ public: /*ReturnDevicePointer=*/false, C->isImplicit()); DeferredInfo[nullptr].emplace_back(IE, VD); } else { - llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( - this->CGF.EmitLValue(IE), IE->getExprLoc()); + llvm::Value *Ptr = + CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); BasePointers.emplace_back(Ptr, VD); Pointers.push_back(Ptr); - Sizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); + Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); } } @@ -8005,11 +8032,10 @@ public: // Remember the current base pointer index. unsigned CurrentBasePointersIdx = CurBasePointers.size(); - // FIXME: MSVC 2013 seems to require this-> to find the member method. - this->generateInfoForComponentList( - L.MapType, L.MapModifiers, L.Components, CurBasePointers, - CurPointers, CurSizes, CurTypes, PartialStruct, - IsFirstComponentList, L.IsImplicit); + generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, + CurBasePointers, CurPointers, CurSizes, + CurTypes, PartialStruct, + IsFirstComponentList, L.IsImplicit); // If this entry relates with a device pointer, set the relevant // declaration and add the 'return pointer' flag. @@ -8061,6 +8087,78 @@ public: } } + /// Generate all the base pointers, section pointers, sizes and map types for + /// the extracted map clauses of user-defined mapper. + void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, + MapValuesArrayTy &Pointers, + MapValuesArrayTy &Sizes, + MapFlagsArrayTy &Types) const { + assert(CurDir.is<const OMPDeclareMapperDecl *>() && + "Expect a declare mapper directive"); + const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); + // We have to process the component lists that relate with the same + // declaration in a single chunk so that we can generate the map flags + // correctly. Therefore, we organize all lists in a map. + llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; + + // Helper function to fill the information map for the different supported + // clauses. + auto &&InfoGen = [&Info]( + const ValueDecl *D, + OMPClauseMappableExprCommon::MappableExprComponentListRef L, + OpenMPMapClauseKind MapType, + ArrayRef<OpenMPMapModifierKind> MapModifiers, + bool ReturnDevicePointer, bool IsImplicit) { + const ValueDecl *VD = + D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; + Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, + IsImplicit); + }; + + for (const auto *C : CurMapperDir->clauselists()) { + const auto *MC = cast<OMPMapClause>(C); + for (const auto &L : MC->component_lists()) { + InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), + /*ReturnDevicePointer=*/false, MC->isImplicit()); + } + } + + for (const auto &M : Info) { + // We need to know when we generate information for the first component + // associated with a capture, because the mapping flags depend on it. + bool IsFirstComponentList = true; + + // Temporary versions of arrays + MapBaseValuesArrayTy CurBasePointers; + MapValuesArrayTy CurPointers; + MapValuesArrayTy CurSizes; + MapFlagsArrayTy CurTypes; + StructRangeInfoTy PartialStruct; + + for (const MapInfo &L : M.second) { + assert(!L.Components.empty() && + "Not expecting declaration with no component lists."); + generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, + CurBasePointers, CurPointers, CurSizes, + CurTypes, PartialStruct, + IsFirstComponentList, L.IsImplicit); + IsFirstComponentList = false; + } + + // If there is an entry in PartialStruct it means we have a struct with + // individual members mapped. Emit an extra combined entry. + if (PartialStruct.Base.isValid()) + emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, + PartialStruct); + + // We need to append the results of this capture to what we already have. + BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); + Pointers.append(CurPointers.begin(), CurPointers.end()); + Sizes.append(CurSizes.begin(), CurSizes.end()); + Types.append(CurTypes.begin(), CurTypes.end()); + } + } + /// Emit capture info for lambdas for variables captured by reference. void generateInfoForLambdaCaptures( const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, @@ -8184,8 +8282,10 @@ public: std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; SmallVector<MapData, 4> DeclComponentLists; - // FIXME: MSVC 2013 seems to require this-> to find member CurDir. - for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { + assert(CurDir.is<const OMPExecutableDirective *>() && + "Expect a executable directive"); + const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); + for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { for (const auto &L : C->decl_component_lists(VD)) { assert(L.first == VD && "We got information for the wrong declaration??"); @@ -8333,9 +8433,12 @@ public: MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types) const { + assert(CurDir.is<const OMPExecutableDirective *>() && + "Expect a executable directive"); + const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); // Map other list items in the map clause which are not captured variables // but "declare target link" global variables. - for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { + for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { for (const auto &L : C->component_lists()) { if (!L.first) continue; @@ -8472,9 +8575,9 @@ emitOffloadingArrays(CodeGenFunction &CGF, } llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); - QualType PointerArrayType = - Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, - /*IndexTypeQuals=*/0); + QualType PointerArrayType = Ctx.getConstantArrayType( + Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, + /*IndexTypeQuals=*/0); Info.BasePointersArray = CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); @@ -8487,9 +8590,9 @@ emitOffloadingArrays(CodeGenFunction &CGF, QualType Int64Ty = Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); if (hasRuntimeEvaluationCaptureSize) { - QualType SizeArrayType = - Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal, - /*IndexTypeQuals=*/0); + QualType SizeArrayType = Ctx.getConstantArrayType( + Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, + /*IndexTypeQuals=*/0); Info.SizesArray = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); } else { @@ -8562,6 +8665,7 @@ emitOffloadingArrays(CodeGenFunction &CGF, } } } + /// Emit the arguments to be passed to the runtime library based on the /// arrays of pointers, sizes and map types. static void emitOffloadingArraysArgument( @@ -8677,12 +8781,16 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unexpected directive."); @@ -8692,10 +8800,343 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { return nullptr; } +/// Emit the user-defined mapper function. The code generation follows the +/// pattern in the example below. +/// \code +/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, +/// void *base, void *begin, +/// int64_t size, int64_t type) { +/// // Allocate space for an array section first. +/// if (size > 1 && !maptype.IsDelete) +/// __tgt_push_mapper_component(rt_mapper_handle, base, begin, +/// size*sizeof(Ty), clearToFrom(type)); +/// // Map members. +/// for (unsigned i = 0; i < size; i++) { +/// // For each component specified by this mapper: +/// for (auto c : all_components) { +/// if (c.hasMapper()) +/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, +/// c.arg_type); +/// else +/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, +/// c.arg_begin, c.arg_size, c.arg_type); +/// } +/// } +/// // Delete the array section. +/// if (size > 1 && maptype.IsDelete) +/// __tgt_push_mapper_component(rt_mapper_handle, base, begin, +/// size*sizeof(Ty), clearToFrom(type)); +/// } +/// \endcode +void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, + CodeGenFunction *CGF) { + if (UDMMap.count(D) > 0) + return; + ASTContext &C = CGM.getContext(); + QualType Ty = D->getType(); + QualType PtrTy = C.getPointerType(Ty).withRestrict(); + QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); + auto *MapperVarDecl = + cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); + SourceLocation Loc = D->getLocation(); + CharUnits ElementSize = C.getTypeSizeInChars(Ty); + + // Prepare mapper function arguments and attributes. + ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl::Other); + ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, + ImplicitParamDecl::Other); + ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, + ImplicitParamDecl::Other); + FunctionArgList Args; + Args.push_back(&HandleArg); + Args.push_back(&BaseArg); + Args.push_back(&BeginArg); + Args.push_back(&SizeArg); + Args.push_back(&TypeArg); + const CGFunctionInfo &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + SmallString<64> TyStr; + llvm::raw_svector_ostream Out(TyStr); + CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); + std::string Name = getName({"omp_mapper", TyStr, D->getName()}); + auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + Name, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); + Fn->removeFnAttr(llvm::Attribute::OptimizeNone); + // Start the mapper function code generation. + CodeGenFunction MapperCGF(CGM); + MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); + // Compute the starting and end addreses of array elements. + llvm::Value *Size = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, + C.getPointerType(Int64Ty), Loc); + llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( + MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), + CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); + llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); + llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, + C.getPointerType(Int64Ty), Loc); + // Prepare common arguments for array initiation and deletion. + llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&HandleArg), + /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); + llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&BaseArg), + /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); + llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&BeginArg), + /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); + + // Emit array initiation if this is an array section and \p MapType indicates + // that memory allocation is required. + llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); + emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, + ElementSize, HeadBB, /*IsInit=*/true); + + // Emit a for loop to iterate through SizeArg of elements and map all of them. + + // Emit the loop header block. + MapperCGF.EmitBlock(HeadBB); + llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); + llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); + // Evaluate whether the initial condition is satisfied. + llvm::Value *IsEmpty = + MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); + MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); + llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); + + // Emit the loop body block. + MapperCGF.EmitBlock(BodyBB); + llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( + PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); + PtrPHI->addIncoming(PtrBegin, EntryBB); + Address PtrCurrent = + Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) + .getAlignment() + .alignmentOfArrayElement(ElementSize)); + // Privatize the declared variable of mapper to be the current array element. + CodeGenFunction::OMPPrivateScope Scope(MapperCGF); + Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { + return MapperCGF + .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) + .getAddress(); + }); + (void)Scope.Privatize(); + + // Get map clause information. Fill up the arrays with all mapped variables. + MappableExprsHandler::MapBaseValuesArrayTy BasePointers; + MappableExprsHandler::MapValuesArrayTy Pointers; + MappableExprsHandler::MapValuesArrayTy Sizes; + MappableExprsHandler::MapFlagsArrayTy MapTypes; + MappableExprsHandler MEHandler(*D, MapperCGF); + MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); + + // Call the runtime API __tgt_mapper_num_components to get the number of + // pre-existing components. + llvm::Value *OffloadingArgs[] = {Handle}; + llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); + llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( + PreviousSize, + MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); + + // Fill up the runtime mapper handle for all components. + for (unsigned I = 0; I < BasePointers.size(); ++I) { + llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( + *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); + llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( + Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); + llvm::Value *CurSizeArg = Sizes[I]; + + // Extract the MEMBER_OF field from the map type. + llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); + MapperCGF.EmitBlock(MemberBB); + llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); + llvm::Value *Member = MapperCGF.Builder.CreateAnd( + OriMapType, + MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); + llvm::BasicBlock *MemberCombineBB = + MapperCGF.createBasicBlock("omp.member.combine"); + llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); + llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); + MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); + // Add the number of pre-existing components to the MEMBER_OF field if it + // is valid. + MapperCGF.EmitBlock(MemberCombineBB); + llvm::Value *CombinedMember = + MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); + // Do nothing if it is not a member of previous components. + MapperCGF.EmitBlock(TypeBB); + llvm::PHINode *MemberMapType = + MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); + MemberMapType->addIncoming(OriMapType, MemberBB); + MemberMapType->addIncoming(CombinedMember, MemberCombineBB); + + // Combine the map type inherited from user-defined mapper with that + // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM + // bits of the \a MapType, which is the input argument of the mapper + // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM + // bits of MemberMapType. + // [OpenMP 5.0], 1.2.6. map-type decay. + // | alloc | to | from | tofrom | release | delete + // ---------------------------------------------------------- + // alloc | alloc | alloc | alloc | alloc | release | delete + // to | alloc | to | alloc | to | release | delete + // from | alloc | alloc | from | from | release | delete + // tofrom | alloc | to | from | tofrom | release | delete + llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( + MapType, + MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | + MappableExprsHandler::OMP_MAP_FROM)); + llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); + llvm::BasicBlock *AllocElseBB = + MapperCGF.createBasicBlock("omp.type.alloc.else"); + llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); + llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); + llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); + llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); + llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); + MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); + // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. + MapperCGF.EmitBlock(AllocBB); + llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( + MemberMapType, + MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | + MappableExprsHandler::OMP_MAP_FROM))); + MapperCGF.Builder.CreateBr(EndBB); + MapperCGF.EmitBlock(AllocElseBB); + llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( + LeftToFrom, + MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); + MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); + // In case of to, clear OMP_MAP_FROM. + MapperCGF.EmitBlock(ToBB); + llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( + MemberMapType, + MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); + MapperCGF.Builder.CreateBr(EndBB); + MapperCGF.EmitBlock(ToElseBB); + llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( + LeftToFrom, + MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); + MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); + // In case of from, clear OMP_MAP_TO. + MapperCGF.EmitBlock(FromBB); + llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( + MemberMapType, + MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); + // In case of tofrom, do nothing. + MapperCGF.EmitBlock(EndBB); + llvm::PHINode *CurMapType = + MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); + CurMapType->addIncoming(AllocMapType, AllocBB); + CurMapType->addIncoming(ToMapType, ToBB); + CurMapType->addIncoming(FromMapType, FromBB); + CurMapType->addIncoming(MemberMapType, ToElseBB); + + // TODO: call the corresponding mapper function if a user-defined mapper is + // associated with this map clause. + // Call the runtime API __tgt_push_mapper_component to fill up the runtime + // data structure. + llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, + CurSizeArg, CurMapType}; + MapperCGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__tgt_push_mapper_component), + OffloadingArgs); + } + + // Update the pointer to point to the next element that needs to be mapped, + // and check whether we have mapped all elements. + llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( + PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); + PtrPHI->addIncoming(PtrNext, BodyBB); + llvm::Value *IsDone = + MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); + llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); + MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); + + MapperCGF.EmitBlock(ExitBB); + // Emit array deletion if this is an array section and \p MapType indicates + // that deletion is required. + emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, + ElementSize, DoneBB, /*IsInit=*/false); + + // Emit the function exit block. + MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); + MapperCGF.FinishFunction(); + UDMMap.try_emplace(D, Fn); + if (CGF) { + auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); + Decls.second.push_back(D); + } +} + +/// Emit the array initialization or deletion portion for user-defined mapper +/// code generation. First, it evaluates whether an array section is mapped and +/// whether the \a MapType instructs to delete this section. If \a IsInit is +/// true, and \a MapType indicates to not delete this array, array +/// initialization code is generated. If \a IsInit is false, and \a MapType +/// indicates to not this array, array deletion code is generated. +void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( + CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, + llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, + CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { + StringRef Prefix = IsInit ? ".init" : ".del"; + + // Evaluate if this is an array section. + llvm::BasicBlock *IsDeleteBB = + MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete"); + llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix); + llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( + Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); + MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); + + // Evaluate if we are going to delete this section. + MapperCGF.EmitBlock(IsDeleteBB); + llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( + MapType, + MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); + llvm::Value *DeleteCond; + if (IsInit) { + DeleteCond = MapperCGF.Builder.CreateIsNull( + DeleteBit, "omp.array" + Prefix + ".delete"); + } else { + DeleteCond = MapperCGF.Builder.CreateIsNotNull( + DeleteBit, "omp.array" + Prefix + ".delete"); + } + MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); + + MapperCGF.EmitBlock(BodyBB); + // Get the array size by multiplying element size and element number (i.e., \p + // Size). + llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( + Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); + // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves + // memory allocation/deletion purpose only. + llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( + MapType, + MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | + MappableExprsHandler::OMP_MAP_FROM))); + // Call the runtime API __tgt_push_mapper_component to fill up the runtime + // data structure. + llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; + MapperCGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); +} + void CGOpenMPRuntime::emitTargetNumIterationsCall( - CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, - const llvm::function_ref<llvm::Value *( - CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) { + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Value *DeviceID, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter) { OpenMPDirectiveKind Kind = D.getDirectiveKind(); const OMPExecutableDirective *TD = &D; // Get nested teams distribute kind directive, if any. @@ -8704,30 +9145,24 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall( if (!TD) return; const auto *LD = cast<OMPLoopDirective>(TD); - auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF, + auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, PrePostActionTy &) { - llvm::Value *NumIterations = SizeEmitter(CGF, *LD); - - // Emit device ID if any. - llvm::Value *DeviceID; - if (Device) - DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), - CGF.Int64Ty, /*isSigned=*/true); - else - DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); - - llvm::Value *Args[] = {DeviceID, NumIterations}; - CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); + if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { + llvm::Value *Args[] = {DeviceID, NumIterations}; + CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); + } }; emitInlinedDirective(CGF, OMPD_unknown, CodeGen); } -void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - llvm::Function *OutlinedFn, - llvm::Value *OutlinedFnID, - const Expr *IfCond, const Expr *Device) { +void CGOpenMPRuntime::emitTargetCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, + const Expr *Device, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter) { if (!CGF.HaveInsertPoint()) return; @@ -8746,8 +9181,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, llvm::Value *MapTypesArray = nullptr; // Fill up the pointer arrays and transfer execution to the device. auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, - &MapTypesArray, &CS, RequiresOuterTask, - &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { + &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, + SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { // On top of the arrays that were filled up, the target offloading call // takes as arguments the device id as well as the host pointer. The host // pointer is used by the runtime library to identify the current target @@ -8779,6 +9214,9 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); + // Emit tripcount for the target loop-based directive. + emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); + bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); // The target region is an outlined function launched by the runtime // via calls __tgt_target() or __tgt_target_teams(). @@ -9103,12 +9541,16 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unknown target directive for OpenMP device codegen."); @@ -9137,14 +9579,28 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { // If emitting code for the host, we do not process FD here. Instead we do // the normal code generation. - if (!CGM.getLangOpts().OpenMPIsDevice) + if (!CGM.getLangOpts().OpenMPIsDevice) { + if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { + Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = + OMPDeclareTargetDeclAttr::getDeviceType(FD); + // Do not emit device_type(nohost) functions for the host. + if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) + return true; + } return false; + } const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); StringRef Name = CGM.getMangledName(GD); // Try to detect target regions in the function. - if (const auto *FD = dyn_cast<FunctionDecl>(VD)) + if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { scanForTargetRegionsFunctions(FD->getBody(), Name); + Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = + OMPDeclareTargetDeclAttr::getDeviceType(FD); + // Do not emit device_type(nohost) functions for the host. + if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) + return true; + } // Do not to emit function if it is not marked as declare target. return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && @@ -9221,6 +9677,9 @@ CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr) { + if (CGM.getLangOpts().OMPTargetTriples.empty() && + !CGM.getLangOpts().OpenMPIsDevice) + return; llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res) { @@ -9433,17 +9892,6 @@ llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { return RequiresRegFn; } -llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { - // If we have offloading in the current module, we need to emit the entries - // now and register the offloading descriptor. - createOffloadEntriesAndInfoMetadata(); - - // Create and register the offloading binary descriptors. This is the main - // entity that captures all the information about offloading in the current - // compilation unit. - return createOffloadingBinaryDescriptorRegistration(); -} - void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, @@ -9711,12 +10159,16 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_teams_distribute_parallel_for: case OMPD_teams_distribute_parallel_for_simd: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: case OMPD_target: case OMPD_target_simd: case OMPD_target_teams_distribute: @@ -10377,7 +10829,7 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, } llvm::APInt Size(/*numBits=*/32, NumIterations.size()); QualType ArrayTy = - C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0); + C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); CGF.EmitNullInitialization(DimsAddr, ArrayTy); @@ -10428,7 +10880,7 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); QualType ArrayTy = CGM.getContext().getConstantArrayType( - Int64Ty, Size, ArrayType::Normal, 0); + Int64Ty, Size, nullptr, ArrayType::Normal, 0); Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { const Expr *CounterVal = C->getLoopData(I); @@ -10566,6 +11018,131 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, return Address(Addr, Align); } +/// Checks current context and returns true if it matches the context selector. +template <OMPDeclareVariantAttr::CtxSelectorSetType CtxSet, + OMPDeclareVariantAttr::CtxSelectorType Ctx> +static bool checkContext(const OMPDeclareVariantAttr *A) { + assert(CtxSet != OMPDeclareVariantAttr::CtxSetUnknown && + Ctx != OMPDeclareVariantAttr::CtxUnknown && + "Unknown context selector or context selector set."); + return false; +} + +/// Checks for implementation={vendor(<vendor>)} context selector. +/// \returns true iff <vendor>="llvm", false otherwise. +template <> +bool checkContext<OMPDeclareVariantAttr::CtxSetImplementation, + OMPDeclareVariantAttr::CtxVendor>( + const OMPDeclareVariantAttr *A) { + return llvm::all_of(A->implVendors(), + [](StringRef S) { return !S.compare_lower("llvm"); }); +} + +static bool greaterCtxScore(ASTContext &Ctx, const Expr *LHS, const Expr *RHS) { + // If both scores are unknown, choose the very first one. + if (!LHS && !RHS) + return true; + // If only one is known, return this one. + if (LHS && !RHS) + return true; + if (!LHS && RHS) + return false; + llvm::APSInt LHSVal = LHS->EvaluateKnownConstInt(Ctx); + llvm::APSInt RHSVal = RHS->EvaluateKnownConstInt(Ctx); + return llvm::APSInt::compareValues(LHSVal, RHSVal) >= 0; +} + +namespace { +/// Comparator for the priority queue for context selector. +class OMPDeclareVariantAttrComparer + : public std::greater<const OMPDeclareVariantAttr *> { +private: + ASTContext &Ctx; + +public: + OMPDeclareVariantAttrComparer(ASTContext &Ctx) : Ctx(Ctx) {} + bool operator()(const OMPDeclareVariantAttr *LHS, + const OMPDeclareVariantAttr *RHS) const { + const Expr *LHSExpr = nullptr; + const Expr *RHSExpr = nullptr; + if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) + LHSExpr = LHS->getScore(); + if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) + RHSExpr = RHS->getScore(); + return greaterCtxScore(Ctx, LHSExpr, RHSExpr); + } +}; +} // anonymous namespace + +/// Finds the variant function that matches current context with its context +/// selector. +static const FunctionDecl *getDeclareVariantFunction(ASTContext &Ctx, + const FunctionDecl *FD) { + if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>()) + return FD; + // Iterate through all DeclareVariant attributes and check context selectors. + auto &&Comparer = [&Ctx](const OMPDeclareVariantAttr *LHS, + const OMPDeclareVariantAttr *RHS) { + const Expr *LHSExpr = nullptr; + const Expr *RHSExpr = nullptr; + if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) + LHSExpr = LHS->getScore(); + if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) + RHSExpr = RHS->getScore(); + return greaterCtxScore(Ctx, LHSExpr, RHSExpr); + }; + const OMPDeclareVariantAttr *TopMostAttr = nullptr; + for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) { + const OMPDeclareVariantAttr *SelectedAttr = nullptr; + switch (A->getCtxSelectorSet()) { + case OMPDeclareVariantAttr::CtxSetImplementation: + switch (A->getCtxSelector()) { + case OMPDeclareVariantAttr::CtxVendor: + if (checkContext<OMPDeclareVariantAttr::CtxSetImplementation, + OMPDeclareVariantAttr::CtxVendor>(A)) + SelectedAttr = A; + break; + case OMPDeclareVariantAttr::CtxUnknown: + llvm_unreachable( + "Unknown context selector in implementation selector set."); + } + break; + case OMPDeclareVariantAttr::CtxSetUnknown: + llvm_unreachable("Unknown context selector set."); + } + // If the attribute matches the context, find the attribute with the highest + // score. + if (SelectedAttr && (!TopMostAttr || !Comparer(TopMostAttr, SelectedAttr))) + TopMostAttr = SelectedAttr; + } + if (!TopMostAttr) + return FD; + return cast<FunctionDecl>( + cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts()) + ->getDecl()); +} + +bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) { + const auto *D = cast<FunctionDecl>(GD.getDecl()); + // If the original function is defined already, use its definition. + StringRef MangledName = CGM.getMangledName(GD); + llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName); + if (Orig && !Orig->isDeclaration()) + return false; + const FunctionDecl *NewFD = getDeclareVariantFunction(CGM.getContext(), D); + // Emit original function if it does not have declare variant attribute or the + // context does not match. + if (NewFD == D) + return false; + GlobalDecl NewGD = GD.getWithDecl(NewFD); + if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) { + DeferredVariantFunction.erase(D); + return true; + } + DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD))); + return true; +} + llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { @@ -10786,12 +11363,13 @@ void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( llvm_unreachable("Not supported in SIMD-only mode"); } -void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - llvm::Function *OutlinedFn, - llvm::Value *OutlinedFnID, - const Expr *IfCond, - const Expr *Device) { +void CGOpenMPSIMDRuntime::emitTargetCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, + const Expr *Device, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -10807,10 +11385,6 @@ bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { return false; } -llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { - return nullptr; -} - void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 3f842ce96407f..bf8e0ac809092 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -15,6 +15,7 @@ #include "CGValue.h" #include "clang/AST/DeclOpenMP.h" +#include "clang/AST/GlobalDecl.h" #include "clang/AST/Type.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/SourceLocation.h" @@ -36,7 +37,6 @@ class Value; namespace clang { class Expr; -class GlobalDecl; class OMPDependClause; class OMPExecutableDirective; class OMPLoopDirective; @@ -291,6 +291,17 @@ protected: /// default location. virtual unsigned getDefaultLocationReserved2Flags() const { return 0; } + /// Tries to emit declare variant function for \p OldGD from \p NewGD. + /// \param OrigAddr LLVM IR value for \p OldGD. + /// \param IsForDefinition true, if requested emission for the definition of + /// \p OldGD. + /// \returns true, was able to emit a definition function for \p OldGD, which + /// points to \p NewGD. + virtual bool tryEmitDeclareVariant(const GlobalDecl &NewGD, + const GlobalDecl &OldGD, + llvm::GlobalValue *OrigAddr, + bool IsForDefinition); + /// Returns default flags for the barriers depending on the directive, for /// which this barier is going to be emitted. static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind); @@ -345,6 +356,14 @@ private: SmallVector<const OMPDeclareReductionDecl *, 4>> FunctionUDRMapTy; FunctionUDRMapTy FunctionUDRMap; + /// Map from the user-defined mapper declaration to its corresponding + /// functions. + llvm::DenseMap<const OMPDeclareMapperDecl *, llvm::Function *> UDMMap; + /// Map of functions and their local user-defined mappers. + using FunctionUDMMapTy = + llvm::DenseMap<llvm::Function *, + SmallVector<const OMPDeclareMapperDecl *, 4>>; + FunctionUDMMapTy FunctionUDMMap; /// Type kmp_critical_name, originally defined as typedef kmp_int32 /// kmp_critical_name[8]; llvm::ArrayType *KmpCriticalNameTy; @@ -636,6 +655,12 @@ private: /// must be emitted. llvm::SmallDenseSet<const VarDecl *> DeferredGlobalVariables; + /// Mapping of the original functions to their variants and original global + /// decl. + llvm::MapVector<CanonicalDeclPtr<const FunctionDecl>, + std::pair<GlobalDecl, GlobalDecl>> + DeferredVariantFunction; + /// Flag for keeping track of weather a requires unified_shared_memory /// directive is present. bool HasRequiresUnifiedSharedMemory = false; @@ -647,14 +672,6 @@ private: /// Device routines are specific to the bool HasEmittedDeclareTargetRegion = false; - /// Creates and registers offloading binary descriptor for the current - /// compilation unit. The function that does the registration is returned. - llvm::Function *createOffloadingBinaryDescriptorRegistration(); - - /// Creates all the offload entries in the current compilation unit - /// along with the associated metadata. - void createOffloadEntriesAndInfoMetadata(); - /// Loads all the offload entries information from the host IR /// metadata. void loadOffloadInfoMetadata(); @@ -738,6 +755,14 @@ private: llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc); + /// Emit the array initialization or deletion portion for user-defined mapper + /// code generation. + void emitUDMapperArrayInitOrDel(CodeGenFunction &MapperCGF, + llvm::Value *Handle, llvm::Value *BasePtr, + llvm::Value *Ptr, llvm::Value *Size, + llvm::Value *MapType, CharUnits ElementSize, + llvm::BasicBlock *ExitBB, bool IsInit); + struct TaskResultTy { llvm::Value *NewTask = nullptr; llvm::Function *TaskEntry = nullptr; @@ -777,6 +802,17 @@ private: /// default. virtual unsigned getDefaultFirstprivateAddressSpace() const { return 0; } + /// Emit code that pushes the trip count of loops associated with constructs + /// 'target teams distribute' and 'teams distribute parallel for'. + /// \param SizeEmitter Emits the int64 value for the number of iterations of + /// the associated loop. + void emitTargetNumIterationsCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Value *DeviceID, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter); + public: explicit CGOpenMPRuntime(CodeGenModule &CGM) : CGOpenMPRuntime(CGM, ".", ".") {} @@ -798,6 +834,10 @@ public: virtual std::pair<llvm::Function *, llvm::Function *> getUserDefinedReduction(const OMPDeclareReductionDecl *D); + /// Emit the function for the user defined mapper construct. + void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, + CodeGenFunction *CGF = nullptr); + /// Emits outlined function for the specified OpenMP parallel directive /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, /// kmp_int32 BoundID, struct context_vars*). @@ -1394,15 +1434,6 @@ public: bool IsOffloadEntry, const RegionCodeGenTy &CodeGen); - /// Emit code that pushes the trip count of loops associated with constructs - /// 'target teams distribute' and 'teams distribute parallel for'. - /// \param SizeEmitter Emits the int64 value for the number of iterations of - /// the associated loop. - virtual void emitTargetNumIterationsCall( - CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, - const llvm::function_ref<llvm::Value *( - CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter); - /// Emit the target offloading code associated with \a D. The emitted /// code attempts offloading the execution to the device, an the event of /// a failure it executes the host version outlined in \a OutlinedFn. @@ -1413,11 +1444,15 @@ public: /// directive, or null if no if clause is used. /// \param Device Expression evaluated in device clause associated with the /// target directive, or null if no device clause is used. - virtual void emitTargetCall(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - llvm::Function *OutlinedFn, - llvm::Value *OutlinedFnID, const Expr *IfCond, - const Expr *Device); + /// \param SizeEmitter Callback to emit number of iterations for loop-based + /// directives. + virtual void + emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, + const Expr *IfCond, const Expr *Device, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter); /// Emit the target regions enclosed in \a GD function definition or /// the function itself in case it is a valid device function. Returns true if @@ -1449,10 +1484,9 @@ public: /// requires directives was used in the current module. llvm::Function *emitRequiresDirectiveRegFun(); - /// Creates the offloading descriptor in the event any target region - /// was emitted in the current module and return the function that registers - /// it. - virtual llvm::Function *emitRegistrationFunction(); + /// Creates all the offload entries in the current compilation unit + /// along with the associated metadata. + void createOffloadEntriesAndInfoMetadata(); /// Emits code for teams call of the \a OutlinedFn with /// variables captured in a record which address is stored in \a @@ -1626,6 +1660,9 @@ public: /// Return whether the unified_shared_memory has been specified. bool hasRequiresUnifiedSharedMemory() const; + + /// Emits the definition of the declare variant function. + virtual bool emitDeclareVariant(GlobalDecl GD, bool IsForDefinition); }; /// Class supports emissionof SIMD-only code. @@ -2097,9 +2134,13 @@ public: /// directive, or null if no if clause is used. /// \param Device Expression evaluated in device clause associated with the /// target directive, or null if no device clause is used. - void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, - llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, - const Expr *IfCond, const Expr *Device) override; + void + emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, + const Expr *IfCond, const Expr *Device, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter) override; /// Emit the target regions enclosed in \a GD function definition or /// the function itself in case it is a valid device function. Returns true if @@ -2117,11 +2158,6 @@ public: /// \param GD Global to scan. bool emitTargetGlobal(GlobalDecl GD) override; - /// Creates the offloading descriptor in the event any target region - /// was emitted in the current module and return the function that registers - /// it. - llvm::Function *emitRegistrationFunction() override; - /// Emits code for teams call of the \a OutlinedFn with /// variables captured in a record which address is stored in \a /// CapturedStruct. diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 48dcbbf3cabd7..708260429f68e 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -107,6 +107,10 @@ enum OpenMPRTLFunctionNVPTX { /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32 /// global_tid); OMPRTL__kmpc_barrier_simple_spmd, + /// Call to int32_t __kmpc_warp_active_thread_mask(void); + OMPRTL_NVPTX__kmpc_warp_active_thread_mask, + /// Call to void __kmpc_syncwarp(int32_t Mask); + OMPRTL_NVPTX__kmpc_syncwarp, }; /// Pre(post)-action for different OpenMP constructs specialized for NVPTX. @@ -276,7 +280,8 @@ static RecordDecl *buildRecordForGlobalizedVars( } } else { llvm::APInt ArraySize(32, BufSize); - Type = C.getConstantArrayType(Type, ArraySize, ArrayType::Normal, 0); + Type = C.getConstantArrayType(Type, ArraySize, nullptr, ArrayType::Normal, + 0); Field = FieldDecl::Create( C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type, C.getTrivialTypeSourceInfo(Type, SourceLocation()), @@ -287,10 +292,11 @@ static RecordDecl *buildRecordForGlobalizedVars( static_cast<CharUnits::QuantityType>( GlobalMemoryAlignment))); Field->addAttr(AlignedAttr::CreateImplicit( - C, AlignedAttr::GNU_aligned, /*IsAlignmentExpr=*/true, + C, /*IsAlignmentExpr=*/true, IntegerLiteral::Create(C, Align, C.getIntTypeForBitwidth(32, /*Signed=*/0), - SourceLocation()))); + SourceLocation()), + {}, AttributeCommonInfo::AS_GNU, AlignedAttr::GNU_aligned)); } GlobalizedRD->addDecl(Field); MappedDeclsFields.try_emplace(VD, Field); @@ -790,12 +796,16 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unexpected directive."); @@ -860,12 +870,16 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: case OMPD_requires: case OMPD_unknown: break; @@ -1023,12 +1037,16 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unexpected directive."); @@ -1099,12 +1117,16 @@ static bool supportsLightweightRuntime(ASTContext &Ctx, case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: case OMPD_requires: case OMPD_unknown: break; @@ -1794,6 +1816,20 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { ->addFnAttr(llvm::Attribute::Convergent); break; } + case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: { + // Build int32_t __kmpc_warp_active_thread_mask(void); + auto *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask"); + break; + } + case OMPRTL_NVPTX__kmpc_syncwarp: { + // Build void __kmpc_syncwarp(kmp_int32 Mask); + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_syncwarp"); + break; + } } return RTLFn; } @@ -1871,6 +1907,19 @@ unsigned CGOpenMPRuntimeNVPTX::getDefaultLocationReserved2Flags() const { llvm_unreachable("Unknown flags are requested."); } +bool CGOpenMPRuntimeNVPTX::tryEmitDeclareVariant(const GlobalDecl &NewGD, + const GlobalDecl &OldGD, + llvm::GlobalValue *OrigAddr, + bool IsForDefinition) { + // Emit the function in OldGD with the body from NewGD, if NewGD is defined. + auto *NewFD = cast<FunctionDecl>(NewGD.getDecl()); + if (NewFD->isDefined()) { + CGM.emitOpenMPDeviceFunctionRedefinition(OldGD, NewGD, OrigAddr); + return true; + } + return false; +} + CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) : CGOpenMPRuntime(CGM, "_", "$") { if (!CGM.getLangOpts().OpenMPIsDevice) @@ -2030,7 +2079,7 @@ llvm::Function *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first; I->getSecond().GlobalRecord = GlobalizedRD; I->getSecond().MappedParams = - llvm::make_unique<CodeGenFunction::OMPMapVars>(); + std::make_unique<CodeGenFunction::OMPMapVars>(); DeclToAddrMapTy &Data = I->getSecond().LocalVarData; for (const auto &Pair : MappedDeclsFields) { assert(Pair.getFirst()->isCanonicalDecl() && @@ -2414,9 +2463,8 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF, if (!CGF.HaveInsertPoint()) return; - Address ZeroAddr = CGF.CreateMemTemp( - CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), - /*Name*/ ".zero.addr"); + Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer()); @@ -2445,16 +2493,19 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall( // Force inline this outlined function at its call site. Fn->setLinkage(llvm::GlobalValue::InternalLinkage); - Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth( - /*DestWidth=*/32, /*Signed=*/1), - ".zero.addr"); + Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); // ThreadId for serialized parallels is 0. Address ThreadIDAddr = ZeroAddr; - auto &&CodeGen = [this, Fn, CapturedVars, Loc, ZeroAddr, &ThreadIDAddr]( + auto &&CodeGen = [this, Fn, CapturedVars, Loc, &ThreadIDAddr]( CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); + Address ZeroAddr = + CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".bound.zero.addr"); + CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); @@ -2611,17 +2662,19 @@ void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall( // llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; - Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth( - /*DestWidth=*/32, /*Signed=*/1), - ".zero.addr"); + Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); // ThreadId for serialized parallels is 0. Address ThreadIDAddr = ZeroAddr; - auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, ZeroAddr, - &ThreadIDAddr](CodeGenFunction &CGF, - PrePostActionTy &Action) { + auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, &ThreadIDAddr]( + CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); + Address ZeroAddr = + CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".bound.zero.addr"); + CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); @@ -2669,8 +2722,9 @@ void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) { llvm::ConstantPointerNull::get( cast<llvm::PointerType>(getIdentTyPointerTy())), llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)}; - CGF.EmitRuntimeCall( + llvm::CallInst *Call = CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args); + Call->setConvergent(); } void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF, @@ -2684,7 +2738,9 @@ void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF, unsigned Flags = getDefaultFlagsForBarriers(Kind); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), getThreadID(CGF, Loc)}; - CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args); + llvm::CallInst *Call = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args); + Call->setConvergent(); } void CGOpenMPRuntimeNVPTX::emitCriticalRegion( @@ -2697,6 +2753,9 @@ void CGOpenMPRuntimeNVPTX::emitCriticalRegion( llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body"); llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit"); + // Get the mask of active threads in the warp. + llvm::Value *Mask = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask)); // Fetch team-local id of the thread. llvm::Value *ThreadID = getNVPTXThreadID(CGF); @@ -2737,8 +2796,9 @@ void CGOpenMPRuntimeNVPTX::emitCriticalRegion( // Block waits for all threads in current team to finish then increments the // counter variable and returns to the loop. CGF.EmitBlock(SyncBB); - emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false, - /*ForceSimpleCall=*/true); + // Reconverge active threads in the warp. + (void)CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_syncwarp), Mask); llvm::Value *IncCounterVal = CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1)); @@ -4239,7 +4299,7 @@ void CGOpenMPRuntimeNVPTX::emitReduction( } llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); QualType ReductionArrayTy = - C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, + C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); Address ReductionList = CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); @@ -4515,9 +4575,8 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper( const auto *RD = CS.getCapturedRecordDecl(); auto CurField = RD->field_begin(); - Address ZeroAddr = CGF.CreateMemTemp( - CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), - /*Name*/ ".zero.addr"); + Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); // Get the array of arguments. SmallVector<llvm::Value *, 8> Args; @@ -4634,7 +4693,7 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF, return; auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first; I->getSecond().MappedParams = - llvm::make_unique<CodeGenFunction::OMPMapVars>(); + std::make_unique<CodeGenFunction::OMPMapVars>(); I->getSecond().GlobalRecord = GlobalizedVarsRecord; I->getSecond().EscapedParameters.insert( VarChecker.getEscapedParameters().begin(), @@ -4700,7 +4759,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant)); CharUnits Align = CGM.getContext().getDeclAlign(VD); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); return Address(GV, Align); } case OMPAllocateDeclAttr::OMPPTeamMemAlloc: { @@ -4712,7 +4771,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); CharUnits Align = CGM.getContext().getDeclAlign(VD); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); return Address(GV, Align); } case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: @@ -4723,7 +4782,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, llvm::GlobalValue::InternalLinkage, llvm::Constant::getNullValue(VarTy), VD->getName()); CharUnits Align = CGM.getContext().getDeclAlign(VD); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); return Address(GV, Align); } } @@ -5026,7 +5085,7 @@ void CGOpenMPRuntimeNVPTX::clear() { Size = llvm::alignTo(Size, RecAlignment); llvm::APInt ArySize(/*numBits=*/64, Size); QualType SubTy = C.getConstantArrayType( - C.CharTy, ArySize, ArrayType::Normal, /*IndexTypeQuals=*/0); + C.CharTy, ArySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); const bool UseSharedMemory = Size <= SharedMemorySize; auto *Field = FieldDecl::Create(C, UseSharedMemory ? SharedStaticRD : StaticRD, @@ -5053,7 +5112,7 @@ void CGOpenMPRuntimeNVPTX::clear() { if (!SharedStaticRD->field_empty()) { llvm::APInt ArySize(/*numBits=*/64, SharedMemorySize); QualType SubTy = C.getConstantArrayType( - C.CharTy, ArySize, ArrayType::Normal, /*IndexTypeQuals=*/0); + C.CharTy, ArySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); auto *Field = FieldDecl::Create( C, SharedStaticRD, SourceLocation(), SourceLocation(), nullptr, SubTy, C.getTrivialTypeSourceInfo(SubTy, SourceLocation()), @@ -5086,11 +5145,12 @@ void CGOpenMPRuntimeNVPTX::clear() { std::pair<unsigned, unsigned> SMsBlockPerSM = getSMsBlocksPerSM(CGM); llvm::APInt Size1(32, SMsBlockPerSM.second); QualType Arr1Ty = - C.getConstantArrayType(StaticTy, Size1, ArrayType::Normal, + C.getConstantArrayType(StaticTy, Size1, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); llvm::APInt Size2(32, SMsBlockPerSM.first); - QualType Arr2Ty = C.getConstantArrayType(Arr1Ty, Size2, ArrayType::Normal, - /*IndexTypeQuals=*/0); + QualType Arr2Ty = + C.getConstantArrayType(Arr1Ty, Size2, nullptr, ArrayType::Normal, + /*IndexTypeQuals=*/0); llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty); // FIXME: nvlink does not handle weak linkage correctly (object with the // different size are reported as erroneous). diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index e7fd458e72713..0f78627c95e63 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -193,6 +193,18 @@ protected: /// Full/Lightweight runtime mode. Used for better optimization. unsigned getDefaultLocationReserved2Flags() const override; + /// Tries to emit declare variant function for \p OldGD from \p NewGD. + /// \param OrigAddr LLVM IR value for \p OldGD. + /// \param IsForDefinition true, if requested emission for the definition of + /// \p OldGD. + /// \returns true, was able to emit a definition function for \p OldGD, which + /// points to \p NewGD. + /// NVPTX backend does not support global aliases, so just use the function, + /// emitted for \p NewGD instead of \p OldGD. + bool tryEmitDeclareVariant(const GlobalDecl &NewGD, const GlobalDecl &OldGD, + llvm::GlobalValue *OrigAddr, + bool IsForDefinition) override; + public: explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); void clear() override; diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp index dd0dea5b94a03..bb2629f89d3da 100644 --- a/lib/CodeGen/CGStmt.cpp +++ b/lib/CodeGen/CGStmt.cpp @@ -281,6 +281,17 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) { case Stmt::OMPTaskLoopSimdDirectiveClass: EmitOMPTaskLoopSimdDirective(cast<OMPTaskLoopSimdDirective>(*S)); break; + case Stmt::OMPMasterTaskLoopDirectiveClass: + EmitOMPMasterTaskLoopDirective(cast<OMPMasterTaskLoopDirective>(*S)); + break; + case Stmt::OMPMasterTaskLoopSimdDirectiveClass: + EmitOMPMasterTaskLoopSimdDirective( + cast<OMPMasterTaskLoopSimdDirective>(*S)); + break; + case Stmt::OMPParallelMasterTaskLoopDirectiveClass: + EmitOMPParallelMasterTaskLoopDirective( + cast<OMPParallelMasterTaskLoopDirective>(*S)); + break; case Stmt::OMPDistributeDirectiveClass: EmitOMPDistributeDirective(cast<OMPDistributeDirective>(*S)); break; @@ -1846,11 +1857,9 @@ llvm::Value* CodeGenFunction::EmitAsmInput( InputExpr->EvaluateAsRValue(EVResult, getContext(), true); llvm::APSInt IntResult; - if (!EVResult.Val.toIntegralConstant(IntResult, InputExpr->getType(), - getContext())) - llvm_unreachable("Invalid immediate constant!"); - - return llvm::ConstantInt::get(getLLVMContext(), IntResult); + if (EVResult.Val.toIntegralConstant(IntResult, InputExpr->getType(), + getContext())) + return llvm::ConstantInt::get(getLLVMContext(), IntResult); } Expr::EvalResult Result; @@ -1986,6 +1995,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { std::vector<llvm::Type *> ResultTruncRegTypes; std::vector<llvm::Type *> ArgTypes; std::vector<llvm::Value*> Args; + llvm::BitVector ResultTypeRequiresCast; // Keep track of inout constraints. std::string InOutConstraints; @@ -2024,13 +2034,23 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // If this is a register output, then make the inline asm return it // by-value. If this is a memory result, return the value by-reference. - if (!Info.allowsMemory() && hasScalarEvaluationKind(OutExpr->getType())) { + bool isScalarizableAggregate = + hasAggregateEvaluationKind(OutExpr->getType()); + if (!Info.allowsMemory() && (hasScalarEvaluationKind(OutExpr->getType()) || + isScalarizableAggregate)) { Constraints += "=" + OutputConstraint; ResultRegQualTys.push_back(OutExpr->getType()); ResultRegDests.push_back(Dest); - ResultRegTypes.push_back(ConvertTypeForMem(OutExpr->getType())); - ResultTruncRegTypes.push_back(ResultRegTypes.back()); - + ResultTruncRegTypes.push_back(ConvertTypeForMem(OutExpr->getType())); + if (Info.allowsRegister() && isScalarizableAggregate) { + ResultTypeRequiresCast.push_back(true); + unsigned Size = getContext().getTypeSize(OutExpr->getType()); + llvm::Type *ConvTy = llvm::IntegerType::get(getLLVMContext(), Size); + ResultRegTypes.push_back(ConvTy); + } else { + ResultTypeRequiresCast.push_back(false); + ResultRegTypes.push_back(ResultTruncRegTypes.back()); + } // If this output is tied to an input, and if the input is larger, then // we need to set the actual result type of the inline asm node to be the // same as the input type. @@ -2064,8 +2084,8 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Update largest vector width for any vector types. if (auto *VT = dyn_cast<llvm::VectorType>(ResultRegTypes.back())) - LargestVectorWidth = std::max(LargestVectorWidth, - VT->getPrimitiveSizeInBits()); + LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getFixedSize()); } else { ArgTypes.push_back(Dest.getAddress().getType()); Args.push_back(Dest.getPointer()); @@ -2089,8 +2109,8 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Update largest vector width for any vector types. if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType())) - LargestVectorWidth = std::max(LargestVectorWidth, - VT->getPrimitiveSizeInBits()); + LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getFixedSize()); if (Info.allowsRegister()) InOutConstraints += llvm::utostr(i); else @@ -2176,8 +2196,8 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Update largest vector width for any vector types. if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType())) - LargestVectorWidth = std::max(LargestVectorWidth, - VT->getPrimitiveSizeInBits()); + LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getFixedSize()); ArgTypes.push_back(Arg->getType()); Args.push_back(Arg); @@ -2273,6 +2293,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { assert(RegResults.size() == ResultRegTypes.size()); assert(RegResults.size() == ResultTruncRegTypes.size()); assert(RegResults.size() == ResultRegDests.size()); + // ResultRegDests can be also populated by addReturnRegisterOutputs() above, + // in which case its size may grow. + assert(ResultTypeRequiresCast.size() <= ResultRegDests.size()); for (unsigned i = 0, e = RegResults.size(); i != e; ++i) { llvm::Value *Tmp = RegResults[i]; @@ -2302,7 +2325,24 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { } } - EmitStoreThroughLValue(RValue::get(Tmp), ResultRegDests[i]); + LValue Dest = ResultRegDests[i]; + // ResultTypeRequiresCast elements correspond to the first + // ResultTypeRequiresCast.size() elements of RegResults. + if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) { + unsigned Size = getContext().getTypeSize(ResultRegQualTys[i]); + Address A = Builder.CreateBitCast(Dest.getAddress(), + ResultRegTypes[i]->getPointerTo()); + QualType Ty = getContext().getIntTypeForBitwidth(Size, /*Signed*/ false); + if (Ty.isNull()) { + const Expr *OutExpr = S.getOutputExpr(i); + CGM.Error( + OutExpr->getExprLoc(), + "impossible constraint in asm: can't store value into a register"); + return; + } + Dest = MakeAddrLValue(A, Ty); + } + EmitStoreThroughLValue(RValue::get(Tmp), Dest); } } diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index e8fbca5108ade..6ece69d51daf0 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -120,12 +120,46 @@ public: class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { CodeGenFunction::OMPMapVars PreCondVars; + llvm::DenseSet<const VarDecl *> EmittedAsPrivate; for (const auto *E : S.counters()) { const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + EmittedAsPrivate.insert(VD->getCanonicalDecl()); (void)PreCondVars.setVarAddr( CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); } + // Mark private vars as undefs. + for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { + for (const Expr *IRef : C->varlists()) { + const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); + if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { + (void)PreCondVars.setVarAddr( + CGF, OrigVD, + Address(llvm::UndefValue::get( + CGF.ConvertTypeForMem(CGF.getContext().getPointerType( + OrigVD->getType().getNonReferenceType()))), + CGF.getContext().getDeclAlign(OrigVD))); + } + } + } (void)PreCondVars.apply(CGF); + // Emit init, __range and __end variables for C++ range loops. + const Stmt *Body = + S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); + for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt) { + Body = Body->IgnoreContainers(); + if (auto *For = dyn_cast<ForStmt>(Body)) { + Body = For->getBody(); + } else { + assert(isa<CXXForRangeStmt>(Body) && + "Expected canonical for loop or range-based for loop."); + auto *CXXFor = cast<CXXForRangeStmt>(Body); + if (const Stmt *Init = CXXFor->getInit()) + CGF.EmitStmt(Init); + CGF.EmitStmt(CXXFor->getRangeStmt()); + CGF.EmitStmt(CXXFor->getEndStmt()); + Body = CXXFor->getBody(); + } + } if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) { for (const auto *I : PreInits->decls()) CGF.EmitVarDecl(cast<VarDecl>(*I)); @@ -1324,6 +1358,31 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, // On a continue in the body, jump to the end. JumpDest Continue = getJumpDestInCurrentScope("omp.body.continue"); BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); + for (const Expr *E : D.finals_conditions()) { + if (!E) + continue; + // Check that loop counter in non-rectangular nest fits into the iteration + // space. + llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next"); + EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(), + getProfileCount(D.getBody())); + EmitBlock(NextBB); + } + // Emit loop variables for C++ range loops. + const Stmt *Body = + D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); + for (unsigned Cnt = 0; Cnt < D.getCollapsedNumber(); ++Cnt) { + Body = Body->IgnoreContainers(); + if (auto *For = dyn_cast<ForStmt>(Body)) { + Body = For->getBody(); + } else { + assert(isa<CXXForRangeStmt>(Body) && + "Expected canonical for loop or range-based for loop."); + auto *CXXFor = cast<CXXForRangeStmt>(Body); + EmitStmt(CXXFor->getLoopVarStmt()); + Body = CXXFor->getBody(); + } + } // Emit loop body. EmitStmt(D.getBody()); // The end (updates/cleanups). @@ -1460,14 +1519,14 @@ static void emitAlignedClause(CodeGenFunction &CGF, if (!CGF.HaveInsertPoint()) return; for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { - unsigned ClauseAlignment = 0; + llvm::APInt ClauseAlignment(64, 0); if (const Expr *AlignmentExpr = Clause->getAlignment()) { auto *AlignmentCI = cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); - ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); + ClauseAlignment = AlignmentCI->getValue(); } for (const Expr *E : Clause->varlists()) { - unsigned Alignment = ClauseAlignment; + llvm::APInt Alignment(ClauseAlignment); if (Alignment == 0) { // OpenMP [2.8.1, Description] // If no optional parameter is specified, implementation-defined default @@ -1478,12 +1537,13 @@ static void emitAlignedClause(CodeGenFunction &CGF, E->getType()->getPointeeType())) .getQuantity(); } - assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && + assert((Alignment == 0 || Alignment.isPowerOf2()) && "alignment is not power of 2"); if (Alignment != 0) { llvm::Value *PtrValue = CGF.EmitScalarExpr(E); CGF.EmitAlignmentAssumption( - PtrValue, E, /*No second loc needed*/ SourceLocation(), Alignment); + PtrValue, E, /*No second loc needed*/ SourceLocation(), + llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment)); } } } @@ -1553,8 +1613,28 @@ static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, CGF.EmitIgnoredExpr(I); } } + // Create temp loop control variables with their init values to support + // non-rectangular loops. + CodeGenFunction::OMPMapVars PreCondVars; + for (const Expr * E: S.dependent_counters()) { + if (!E) + continue; + assert(!E->getType().getNonReferenceType()->isRecordType() && + "dependent counter must not be an iterator."); + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Address CounterAddr = + CGF.CreateMemTemp(VD->getType().getNonReferenceType()); + (void)PreCondVars.setVarAddr(CGF, VD, CounterAddr); + } + (void)PreCondVars.apply(CGF); + for (const Expr *E : S.dependent_inits()) { + if (!E) + continue; + CGF.EmitIgnoredExpr(E); + } // Check that loop is executed at least one time. CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); + PreCondVars.restore(CGF); } void CodeGenFunction::EmitOMPLinearClause( @@ -3044,7 +3124,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, Data.NumberOfParts); - OMPLexicalScope Scope(*this, S); + OMPLexicalScope Scope(*this, S, llvm::None, + !isOpenMPParallelDirective(S.getDirectiveKind())); TaskGen(*this, OutlinedFn, Data); } @@ -3112,7 +3193,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); QualType BaseAndPointersType = getContext().getConstantArrayType( - getContext().VoidPtrTy, ArrSize, ArrayType::Normal, + getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); BPVD = createImplicitFirstprivateForType( getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); @@ -3120,7 +3201,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); QualType SizesType = getContext().getConstantArrayType( getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), - ArrSize, ArrayType::Normal, + ArrSize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, S.getBeginLoc()); @@ -3991,6 +4072,8 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_reverse_offload: case OMPC_dynamic_allocators: case OMPC_atomic_default_mem_order: + case OMPC_device_type: + case OMPC_match: llvm_unreachable("Clause is not allowed in 'omp atomic'."); } } @@ -4090,18 +4173,21 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, IsOffloadEntry, CodeGen); OMPLexicalScope Scope(CGF, S, OMPD_task); - auto &&SizeEmitter = [](CodeGenFunction &CGF, const OMPLoopDirective &D) { - OMPLoopScope(CGF, D); - // Emit calculation of the iterations count. - llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); - NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, - /*isSigned=*/false); - return NumIterations; + auto &&SizeEmitter = + [IsOffloadEntry](CodeGenFunction &CGF, + const OMPLoopDirective &D) -> llvm::Value * { + if (IsOffloadEntry) { + OMPLoopScope(CGF, D); + // Emit calculation of the iterations count. + llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); + NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, + /*isSigned=*/false); + return NumIterations; + } + return nullptr; }; - if (IsOffloadEntry) - CGM.getOpenMPRuntime().emitTargetNumIterationsCall(CGF, S, Device, - SizeEmitter); - CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device); + CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, + SizeEmitter); } static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, @@ -5025,6 +5111,42 @@ void CodeGenFunction::EmitOMPTaskLoopSimdDirective( EmitOMPTaskLoopBasedDirective(S); } +void CodeGenFunction::EmitOMPMasterTaskLoopDirective( + const OMPMasterTaskLoopDirective &S) { + auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + EmitOMPTaskLoopBasedDirective(S); + }; + OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false); + CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); +} + +void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( + const OMPMasterTaskLoopSimdDirective &S) { + auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + EmitOMPTaskLoopBasedDirective(S); + }; + OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false); + CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); +} + +void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( + const OMPParallelMasterTaskLoopDirective &S) { + auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { + auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CGF.EmitOMPTaskLoopBasedDirective(S); + }; + OMPLexicalScope Scope(CGF, S, llvm::None, /*EmitPreInitStmt=*/false); + CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, + S.getBeginLoc()); + }; + emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen, + emitEmptyBoundParameters); +} + // Generate the instructions for '#pragma omp target update' directive. void CodeGenFunction::EmitOMPTargetUpdateDirective( const OMPTargetUpdateDirective &S) { diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp index 3cb3d35448389..f9f25e7e57adc 100644 --- a/lib/CodeGen/CGVTables.cpp +++ b/lib/CodeGen/CGVTables.cpp @@ -157,7 +157,7 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn, const CGFunctionInfo &FnInfo, GlobalDecl GD, const ThunkInfo &Thunk) { const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl()); - const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>(); + const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>(); QualType ResultType = FPT->getReturnType(); // Get the original function @@ -166,6 +166,15 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn, llvm::Value *Callee = CGM.GetAddrOfFunction(GD, Ty, /*ForVTable=*/true); llvm::Function *BaseFn = cast<llvm::Function>(Callee); + // Cloning can't work if we don't have a definition. The Microsoft ABI may + // require thunks when a definition is not available. Emit an error in these + // cases. + if (!MD->isDefined()) { + CGM.ErrorUnsupported(MD, "return-adjusting thunk with variadic arguments"); + return Fn; + } + assert(!BaseFn->isDeclaration() && "cannot clone undefined variadic method"); + // Clone to thunk. llvm::ValueToValueMapTy VMap; @@ -201,6 +210,8 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn, Builder.SetInsertPoint(&*ThisStore); llvm::Value *AdjustedThisPtr = CGM.getCXXABI().performThisAdjustment(*this, ThisPtr, Thunk.This); + AdjustedThisPtr = Builder.CreateBitCast(AdjustedThisPtr, + ThisStore->getOperand(0)->getType()); ThisStore->setOperand(0, AdjustedThisPtr); if (!Thunk.Return.isEmpty()) { @@ -231,7 +242,6 @@ void CodeGenFunction::StartThunk(llvm::Function *Fn, GlobalDecl GD, // Build FunctionArgs. const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl()); QualType ThisType = MD->getThisType(); - const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>(); QualType ResultType; if (IsUnprototyped) ResultType = CGM.getContext().VoidTy; @@ -240,7 +250,7 @@ void CodeGenFunction::StartThunk(llvm::Function *Fn, GlobalDecl GD, else if (CGM.getCXXABI().hasMostDerivedReturn(GD)) ResultType = CGM.getContext().VoidPtrTy; else - ResultType = FPT->getReturnType(); + ResultType = MD->getType()->castAs<FunctionProtoType>()->getReturnType(); FunctionArgList FunctionArgs; // Create the implicit 'this' parameter declaration. @@ -291,14 +301,17 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::FunctionCallee Callee, *this, LoadCXXThisAddress(), Thunk->This) : LoadCXXThis(); - if (CurFnInfo->usesInAlloca() || IsUnprototyped) { - // We don't handle return adjusting thunks, because they require us to call - // the copy constructor. For now, fall through and pretend the return - // adjustment was empty so we don't crash. + // If perfect forwarding is required a variadic method, a method using + // inalloca, or an unprototyped thunk, use musttail. Emit an error if this + // thunk requires a return adjustment, since that is impossible with musttail. + if (CurFnInfo->usesInAlloca() || CurFnInfo->isVariadic() || IsUnprototyped) { if (Thunk && !Thunk->Return.isEmpty()) { if (IsUnprototyped) CGM.ErrorUnsupported( MD, "return-adjusting thunk with incomplete parameter type"); + else if (CurFnInfo->isVariadic()) + llvm_unreachable("shouldn't try to emit musttail return-adjusting " + "thunks for variadic functions"); else CGM.ErrorUnsupported( MD, "non-trivial argument copy for return-adjusting thunk"); @@ -549,16 +562,32 @@ llvm::Constant *CodeGenVTables::maybeEmitThunk(GlobalDecl GD, CGM.SetLLVMFunctionAttributesForDefinition(GD.getDecl(), ThunkFn); + // Thunks for variadic methods are special because in general variadic + // arguments cannot be perferctly forwarded. In the general case, clang + // implements such thunks by cloning the original function body. However, for + // thunks with no return adjustment on targets that support musttail, we can + // use musttail to perfectly forward the variadic arguments. + bool ShouldCloneVarArgs = false; if (!IsUnprototyped && ThunkFn->isVarArg()) { - // Varargs thunks are special; we can't just generate a call because - // we can't copy the varargs. Our implementation is rather - // expensive/sucky at the moment, so don't generate the thunk unless - // we have to. - // FIXME: Do something better here; GenerateVarArgsThunk is extremely ugly. + ShouldCloneVarArgs = true; + if (TI.Return.isEmpty()) { + switch (CGM.getTriple().getArch()) { + case llvm::Triple::x86_64: + case llvm::Triple::x86: + case llvm::Triple::aarch64: + ShouldCloneVarArgs = false; + break; + default: + break; + } + } + } + + if (ShouldCloneVarArgs) { if (UseAvailableExternallyLinkage) return ThunkFn; - ThunkFn = CodeGenFunction(CGM).GenerateVarArgsThunk(ThunkFn, FnInfo, GD, - TI); + ThunkFn = + CodeGenFunction(CGM).GenerateVarArgsThunk(ThunkFn, FnInfo, GD, TI); } else { // Normal thunk body generation. CodeGenFunction(CGM).generateThunk(ThunkFn, FnInfo, GD, TI, IsUnprototyped); @@ -779,7 +808,7 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD, assert(!VTable->isDeclaration() && "Shouldn't set properties on declaration"); CGM.setGVProperties(VTable, RD); - CGM.EmitVTableTypeMetadata(VTable, *VTLayout.get()); + CGM.EmitVTableTypeMetadata(RD, VTable, *VTLayout.get()); return VTable; } @@ -1010,7 +1039,32 @@ bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) { return true; } -void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable, +llvm::GlobalObject::VCallVisibility +CodeGenModule::GetVCallVisibilityLevel(const CXXRecordDecl *RD) { + LinkageInfo LV = RD->getLinkageAndVisibility(); + llvm::GlobalObject::VCallVisibility TypeVis; + if (!isExternallyVisible(LV.getLinkage())) + TypeVis = llvm::GlobalObject::VCallVisibilityTranslationUnit; + else if (HasHiddenLTOVisibility(RD)) + TypeVis = llvm::GlobalObject::VCallVisibilityLinkageUnit; + else + TypeVis = llvm::GlobalObject::VCallVisibilityPublic; + + for (auto B : RD->bases()) + if (B.getType()->getAsCXXRecordDecl()->isDynamicClass()) + TypeVis = std::min(TypeVis, + GetVCallVisibilityLevel(B.getType()->getAsCXXRecordDecl())); + + for (auto B : RD->vbases()) + if (B.getType()->getAsCXXRecordDecl()->isDynamicClass()) + TypeVis = std::min(TypeVis, + GetVCallVisibilityLevel(B.getType()->getAsCXXRecordDecl())); + + return TypeVis; +} + +void CodeGenModule::EmitVTableTypeMetadata(const CXXRecordDecl *RD, + llvm::GlobalVariable *VTable, const VTableLayout &VTLayout) { if (!getCodeGenOpts().LTOUnit) return; @@ -1070,4 +1124,10 @@ void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable, VTable->addTypeMetadata((PointerWidth * I).getQuantity(), MD); } } + + if (getCodeGenOpts().VirtualFunctionElimination) { + llvm::GlobalObject::VCallVisibility TypeVis = GetVCallVisibilityLevel(RD); + if (TypeVis != llvm::GlobalObject::VCallVisibilityPublic) + VTable->addVCallVisibilityMetadata(TypeVis); + } } diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp index 0ae9ea427d659..87bda4a0fc2cc 100644 --- a/lib/CodeGen/CodeGenAction.cpp +++ b/lib/CodeGen/CodeGenAction.cpp @@ -14,7 +14,9 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclGroup.h" +#include "clang/Basic/DiagnosticFrontend.h" #include "clang/Basic/FileManager.h" +#include "clang/Basic/LangStandard.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/BackendUtil.h" @@ -37,6 +39,7 @@ #include "llvm/Pass.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/Timer.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/YAMLTraits.h" @@ -228,6 +231,7 @@ namespace clang { void HandleTranslationUnit(ASTContext &C) override { { + llvm::TimeTraceScope TimeScope("Frontend", StringRef("")); PrettyStackTraceString CrashInfo("Per-file LLVM IR generation"); if (FrontendTimesIsEnabled) { LLVMIRGenerationRefCount += 1; @@ -260,7 +264,7 @@ namespace clang { std::unique_ptr<DiagnosticHandler> OldDiagnosticHandler = Ctx.getDiagnosticHandler(); - Ctx.setDiagnosticHandler(llvm::make_unique<ClangDiagnosticHandler>( + Ctx.setDiagnosticHandler(std::make_unique<ClangDiagnosticHandler>( CodeGenOpts, this)); Expected<std::unique_ptr<llvm::ToolOutputFile>> OptRecordFileOrErr = @@ -362,6 +366,9 @@ namespace clang { bool StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D); /// Specialized handler for unsupported backend feature diagnostic. void UnsupportedDiagHandler(const llvm::DiagnosticInfoUnsupported &D); + /// Specialized handler for misexpect warnings. + /// Note that misexpect remarks are emitted through ORE + void MisExpectDiagHandler(const llvm::DiagnosticInfoMisExpect &D); /// Specialized handlers for optimization remarks. /// Note that these handlers only accept remarks and they always handle /// them. @@ -561,13 +568,13 @@ const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc( if (D.isLocationAvailable()) { D.getLocation(Filename, Line, Column); if (Line > 0) { - const FileEntry *FE = FileMgr.getFile(Filename); + auto FE = FileMgr.getFile(Filename); if (!FE) FE = FileMgr.getFile(D.getAbsolutePath()); if (FE) { // If -gcolumn-info was not used, Column will be 0. This upsets the // source manager, so pass 1 if Column is not set. - DILoc = SourceMgr.translateFileLineCol(FE, Line, Column ? Column : 1); + DILoc = SourceMgr.translateFileLineCol(*FE, Line, Column ? Column : 1); } } BadDebugInfo = DILoc.isInvalid(); @@ -614,6 +621,25 @@ void BackendConsumer::UnsupportedDiagHandler( << Filename << Line << Column; } +void BackendConsumer::MisExpectDiagHandler( + const llvm::DiagnosticInfoMisExpect &D) { + StringRef Filename; + unsigned Line, Column; + bool BadDebugInfo = false; + FullSourceLoc Loc = + getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, Line, Column); + + Diags.Report(Loc, diag::warn_profile_data_misexpect) << D.getMsg().str(); + + if (BadDebugInfo) + // If we were not able to translate the file:line:col information + // back to a SourceLocation, at least emit a note stating that + // we could not translate this location. This can happen in the + // case of #line directives. + Diags.Report(Loc, diag::note_fe_backend_invalid_loc) + << Filename << Line << Column; +} + void BackendConsumer::EmitOptimizationMessage( const llvm::DiagnosticInfoOptimizationBase &D, unsigned DiagID) { // We only support warnings and remarks. @@ -784,6 +810,9 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) { case llvm::DK_Unsupported: UnsupportedDiagHandler(cast<DiagnosticInfoUnsupported>(DI)); return; + case llvm::DK_MisExpect: + MisExpectDiagHandler(cast<DiagnosticInfoMisExpect>(DI)); + return; default: // Plugin IDs are not bound to any value as they are set dynamically. ComputeDiagRemarkID(Severity, backend_plugin, DiagID); @@ -914,7 +943,7 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { if (CI.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo && CI.getCodeGenOpts().MacroDebugInfo) { std::unique_ptr<PPCallbacks> Callbacks = - llvm::make_unique<MacroPPCallbacks>(BEConsumer->getCodeGenerator(), + std::make_unique<MacroPPCallbacks>(BEConsumer->getCodeGenerator(), CI.getPreprocessor()); CI.getPreprocessor().addPPCallbacks(std::move(Callbacks)); } @@ -975,7 +1004,7 @@ CodeGenAction::loadModule(MemoryBufferRef MBRef) { // the file was already processed by indexing and will be passed to the // linker using merged object file. if (!Bm) { - auto M = llvm::make_unique<llvm::Module>("empty", *VMContext); + auto M = std::make_unique<llvm::Module>("empty", *VMContext); M->setTargetTriple(CI.getTargetOpts().Triple); return M; } @@ -1014,7 +1043,7 @@ CodeGenAction::loadModule(MemoryBufferRef MBRef) { void CodeGenAction::ExecuteAction() { // If this is an IR file, we have to treat it specially. - if (getCurrentFileKind().getLanguage() == InputKind::LLVM_IR) { + if (getCurrentFileKind().getLanguage() == Language::LLVM_IR) { BackendAction BA = static_cast<BackendAction>(Act); CompilerInstance &CI = getCompilerInstance(); std::unique_ptr<raw_pwrite_stream> OS = diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp index eafe26674434f..3f9a52ab7638a 100644 --- a/lib/CodeGen/CodeGenFunction.cpp +++ b/lib/CodeGen/CodeGenFunction.cpp @@ -47,13 +47,10 @@ static bool shouldEmitLifetimeMarkers(const CodeGenOptions &CGOpts, if (CGOpts.DisableLifetimeMarkers) return false; - // Disable lifetime markers in msan builds. - // FIXME: Remove this when msan works with lifetime markers. - if (LangOpts.Sanitize.has(SanitizerKind::Memory)) - return false; - - // Asan uses markers for use-after-scope checks. - if (CGOpts.SanitizeAddressUseAfterScope) + // Sanitizers may use markers. + if (CGOpts.SanitizeAddressUseAfterScope || + LangOpts.Sanitize.has(SanitizerKind::HWAddress) || + LangOpts.Sanitize.has(SanitizerKind::Memory)) return true; // For now, only in optimized builds. @@ -197,7 +194,7 @@ TypeEvaluationKind CodeGenFunction::getEvaluationKind(QualType type) { #define NON_CANONICAL_TYPE(name, parent) case Type::name: #define DEPENDENT_TYPE(name, parent) case Type::name: #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(name, parent) case Type::name: -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" llvm_unreachable("non-canonical or dependent type in IR-generation"); case Type::Auto: @@ -434,13 +431,13 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { // Scan function arguments for vector width. for (llvm::Argument &A : CurFn->args()) if (auto *VT = dyn_cast<llvm::VectorType>(A.getType())) - LargestVectorWidth = std::max(LargestVectorWidth, - VT->getPrimitiveSizeInBits()); + LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getFixedSize()); // Update vector width based on return type. if (auto *VT = dyn_cast<llvm::VectorType>(CurFn->getReturnType())) - LargestVectorWidth = std::max(LargestVectorWidth, - VT->getPrimitiveSizeInBits()); + LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, + VT->getPrimitiveSizeInBits().getFixedSize()); // Add the required-vector-width attribute. This contains the max width from: // 1. min-vector-width attribute used in the source program. @@ -732,6 +729,15 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, SanOpts.Mask &= ~SanitizerKind::CFIUnrelatedCast; } + // Ignore null checks in coroutine functions since the coroutines passes + // are not aware of how to move the extra UBSan instructions across the split + // coroutine boundaries. + if (D && SanOpts.has(SanitizerKind::Null)) + if (const auto *FD = dyn_cast<FunctionDecl>(D)) + if (FD->getBody() && + FD->getBody()->getStmtClass() == Stmt::CoroutineBodyStmtClass) + SanOpts.Mask &= ~SanitizerKind::Null; + // Apply xray attributes to the function (as a string, for now) if (D) { if (const auto *XRayAttr = D->getAttr<XRayInstrumentAttr>()) { @@ -762,6 +768,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, if (CGM.getCodeGenOpts().ProfileSampleAccurate) Fn->addFnAttr("profile-sample-accurate"); + if (D && D->hasAttr<CFICanonicalJumpTableAttr>()) + Fn->addFnAttr("cfi-canonical-jump-table"); + if (getLangOpts().OpenCL) { // Add metadata for a kernel function. if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) @@ -1662,7 +1671,7 @@ CodeGenFunction::EmitNullInitialization(Address DestPtr, QualType Ty) { llvm::GlobalVariable::PrivateLinkage, NullConstant, Twine()); CharUnits NullAlign = DestPtr.getAlignment(); - NullVariable->setAlignment(NullAlign.getQuantity()); + NullVariable->setAlignment(NullAlign.getAsAlign()); Address SrcPtr(Builder.CreateBitCast(NullVariable, Builder.getInt8PtrTy()), NullAlign); @@ -1862,7 +1871,7 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) { #define NON_CANONICAL_TYPE(Class, Base) #define DEPENDENT_TYPE(Class, Base) case Type::Class: #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" llvm_unreachable("unexpected dependent type!"); // These types are never variably-modified. @@ -2048,24 +2057,9 @@ void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue, } void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue, - QualType Ty, SourceLocation Loc, - SourceLocation AssumptionLoc, - unsigned Alignment, - llvm::Value *OffsetValue) { - llvm::Value *TheCheck; - llvm::Instruction *Assumption = Builder.CreateAlignmentAssumption( - CGM.getDataLayout(), PtrValue, Alignment, OffsetValue, &TheCheck); - if (SanOpts.has(SanitizerKind::Alignment)) { - llvm::Value *AlignmentVal = llvm::ConstantInt::get(IntPtrTy, Alignment); - EmitAlignmentAssumptionCheck(PtrValue, Ty, Loc, AssumptionLoc, AlignmentVal, - OffsetValue, TheCheck, Assumption); - } -} - -void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue, const Expr *E, SourceLocation AssumptionLoc, - unsigned Alignment, + llvm::Value *Alignment, llvm::Value *OffsetValue) { if (auto *CE = dyn_cast<CastExpr>(E)) E = CE->getSubExprAsWritten(); @@ -2194,7 +2188,7 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc, // Get the current enclosing function if it exists. If it doesn't // we can't check the target features anyhow. - const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl); + const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurCodeDecl); if (!FD) return; diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index c3060d1fb3514..99bc85ba3773a 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -1034,7 +1034,7 @@ public: assert(isInConditionalBranch()); llvm::BasicBlock *block = OutermostConditional->getStartingBlock(); auto store = new llvm::StoreInst(value, addr.getPointer(), &block->back()); - store->setAlignment(addr.getAlignment().getQuantity()); + store->setAlignment(addr.getAlignment().getAsAlign()); } /// An RAII object to record that we're evaluating a statement @@ -2829,13 +2829,8 @@ public: llvm::Value *Alignment, llvm::Value *OffsetValue = nullptr); - void EmitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, - SourceLocation Loc, SourceLocation AssumptionLoc, - unsigned Alignment, - llvm::Value *OffsetValue = nullptr); - void EmitAlignmentAssumption(llvm::Value *PtrValue, const Expr *E, - SourceLocation AssumptionLoc, unsigned Alignment, + SourceLocation AssumptionLoc, llvm::Value *Alignment, llvm::Value *OffsetValue = nullptr); //===--------------------------------------------------------------------===// @@ -3160,6 +3155,11 @@ public: void EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S); void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S); void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S); + void EmitOMPMasterTaskLoopDirective(const OMPMasterTaskLoopDirective &S); + void + EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective &S); + void EmitOMPParallelMasterTaskLoopDirective( + const OMPParallelMasterTaskLoopDirective &S); void EmitOMPDistributeDirective(const OMPDistributeDirective &S); void EmitOMPDistributeParallelForDirective( const OMPDistributeParallelForDirective &S); @@ -3760,6 +3760,7 @@ public: llvm::Value *vectorWrapScalar16(llvm::Value *Op); llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch); + llvm::Value *EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *BuildVector(ArrayRef<llvm::Value*> Ops); llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E); diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index 6ff72ec045e62..b05a58848e824 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -414,12 +414,7 @@ void CodeGenModule::Release() { OpenMPRuntime->emitRequiresDirectiveRegFun()) { AddGlobalCtor(OpenMPRequiresDirectiveRegFun, 0); } - if (llvm::Function *OpenMPRegistrationFunction = - OpenMPRuntime->emitRegistrationFunction()) { - auto ComdatKey = OpenMPRegistrationFunction->hasComdat() ? - OpenMPRegistrationFunction : nullptr; - AddGlobalCtor(OpenMPRegistrationFunction, 0, ComdatKey); - } + OpenMPRuntime->createOffloadEntriesAndInfoMetadata(); OpenMPRuntime->clear(); } if (PGOReader) { @@ -535,6 +530,12 @@ void CodeGenModule::Release() { getModule().addModuleFlag(llvm::Module::Override, "Cross-DSO CFI", 1); } + if (LangOpts.Sanitize.has(SanitizerKind::CFIICall)) { + getModule().addModuleFlag(llvm::Module::Override, + "CFI Canonical Jump Tables", + CodeGenOpts.SanitizeCfiCanonicalJumpTables); + } + if (CodeGenOpts.CFProtectionReturn && Target.checkCFProtectionReturnSupported(getDiags())) { // Indicate that we want to instrument return control flow protection. @@ -1176,7 +1177,7 @@ void CodeGenModule::EmitCtorList(CtorList &Fns, const char *GlobalName) { // The LTO linker doesn't seem to like it when we set an alignment // on appending variables. Take it off as a workaround. - list->setAlignment(0); + list->setAlignment(llvm::None); Fns.clear(); } @@ -1590,11 +1591,11 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, unsigned alignment = D->getMaxAlignment() / Context.getCharWidth(); if (alignment) - F->setAlignment(alignment); + F->setAlignment(llvm::Align(alignment)); if (!D->hasAttr<AlignedAttr>()) if (LangOpts.FunctionAlignment) - F->setAlignment(1 << LangOpts.FunctionAlignment); + F->setAlignment(llvm::Align(1ull << LangOpts.FunctionAlignment)); // Some C++ ABIs require 2-byte alignment for member functions, in order to // reserve a bit for differentiating between virtual and non-virtual member @@ -1602,13 +1603,20 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, // member function, set its alignment accordingly. if (getTarget().getCXXABI().areMemberFunctionsAligned()) { if (F->getAlignment() < 2 && isa<CXXMethodDecl>(D)) - F->setAlignment(2); + F->setAlignment(llvm::Align(2)); } - // In the cross-dso CFI mode, we want !type attributes on definitions only. - if (CodeGenOpts.SanitizeCfiCrossDso) - if (auto *FD = dyn_cast<FunctionDecl>(D)) - CreateFunctionTypeMetadataForIcall(FD, F); + // In the cross-dso CFI mode with canonical jump tables, we want !type + // attributes on definitions only. + if (CodeGenOpts.SanitizeCfiCrossDso && + CodeGenOpts.SanitizeCfiCanonicalJumpTables) { + if (auto *FD = dyn_cast<FunctionDecl>(D)) { + // Skip available_externally functions. They won't be codegen'ed in the + // current module anyway. + if (getContext().GetGVALinkageForFunction(FD) != GVA_AvailableExternally) + CreateFunctionTypeMetadataForIcall(FD, F); + } + } // Emit type metadata on member functions for member function pointer checks. // These are only ever necessary on definitions; we're guaranteed that the @@ -1704,6 +1712,8 @@ void CodeGenModule::setNonAliasAttributes(GlobalDecl GD, GV->addAttribute("data-section", SA->getName()); if (auto *SA = D->getAttr<PragmaClangRodataSectionAttr>()) GV->addAttribute("rodata-section", SA->getName()); + if (auto *SA = D->getAttr<PragmaClangRelroSectionAttr>()) + GV->addAttribute("relro-section", SA->getName()); } if (auto *F = dyn_cast<llvm::Function>(GO)) { @@ -1765,14 +1775,6 @@ void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD, if (isa<CXXMethodDecl>(FD) && !cast<CXXMethodDecl>(FD)->isStatic()) return; - // Additionally, if building with cross-DSO support... - if (CodeGenOpts.SanitizeCfiCrossDso) { - // Skip available_externally functions. They won't be codegen'ed in the - // current module anyway. - if (getContext().GetGVALinkageForFunction(FD) == GVA_AvailableExternally) - return; - } - llvm::Metadata *MD = CreateMetadataIdentifierForType(FD->getType()); F->addTypeMetadata(0, MD); F->addTypeMetadata(0, CreateMetadataIdentifierGeneralized(FD->getType())); @@ -1849,8 +1851,11 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, F->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); // Don't emit entries for function declarations in the cross-DSO mode. This - // is handled with better precision by the receiving DSO. - if (!CodeGenOpts.SanitizeCfiCrossDso) + // is handled with better precision by the receiving DSO. But if jump tables + // are non-canonical then we need type metadata in order to produce the local + // jump table. + if (!CodeGenOpts.SanitizeCfiCrossDso || + !CodeGenOpts.SanitizeCfiCanonicalJumpTables) CreateFunctionTypeMetadataForIcall(FD, F); if (getLangOpts().OpenMP && FD->hasAttr<OMPDeclareSimdDeclAttr>()) @@ -2114,6 +2119,10 @@ void CodeGenModule::EmitDeferred() { if (!GV->isDeclaration()) continue; + // If this is OpenMP, check if it is legal to emit this global normally. + if (LangOpts.OpenMP && OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(D)) + continue; + // Otherwise, emit the definition and move on to the next one. EmitGlobalDefinition(D, GV); @@ -2310,11 +2319,20 @@ bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) { } bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) { - if (const auto *FD = dyn_cast<FunctionDecl>(Global)) + if (const auto *FD = dyn_cast<FunctionDecl>(Global)) { if (FD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) // Implicit template instantiations may change linkage if they are later // explicitly instantiated, so they should not be emitted eagerly. return false; + // In OpenMP 5.0 function may be marked as device_type(nohost) and we should + // not emit them eagerly unless we sure that the function must be emitted on + // the host. + if (LangOpts.OpenMP >= 50 && !LangOpts.OpenMPSimd && + !LangOpts.OpenMPIsDevice && + !OMPDeclareTargetDeclAttr::getDeviceType(FD) && + !FD->isUsed(/*CheckUsedAttr=*/false) && !FD->isReferenced()) + return false; + } if (const auto *VD = dyn_cast<VarDecl>(Global)) if (Context.getInlineVariableDefinitionKind(VD) == ASTContext::InlineVariableDefinitionKind::WeakUnknown) @@ -2437,8 +2455,7 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { } if (LangOpts.OpenMP) { - // If this is OpenMP device, check if it is legal to emit this global - // normally. + // If this is OpenMP, check if it is legal to emit this global normally. if (OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(GD)) return; if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Global)) { @@ -2512,6 +2529,11 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { return; } + // Check if this must be emitted as declare variant. + if (LangOpts.OpenMP && isa<FunctionDecl>(Global) && OpenMPRuntime && + OpenMPRuntime->emitDeclareVariant(GD, /*IsForDefinition=*/false)) + return; + // If we're deferring emission of a C++ variable with an // initializer, remember the order in which it appeared in the file. if (getLangOpts().CPlusPlus && isa<VarDecl>(Global) && @@ -2717,6 +2739,50 @@ void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD, EmitGlobalFunctionDefinition(GD, GV); } +void CodeGenModule::emitOpenMPDeviceFunctionRedefinition( + GlobalDecl OldGD, GlobalDecl NewGD, llvm::GlobalValue *GV) { + assert(getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice && + OpenMPRuntime && "Expected OpenMP device mode."); + const auto *D = cast<FunctionDecl>(OldGD.getDecl()); + + // Compute the function info and LLVM type. + const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(OldGD); + llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); + + // Get or create the prototype for the function. + if (!GV || (GV->getType()->getElementType() != Ty)) { + GV = cast<llvm::GlobalValue>(GetOrCreateLLVMFunction( + getMangledName(OldGD), Ty, GlobalDecl(), /*ForVTable=*/false, + /*DontDefer=*/true, /*IsThunk=*/false, llvm::AttributeList(), + ForDefinition)); + SetFunctionAttributes(OldGD, cast<llvm::Function>(GV), + /*IsIncompleteFunction=*/false, + /*IsThunk=*/false); + } + // We need to set linkage and visibility on the function before + // generating code for it because various parts of IR generation + // want to propagate this information down (e.g. to local static + // declarations). + auto *Fn = cast<llvm::Function>(GV); + setFunctionLinkage(OldGD, Fn); + + // FIXME: this is redundant with part of + // setFunctionDefinitionAttributes + setGVProperties(Fn, OldGD); + + MaybeHandleStaticInExternC(D, Fn); + + maybeSetTrivialComdat(*D, *Fn); + + CodeGenFunction(*this).GenerateCode(NewGD, Fn, FI); + + setNonAliasAttributes(OldGD, Fn); + SetLLVMFunctionAttributesForDefinition(D, Fn); + + if (D->hasAttr<AnnotateAttr>()) + AddGlobalAnnotations(D, Fn); +} + void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { const auto *D = cast<ValueDecl>(GD.getDecl()); @@ -2816,11 +2882,13 @@ void CodeGenModule::emitMultiVersionFunctions() { llvm::Function *ResolverFunc; const TargetInfo &TI = getTarget(); - if (TI.supportsIFunc() || FD->isTargetMultiVersion()) + if (TI.supportsIFunc() || FD->isTargetMultiVersion()) { ResolverFunc = cast<llvm::Function>( GetGlobalValue((getMangledName(GD) + ".resolver").str())); - else + ResolverFunc->setLinkage(llvm::Function::WeakODRLinkage); + } else { ResolverFunc = cast<llvm::Function>(GetGlobalValue(getMangledName(GD))); + } if (supportsCOMDAT()) ResolverFunc->setComdat( @@ -2864,6 +2932,10 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { auto *ResolverFunc = cast<llvm::Function>(GetOrCreateLLVMFunction( ResolverName, ResolverType, ResolverGD, /*ForVTable=*/false)); + ResolverFunc->setLinkage(llvm::Function::WeakODRLinkage); + if (supportsCOMDAT()) + ResolverFunc->setComdat( + getModule().getOrInsertComdat(ResolverFunc->getName())); SmallVector<CodeGenFunction::MultiVersionResolverOption, 10> Options; const TargetInfo &Target = getTarget(); @@ -2928,6 +3000,21 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { CodeGenFunction CGF(*this); CGF.EmitMultiVersionResolver(ResolverFunc, Options); + + if (getTarget().supportsIFunc()) { + std::string AliasName = getMangledNameImpl( + *this, GD, FD, /*OmitMultiVersionMangling=*/true); + llvm::Constant *AliasFunc = GetGlobalValue(AliasName); + if (!AliasFunc) { + auto *IFunc = cast<llvm::GlobalIFunc>(GetOrCreateLLVMFunction( + AliasName, DeclTy, GD, /*ForVTable=*/false, /*DontDefer=*/true, + /*IsThunk=*/false, llvm::AttributeList(), NotForDefinition)); + auto *GA = llvm::GlobalAlias::create( + DeclTy, 0, getFunctionLinkage(GD), AliasName, IFunc, &getModule()); + GA->setLinkage(llvm::Function::WeakODRLinkage); + SetCommonAttributes(GD, GA); + } + } } /// If a dispatcher for the specified mangled name is not in the module, create @@ -2964,7 +3051,7 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver( MangledName + ".resolver", ResolverType, GlobalDecl{}, /*ForVTable=*/false); llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create( - DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule()); + DeclTy, 0, llvm::Function::WeakODRLinkage, "", Resolver, &getModule()); GIF->setName(ResolverName); SetCommonAttributes(FD, GIF); @@ -3010,6 +3097,10 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( EmitGlobal(GDDef); } } + // Check if this must be emitted as declare variant and emit reference to + // the the declare variant function. + if (LangOpts.OpenMP && OpenMPRuntime) + (void)OpenMPRuntime->emitDeclareVariant(GD, /*IsForDefinition=*/true); if (FD->isMultiVersion()) { const auto *TA = FD->getAttr<TargetAttr>(); @@ -3398,7 +3489,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, // handling. GV->setConstant(isTypeConstant(D->getType(), false)); - GV->setAlignment(getContext().getDeclAlign(D).getQuantity()); + GV->setAlignment(getContext().getDeclAlign(D).getAsAlign()); setLinkageForGV(GV, D); @@ -3455,7 +3546,8 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, // Make a new global with the correct type, this is now guaranteed // to work. auto *NewGV = cast<llvm::GlobalVariable>( - GetAddrOfGlobalVar(D, InitType, IsForDefinition)); + GetAddrOfGlobalVar(D, InitType, IsForDefinition) + ->stripPointerCasts()); // Erase the old global, since it is no longer used. GV->eraseFromParent(); @@ -3548,7 +3640,7 @@ llvm::GlobalVariable *CodeGenModule::CreateOrReplaceCXXRuntimeVariable( !GV->hasAvailableExternallyLinkage()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); - GV->setAlignment(Alignment); + GV->setAlignment(llvm::MaybeAlign(Alignment)); return GV; } @@ -3768,9 +3860,9 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, return; llvm::Constant *Init = nullptr; - CXXRecordDecl *RD = ASTTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); bool NeedsGlobalCtor = false; - bool NeedsGlobalDtor = RD && !RD->hasTrivialDestructor(); + bool NeedsGlobalDtor = + D->needsDestruction(getContext()) == QualType::DK_cxx_destructor; const VarDecl *InitDecl; const Expr *InitExpr = D->getAnyInitializer(InitDecl); @@ -3837,14 +3929,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, llvm::Constant *Entry = GetAddrOfGlobalVar(D, InitType, ForDefinition_t(!IsTentative)); - // Strip off a bitcast if we got one back. - if (auto *CE = dyn_cast<llvm::ConstantExpr>(Entry)) { - assert(CE->getOpcode() == llvm::Instruction::BitCast || - CE->getOpcode() == llvm::Instruction::AddrSpaceCast || - // All zero index gep. - CE->getOpcode() == llvm::Instruction::GetElementPtr); - Entry = CE->getOperand(0); - } + // Strip off pointer casts if we got them. + Entry = Entry->stripPointerCasts(); // Entry is now either a Function or GlobalVariable. auto *GV = dyn_cast<llvm::GlobalVariable>(Entry); @@ -3867,7 +3953,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // Make a new global with the correct type, this is now guaranteed to work. GV = cast<llvm::GlobalVariable>( - GetAddrOfGlobalVar(D, InitType, ForDefinition_t(!IsTentative))); + GetAddrOfGlobalVar(D, InitType, ForDefinition_t(!IsTentative)) + ->stripPointerCasts()); // Replace all uses of the old global with the new global llvm::Constant *NewPtrForOldDecl = @@ -3944,8 +4031,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, GV->setConstant(true); } - GV->setAlignment(getContext().getDeclAlign(D).getQuantity()); - + GV->setAlignment(getContext().getDeclAlign(D).getAsAlign()); // On Darwin, if the normal linkage of a C++ thread_local variable is // LinkOnce or Weak, we keep the normal linkage to prevent multiple @@ -4025,6 +4111,7 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context, // If no specialized section name is applicable, it will resort to default. if (D->hasAttr<PragmaClangBSSSectionAttr>() || D->hasAttr<PragmaClangDataSectionAttr>() || + D->hasAttr<PragmaClangRelroSectionAttr>() || D->hasAttr<PragmaClangRodataSectionAttr>()) return true; @@ -4286,6 +4373,11 @@ void CodeGenModule::HandleCXXStaticMemberVarInstantiation(VarDecl *VD) { void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { + // Check if this must be emitted as declare variant. + if (LangOpts.OpenMP && OpenMPRuntime && + OpenMPRuntime->emitDeclareVariant(GD, /*IsForDefinition=*/true)) + return; + const auto *D = cast<FunctionDecl>(GD.getDecl()); // Compute the function info and LLVM type. @@ -4355,17 +4447,22 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { // Create a reference to the named value. This ensures that it is emitted // if a deferred decl. llvm::Constant *Aliasee; - if (isa<llvm::FunctionType>(DeclTy)) + llvm::GlobalValue::LinkageTypes LT; + if (isa<llvm::FunctionType>(DeclTy)) { Aliasee = GetOrCreateLLVMFunction(AA->getAliasee(), DeclTy, GD, /*ForVTable=*/false); - else + LT = getFunctionLinkage(GD); + } else { Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(), llvm::PointerType::getUnqual(DeclTy), /*D=*/nullptr); + LT = getLLVMLinkageVarDefinition(cast<VarDecl>(GD.getDecl()), + D->getType().isConstQualified()); + } // Create the new alias itself, but don't set a name yet. - auto *GA = llvm::GlobalAlias::create( - DeclTy, 0, llvm::Function::ExternalLinkage, "", Aliasee, &getModule()); + auto *GA = + llvm::GlobalAlias::create(DeclTy, 0, LT, "", Aliasee, &getModule()); if (Entry) { if (GA->getAliasee() == Entry) { @@ -4634,7 +4731,7 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { // of the string is via this class initializer. CharUnits Align = isUTF16 ? Context.getTypeAlignInChars(Context.ShortTy) : Context.getTypeAlignInChars(Context.CharTy); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); // FIXME: We set the section explicitly to avoid a bug in ld64 224.1. // Without it LLVM can merge the string with a non unnamed_addr one during @@ -4669,7 +4766,10 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { } Fields.addInt(LengthTy, StringLength); - CharUnits Alignment = getPointerAlign(); + // Swift ABI requires 8-byte alignment to ensure that the _Atomic(uint64_t) is + // properly aligned on 32-bit platforms. + CharUnits Alignment = + IsSwiftABI ? Context.toCharUnitsFromBits(64) : getPointerAlign(); // The struct. GV = Fields.finishAndCreateGlobal("_unnamed_cfstring_", Alignment, @@ -4709,7 +4809,7 @@ QualType CodeGenModule::getObjCFastEnumerationStateType() { Context.getPointerType(Context.getObjCIdType()), Context.getPointerType(Context.UnsignedLongTy), Context.getConstantArrayType(Context.UnsignedLongTy, - llvm::APInt(32, 5), ArrayType::Normal, 0) + llvm::APInt(32, 5), nullptr, ArrayType::Normal, 0) }; for (size_t i = 0; i < 4; ++i) { @@ -4784,7 +4884,7 @@ GenerateStringLiteral(llvm::Constant *C, llvm::GlobalValue::LinkageTypes LT, auto *GV = new llvm::GlobalVariable( M, C->getType(), !CGM.getLangOpts().WritableStrings, LT, C, GlobalName, nullptr, llvm::GlobalVariable::NotThreadLocal, AddrSpace); - GV->setAlignment(Alignment.getQuantity()); + GV->setAlignment(Alignment.getAsAlign()); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); if (GV->isWeakForLinker()) { assert(CGM.supportsCOMDAT() && "Only COFF uses weak string literals"); @@ -4808,7 +4908,7 @@ CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S, Entry = &ConstantStringMap[C]; if (auto GV = *Entry) { if (Alignment.getQuantity() > GV->getAlignment()) - GV->setAlignment(Alignment.getQuantity()); + GV->setAlignment(Alignment.getAsAlign()); return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), Alignment); } @@ -4871,7 +4971,7 @@ ConstantAddress CodeGenModule::GetAddrOfConstantCString( Entry = &ConstantStringMap[C]; if (auto GV = *Entry) { if (Alignment.getQuantity() > GV->getAlignment()) - GV->setAlignment(Alignment.getQuantity()); + GV->setAlignment(Alignment.getAsAlign()); return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), Alignment); } @@ -4916,14 +5016,13 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( VD, E->getManglingNumber(), Out); APValue *Value = nullptr; - if (E->getStorageDuration() == SD_Static) { - // We might have a cached constant initializer for this temporary. Note - // that this might have a different value from the value computed by - // evaluating the initializer if the surrounding constant expression - // modifies the temporary. + if (E->getStorageDuration() == SD_Static && VD && VD->evaluateValue()) { + // If the initializer of the extending declaration is a constant + // initializer, we should have a cached constant initializer for this + // temporary. Note that this might have a different value from the value + // computed by evaluating the initializer if the surrounding constant + // expression modifies the temporary. Value = getContext().getMaterializedTemporaryValue(E, false); - if (Value && Value->isAbsent()) - Value = nullptr; } // Try evaluating it now, it might have a constant initializer. @@ -4974,7 +5073,7 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS); if (emitter) emitter->finalize(GV); setGVProperties(GV, VD); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); if (supportsCOMDAT() && GV->isWeakForLinker()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); if (VD->getTLSKind()) @@ -5083,7 +5182,9 @@ void CodeGenModule::EmitObjCIvarInitializations(ObjCImplementationDecl *D) { // EmitLinkageSpec - Emit all declarations in a linkage spec. void CodeGenModule::EmitLinkageSpec(const LinkageSpecDecl *LSD) { if (LSD->getLanguage() != LinkageSpecDecl::lang_c && - LSD->getLanguage() != LinkageSpecDecl::lang_cxx) { + LSD->getLanguage() != LinkageSpecDecl::lang_cxx && + LSD->getLanguage() != LinkageSpecDecl::lang_cxx_11 && + LSD->getLanguage() != LinkageSpecDecl::lang_cxx_14) { ErrorUnsupported(LSD, "linkage spec"); return; } @@ -5804,7 +5905,7 @@ void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, llvm::SanitizerStatReport &CodeGenModule::getSanStats() { if (!SanStats) - SanStats = llvm::make_unique<llvm::SanitizerStatReport>(&getModule()); + SanStats = std::make_unique<llvm::SanitizerStatReport>(&getModule()); return *SanStats; } diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h index 95964afed4ec1..73f81adae35f2 100644 --- a/lib/CodeGen/CodeGenModule.h +++ b/lib/CodeGen/CodeGenModule.h @@ -1270,13 +1270,26 @@ public: /// \param D Requires declaration void EmitOMPRequiresDecl(const OMPRequiresDecl *D); + /// Emits the definition of \p OldGD function with body from \p NewGD. + /// Required for proper handling of declare variant directive on the GPU. + void emitOpenMPDeviceFunctionRedefinition(GlobalDecl OldGD, GlobalDecl NewGD, + llvm::GlobalValue *GV); + /// Returns whether the given record has hidden LTO visibility and therefore /// may participate in (single-module) CFI and whole-program vtable /// optimization. bool HasHiddenLTOVisibility(const CXXRecordDecl *RD); + /// Returns the vcall visibility of the given type. This is the scope in which + /// a virtual function call could be made which ends up being dispatched to a + /// member function of this class. This scope can be wider than the visibility + /// of the class itself when the class has a more-visible dynamic base class. + llvm::GlobalObject::VCallVisibility + GetVCallVisibilityLevel(const CXXRecordDecl *RD); + /// Emit type metadata for the given vtable using the given layout. - void EmitVTableTypeMetadata(llvm::GlobalVariable *VTable, + void EmitVTableTypeMetadata(const CXXRecordDecl *RD, + llvm::GlobalVariable *VTable, const VTableLayout &VTLayout); /// Generate a cross-DSO type identifier for MD. diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp index d10a321dc3d7b..e525abe979e35 100644 --- a/lib/CodeGen/CodeGenPGO.cpp +++ b/lib/CodeGen/CodeGenPGO.cpp @@ -980,7 +980,7 @@ void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader, return; } ProfRecord = - llvm::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get())); + std::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get())); RegionCounts = ProfRecord->Counts; } diff --git a/lib/CodeGen/CodeGenPGO.h b/lib/CodeGen/CodeGenPGO.h index 2e740f7892431..a3778b549910b 100644 --- a/lib/CodeGen/CodeGenPGO.h +++ b/lib/CodeGen/CodeGenPGO.h @@ -41,8 +41,8 @@ private: public: CodeGenPGO(CodeGenModule &CGM) - : CGM(CGM), NumValueSites({{0}}), NumRegionCounters(0), FunctionHash(0), - CurrentRegionCount(0) {} + : CGM(CGM), FuncNameVar(nullptr), NumValueSites({{0}}), + NumRegionCounters(0), FunctionHash(0), CurrentRegionCount(0) {} /// Whether or not we have PGO region data for the current function. This is /// false both when we have no data at all and when our data has been diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp index 79b29b3d916f8..a458811d7a30c 100644 --- a/lib/CodeGen/CodeGenTypes.cpp +++ b/lib/CodeGen/CodeGenTypes.cpp @@ -135,8 +135,8 @@ isSafeToConvert(const RecordDecl *RD, CodeGenTypes &CGT, // the class. if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD)) { for (const auto &I : CRD->bases()) - if (!isSafeToConvert(I.getType()->getAs<RecordType>()->getDecl(), - CGT, AlreadyChecked)) + if (!isSafeToConvert(I.getType()->castAs<RecordType>()->getDecl(), CGT, + AlreadyChecked)) return false; } @@ -402,7 +402,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { #define NON_CANONICAL_TYPE(Class, Base) case Type::Class: #define DEPENDENT_TYPE(Class, Base) case Type::Class: #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class: -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" llvm_unreachable("Non-canonical or dependent types aren't possible."); case Type::Builtin: { @@ -512,6 +512,22 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { ResultType = CGM.getOpenCLRuntime().convertOpenCLSpecificType(Ty); break; + // TODO: real CodeGen support for SVE types requires more infrastructure + // to be added first. Report an error until then. +#define SVE_TYPE(Name, Id, SingletonId) case BuiltinType::Id: +#include "clang/Basic/AArch64SVEACLETypes.def" + { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, + "cannot yet generate code for SVE type '%0'"); + auto *BT = cast<BuiltinType>(Ty); + auto Name = BT->getName(CGM.getContext().getPrintingPolicy()); + CGM.getDiags().Report(DiagID) << Name; + // Return something safe. + ResultType = llvm::IntegerType::get(getLLVMContext(), 32); + break; + } + case BuiltinType::Dependent: #define BUILTIN_TYPE(Id, SingletonId) #define PLACEHOLDER_TYPE(Id, SingletonId) \ @@ -728,8 +744,7 @@ llvm::StructType *CodeGenTypes::ConvertRecordDeclType(const RecordDecl *RD) { if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD)) { for (const auto &I : CRD->bases()) { if (I.isVirtual()) continue; - - ConvertRecordDeclType(I.getType()->getAs<RecordType>()->getDecl()); + ConvertRecordDeclType(I.getType()->castAs<RecordType>()->getDecl()); } } diff --git a/lib/CodeGen/ConstantInitBuilder.cpp b/lib/CodeGen/ConstantInitBuilder.cpp index 40b1607b56266..2d63d88020bed 100644 --- a/lib/CodeGen/ConstantInitBuilder.cpp +++ b/lib/CodeGen/ConstantInitBuilder.cpp @@ -79,7 +79,7 @@ ConstantInitBuilderBase::createGlobal(llvm::Constant *initializer, /*insert before*/ nullptr, llvm::GlobalValue::NotThreadLocal, addressSpace); - GV->setAlignment(alignment.getQuantity()); + GV->setAlignment(alignment.getAsAlign()); resolveSelfReferences(GV); return GV; } diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp index 6d18027f16a80..a6f6e38d5f148 100644 --- a/lib/CodeGen/CoverageMappingGen.cpp +++ b/lib/CodeGen/CoverageMappingGen.cpp @@ -1278,13 +1278,6 @@ std::string getCoverageSection(const CodeGenModule &CGM) { CGM.getContext().getTargetInfo().getTriple().getObjectFormat()); } -std::string normalizeFilename(StringRef Filename) { - llvm::SmallString<256> Path(Filename); - llvm::sys::fs::make_absolute(Path); - llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true); - return Path.str().str(); -} - } // end anonymous namespace static void dump(llvm::raw_ostream &OS, StringRef FunctionName, @@ -1317,6 +1310,24 @@ static void dump(llvm::raw_ostream &OS, StringRef FunctionName, } } +CoverageMappingModuleGen::CoverageMappingModuleGen( + CodeGenModule &CGM, CoverageSourceInfo &SourceInfo) + : CGM(CGM), SourceInfo(SourceInfo), FunctionRecordTy(nullptr) { + // Honor -fdebug-compilation-dir in paths in coverage data. Otherwise, use the + // regular working directory when normalizing paths. + if (!CGM.getCodeGenOpts().DebugCompilationDir.empty()) + CWD = CGM.getCodeGenOpts().DebugCompilationDir; + else + llvm::sys::fs::current_path(CWD); +} + +std::string CoverageMappingModuleGen::normalizeFilename(StringRef Filename) { + llvm::SmallString<256> Path(Filename); + llvm::sys::fs::make_absolute(CWD, Path); + llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true); + return Path.str().str(); +} + void CoverageMappingModuleGen::addFunctionMappingRecord( llvm::GlobalVariable *NamePtr, StringRef NameValue, uint64_t FuncHash, const std::string &CoverageMapping, bool IsUsed) { @@ -1442,7 +1453,7 @@ void CoverageMappingModuleGen::emit() { CovDataVal, llvm::getCoverageMappingVarName()); CovData->setSection(getCoverageSection(CGM)); - CovData->setAlignment(8); + CovData->setAlignment(llvm::Align(8)); // Make sure the data doesn't get deleted. CGM.addUsedGlobal(CovData); diff --git a/lib/CodeGen/CoverageMappingGen.h b/lib/CodeGen/CoverageMappingGen.h index 3bf51f590479f..2bdc00e256689 100644 --- a/lib/CodeGen/CoverageMappingGen.h +++ b/lib/CodeGen/CoverageMappingGen.h @@ -54,10 +54,14 @@ class CoverageMappingModuleGen { std::vector<llvm::Constant *> FunctionNames; llvm::StructType *FunctionRecordTy; std::vector<std::string> CoverageMappings; + SmallString<256> CWD; + + /// Make the filename absolute, remove dots, and normalize slashes to local + /// path style. + std::string normalizeFilename(StringRef Filename); public: - CoverageMappingModuleGen(CodeGenModule &CGM, CoverageSourceInfo &SourceInfo) - : CGM(CGM), SourceInfo(SourceInfo), FunctionRecordTy(nullptr) {} + CoverageMappingModuleGen(CodeGenModule &CGM, CoverageSourceInfo &SourceInfo); CoverageSourceInfo &getSourceInfo() const { return SourceInfo; diff --git a/lib/CodeGen/EHScopeStack.h b/lib/CodeGen/EHScopeStack.h index 3b0db35d982ba..0ed67aabcd621 100644 --- a/lib/CodeGen/EHScopeStack.h +++ b/lib/CodeGen/EHScopeStack.h @@ -199,14 +199,14 @@ public: SavedTuple Saved; template <std::size_t... Is> - T restore(CodeGenFunction &CGF, llvm::index_sequence<Is...>) { + T restore(CodeGenFunction &CGF, std::index_sequence<Is...>) { // It's important that the restores are emitted in order. The braced init // list guarantees that. return T{DominatingValue<As>::restore(CGF, std::get<Is>(Saved))...}; } void Emit(CodeGenFunction &CGF, Flags flags) override { - restore(CGF, llvm::index_sequence_for<As...>()).Emit(CGF, flags); + restore(CGF, std::index_sequence_for<As...>()).Emit(CGF, flags); } public: diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp index 3b2413d960d63..8f9b16470b642 100644 --- a/lib/CodeGen/ItaniumCXXABI.cpp +++ b/lib/CodeGen/ItaniumCXXABI.cpp @@ -43,6 +43,10 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI { /// VTables - All the vtables which have been defined. llvm::DenseMap<const CXXRecordDecl *, llvm::GlobalVariable *> VTables; + /// All the thread wrapper functions that have been used. + llvm::SmallVector<std::pair<const VarDecl *, llvm::Function *>, 8> + ThreadWrappers; + protected: bool UseARMMethodPtrABI; bool UseARMGuardVarABI; @@ -322,7 +326,43 @@ public: ArrayRef<llvm::Function *> CXXThreadLocalInits, ArrayRef<const VarDecl *> CXXThreadLocalInitVars) override; - bool usesThreadWrapperFunction() const override { return true; } + /// Determine whether we will definitely emit this variable with a constant + /// initializer, either because the language semantics demand it or because + /// we know that the initializer is a constant. + bool isEmittedWithConstantInitializer(const VarDecl *VD) const { + VD = VD->getMostRecentDecl(); + if (VD->hasAttr<ConstInitAttr>()) + return true; + + // All later checks examine the initializer specified on the variable. If + // the variable is weak, such examination would not be correct. + if (VD->isWeak() || VD->hasAttr<SelectAnyAttr>()) + return false; + + const VarDecl *InitDecl = VD->getInitializingDeclaration(); + if (!InitDecl) + return false; + + // If there's no initializer to run, this is constant initialization. + if (!InitDecl->hasInit()) + return true; + + // If we have the only definition, we don't need a thread wrapper if we + // will emit the value as a constant. + if (isUniqueGVALinkage(getContext().GetGVALinkageForVariable(VD))) + return !VD->needsDestruction(getContext()) && InitDecl->evaluateValue(); + + // Otherwise, we need a thread wrapper unless we know that every + // translation unit will emit the value as a constant. We rely on + // ICE-ness not varying between translation units, which isn't actually + // guaranteed by the standard but is necessary for sanity. + return InitDecl->isInitKnownICE() && InitDecl->isInitICE(); + } + + bool usesThreadWrapperFunction(const VarDecl *VD) const override { + return !isEmittedWithConstantInitializer(VD) || + VD->needsDestruction(getContext()); + } LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD, QualType LValType) override; @@ -415,8 +455,8 @@ public: class ARMCXXABI : public ItaniumCXXABI { public: ARMCXXABI(CodeGen::CodeGenModule &CGM) : - ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true, - /* UseARMGuardVarABI = */ true) {} + ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true, + /*UseARMGuardVarABI=*/true) {} bool HasThisReturn(GlobalDecl GD) const override { return (isa<CXXConstructorDecl>(GD.getDecl()) || ( @@ -480,11 +520,11 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) { // include the other 32-bit ARM oddities: constructor/destructor return values // and array cookies. case TargetCXXABI::GenericAArch64: - return new ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true, - /* UseARMGuardVarABI = */ true); + return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true, + /*UseARMGuardVarABI=*/true); case TargetCXXABI::GenericMIPS: - return new ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true); + return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true); case TargetCXXABI::WebAssembly: return new WebAssemblyCXXABI(CGM); @@ -495,8 +535,7 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) { // For PNaCl, use ARM-style method pointers so that PNaCl code // does not assume anything about the alignment of function // pointers. - return new ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true, - /* UseARMGuardVarABI = */ false); + return new ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true); } return new ItaniumCXXABI(CGM); @@ -541,8 +580,8 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( const FunctionProtoType *FPT = MPT->getPointeeType()->getAs<FunctionProtoType>(); - const CXXRecordDecl *RD = - cast<CXXRecordDecl>(MPT->getClass()->getAs<RecordType>()->getDecl()); + auto *RD = + cast<CXXRecordDecl>(MPT->getClass()->castAs<RecordType>()->getDecl()); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType( CGM.getTypes().arrangeCXXMethodType(RD, FPT, /*FD=*/nullptr)); @@ -605,8 +644,6 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( VTableOffset = Builder.CreateTrunc(VTableOffset, CGF.Int32Ty); VTableOffset = Builder.CreateZExt(VTableOffset, CGM.PtrDiffTy); } - // Compute the address of the virtual function pointer. - llvm::Value *VFPAddr = Builder.CreateGEP(VTable, VTableOffset); // Check the address of the function pointer if CFI on member function // pointers is enabled. @@ -614,44 +651,81 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( llvm::Constant *CheckTypeDesc; bool ShouldEmitCFICheck = CGF.SanOpts.has(SanitizerKind::CFIMFCall) && CGM.HasHiddenLTOVisibility(RD); - if (ShouldEmitCFICheck) { - CodeGenFunction::SanitizerScope SanScope(&CGF); - - CheckSourceLocation = CGF.EmitCheckSourceLocation(E->getBeginLoc()); - CheckTypeDesc = CGF.EmitCheckTypeDescriptor(QualType(MPT, 0)); - llvm::Constant *StaticData[] = { - llvm::ConstantInt::get(CGF.Int8Ty, CodeGenFunction::CFITCK_VMFCall), - CheckSourceLocation, - CheckTypeDesc, - }; - - llvm::Metadata *MD = - CGM.CreateMetadataIdentifierForVirtualMemPtrType(QualType(MPT, 0)); - llvm::Value *TypeId = llvm::MetadataAsValue::get(CGF.getLLVMContext(), MD); + bool ShouldEmitVFEInfo = CGM.getCodeGenOpts().VirtualFunctionElimination && + CGM.HasHiddenLTOVisibility(RD); + llvm::Value *VirtualFn = nullptr; - llvm::Value *TypeTest = Builder.CreateCall( - CGM.getIntrinsic(llvm::Intrinsic::type_test), {VFPAddr, TypeId}); + { + CodeGenFunction::SanitizerScope SanScope(&CGF); + llvm::Value *TypeId = nullptr; + llvm::Value *CheckResult = nullptr; + + if (ShouldEmitCFICheck || ShouldEmitVFEInfo) { + // If doing CFI or VFE, we will need the metadata node to check against. + llvm::Metadata *MD = + CGM.CreateMetadataIdentifierForVirtualMemPtrType(QualType(MPT, 0)); + TypeId = llvm::MetadataAsValue::get(CGF.getLLVMContext(), MD); + } - if (CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIMFCall)) { - CGF.EmitTrapCheck(TypeTest); + llvm::Value *VFPAddr = Builder.CreateGEP(VTable, VTableOffset); + + if (ShouldEmitVFEInfo) { + // If doing VFE, load from the vtable with a type.checked.load intrinsic + // call. Note that we use the GEP to calculate the address to load from + // and pass 0 as the offset to the intrinsic. This is because every + // vtable slot of the correct type is marked with matching metadata, and + // we know that the load must be from one of these slots. + llvm::Value *CheckedLoad = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_checked_load), + {VFPAddr, llvm::ConstantInt::get(CGM.Int32Ty, 0), TypeId}); + CheckResult = Builder.CreateExtractValue(CheckedLoad, 1); + VirtualFn = Builder.CreateExtractValue(CheckedLoad, 0); + VirtualFn = Builder.CreateBitCast(VirtualFn, FTy->getPointerTo(), + "memptr.virtualfn"); } else { - llvm::Value *AllVtables = llvm::MetadataAsValue::get( - CGM.getLLVMContext(), - llvm::MDString::get(CGM.getLLVMContext(), "all-vtables")); - llvm::Value *ValidVtable = Builder.CreateCall( - CGM.getIntrinsic(llvm::Intrinsic::type_test), {VTable, AllVtables}); - CGF.EmitCheck(std::make_pair(TypeTest, SanitizerKind::CFIMFCall), - SanitizerHandler::CFICheckFail, StaticData, - {VTable, ValidVtable}); + // When not doing VFE, emit a normal load, as it allows more + // optimisations than type.checked.load. + if (ShouldEmitCFICheck) { + CheckResult = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_test), + {Builder.CreateBitCast(VFPAddr, CGF.Int8PtrTy), TypeId}); + } + VFPAddr = + Builder.CreateBitCast(VFPAddr, FTy->getPointerTo()->getPointerTo()); + VirtualFn = Builder.CreateAlignedLoad(VFPAddr, CGF.getPointerAlign(), + "memptr.virtualfn"); } + assert(VirtualFn && "Virtual fuction pointer not created!"); + assert((!ShouldEmitCFICheck || !ShouldEmitVFEInfo || CheckResult) && + "Check result required but not created!"); + + if (ShouldEmitCFICheck) { + // If doing CFI, emit the check. + CheckSourceLocation = CGF.EmitCheckSourceLocation(E->getBeginLoc()); + CheckTypeDesc = CGF.EmitCheckTypeDescriptor(QualType(MPT, 0)); + llvm::Constant *StaticData[] = { + llvm::ConstantInt::get(CGF.Int8Ty, CodeGenFunction::CFITCK_VMFCall), + CheckSourceLocation, + CheckTypeDesc, + }; - FnVirtual = Builder.GetInsertBlock(); - } + if (CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIMFCall)) { + CGF.EmitTrapCheck(CheckResult); + } else { + llvm::Value *AllVtables = llvm::MetadataAsValue::get( + CGM.getLLVMContext(), + llvm::MDString::get(CGM.getLLVMContext(), "all-vtables")); + llvm::Value *ValidVtable = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::type_test), {VTable, AllVtables}); + CGF.EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIMFCall), + SanitizerHandler::CFICheckFail, StaticData, + {VTable, ValidVtable}); + } + + FnVirtual = Builder.GetInsertBlock(); + } + } // End of sanitizer scope - // Load the virtual function to call. - VFPAddr = Builder.CreateBitCast(VFPAddr, FTy->getPointerTo()->getPointerTo()); - llvm::Value *VirtualFn = Builder.CreateAlignedLoad( - VFPAddr, CGF.getPointerAlign(), "memptr.virtualfn"); CGF.EmitBranch(FnEnd); // In the non-virtual path, the function pointer is actually a @@ -1104,7 +1178,7 @@ void ItaniumCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF, // Grab the vtable pointer as an intptr_t*. auto *ClassDecl = - cast<CXXRecordDecl>(ElementType->getAs<RecordType>()->getDecl()); + cast<CXXRecordDecl>(ElementType->castAs<RecordType>()->getDecl()); llvm::Value *VTable = CGF.GetVTablePtr(Ptr, CGF.IntPtrTy->getPointerTo(), ClassDecl); @@ -1307,7 +1381,7 @@ llvm::Value *ItaniumCXXABI::EmitTypeid(CodeGenFunction &CGF, Address ThisPtr, llvm::Type *StdTypeInfoPtrTy) { auto *ClassDecl = - cast<CXXRecordDecl>(SrcRecordTy->getAs<RecordType>()->getDecl()); + cast<CXXRecordDecl>(SrcRecordTy->castAs<RecordType>()->getDecl()); llvm::Value *Value = CGF.GetVTablePtr(ThisPtr, StdTypeInfoPtrTy->getPointerTo(), ClassDecl); @@ -1373,7 +1447,7 @@ llvm::Value *ItaniumCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, llvm::Type *DestLTy = CGF.ConvertType(DestTy); auto *ClassDecl = - cast<CXXRecordDecl>(SrcRecordTy->getAs<RecordType>()->getDecl()); + cast<CXXRecordDecl>(SrcRecordTy->castAs<RecordType>()->getDecl()); // Get the vtable pointer. llvm::Value *VTable = CGF.GetVTablePtr(ThisAddr, PtrDiffLTy->getPointerTo(), ClassDecl); @@ -1595,7 +1669,7 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, EmitFundamentalRTTIDescriptors(RD); if (!VTable->isDeclarationForLinker()) - CGM.EmitVTableTypeMetadata(VTable, VTLayout); + CGM.EmitVTableTypeMetadata(RD, VTable, VTLayout); } bool ItaniumCXXABI::isVirtualOffsetNeededForVTableField( @@ -1755,10 +1829,11 @@ llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall( CGCallee Callee = CGCallee::forVirtual(CE, GD, This, Ty); QualType ThisTy; - if (CE) - ThisTy = CE->getImplicitObjectArgument()->getType()->getPointeeType(); - else + if (CE) { + ThisTy = CE->getObjectType(); + } else { ThisTy = D->getDestroyedType(); + } CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy, nullptr, QualType(), nullptr); @@ -2154,7 +2229,7 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, guard->setVisibility(var->getVisibility()); // If the variable is thread-local, so is its guard variable. guard->setThreadLocalMode(var->getThreadLocalMode()); - guard->setAlignment(guardAlignment.getQuantity()); + guard->setAlignment(guardAlignment.getAsAlign()); // The ABI says: "It is suggested that it be emitted in the same COMDAT // group as the associated data object." In practice, this doesn't work for @@ -2456,9 +2531,6 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD, CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Wrapper); - if (VD->hasDefinition()) - CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper); - // Always resolve references to the wrapper at link time. if (!Wrapper->hasLocalLinkage()) if (!isThreadWrapperReplaceable(VD, CGM) || @@ -2471,6 +2543,8 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD, Wrapper->setCallingConv(llvm::CallingConv::CXX_FAST_TLS); Wrapper->addFnAttr(llvm::Attribute::NoUnwind); } + + ThreadWrappers.push_back({VD, Wrapper}); return Wrapper; } @@ -2508,7 +2582,7 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( Guard->setThreadLocal(true); CharUnits GuardAlign = CharUnits::One(); - Guard->setAlignment(GuardAlign.getQuantity()); + Guard->setAlignment(GuardAlign.getAsAlign()); CodeGenFunction(CGM).GenerateCXXGlobalInitFunc( InitFunc, OrderedInits, ConstantAddress(Guard, GuardAlign)); @@ -2519,20 +2593,40 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( } } - // Emit thread wrappers. + // Create declarations for thread wrappers for all thread-local variables + // with non-discardable definitions in this translation unit. for (const VarDecl *VD : CXXThreadLocals) { + if (VD->hasDefinition() && + !isDiscardableGVALinkage(getContext().GetGVALinkageForVariable(VD))) { + llvm::GlobalValue *GV = CGM.GetGlobalValue(CGM.getMangledName(VD)); + getOrCreateThreadLocalWrapper(VD, GV); + } + } + + // Emit all referenced thread wrappers. + for (auto VDAndWrapper : ThreadWrappers) { + const VarDecl *VD = VDAndWrapper.first; llvm::GlobalVariable *Var = cast<llvm::GlobalVariable>(CGM.GetGlobalValue(CGM.getMangledName(VD))); - llvm::Function *Wrapper = getOrCreateThreadLocalWrapper(VD, Var); + llvm::Function *Wrapper = VDAndWrapper.second; // Some targets require that all access to thread local variables go through // the thread wrapper. This means that we cannot attempt to create a thread // wrapper or a thread helper. - if (isThreadWrapperReplaceable(VD, CGM) && !VD->hasDefinition()) { - Wrapper->setLinkage(llvm::Function::ExternalLinkage); - continue; + if (!VD->hasDefinition()) { + if (isThreadWrapperReplaceable(VD, CGM)) { + Wrapper->setLinkage(llvm::Function::ExternalLinkage); + continue; + } + + // If this isn't a TU in which this variable is defined, the thread + // wrapper is discardable. + if (Wrapper->getLinkage() == llvm::Function::WeakODRLinkage) + Wrapper->setLinkage(llvm::Function::LinkOnceODRLinkage); } + CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper); + // Mangle the name for the thread_local initialization function. SmallString<256> InitFnName; { @@ -2547,7 +2641,10 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( // produce a declaration of the initialization function. llvm::GlobalValue *Init = nullptr; bool InitIsInitFunc = false; - if (VD->hasDefinition()) { + bool HasConstantInitialization = false; + if (!usesThreadWrapperFunction(VD)) { + HasConstantInitialization = true; + } else if (VD->hasDefinition()) { InitIsInitFunc = true; llvm::Function *InitFuncToUse = InitFunc; if (isTemplateInstantiation(VD->getTemplateSpecializationKind())) @@ -2576,7 +2673,9 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( llvm::LLVMContext &Context = CGM.getModule().getContext(); llvm::BasicBlock *Entry = llvm::BasicBlock::Create(Context, "", Wrapper); CGBuilderTy Builder(CGM, Entry); - if (InitIsInitFunc) { + if (HasConstantInitialization) { + // No dynamic initialization to invoke. + } else if (InitIsInitFunc) { if (Init) { llvm::CallInst *CallVal = Builder.CreateCall(InitFnTy, Init); if (isThreadWrapperReplaceable(VD, CGM)) { @@ -2860,6 +2959,9 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) { case BuiltinType::OCLClkEvent: case BuiltinType::OCLQueue: case BuiltinType::OCLReserveID: +#define SVE_TYPE(Name, Id, SingletonId) \ + case BuiltinType::Id: +#include "clang/Basic/AArch64SVEACLETypes.def" case BuiltinType::ShortAccum: case BuiltinType::Accum: case BuiltinType::LongAccum: @@ -3033,8 +3135,8 @@ static bool CanUseSingleInheritance(const CXXRecordDecl *RD) { return false; // Check that the class is dynamic iff the base is. - const CXXRecordDecl *BaseDecl = - cast<CXXRecordDecl>(Base->getType()->getAs<RecordType>()->getDecl()); + auto *BaseDecl = + cast<CXXRecordDecl>(Base->getType()->castAs<RecordType>()->getDecl()); if (!BaseDecl->isEmpty() && BaseDecl->isDynamicClass() != RD->isDynamicClass()) return false; @@ -3061,7 +3163,7 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) { #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class: #define NON_CANONICAL_TYPE(Class, Base) case Type::Class: #define DEPENDENT_TYPE(Class, Base) case Type::Class: -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" llvm_unreachable("Non-canonical and dependent types shouldn't get here"); case Type::LValueReference: @@ -3307,7 +3409,7 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo( #define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class: #define NON_CANONICAL_TYPE(Class, Base) case Type::Class: #define DEPENDENT_TYPE(Class, Base) case Type::Class: -#include "clang/AST/TypeNodes.def" +#include "clang/AST/TypeNodes.inc" llvm_unreachable("Non-canonical and dependent types shouldn't get here"); // GCC treats vector types as fundamental types. @@ -3412,7 +3514,7 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo( CharUnits Align = CGM.getContext().toCharUnitsFromBits(CGM.getTarget().getPointerAlign(0)); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); // The Itanium ABI specifies that type_info objects must be globally // unique, with one exception: if the type is an incomplete class @@ -3497,8 +3599,8 @@ static unsigned ComputeVMIClassTypeInfoFlags(const CXXBaseSpecifier *Base, unsigned Flags = 0; - const CXXRecordDecl *BaseDecl = - cast<CXXRecordDecl>(Base->getType()->getAs<RecordType>()->getDecl()); + auto *BaseDecl = + cast<CXXRecordDecl>(Base->getType()->castAs<RecordType>()->getDecl()); if (Base->isVirtual()) { // Mark the virtual base as seen. @@ -3596,8 +3698,8 @@ void ItaniumRTTIBuilder::BuildVMIClassTypeInfo(const CXXRecordDecl *RD) { // The __base_type member points to the RTTI for the base type. Fields.push_back(ItaniumRTTIBuilder(CXXABI).BuildTypeInfo(Base.getType())); - const CXXRecordDecl *BaseDecl = - cast<CXXRecordDecl>(Base.getType()->getAs<RecordType>()->getDecl()); + auto *BaseDecl = + cast<CXXRecordDecl>(Base.getType()->castAs<RecordType>()->getDecl()); int64_t OffsetFlags = 0; diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp index fa34414de5da1..2d8b538bc2eec 100644 --- a/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/lib/CodeGen/MicrosoftCXXABI.cpp @@ -386,7 +386,9 @@ public: ArrayRef<llvm::Function *> CXXThreadLocalInits, ArrayRef<const VarDecl *> CXXThreadLocalInitVars) override; - bool usesThreadWrapperFunction() const override { return false; } + bool usesThreadWrapperFunction(const VarDecl *VD) const override { + return false; + } LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD, QualType LValType) override; @@ -1208,7 +1210,7 @@ static bool hasDefaultCXXMethodCC(ASTContext &Context, CallingConv ExpectedCallingConv = Context.getDefaultCallingConvention( /*IsVariadic=*/false, /*IsCXXMethod=*/true); CallingConv ActualCallingConv = - MD->getType()->getAs<FunctionProtoType>()->getCallConv(); + MD->getType()->castAs<FunctionProtoType>()->getCallConv(); return ExpectedCallingConv == ActualCallingConv; } @@ -1921,10 +1923,11 @@ llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall( DtorType == Dtor_Deleting); QualType ThisTy; - if (CE) - ThisTy = CE->getImplicitObjectArgument()->getType()->getPointeeType(); - else + if (CE) { + ThisTy = CE->getObjectType(); + } else { ThisTy = D->getDestroyedType(); + } This = adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true); RValue RV = CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy, @@ -2352,7 +2355,7 @@ static ConstantAddress getInitThreadEpochPtr(CodeGenModule &CGM) { /*isConstant=*/false, llvm::GlobalVariable::ExternalLinkage, /*Initializer=*/nullptr, VarName, /*InsertBefore=*/nullptr, llvm::GlobalVariable::GeneralDynamicTLSModel); - GV->setAlignment(Align.getQuantity()); + GV->setAlignment(Align.getAsAlign()); return ConstantAddress(GV, Align); } @@ -2495,7 +2498,7 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D, GV->getLinkage(), Zero, GuardName.str()); GuardVar->setVisibility(GV->getVisibility()); GuardVar->setDLLStorageClass(GV->getDLLStorageClass()); - GuardVar->setAlignment(GuardAlign.getQuantity()); + GuardVar->setAlignment(GuardAlign.getAsAlign()); if (GuardVar->isWeakForLinker()) GuardVar->setComdat( CGM.getModule().getOrInsertComdat(GuardVar->getName())); diff --git a/lib/CodeGen/ModuleBuilder.cpp b/lib/CodeGen/ModuleBuilder.cpp index 3b4e06045a37c..4154f6ebe736f 100644 --- a/lib/CodeGen/ModuleBuilder.cpp +++ b/lib/CodeGen/ModuleBuilder.cpp @@ -65,6 +65,13 @@ namespace { private: SmallVector<FunctionDecl *, 8> DeferredInlineMemberFuncDefs; + static llvm::StringRef ExpandModuleName(llvm::StringRef ModuleName, + const CodeGenOptions &CGO) { + if (ModuleName == "-" && !CGO.MainFileName.empty()) + return CGO.MainFileName; + return ModuleName; + } + public: CodeGeneratorImpl(DiagnosticsEngine &diags, llvm::StringRef ModuleName, const HeaderSearchOptions &HSO, @@ -73,7 +80,8 @@ namespace { CoverageSourceInfo *CoverageInfo = nullptr) : Diags(diags), Ctx(nullptr), HeaderSearchOpts(HSO), PreprocessorOpts(PPO), CodeGenOpts(CGO), HandlingTopLevelDecls(0), - CoverageInfo(CoverageInfo), M(new llvm::Module(ModuleName, C)) { + CoverageInfo(CoverageInfo), + M(new llvm::Module(ExpandModuleName(ModuleName, CGO), C)) { C.setDiscardValueNames(CGO.DiscardValueNames); } @@ -121,7 +129,7 @@ namespace { llvm::Module *StartModule(llvm::StringRef ModuleName, llvm::LLVMContext &C) { assert(!M && "Replacing existing Module?"); - M.reset(new llvm::Module(ModuleName, C)); + M.reset(new llvm::Module(ExpandModuleName(ModuleName, CodeGenOpts), C)); Initialize(*Ctx); return M.get(); } @@ -232,6 +240,9 @@ namespace { if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Member)) { if (Ctx->DeclMustBeEmitted(DRD)) Builder->EmitGlobal(DRD); + } else if (auto *DMD = dyn_cast<OMPDeclareMapperDecl>(Member)) { + if (Ctx->DeclMustBeEmitted(DMD)) + Builder->EmitGlobal(DMD); } } } diff --git a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index 15a2ab99fdac2..284e8022a3c46 100644 --- a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -279,7 +279,7 @@ public: *M, Ty, /*constant*/ true, llvm::GlobalVariable::InternalLinkage, Data, "__clang_ast"); // The on-disk hashtable needs to be aligned. - ASTSym->setAlignment(8); + ASTSym->setAlignment(llvm::Align(8)); // Mach-O also needs a segment name. if (Triple.isOSBinFormatMachO()) @@ -297,7 +297,7 @@ public: Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, LangOpts, Ctx.getTargetInfo().getDataLayout(), M.get(), BackendAction::Backend_EmitLL, - llvm::make_unique<llvm::raw_svector_ostream>(Buffer)); + std::make_unique<llvm::raw_svector_ostream>(Buffer)); llvm::dbgs() << Buffer; }); @@ -321,7 +321,7 @@ ObjectFilePCHContainerWriter::CreatePCHContainerGenerator( const std::string &OutputFileName, std::unique_ptr<llvm::raw_pwrite_stream> OS, std::shared_ptr<PCHBuffer> Buffer) const { - return llvm::make_unique<PCHContainerGenerator>( + return std::make_unique<PCHContainerGenerator>( CI, MainFileName, OutputFileName, std::move(OS), Buffer); } @@ -335,7 +335,11 @@ ObjectFilePCHContainerReader::ExtractPCH(llvm::MemoryBufferRef Buffer) const { // Find the clang AST section in the container. for (auto &Section : OF->sections()) { StringRef Name; - Section.getName(Name); + if (Expected<StringRef> NameOrErr = Section.getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); + if ((!IsCOFF && Name == "__clangast") || (IsCOFF && Name == "clangast")) { if (Expected<StringRef> E = Section.getContents()) return *E; diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index 5da988fb8a3c5..c2c7b8bf653b9 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -833,10 +833,13 @@ ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const { Address WebAssemblyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*IsIndirect=*/ false, + bool IsIndirect = isAggregateTypeForABI(Ty) && + !isEmptyRecord(getContext(), Ty, true) && + !isSingleElementStruct(Ty, getContext()); + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, getContext().getTypeInfoInChars(Ty), CharUnits::fromQuantity(4), - /*AllowHigherAlign=*/ true); + /*AllowHigherAlign=*/true); } //===----------------------------------------------------------------------===// @@ -2177,6 +2180,17 @@ class X86_64ABIInfo : public SwiftABIInfo { return true; } + // GCC classifies vectors of __int128 as memory. + bool passInt128VectorsInMem() const { + // Clang <= 9.0 did not do this. + if (getContext().getLangOpts().getClangABICompat() <= + LangOptions::ClangABI::Ver9) + return false; + + const llvm::Triple &T = getTarget().getTriple(); + return T.isOSLinux() || T.isOSNetBSD(); + } + X86AVXABILevel AVXLevel; // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on // 64-bit hardware. @@ -2657,6 +2671,14 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Hi = Lo; } else if (Size == 128 || (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) { + QualType ElementType = VT->getElementType(); + + // gcc passes 256 and 512 bit <X x __int128> vectors in memory. :( + if (passInt128VectorsInMem() && Size != 128 && + (ElementType->isSpecificBuiltinType(BuiltinType::Int128) || + ElementType->isSpecificBuiltinType(BuiltinType::UInt128))) + return; + // Arguments of 256-bits are split into four eightbyte chunks. The // least significant one belongs to class SSE and all the others to class // SSEUP. The original Lo and Hi design considers that types can't be @@ -2787,8 +2809,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, for (const auto &I : CXXRD->bases()) { assert(!I.isVirtual() && !I.getType()->isDependentType() && "Unexpected base class!"); - const CXXRecordDecl *Base = - cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl()); + const auto *Base = + cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); // Classify this field. // @@ -2899,6 +2921,11 @@ bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const { unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel); if (Size <= 64 || Size > LargestVector) return true; + QualType EltTy = VecTy->getElementType(); + if (passInt128VectorsInMem() && + (EltTy->isSpecificBuiltinType(BuiltinType::Int128) || + EltTy->isSpecificBuiltinType(BuiltinType::UInt128))) + return true; } return false; @@ -2973,14 +3000,28 @@ llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const { Ty = QualType(InnerTy, 0); llvm::Type *IRType = CGT.ConvertType(Ty); - if (isa<llvm::VectorType>(IRType) || - IRType->getTypeID() == llvm::Type::FP128TyID) + if (isa<llvm::VectorType>(IRType)) { + // Don't pass vXi128 vectors in their native type, the backend can't + // legalize them. + if (passInt128VectorsInMem() && + IRType->getVectorElementType()->isIntegerTy(128)) { + // Use a vXi64 vector. + uint64_t Size = getContext().getTypeSize(Ty); + return llvm::VectorType::get(llvm::Type::getInt64Ty(getVMContext()), + Size / 64); + } + + return IRType; + } + + if (IRType->getTypeID() == llvm::Type::FP128TyID) return IRType; // We couldn't find the preferred IR vector type for 'Ty'. uint64_t Size = getContext().getTypeSize(Ty); assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!"); + // Return a LLVM IR vector type based on the size of 'Ty'. return llvm::VectorType::get(llvm::Type::getDoubleTy(getVMContext()), Size / 64); @@ -3030,8 +3071,8 @@ static bool BitsContainNoUserData(QualType Ty, unsigned StartBit, for (const auto &I : CXXRD->bases()) { assert(!I.isVirtual() && !I.getType()->isDependentType() && "Unexpected base class!"); - const CXXRecordDecl *Base = - cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl()); + const auto *Base = + cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl()); // If the base is after the span we care about, ignore it. unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base)); @@ -7909,8 +7950,12 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( const auto *ReqdWGS = M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr; - if (((M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>()) || - (M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>())) && + + const bool IsOpenCLKernel = M.getLangOpts().OpenCL && + FD->hasAttr<OpenCLKernelAttr>(); + const bool IsHIPKernel = M.getLangOpts().HIP && + FD->hasAttr<CUDAGlobalAttr>(); + if ((IsOpenCLKernel || IsHIPKernel) && (M.getTriple().getOS() == llvm::Triple::AMDHSA)) F->addFnAttr("amdgpu-implicitarg-num-bytes", "56"); @@ -7936,6 +7981,9 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes( F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } else assert(Max == 0 && "Max must be zero"); + } else if (IsOpenCLKernel || IsHIPKernel) { + // By default, restrict the maximum size to 256. + F->addFnAttr("amdgpu-flat-work-group-size", "1,256"); } if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) { @@ -9188,25 +9236,45 @@ static bool getTypeString(SmallStringEnc &Enc, const Decl *D, namespace { class RISCVABIInfo : public DefaultABIInfo { private: - unsigned XLen; // Size of the integer ('x') registers in bits. + // Size of the integer ('x') registers in bits. + unsigned XLen; + // Size of the floating point ('f') registers in bits. Note that the target + // ISA might have a wider FLen than the selected ABI (e.g. an RV32IF target + // with soft float ABI has FLen==0). + unsigned FLen; static const int NumArgGPRs = 8; + static const int NumArgFPRs = 8; + bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, + llvm::Type *&Field1Ty, + CharUnits &Field1Off, + llvm::Type *&Field2Ty, + CharUnits &Field2Off) const; public: - RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen) - : DefaultABIInfo(CGT), XLen(XLen) {} + RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen) + : DefaultABIInfo(CGT), XLen(XLen), FLen(FLen) {} // DefaultABIInfo's classifyReturnType and classifyArgumentType are // non-virtual, but computeInfo is virtual, so we overload it. void computeInfo(CGFunctionInfo &FI) const override; - ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, - int &ArgGPRsLeft) const; + ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft, + int &ArgFPRsLeft) const; ABIArgInfo classifyReturnType(QualType RetTy) const; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; ABIArgInfo extendType(QualType Ty) const; + + bool detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, + CharUnits &Field1Off, llvm::Type *&Field2Ty, + CharUnits &Field2Off, int &NeededArgGPRs, + int &NeededArgFPRs) const; + ABIArgInfo coerceAndExpandFPCCEligibleStruct(llvm::Type *Field1Ty, + CharUnits Field1Off, + llvm::Type *Field2Ty, + CharUnits Field2Off) const; }; } // end anonymous namespace @@ -9228,18 +9296,215 @@ void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const { // different for variadic arguments, we must also track whether we are // examining a vararg or not. int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs; + int ArgFPRsLeft = FLen ? NumArgFPRs : 0; int NumFixedArgs = FI.getNumRequiredArgs(); int ArgNum = 0; for (auto &ArgInfo : FI.arguments()) { bool IsFixed = ArgNum < NumFixedArgs; - ArgInfo.info = classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft); + ArgInfo.info = + classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft, ArgFPRsLeft); ArgNum++; } } +// Returns true if the struct is a potential candidate for the floating point +// calling convention. If this function returns true, the caller is +// responsible for checking that if there is only a single field then that +// field is a float. +bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, + llvm::Type *&Field1Ty, + CharUnits &Field1Off, + llvm::Type *&Field2Ty, + CharUnits &Field2Off) const { + bool IsInt = Ty->isIntegralOrEnumerationType(); + bool IsFloat = Ty->isRealFloatingType(); + + if (IsInt || IsFloat) { + uint64_t Size = getContext().getTypeSize(Ty); + if (IsInt && Size > XLen) + return false; + // Can't be eligible if larger than the FP registers. Half precision isn't + // currently supported on RISC-V and the ABI hasn't been confirmed, so + // default to the integer ABI in that case. + if (IsFloat && (Size > FLen || Size < 32)) + return false; + // Can't be eligible if an integer type was already found (int+int pairs + // are not eligible). + if (IsInt && Field1Ty && Field1Ty->isIntegerTy()) + return false; + if (!Field1Ty) { + Field1Ty = CGT.ConvertType(Ty); + Field1Off = CurOff; + return true; + } + if (!Field2Ty) { + Field2Ty = CGT.ConvertType(Ty); + Field2Off = CurOff; + return true; + } + return false; + } + + if (auto CTy = Ty->getAs<ComplexType>()) { + if (Field1Ty) + return false; + QualType EltTy = CTy->getElementType(); + if (getContext().getTypeSize(EltTy) > FLen) + return false; + Field1Ty = CGT.ConvertType(EltTy); + Field1Off = CurOff; + assert(CurOff.isZero() && "Unexpected offset for first field"); + Field2Ty = Field1Ty; + Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy); + return true; + } + + if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) { + uint64_t ArraySize = ATy->getSize().getZExtValue(); + QualType EltTy = ATy->getElementType(); + CharUnits EltSize = getContext().getTypeSizeInChars(EltTy); + for (uint64_t i = 0; i < ArraySize; ++i) { + bool Ret = detectFPCCEligibleStructHelper(EltTy, CurOff, Field1Ty, + Field1Off, Field2Ty, Field2Off); + if (!Ret) + return false; + CurOff += EltSize; + } + return true; + } + + if (const auto *RTy = Ty->getAs<RecordType>()) { + // Structures with either a non-trivial destructor or a non-trivial + // copy constructor are not eligible for the FP calling convention. + if (getRecordArgABI(Ty, CGT.getCXXABI())) + return false; + if (isEmptyRecord(getContext(), Ty, true)) + return true; + const RecordDecl *RD = RTy->getDecl(); + // Unions aren't eligible unless they're empty (which is caught above). + if (RD->isUnion()) + return false; + int ZeroWidthBitFieldCount = 0; + for (const FieldDecl *FD : RD->fields()) { + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); + uint64_t FieldOffInBits = Layout.getFieldOffset(FD->getFieldIndex()); + QualType QTy = FD->getType(); + if (FD->isBitField()) { + unsigned BitWidth = FD->getBitWidthValue(getContext()); + // Allow a bitfield with a type greater than XLen as long as the + // bitwidth is XLen or less. + if (getContext().getTypeSize(QTy) > XLen && BitWidth <= XLen) + QTy = getContext().getIntTypeForBitwidth(XLen, false); + if (BitWidth == 0) { + ZeroWidthBitFieldCount++; + continue; + } + } + + bool Ret = detectFPCCEligibleStructHelper( + QTy, CurOff + getContext().toCharUnitsFromBits(FieldOffInBits), + Field1Ty, Field1Off, Field2Ty, Field2Off); + if (!Ret) + return false; + + // As a quirk of the ABI, zero-width bitfields aren't ignored for fp+fp + // or int+fp structs, but are ignored for a struct with an fp field and + // any number of zero-width bitfields. + if (Field2Ty && ZeroWidthBitFieldCount > 0) + return false; + } + return Field1Ty != nullptr; + } + + return false; +} + +// Determine if a struct is eligible for passing according to the floating +// point calling convention (i.e., when flattened it contains a single fp +// value, fp+fp, or int+fp of appropriate size). If so, NeededArgFPRs and +// NeededArgGPRs are incremented appropriately. +bool RISCVABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, + CharUnits &Field1Off, + llvm::Type *&Field2Ty, + CharUnits &Field2Off, + int &NeededArgGPRs, + int &NeededArgFPRs) const { + Field1Ty = nullptr; + Field2Ty = nullptr; + NeededArgGPRs = 0; + NeededArgFPRs = 0; + bool IsCandidate = detectFPCCEligibleStructHelper( + Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off); + // Not really a candidate if we have a single int but no float. + if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy()) + return false; + if (!IsCandidate) + return false; + if (Field1Ty && Field1Ty->isFloatingPointTy()) + NeededArgFPRs++; + else if (Field1Ty) + NeededArgGPRs++; + if (Field2Ty && Field2Ty->isFloatingPointTy()) + NeededArgFPRs++; + else if (Field2Ty) + NeededArgGPRs++; + return IsCandidate; +} + +// Call getCoerceAndExpand for the two-element flattened struct described by +// Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an +// appropriate coerceToType and unpaddedCoerceToType. +ABIArgInfo RISCVABIInfo::coerceAndExpandFPCCEligibleStruct( + llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty, + CharUnits Field2Off) const { + SmallVector<llvm::Type *, 3> CoerceElts; + SmallVector<llvm::Type *, 2> UnpaddedCoerceElts; + if (!Field1Off.isZero()) + CoerceElts.push_back(llvm::ArrayType::get( + llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity())); + + CoerceElts.push_back(Field1Ty); + UnpaddedCoerceElts.push_back(Field1Ty); + + if (!Field2Ty) { + return ABIArgInfo::getCoerceAndExpand( + llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()), + UnpaddedCoerceElts[0]); + } + + CharUnits Field2Align = + CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(Field2Ty)); + CharUnits Field1Size = + CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty)); + CharUnits Field2OffNoPadNoPack = Field1Size.alignTo(Field2Align); + + CharUnits Padding = CharUnits::Zero(); + if (Field2Off > Field2OffNoPadNoPack) + Padding = Field2Off - Field2OffNoPadNoPack; + else if (Field2Off != Field2Align && Field2Off > Field1Size) + Padding = Field2Off - Field1Size; + + bool IsPacked = !Field2Off.isMultipleOf(Field2Align); + + if (!Padding.isZero()) + CoerceElts.push_back(llvm::ArrayType::get( + llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity())); + + CoerceElts.push_back(Field2Ty); + UnpaddedCoerceElts.push_back(Field2Ty); + + auto CoerceToType = + llvm::StructType::get(getVMContext(), CoerceElts, IsPacked); + auto UnpaddedCoerceToType = + llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked); + + return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType); +} + ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, - int &ArgGPRsLeft) const { + int &ArgGPRsLeft, + int &ArgFPRsLeft) const { assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow"); Ty = useFirstFieldIfTransparentUnion(Ty); @@ -9257,6 +9522,42 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, return ABIArgInfo::getIgnore(); uint64_t Size = getContext().getTypeSize(Ty); + + // Pass floating point values via FPRs if possible. + if (IsFixed && Ty->isFloatingType() && FLen >= Size && ArgFPRsLeft) { + ArgFPRsLeft--; + return ABIArgInfo::getDirect(); + } + + // Complex types for the hard float ABI must be passed direct rather than + // using CoerceAndExpand. + if (IsFixed && Ty->isComplexType() && FLen && ArgFPRsLeft >= 2) { + QualType EltTy = Ty->castAs<ComplexType>()->getElementType(); + if (getContext().getTypeSize(EltTy) <= FLen) { + ArgFPRsLeft -= 2; + return ABIArgInfo::getDirect(); + } + } + + if (IsFixed && FLen && Ty->isStructureOrClassType()) { + llvm::Type *Field1Ty = nullptr; + llvm::Type *Field2Ty = nullptr; + CharUnits Field1Off = CharUnits::Zero(); + CharUnits Field2Off = CharUnits::Zero(); + int NeededArgGPRs; + int NeededArgFPRs; + bool IsCandidate = + detectFPCCEligibleStruct(Ty, Field1Ty, Field1Off, Field2Ty, Field2Off, + NeededArgGPRs, NeededArgFPRs); + if (IsCandidate && NeededArgGPRs <= ArgGPRsLeft && + NeededArgFPRs <= ArgFPRsLeft) { + ArgGPRsLeft -= NeededArgGPRs; + ArgFPRsLeft -= NeededArgFPRs; + return coerceAndExpandFPCCEligibleStruct(Field1Ty, Field1Off, Field2Ty, + Field2Off); + } + } + uint64_t NeededAlign = getContext().getTypeAlign(Ty); bool MustUseStack = false; // Determine the number of GPRs needed to pass the current argument @@ -9315,10 +9616,12 @@ ABIArgInfo RISCVABIInfo::classifyReturnType(QualType RetTy) const { return ABIArgInfo::getIgnore(); int ArgGPRsLeft = 2; + int ArgFPRsLeft = FLen ? 2 : 0; // The rules for return and argument types are the same, so defer to // classifyArgumentType. - return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft); + return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft, + ArgFPRsLeft); } Address RISCVABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, @@ -9353,8 +9656,9 @@ ABIArgInfo RISCVABIInfo::extendType(QualType Ty) const { namespace { class RISCVTargetCodeGenInfo : public TargetCodeGenInfo { public: - RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen) - : TargetCodeGenInfo(new RISCVABIInfo(CGT, XLen)) {} + RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, + unsigned FLen) + : TargetCodeGenInfo(new RISCVABIInfo(CGT, XLen, FLen)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { @@ -9460,7 +9764,8 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { case llvm::Triple::ppc: return SetCGInfo( - new PPC32TargetCodeGenInfo(Types, CodeGenOpts.FloatABI == "soft")); + new PPC32TargetCodeGenInfo(Types, CodeGenOpts.FloatABI == "soft" || + getTarget().hasFeature("spe"))); case llvm::Triple::ppc64: if (Triple.isOSBinFormatELF()) { PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1; @@ -9493,9 +9798,16 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { return SetCGInfo(new MSP430TargetCodeGenInfo(Types)); case llvm::Triple::riscv32: - return SetCGInfo(new RISCVTargetCodeGenInfo(Types, 32)); - case llvm::Triple::riscv64: - return SetCGInfo(new RISCVTargetCodeGenInfo(Types, 64)); + case llvm::Triple::riscv64: { + StringRef ABIStr = getTarget().getABI(); + unsigned XLen = getTarget().getPointerWidth(0); + unsigned ABIFLen = 0; + if (ABIStr.endswith("f")) + ABIFLen = 32; + else if (ABIStr.endswith("d")) + ABIFLen = 64; + return SetCGInfo(new RISCVTargetCodeGenInfo(Types, XLen, ABIFLen)); + } case llvm::Triple::systemz: { bool HasVector = getTarget().getABI() == "vector"; @@ -9642,7 +9954,7 @@ llvm::Function *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel( Builder.SetInsertPoint(BB); unsigned BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlignment(BlockTy); auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr); - BlockPtr->setAlignment(BlockAlign); + BlockPtr->setAlignment(llvm::MaybeAlign(BlockAlign)); Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign); auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0)); llvm::SmallVector<llvm::Value *, 2> Args; |