diff options
Diffstat (limited to 'lib/CodeGen')
59 files changed, 7957 insertions, 3205 deletions
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp index 513896d98634..2c033e0f7c02 100644 --- a/lib/CodeGen/BackendUtil.cpp +++ b/lib/CodeGen/BackendUtil.cpp @@ -44,13 +44,14 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Transforms/Coroutines.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" #include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Instrumentation/BoundsChecking.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" @@ -168,7 +169,7 @@ static void addAddDiscriminatorsPass(const PassManagerBuilder &Builder, static void addBoundsCheckingPass(const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) { - PM.add(createBoundsCheckingPass()); + PM.add(createBoundsCheckingLegacyPass()); } static void addSanitizerCoveragePass(const PassManagerBuilder &Builder, @@ -189,6 +190,8 @@ static void addSanitizerCoveragePass(const PassManagerBuilder &Builder, Opts.TracePCGuard = CGOpts.SanitizeCoverageTracePCGuard; Opts.NoPrune = CGOpts.SanitizeCoverageNoPrune; Opts.Inline8bitCounters = CGOpts.SanitizeCoverageInline8bitCounters; + Opts.PCTable = CGOpts.SanitizeCoveragePCTable; + Opts.StackDepth = CGOpts.SanitizeCoverageStackDepth; PM.add(createSanitizerCoverageModulePass(Opts)); } @@ -234,6 +237,11 @@ static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder, /*Recover*/true)); } +static void addHWAddressSanitizerPasses(const PassManagerBuilder &Builder, + legacy::PassManagerBase &PM) { + PM.add(createHWAddressSanitizerPass()); +} + static void addMemorySanitizerPass(const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) { const PassManagerBuilderWrapper &BuilderWrapper = @@ -334,16 +342,18 @@ static CodeGenOpt::Level getCGOptLevel(const CodeGenOptions &CodeGenOpts) { } } -static llvm::CodeModel::Model getCodeModel(const CodeGenOptions &CodeGenOpts) { - unsigned CodeModel = - llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel) - .Case("small", llvm::CodeModel::Small) - .Case("kernel", llvm::CodeModel::Kernel) - .Case("medium", llvm::CodeModel::Medium) - .Case("large", llvm::CodeModel::Large) - .Case("default", llvm::CodeModel::Default) - .Default(~0u); +static Optional<llvm::CodeModel::Model> +getCodeModel(const CodeGenOptions &CodeGenOpts) { + unsigned CodeModel = llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel) + .Case("small", llvm::CodeModel::Small) + .Case("kernel", llvm::CodeModel::Kernel) + .Case("medium", llvm::CodeModel::Medium) + .Case("large", llvm::CodeModel::Large) + .Case("default", ~1u) + .Default(~0u); assert(CodeModel != ~0u && "invalid code model!"); + if (CodeModel == ~1u) + return None; return static_cast<llvm::CodeModel::Model>(CodeModel); } @@ -419,6 +429,10 @@ static void initTargetOptions(llvm::TargetOptions &Options, if (LangOpts.SjLjExceptions) Options.ExceptionModel = llvm::ExceptionHandling::SjLj; + if (LangOpts.SEHExceptions) + Options.ExceptionModel = llvm::ExceptionHandling::WinEH; + if (LangOpts.DWARFExceptions) + Options.ExceptionModel = llvm::ExceptionHandling::DwarfCFI; Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath; Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath; @@ -547,6 +561,13 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, addKernelAddressSanitizerPasses); } + if (LangOpts.Sanitize.has(SanitizerKind::HWAddress)) { + PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast, + addHWAddressSanitizerPasses); + PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0, + addHWAddressSanitizerPasses); + } + if (LangOpts.Sanitize.has(SanitizerKind::Memory)) { PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast, addMemorySanitizerPass); @@ -657,7 +678,7 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) { return; } - llvm::CodeModel::Model CM = getCodeModel(CodeGenOpts); + Optional<llvm::CodeModel::Model> CM = getCodeModel(CodeGenOpts); std::string FeaturesStr = llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ","); llvm::Reloc::Model RM = getRelocModel(CodeGenOpts); @@ -840,37 +861,44 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( return; TheModule->setDataLayout(TM->createDataLayout()); - PGOOptions PGOOpt; - - // -fprofile-generate. - PGOOpt.RunProfileGen = CodeGenOpts.hasProfileIRInstr(); - if (PGOOpt.RunProfileGen) - PGOOpt.ProfileGenFile = CodeGenOpts.InstrProfileOutput.empty() ? - DefaultProfileGenName : CodeGenOpts.InstrProfileOutput; - - // -fprofile-use. - if (CodeGenOpts.hasProfileIRUse()) - PGOOpt.ProfileUseFile = CodeGenOpts.ProfileInstrumentUsePath; - - if (!CodeGenOpts.SampleProfileFile.empty()) - PGOOpt.SampleProfileFile = CodeGenOpts.SampleProfileFile; - - // Only pass a PGO options struct if -fprofile-generate or - // -fprofile-use were passed on the cmdline. - PassBuilder PB(TM.get(), - (PGOOpt.RunProfileGen || - !PGOOpt.ProfileUseFile.empty() || - !PGOOpt.SampleProfileFile.empty()) ? - Optional<PGOOptions>(PGOOpt) : None); - - LoopAnalysisManager LAM; - FunctionAnalysisManager FAM; - CGSCCAnalysisManager CGAM; - ModuleAnalysisManager MAM; + Optional<PGOOptions> PGOOpt; + + if (CodeGenOpts.hasProfileIRInstr()) + // -fprofile-generate. + PGOOpt = PGOOptions(CodeGenOpts.InstrProfileOutput.empty() + ? DefaultProfileGenName + : CodeGenOpts.InstrProfileOutput, + "", "", true, CodeGenOpts.DebugInfoForProfiling); + else if (CodeGenOpts.hasProfileIRUse()) + // -fprofile-use. + PGOOpt = PGOOptions("", CodeGenOpts.ProfileInstrumentUsePath, "", false, + CodeGenOpts.DebugInfoForProfiling); + else if (!CodeGenOpts.SampleProfileFile.empty()) + // -fprofile-sample-use + PGOOpt = PGOOptions("", "", CodeGenOpts.SampleProfileFile, false, + CodeGenOpts.DebugInfoForProfiling); + else if (CodeGenOpts.DebugInfoForProfiling) + // -fdebug-info-for-profiling + PGOOpt = PGOOptions("", "", "", false, true); + + PassBuilder PB(TM.get(), PGOOpt); + + LoopAnalysisManager LAM(CodeGenOpts.DebugPassManager); + FunctionAnalysisManager FAM(CodeGenOpts.DebugPassManager); + CGSCCAnalysisManager CGAM(CodeGenOpts.DebugPassManager); + ModuleAnalysisManager MAM(CodeGenOpts.DebugPassManager); // Register the AA manager first so that our version is the one used. FAM.registerPass([&] { return PB.buildDefaultAAPipeline(); }); + // Register the target library analysis directly and give it a customized + // preset TLI. + Triple TargetTriple(TheModule->getTargetTriple()); + std::unique_ptr<TargetLibraryInfoImpl> TLII( + createTLII(TargetTriple, CodeGenOpts)); + FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); + MAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); + // Register all the basic analyses with the managers. PB.registerModuleAnalyses(MAM); PB.registerCGSCCAnalyses(CGAM); @@ -888,6 +916,12 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // Build a minimal pipeline based on the semantics required by Clang, // which is just that always inlining occurs. MPM.addPass(AlwaysInlinerPass()); + + // At -O0 we directly run necessary sanitizer passes. + if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) + MPM.addPass(createModuleToFunctionPassAdaptor(BoundsCheckingPass())); + + // Lastly, add a semantically necessary pass for ThinLTO. if (IsThinLTO) MPM.addPass(NameAnonGlobalPass()); } else { @@ -895,6 +929,14 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // configure the pipeline. PassBuilder::OptimizationLevel Level = mapToLevel(CodeGenOpts); + // Register callbacks to schedule sanitizer passes at the appropriate part of + // the pipeline. + if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) + PB.registerScalarOptimizerLateEPCallback( + [](FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { + FPM.addPass(BoundsCheckingPass()); + }); + if (IsThinLTO) { MPM = PB.buildThinLTOPreLinkDefaultPipeline( Level, CodeGenOpts.DebugPassManager); @@ -1062,6 +1104,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts); Conf.SampleProfile = std::move(SampleProfile); Conf.UseNewPM = CGOpts.ExperimentalNewPassManager; + Conf.DebugPassManager = CGOpts.DebugPassManager; switch (Action) { case Backend_EmitNothing: Conf.PreCodeGenModuleHook = [](size_t Task, const Module &Mod) { diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp index a6e6fec206d5..d90c3a53a635 100644 --- a/lib/CodeGen/CGAtomic.cpp +++ b/lib/CodeGen/CGAtomic.cpp @@ -15,8 +15,10 @@ #include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Operator.h" @@ -94,9 +96,8 @@ namespace { BFI.StorageSize = AtomicSizeInBits; BFI.StorageOffset += OffsetInChars; LVal = LValue::MakeBitfield(Address(Addr, lvalue.getAlignment()), - BFI, lvalue.getType(), - lvalue.getBaseInfo()); - LVal.setTBAAInfo(lvalue.getTBAAInfo()); + BFI, lvalue.getType(), lvalue.getBaseInfo(), + lvalue.getTBAAInfo()); AtomicTy = C.getIntTypeForBitwidth(AtomicSizeInBits, OrigBFI.IsSigned); if (AtomicTy.isNull()) { llvm::APInt Size( @@ -359,13 +360,15 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak, Address Val1, Address Val2, uint64_t Size, llvm::AtomicOrdering SuccessOrder, - llvm::AtomicOrdering FailureOrder) { + llvm::AtomicOrdering FailureOrder, + llvm::SyncScope::ID Scope) { // Note that cmpxchg doesn't support weak cmpxchg, at least at the moment. llvm::Value *Expected = CGF.Builder.CreateLoad(Val1); llvm::Value *Desired = CGF.Builder.CreateLoad(Val2); llvm::AtomicCmpXchgInst *Pair = CGF.Builder.CreateAtomicCmpXchg( - Ptr.getPointer(), Expected, Desired, SuccessOrder, FailureOrder); + Ptr.getPointer(), Expected, Desired, SuccessOrder, FailureOrder, + Scope); Pair->setVolatile(E->isVolatile()); Pair->setWeak(IsWeak); @@ -407,7 +410,8 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E, Address Val1, Address Val2, llvm::Value *FailureOrderVal, uint64_t Size, - llvm::AtomicOrdering SuccessOrder) { + llvm::AtomicOrdering SuccessOrder, + llvm::SyncScope::ID Scope) { llvm::AtomicOrdering FailureOrder; if (llvm::ConstantInt *FO = dyn_cast<llvm::ConstantInt>(FailureOrderVal)) { auto FOS = FO->getSExtValue(); @@ -435,7 +439,7 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrder); } emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder, - FailureOrder); + FailureOrder, Scope); return; } @@ -460,13 +464,13 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E, // doesn't fold to a constant for the ordering. CGF.Builder.SetInsertPoint(MonotonicBB); emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, - Size, SuccessOrder, llvm::AtomicOrdering::Monotonic); + Size, SuccessOrder, llvm::AtomicOrdering::Monotonic, Scope); CGF.Builder.CreateBr(ContBB); if (AcquireBB) { CGF.Builder.SetInsertPoint(AcquireBB); emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, - Size, SuccessOrder, llvm::AtomicOrdering::Acquire); + Size, SuccessOrder, llvm::AtomicOrdering::Acquire, Scope); CGF.Builder.CreateBr(ContBB); SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::consume), AcquireBB); @@ -476,7 +480,7 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E, if (SeqCstBB) { CGF.Builder.SetInsertPoint(SeqCstBB); emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder, - llvm::AtomicOrdering::SequentiallyConsistent); + llvm::AtomicOrdering::SequentiallyConsistent, Scope); CGF.Builder.CreateBr(ContBB); SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst), SeqCstBB); @@ -488,27 +492,31 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E, static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, Address Ptr, Address Val1, Address Val2, llvm::Value *IsWeak, llvm::Value *FailureOrder, - uint64_t Size, llvm::AtomicOrdering Order) { + uint64_t Size, llvm::AtomicOrdering Order, + llvm::SyncScope::ID Scope) { llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add; llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0; switch (E->getOp()) { case AtomicExpr::AO__c11_atomic_init: + case AtomicExpr::AO__opencl_atomic_init: llvm_unreachable("Already handled!"); case AtomicExpr::AO__c11_atomic_compare_exchange_strong: + case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2, - FailureOrder, Size, Order); + FailureOrder, Size, Order, Scope); return; case AtomicExpr::AO__c11_atomic_compare_exchange_weak: + case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2, - FailureOrder, Size, Order); + FailureOrder, Size, Order, Scope); return; case AtomicExpr::AO__atomic_compare_exchange: case AtomicExpr::AO__atomic_compare_exchange_n: { if (llvm::ConstantInt *IsWeakC = dyn_cast<llvm::ConstantInt>(IsWeak)) { emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr, - Val1, Val2, FailureOrder, Size, Order); + Val1, Val2, FailureOrder, Size, Order, Scope); } else { // Create all the relevant BB's llvm::BasicBlock *StrongBB = @@ -522,12 +530,12 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, CGF.Builder.SetInsertPoint(StrongBB); emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2, - FailureOrder, Size, Order); + FailureOrder, Size, Order, Scope); CGF.Builder.CreateBr(ContBB); CGF.Builder.SetInsertPoint(WeakBB); emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2, - FailureOrder, Size, Order); + FailureOrder, Size, Order, Scope); CGF.Builder.CreateBr(ContBB); CGF.Builder.SetInsertPoint(ContBB); @@ -535,26 +543,29 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, return; } case AtomicExpr::AO__c11_atomic_load: + case AtomicExpr::AO__opencl_atomic_load: case AtomicExpr::AO__atomic_load_n: case AtomicExpr::AO__atomic_load: { llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr); - Load->setAtomic(Order); + Load->setAtomic(Order, Scope); Load->setVolatile(E->isVolatile()); CGF.Builder.CreateStore(Load, Dest); return; } case AtomicExpr::AO__c11_atomic_store: + case AtomicExpr::AO__opencl_atomic_store: case AtomicExpr::AO__atomic_store: case AtomicExpr::AO__atomic_store_n: { llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1); llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr); - Store->setAtomic(Order); + Store->setAtomic(Order, Scope); Store->setVolatile(E->isVolatile()); return; } case AtomicExpr::AO__c11_atomic_exchange: + case AtomicExpr::AO__opencl_atomic_exchange: case AtomicExpr::AO__atomic_exchange_n: case AtomicExpr::AO__atomic_exchange: Op = llvm::AtomicRMWInst::Xchg; @@ -564,6 +575,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, PostOp = llvm::Instruction::Add; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_add: + case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_add: Op = llvm::AtomicRMWInst::Add; break; @@ -572,14 +584,26 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, PostOp = llvm::Instruction::Sub; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_sub: + case AtomicExpr::AO__opencl_atomic_fetch_sub: case AtomicExpr::AO__atomic_fetch_sub: Op = llvm::AtomicRMWInst::Sub; break; + case AtomicExpr::AO__opencl_atomic_fetch_min: + Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Min + : llvm::AtomicRMWInst::UMin; + break; + + case AtomicExpr::AO__opencl_atomic_fetch_max: + Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Max + : llvm::AtomicRMWInst::UMax; + break; + case AtomicExpr::AO__atomic_and_fetch: PostOp = llvm::Instruction::And; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_and: + case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_and: Op = llvm::AtomicRMWInst::And; break; @@ -588,6 +612,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, PostOp = llvm::Instruction::Or; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_or: + case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_or: Op = llvm::AtomicRMWInst::Or; break; @@ -596,6 +621,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, PostOp = llvm::Instruction::Xor; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_xor: case AtomicExpr::AO__atomic_fetch_xor: Op = llvm::AtomicRMWInst::Xor; break; @@ -610,7 +636,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest, llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1); llvm::AtomicRMWInst *RMWI = - CGF.Builder.CreateAtomicRMW(Op, Ptr.getPointer(), LoadVal1, Order); + CGF.Builder.CreateAtomicRMW(Op, Ptr.getPointer(), LoadVal1, Order, Scope); RMWI->setVolatile(E->isVolatile()); // For __atomic_*_fetch operations, perform the operation again to @@ -633,6 +659,61 @@ EmitValToTemp(CodeGenFunction &CGF, Expr *E) { return DeclPtr; } +static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, + Address Ptr, Address Val1, Address Val2, + llvm::Value *IsWeak, llvm::Value *FailureOrder, + uint64_t Size, llvm::AtomicOrdering Order, + llvm::Value *Scope) { + auto ScopeModel = Expr->getScopeModel(); + + // LLVM atomic instructions always have synch scope. If clang atomic + // expression has no scope operand, use default LLVM synch scope. + if (!ScopeModel) { + EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, + Order, CGF.CGM.getLLVMContext().getOrInsertSyncScopeID("")); + return; + } + + // Handle constant scope. + if (auto SC = dyn_cast<llvm::ConstantInt>(Scope)) { + auto SCID = CGF.getTargetHooks().getLLVMSyncScopeID( + ScopeModel->map(SC->getZExtValue()), CGF.CGM.getLLVMContext()); + EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, + Order, SCID); + return; + } + + // Handle non-constant scope. + auto &Builder = CGF.Builder; + auto Scopes = ScopeModel->getRuntimeValues(); + llvm::DenseMap<unsigned, llvm::BasicBlock *> BB; + for (auto S : Scopes) + BB[S] = CGF.createBasicBlock(getAsString(ScopeModel->map(S)), CGF.CurFn); + + llvm::BasicBlock *ContBB = + CGF.createBasicBlock("atomic.scope.continue", CGF.CurFn); + + auto *SC = Builder.CreateIntCast(Scope, Builder.getInt32Ty(), false); + // If unsupported synch scope is encountered at run time, assume a fallback + // synch scope value. + auto FallBack = ScopeModel->getFallBackValue(); + llvm::SwitchInst *SI = Builder.CreateSwitch(SC, BB[FallBack]); + for (auto S : Scopes) { + auto *B = BB[S]; + if (S != FallBack) + SI->addCase(Builder.getInt32(S), B); + + Builder.SetInsertPoint(B); + EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, + Order, + CGF.getTargetHooks().getLLVMSyncScopeID(ScopeModel->map(S), + CGF.getLLVMContext())); + Builder.CreateBr(ContBB); + } + + Builder.SetInsertPoint(ContBB); +} + static void AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args, bool UseOptimizedLibcall, llvm::Value *Val, QualType ValTy, @@ -663,33 +744,38 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { QualType MemTy = AtomicTy; if (const AtomicType *AT = AtomicTy->getAs<AtomicType>()) MemTy = AT->getValueType(); - CharUnits sizeChars, alignChars; - std::tie(sizeChars, alignChars) = getContext().getTypeInfoInChars(AtomicTy); - uint64_t Size = sizeChars.getQuantity(); - unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth(); - bool UseLibcall = (sizeChars != alignChars || - getContext().toBits(sizeChars) > MaxInlineWidthInBits); - llvm::Value *IsWeak = nullptr, *OrderFail = nullptr; Address Val1 = Address::invalid(); Address Val2 = Address::invalid(); Address Dest = Address::invalid(); - Address Ptr(EmitScalarExpr(E->getPtr()), alignChars); + Address Ptr = EmitPointerWithAlignment(E->getPtr()); + + CharUnits sizeChars, alignChars; + std::tie(sizeChars, alignChars) = getContext().getTypeInfoInChars(AtomicTy); + uint64_t Size = sizeChars.getQuantity(); + unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth(); + bool UseLibcall = ((Ptr.getAlignment() % sizeChars) != 0 || + getContext().toBits(sizeChars) > MaxInlineWidthInBits); - if (E->getOp() == AtomicExpr::AO__c11_atomic_init) { + if (E->getOp() == AtomicExpr::AO__c11_atomic_init || + E->getOp() == AtomicExpr::AO__opencl_atomic_init) { LValue lvalue = MakeAddrLValue(Ptr, AtomicTy); EmitAtomicInit(E->getVal1(), lvalue); return RValue::get(nullptr); } llvm::Value *Order = EmitScalarExpr(E->getOrder()); + llvm::Value *Scope = + E->getScopeModel() ? EmitScalarExpr(E->getScope()) : nullptr; switch (E->getOp()) { case AtomicExpr::AO__c11_atomic_init: + case AtomicExpr::AO__opencl_atomic_init: llvm_unreachable("Already handled above with EmitAtomicInit!"); case AtomicExpr::AO__c11_atomic_load: + case AtomicExpr::AO__opencl_atomic_load: case AtomicExpr::AO__atomic_load_n: break; @@ -708,6 +794,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_compare_exchange_strong: case AtomicExpr::AO__c11_atomic_compare_exchange_weak: + case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: + case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: case AtomicExpr::AO__atomic_compare_exchange_n: case AtomicExpr::AO__atomic_compare_exchange: Val1 = EmitPointerWithAlignment(E->getVal1()); @@ -716,12 +804,15 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { else Val2 = EmitValToTemp(*this, E->getVal2()); OrderFail = EmitScalarExpr(E->getOrderFail()); - if (E->getNumSubExprs() == 6) + if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange_n || + E->getOp() == AtomicExpr::AO__atomic_compare_exchange) IsWeak = EmitScalarExpr(E->getWeak()); break; case AtomicExpr::AO__c11_atomic_fetch_add: case AtomicExpr::AO__c11_atomic_fetch_sub: + case AtomicExpr::AO__opencl_atomic_fetch_add: + case AtomicExpr::AO__opencl_atomic_fetch_sub: if (MemTy->isPointerType()) { // For pointer arithmetic, we're required to do a bit of math: // adding 1 to an int* is not the same as adding 1 to a uintptr_t. @@ -744,11 +835,18 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__atomic_sub_fetch: case AtomicExpr::AO__c11_atomic_store: case AtomicExpr::AO__c11_atomic_exchange: + case AtomicExpr::AO__opencl_atomic_store: + case AtomicExpr::AO__opencl_atomic_exchange: case AtomicExpr::AO__atomic_store_n: case AtomicExpr::AO__atomic_exchange_n: case AtomicExpr::AO__c11_atomic_fetch_and: case AtomicExpr::AO__c11_atomic_fetch_or: case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_and: + case AtomicExpr::AO__opencl_atomic_fetch_or: + case AtomicExpr::AO__opencl_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_min: + case AtomicExpr::AO__opencl_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_xor: @@ -784,18 +882,26 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { bool UseOptimizedLibcall = false; switch (E->getOp()) { case AtomicExpr::AO__c11_atomic_init: + case AtomicExpr::AO__opencl_atomic_init: llvm_unreachable("Already handled above with EmitAtomicInit!"); case AtomicExpr::AO__c11_atomic_fetch_add: + case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_add: case AtomicExpr::AO__c11_atomic_fetch_and: + case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_and: case AtomicExpr::AO__c11_atomic_fetch_or: + case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_nand: case AtomicExpr::AO__c11_atomic_fetch_sub: + case AtomicExpr::AO__opencl_atomic_fetch_sub: case AtomicExpr::AO__atomic_fetch_sub: case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_min: + case AtomicExpr::AO__opencl_atomic_fetch_max: case AtomicExpr::AO__atomic_fetch_xor: case AtomicExpr::AO__atomic_add_fetch: case AtomicExpr::AO__atomic_and_fetch: @@ -812,6 +918,11 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__c11_atomic_exchange: case AtomicExpr::AO__c11_atomic_compare_exchange_weak: case AtomicExpr::AO__c11_atomic_compare_exchange_strong: + case AtomicExpr::AO__opencl_atomic_load: + case AtomicExpr::AO__opencl_atomic_store: + case AtomicExpr::AO__opencl_atomic_exchange: + case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: + case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: case AtomicExpr::AO__atomic_load_n: case AtomicExpr::AO__atomic_load: case AtomicExpr::AO__atomic_store_n: @@ -833,7 +944,24 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { getContext().getSizeType()); } // Atomic address is the first or second parameter - Args.add(RValue::get(EmitCastToVoidPtr(Ptr.getPointer())), + // The OpenCL atomic library functions only accept pointer arguments to + // generic address space. + auto CastToGenericAddrSpace = [&](llvm::Value *V, QualType PT) { + if (!E->isOpenCL()) + return V; + auto AS = PT->getAs<PointerType>()->getPointeeType().getAddressSpace(); + if (AS == LangAS::opencl_generic) + return V; + auto DestAS = getContext().getTargetAddressSpace(LangAS::opencl_generic); + auto T = V->getType(); + auto *DestType = T->getPointerElementType()->getPointerTo(DestAS); + + return getTargetHooks().performAddrSpaceCast( + *this, V, AS, LangAS::opencl_generic, DestType, false); + }; + + Args.add(RValue::get(CastToGenericAddrSpace( + EmitCastToVoidPtr(Ptr.getPointer()), E->getPtr()->getType())), getContext().VoidPtrTy); std::string LibCallName; @@ -844,6 +972,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0; switch (E->getOp()) { case AtomicExpr::AO__c11_atomic_init: + case AtomicExpr::AO__opencl_atomic_init: llvm_unreachable("Already handled!"); // There is only one libcall for compare an exchange, because there is no @@ -855,13 +984,17 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // int success, int failure) case AtomicExpr::AO__c11_atomic_compare_exchange_weak: case AtomicExpr::AO__c11_atomic_compare_exchange_strong: + case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: + case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: case AtomicExpr::AO__atomic_compare_exchange: case AtomicExpr::AO__atomic_compare_exchange_n: LibCallName = "__atomic_compare_exchange"; RetTy = getContext().BoolTy; HaveRetTy = true; - Args.add(RValue::get(EmitCastToVoidPtr(Val1.getPointer())), - getContext().VoidPtrTy); + Args.add( + RValue::get(CastToGenericAddrSpace( + EmitCastToVoidPtr(Val1.getPointer()), E->getVal1()->getType())), + getContext().VoidPtrTy); AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2.getPointer(), MemTy, E->getExprLoc(), sizeChars); Args.add(RValue::get(Order), getContext().IntTy); @@ -871,6 +1004,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // int order) // T __atomic_exchange_N(T *mem, T val, int order) case AtomicExpr::AO__c11_atomic_exchange: + case AtomicExpr::AO__opencl_atomic_exchange: case AtomicExpr::AO__atomic_exchange_n: case AtomicExpr::AO__atomic_exchange: LibCallName = "__atomic_exchange"; @@ -880,6 +1014,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // void __atomic_store(size_t size, void *mem, void *val, int order) // void __atomic_store_N(T *mem, T val, int order) case AtomicExpr::AO__c11_atomic_store: + case AtomicExpr::AO__opencl_atomic_store: case AtomicExpr::AO__atomic_store: case AtomicExpr::AO__atomic_store_n: LibCallName = "__atomic_store"; @@ -891,6 +1026,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // void __atomic_load(size_t size, void *mem, void *return, int order) // T __atomic_load_N(T *mem, int order) case AtomicExpr::AO__c11_atomic_load: + case AtomicExpr::AO__opencl_atomic_load: case AtomicExpr::AO__atomic_load: case AtomicExpr::AO__atomic_load_n: LibCallName = "__atomic_load"; @@ -901,6 +1037,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { PostOp = llvm::Instruction::Add; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_add: + case AtomicExpr::AO__opencl_atomic_fetch_add: case AtomicExpr::AO__atomic_fetch_add: LibCallName = "__atomic_fetch_add"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), @@ -912,6 +1049,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { PostOp = llvm::Instruction::And; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_and: + case AtomicExpr::AO__opencl_atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_and: LibCallName = "__atomic_fetch_and"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), @@ -923,6 +1061,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { PostOp = llvm::Instruction::Or; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_or: + case AtomicExpr::AO__opencl_atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_or: LibCallName = "__atomic_fetch_or"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), @@ -934,6 +1073,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { PostOp = llvm::Instruction::Sub; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_sub: + case AtomicExpr::AO__opencl_atomic_fetch_sub: case AtomicExpr::AO__atomic_fetch_sub: LibCallName = "__atomic_fetch_sub"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), @@ -945,11 +1085,26 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { PostOp = llvm::Instruction::Xor; // Fall through. case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_xor: case AtomicExpr::AO__atomic_fetch_xor: LibCallName = "__atomic_fetch_xor"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), MemTy, E->getExprLoc(), sizeChars); break; + case AtomicExpr::AO__opencl_atomic_fetch_min: + LibCallName = E->getValueType()->isSignedIntegerType() + ? "__atomic_fetch_min" + : "__atomic_fetch_umin"; + AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), + LoweredMemTy, E->getExprLoc(), sizeChars); + break; + case AtomicExpr::AO__opencl_atomic_fetch_max: + LibCallName = E->getValueType()->isSignedIntegerType() + ? "__atomic_fetch_max" + : "__atomic_fetch_umax"; + AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(), + LoweredMemTy, E->getExprLoc(), sizeChars); + break; // T __atomic_nand_fetch_N(T *mem, T val, int order) // T __atomic_fetch_nand_N(T *mem, T val, int order) case AtomicExpr::AO__atomic_nand_fetch: @@ -962,6 +1117,11 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { break; } + if (E->isOpenCL()) { + LibCallName = std::string("__opencl") + + StringRef(LibCallName).drop_front(1).str(); + + } // Optimized functions have the size in their name. if (UseOptimizedLibcall) LibCallName += "_" + llvm::utostr(Size); @@ -982,6 +1142,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // order is always the last parameter Args.add(RValue::get(Order), getContext().IntTy); + if (E->isOpenCL()) + Args.add(RValue::get(Scope), getContext().IntTy); // PostOp is only needed for the atomic_*_fetch operations, and // thus is only needed for and implemented in the @@ -1018,9 +1180,11 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { } bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store || + E->getOp() == AtomicExpr::AO__opencl_atomic_store || E->getOp() == AtomicExpr::AO__atomic_store || E->getOp() == AtomicExpr::AO__atomic_store_n; bool IsLoad = E->getOp() == AtomicExpr::AO__c11_atomic_load || + E->getOp() == AtomicExpr::AO__opencl_atomic_load || E->getOp() == AtomicExpr::AO__atomic_load || E->getOp() == AtomicExpr::AO__atomic_load_n; @@ -1032,37 +1196,38 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { switch ((llvm::AtomicOrderingCABI)ord) { case llvm::AtomicOrderingCABI::relaxed: EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Monotonic); + llvm::AtomicOrdering::Monotonic, Scope); break; case llvm::AtomicOrderingCABI::consume: case llvm::AtomicOrderingCABI::acquire: if (IsStore) break; // Avoid crashing on code with undefined behavior EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Acquire); + llvm::AtomicOrdering::Acquire, Scope); break; case llvm::AtomicOrderingCABI::release: if (IsLoad) break; // Avoid crashing on code with undefined behavior EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::Release); + llvm::AtomicOrdering::Release, Scope); break; case llvm::AtomicOrderingCABI::acq_rel: if (IsLoad || IsStore) break; // Avoid crashing on code with undefined behavior EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::AcquireRelease); + llvm::AtomicOrdering::AcquireRelease, Scope); break; case llvm::AtomicOrderingCABI::seq_cst: EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, - llvm::AtomicOrdering::SequentiallyConsistent); + llvm::AtomicOrdering::SequentiallyConsistent, Scope); break; } if (RValTy->isVoidType()) return RValue::get(nullptr); return convertTempToRValue( - Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()), + Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo( + Dest.getAddressSpace())), RValTy, E->getExprLoc()); } @@ -1091,13 +1256,13 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { // Emit all the different atomics Builder.SetInsertPoint(MonotonicBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::AtomicOrdering::Monotonic); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::Monotonic, Scope); Builder.CreateBr(ContBB); if (!IsStore) { Builder.SetInsertPoint(AcquireBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::AtomicOrdering::Acquire); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::Acquire, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume), AcquireBB); @@ -1106,23 +1271,23 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { } if (!IsLoad) { Builder.SetInsertPoint(ReleaseBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::AtomicOrdering::Release); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::Release, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release), ReleaseBB); } if (!IsLoad && !IsStore) { Builder.SetInsertPoint(AcqRelBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::AtomicOrdering::AcquireRelease); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::AcquireRelease, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel), AcqRelBB); } Builder.SetInsertPoint(SeqCstBB); - EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, - Size, llvm::AtomicOrdering::SequentiallyConsistent); + EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size, + llvm::AtomicOrdering::SequentiallyConsistent, Scope); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst), SeqCstBB); @@ -1134,7 +1299,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { assert(Atomics.getValueSizeInBits() <= Atomics.getAtomicSizeInBits()); return convertTempToRValue( - Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()), + Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo( + Dest.getAddressSpace())), RValTy, E->getExprLoc()); } @@ -1181,15 +1347,15 @@ RValue AtomicInfo::convertAtomicTempToRValue(Address addr, if (LVal.isBitField()) return CGF.EmitLoadOfBitfieldLValue( LValue::MakeBitfield(addr, LVal.getBitFieldInfo(), LVal.getType(), - LVal.getBaseInfo()), loc); + LVal.getBaseInfo(), TBAAAccessInfo()), loc); if (LVal.isVectorElt()) return CGF.EmitLoadOfLValue( LValue::MakeVectorElt(addr, LVal.getVectorIdx(), LVal.getType(), - LVal.getBaseInfo()), loc); + LVal.getBaseInfo(), TBAAAccessInfo()), loc); assert(LVal.isExtVectorElt()); return CGF.EmitLoadOfExtVectorElementLValue(LValue::MakeExtVectorElt( addr, LVal.getExtVectorElts(), LVal.getType(), - LVal.getBaseInfo())); + LVal.getBaseInfo(), TBAAAccessInfo())); } RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal, @@ -1260,8 +1426,7 @@ llvm::Value *AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO, // Other decoration. if (IsVolatile) Load->setVolatile(true); - if (LVal.getTBAAInfo()) - CGF.CGM.DecorateInstructionWithTBAA(Load, LVal.getTBAAInfo()); + CGF.CGM.DecorateInstructionWithTBAA(Load, LVal.getTBAAInfo()); return Load; } @@ -1506,29 +1671,30 @@ EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, RValue OldRVal, UpdateLVal = LValue::MakeBitfield(Ptr, AtomicLVal.getBitFieldInfo(), AtomicLVal.getType(), - AtomicLVal.getBaseInfo()); + AtomicLVal.getBaseInfo(), + AtomicLVal.getTBAAInfo()); DesiredLVal = LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(), - AtomicLVal.getType(), - AtomicLVal.getBaseInfo()); + AtomicLVal.getType(), AtomicLVal.getBaseInfo(), + AtomicLVal.getTBAAInfo()); } else if (AtomicLVal.isVectorElt()) { UpdateLVal = LValue::MakeVectorElt(Ptr, AtomicLVal.getVectorIdx(), AtomicLVal.getType(), - AtomicLVal.getBaseInfo()); + AtomicLVal.getBaseInfo(), + AtomicLVal.getTBAAInfo()); DesiredLVal = LValue::MakeVectorElt( DesiredAddr, AtomicLVal.getVectorIdx(), AtomicLVal.getType(), - AtomicLVal.getBaseInfo()); + AtomicLVal.getBaseInfo(), AtomicLVal.getTBAAInfo()); } else { assert(AtomicLVal.isExtVectorElt()); UpdateLVal = LValue::MakeExtVectorElt(Ptr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(), - AtomicLVal.getBaseInfo()); + AtomicLVal.getBaseInfo(), + AtomicLVal.getTBAAInfo()); DesiredLVal = LValue::MakeExtVectorElt( DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(), - AtomicLVal.getBaseInfo()); + AtomicLVal.getBaseInfo(), AtomicLVal.getTBAAInfo()); } - UpdateLVal.setTBAAInfo(AtomicLVal.getTBAAInfo()); - DesiredLVal.setTBAAInfo(AtomicLVal.getTBAAInfo()); UpRVal = CGF.EmitLoadOfLValue(UpdateLVal, SourceLocation()); } // Store new value in the corresponding memory area @@ -1611,20 +1777,19 @@ static void EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, if (AtomicLVal.isBitField()) { DesiredLVal = LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(), - AtomicLVal.getType(), - AtomicLVal.getBaseInfo()); + AtomicLVal.getType(), AtomicLVal.getBaseInfo(), + AtomicLVal.getTBAAInfo()); } else if (AtomicLVal.isVectorElt()) { DesiredLVal = LValue::MakeVectorElt(DesiredAddr, AtomicLVal.getVectorIdx(), - AtomicLVal.getType(), - AtomicLVal.getBaseInfo()); + AtomicLVal.getType(), AtomicLVal.getBaseInfo(), + AtomicLVal.getTBAAInfo()); } else { assert(AtomicLVal.isExtVectorElt()); DesiredLVal = LValue::MakeExtVectorElt( DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(), - AtomicLVal.getBaseInfo()); + AtomicLVal.getBaseInfo(), AtomicLVal.getTBAAInfo()); } - DesiredLVal.setTBAAInfo(AtomicLVal.getTBAAInfo()); // Store new value in the corresponding memory area assert(UpdateRVal.isScalar()); CGF.EmitStoreThroughLValue(UpdateRVal, DesiredLVal); @@ -1777,8 +1942,7 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest, // Other decoration. if (IsVolatile) store->setVolatile(true); - if (dest.getTBAAInfo()) - CGM.DecorateInstructionWithTBAA(store, dest.getTBAAInfo()); + CGM.DecorateInstructionWithTBAA(store, dest.getTBAAInfo()); return; } diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp index 181048957879..5f73d4cf7913 100644 --- a/lib/CodeGen/CGBlocks.cpp +++ b/lib/CodeGen/CGBlocks.cpp @@ -14,10 +14,13 @@ #include "CGBlocks.h" #include "CGDebugInfo.h" #include "CGObjCRuntime.h" +#include "CGOpenCLRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" -#include "clang/CodeGen/ConstantInitBuilder.h" +#include "ConstantEmitter.h" +#include "TargetInfo.h" #include "clang/AST/DeclObjC.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/ADT/SmallSet.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" @@ -290,7 +293,7 @@ static llvm::Constant *tryCaptureAsConstant(CodeGenModule &CGM, const Expr *init = var->getInit(); if (!init) return nullptr; - return CGM.EmitConstantInit(*var, CGF); + return ConstantEmitter(CGM, CGF).tryEmitAbstractForInitializer(*var); } /// Get the low bit of a nonzero character count. This is the @@ -301,21 +304,57 @@ static CharUnits getLowBit(CharUnits v) { static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info, SmallVectorImpl<llvm::Type*> &elementTypes) { - // The header is basically 'struct { void *; int; int; void *; void *; }'. - // Assert that that struct is packed. - assert(CGM.getIntSize() <= CGM.getPointerSize()); - assert(CGM.getIntAlign() <= CGM.getPointerAlign()); - assert((2 * CGM.getIntSize()).isMultipleOf(CGM.getPointerAlign())); - - info.BlockAlign = CGM.getPointerAlign(); - info.BlockSize = 3 * CGM.getPointerSize() + 2 * CGM.getIntSize(); assert(elementTypes.empty()); - elementTypes.push_back(CGM.VoidPtrTy); - elementTypes.push_back(CGM.IntTy); - elementTypes.push_back(CGM.IntTy); - elementTypes.push_back(CGM.VoidPtrTy); - elementTypes.push_back(CGM.getBlockDescriptorType()); + if (CGM.getLangOpts().OpenCL) { + // The header is basically 'struct { int; int; generic void *; + // custom_fields; }'. Assert that struct is packed. + auto GenericAS = + CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic); + auto GenPtrAlign = + CharUnits::fromQuantity(CGM.getTarget().getPointerAlign(GenericAS) / 8); + auto GenPtrSize = + CharUnits::fromQuantity(CGM.getTarget().getPointerWidth(GenericAS) / 8); + assert(CGM.getIntSize() <= GenPtrSize); + assert(CGM.getIntAlign() <= GenPtrAlign); + assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign)); + elementTypes.push_back(CGM.IntTy); /* total size */ + elementTypes.push_back(CGM.IntTy); /* align */ + elementTypes.push_back( + CGM.getOpenCLRuntime() + .getGenericVoidPointerType()); /* invoke function */ + unsigned Offset = + 2 * CGM.getIntSize().getQuantity() + GenPtrSize.getQuantity(); + unsigned BlockAlign = GenPtrAlign.getQuantity(); + if (auto *Helper = + CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { + for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ { + // TargetOpenCLBlockHelp needs to make sure the struct is packed. + // If necessary, add padding fields to the custom fields. + unsigned Align = CGM.getDataLayout().getABITypeAlignment(I); + if (BlockAlign < Align) + BlockAlign = Align; + assert(Offset % Align == 0); + Offset += CGM.getDataLayout().getTypeAllocSize(I); + elementTypes.push_back(I); + } + } + info.BlockAlign = CharUnits::fromQuantity(BlockAlign); + info.BlockSize = CharUnits::fromQuantity(Offset); + } else { + // The header is basically 'struct { void *; int; int; void *; void *; }'. + // Assert that that struct is packed. + assert(CGM.getIntSize() <= CGM.getPointerSize()); + assert(CGM.getIntAlign() <= CGM.getPointerAlign()); + assert((2 * CGM.getIntSize()).isMultipleOf(CGM.getPointerAlign())); + info.BlockAlign = CGM.getPointerAlign(); + info.BlockSize = 3 * CGM.getPointerSize() + 2 * CGM.getIntSize(); + elementTypes.push_back(CGM.VoidPtrTy); + elementTypes.push_back(CGM.IntTy); + elementTypes.push_back(CGM.IntTy); + elementTypes.push_back(CGM.VoidPtrTy); + elementTypes.push_back(CGM.getBlockDescriptorType()); + } } static QualType getCaptureFieldType(const CodeGenFunction &CGF, @@ -340,8 +379,12 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, SmallVector<llvm::Type*, 8> elementTypes; initializeForBlockHeader(CGM, info, elementTypes); - - if (!block->hasCaptures()) { + bool hasNonConstantCustomFields = false; + if (auto *OpenCLHelper = + CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) + hasNonConstantCustomFields = + !OpenCLHelper->areAllCustomFieldValuesConstant(info); + if (!block->hasCaptures() && !hasNonConstantCustomFields) { info.StructureType = llvm::StructType::get(CGM.getLLVMContext(), elementTypes, true); info.CanBeGlobal = true; @@ -697,16 +740,27 @@ void CodeGenFunction::destroyBlockInfos(CGBlockInfo *head) { } /// Emit a block literal expression in the current function. -llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) { +llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr, + llvm::Function **InvokeF) { // If the block has no captures, we won't have a pre-computed // layout for it. if (!blockExpr->getBlockDecl()->hasCaptures()) { - if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) + // The block literal is emitted as a global variable, and the block invoke + // function has to be extracted from its initializer. + if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) { + if (InvokeF) { + auto *GV = cast<llvm::GlobalVariable>( + cast<llvm::Constant>(Block)->stripPointerCasts()); + auto *BlockInit = cast<llvm::ConstantStruct>(GV->getInitializer()); + *InvokeF = cast<llvm::Function>( + BlockInit->getAggregateElement(2)->stripPointerCasts()); + } return Block; + } CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName()); computeBlockInfo(CGM, this, blockInfo); blockInfo.BlockExpression = blockExpr; - return EmitBlockLiteral(blockInfo); + return EmitBlockLiteral(blockInfo, InvokeF); } // Find the block info for this block and take ownership of it. @@ -715,44 +769,59 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) { blockExpr->getBlockDecl())); blockInfo->BlockExpression = blockExpr; - return EmitBlockLiteral(*blockInfo); + return EmitBlockLiteral(*blockInfo, InvokeF); } -llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { +llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo, + llvm::Function **InvokeF) { + bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL; + auto GenVoidPtrTy = + IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy; + LangAS GenVoidPtrAddr = IsOpenCL ? LangAS::opencl_generic : LangAS::Default; + auto GenVoidPtrSize = CharUnits::fromQuantity( + CGM.getTarget().getPointerWidth( + CGM.getContext().getTargetAddressSpace(GenVoidPtrAddr)) / + 8); // Using the computed layout, generate the actual block function. bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda(); - llvm::Constant *blockFn - = CodeGenFunction(CGM, true).GenerateBlockFunction(CurGD, blockInfo, - LocalDeclMap, - isLambdaConv); - blockFn = llvm::ConstantExpr::getBitCast(blockFn, VoidPtrTy); + CodeGenFunction BlockCGF{CGM, true}; + BlockCGF.SanOpts = SanOpts; + auto *InvokeFn = BlockCGF.GenerateBlockFunction( + CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal); + if (InvokeF) + *InvokeF = InvokeFn; + auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy); // If there is nothing to capture, we can emit this as a global block. if (blockInfo.CanBeGlobal) - return buildGlobalBlock(CGM, blockInfo, blockFn); + return CGM.getAddrOfGlobalBlockIfEmitted(blockInfo.BlockExpression); // Otherwise, we have to emit this as a local block. - llvm::Constant *isa = - (!CGM.getContext().getLangOpts().OpenCL) - ? CGM.getNSConcreteStackBlock() - : CGM.getNullPointer(VoidPtrPtrTy, - CGM.getContext().getPointerType( - QualType(CGM.getContext().VoidPtrTy))); - isa = llvm::ConstantExpr::getBitCast(isa, VoidPtrTy); - - // Build the block descriptor. - llvm::Constant *descriptor = buildBlockDescriptor(CGM, blockInfo); - Address blockAddr = blockInfo.LocalAddress; assert(blockAddr.isValid() && "block has no address!"); - // Compute the initial on-stack block flags. - BlockFlags flags = BLOCK_HAS_SIGNATURE; - if (blockInfo.HasCapturedVariableLayout) flags |= BLOCK_HAS_EXTENDED_LAYOUT; - if (blockInfo.NeedsCopyDispose) flags |= BLOCK_HAS_COPY_DISPOSE; - if (blockInfo.HasCXXObject) flags |= BLOCK_HAS_CXX_OBJ; - if (blockInfo.UsesStret) flags |= BLOCK_USE_STRET; + llvm::Constant *isa; + llvm::Constant *descriptor; + BlockFlags flags; + if (!IsOpenCL) { + isa = llvm::ConstantExpr::getBitCast(CGM.getNSConcreteStackBlock(), + VoidPtrTy); + + // Build the block descriptor. + descriptor = buildBlockDescriptor(CGM, blockInfo); + + // Compute the initial on-stack block flags. + flags = BLOCK_HAS_SIGNATURE; + if (blockInfo.HasCapturedVariableLayout) + flags |= BLOCK_HAS_EXTENDED_LAYOUT; + if (blockInfo.NeedsCopyDispose) + flags |= BLOCK_HAS_COPY_DISPOSE; + if (blockInfo.HasCXXObject) + flags |= BLOCK_HAS_CXX_OBJ; + if (blockInfo.UsesStret) + flags |= BLOCK_USE_STRET; + } auto projectField = [&](unsigned index, CharUnits offset, const Twine &name) -> Address { @@ -776,13 +845,33 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { index++; }; - addHeaderField(isa, getPointerSize(), "block.isa"); - addHeaderField(llvm::ConstantInt::get(IntTy, flags.getBitMask()), - getIntSize(), "block.flags"); - addHeaderField(llvm::ConstantInt::get(IntTy, 0), - getIntSize(), "block.reserved"); - addHeaderField(blockFn, getPointerSize(), "block.invoke"); - addHeaderField(descriptor, getPointerSize(), "block.descriptor"); + if (!IsOpenCL) { + addHeaderField(isa, getPointerSize(), "block.isa"); + addHeaderField(llvm::ConstantInt::get(IntTy, flags.getBitMask()), + getIntSize(), "block.flags"); + addHeaderField(llvm::ConstantInt::get(IntTy, 0), getIntSize(), + "block.reserved"); + } else { + addHeaderField( + llvm::ConstantInt::get(IntTy, blockInfo.BlockSize.getQuantity()), + getIntSize(), "block.size"); + addHeaderField( + llvm::ConstantInt::get(IntTy, blockInfo.BlockAlign.getQuantity()), + getIntSize(), "block.align"); + } + addHeaderField(blockFn, GenVoidPtrSize, "block.invoke"); + if (!IsOpenCL) + addHeaderField(descriptor, getPointerSize(), "block.descriptor"); + else if (auto *Helper = + CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { + for (auto I : Helper->getCustomFieldValues(*this, blockInfo)) { + addHeaderField( + I.first, + CharUnits::fromQuantity( + CGM.getDataLayout().getTypeAllocSize(I.first->getType())), + I.second); + } + } } // Finally, capture all the values into the block. @@ -917,9 +1006,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // FIXME: Pass a specific location for the expr init so that the store is // attributed to a reasonable location - otherwise it may be attributed to // locations of subexpressions in the initialization. - LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); EmitExprAsInit(&l2r, &BlockFieldPseudoVar, - MakeAddrLValue(blockField, type, BaseInfo), + MakeAddrLValue(blockField, type, AlignmentSource::Decl), /*captured by init*/ false); } @@ -978,21 +1066,38 @@ llvm::Type *CodeGenModule::getGenericBlockLiteralType() { llvm::Type *BlockDescPtrTy = getBlockDescriptorType(); - // struct __block_literal_generic { - // void *__isa; - // int __flags; - // int __reserved; - // void (*__invoke)(void *); - // struct __block_descriptor *__descriptor; - // }; - GenericBlockLiteralType = - llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy, - IntTy, IntTy, VoidPtrTy, BlockDescPtrTy); + if (getLangOpts().OpenCL) { + // struct __opencl_block_literal_generic { + // int __size; + // int __align; + // __generic void *__invoke; + // /* custom fields */ + // }; + SmallVector<llvm::Type *, 8> StructFields( + {IntTy, IntTy, getOpenCLRuntime().getGenericVoidPointerType()}); + if (auto *Helper = getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { + for (auto I : Helper->getCustomFieldTypes()) + StructFields.push_back(I); + } + GenericBlockLiteralType = llvm::StructType::create( + StructFields, "struct.__opencl_block_literal_generic"); + } else { + // struct __block_literal_generic { + // void *__isa; + // int __flags; + // int __reserved; + // void (*__invoke)(void *); + // struct __block_descriptor *__descriptor; + // }; + GenericBlockLiteralType = + llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy, + IntTy, IntTy, VoidPtrTy, BlockDescPtrTy); + } return GenericBlockLiteralType; } -RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, +RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue) { const BlockPointerType *BPT = E->getCallee()->getType()->getAs<BlockPointerType>(); @@ -1017,8 +1122,8 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, // Get the function pointer from the literal. llvm::Value *FuncPtr = - Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3); - + Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, + CGM.getLangOpts().OpenCL ? 2 : 3); // Add the block literal. CallArgList Args; @@ -1026,8 +1131,7 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, QualType VoidPtrQualTy = getContext().VoidPtrTy; llvm::Type *GenericVoidPtrTy = VoidPtrTy; if (getLangOpts().OpenCL) { - GenericVoidPtrTy = Builder.getInt8PtrTy( - getContext().getTargetAddressSpace(LangAS::opencl_generic)); + GenericVoidPtrTy = CGM.getOpenCLRuntime().getGenericVoidPointerType(); VoidPtrQualTy = getContext().getPointerType(getContext().getAddrSpaceQualType( getContext().VoidTy, LangAS::opencl_generic)); @@ -1052,7 +1156,7 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, llvm::Type *BlockFTy = CGM.getTypes().GetFunctionType(FnInfo); llvm::Type *BlockFTyPtr = llvm::PointerType::getUnqual(BlockFTy); - Func = Builder.CreateBitCast(Func, BlockFTyPtr); + Func = Builder.CreatePointerCast(Func, BlockFTyPtr); // Prepare the callee. CGCallee Callee(CGCalleeInfo(), Func); @@ -1087,8 +1191,8 @@ Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable, variable->getName()); } - if (auto refType = capture.fieldType()->getAs<ReferenceType>()) - addr = EmitLoadOfReference(addr, refType); + if (capture.fieldType()->isReferenceType()) + addr = EmitLoadOfReference(MakeAddrLValue(addr, capture.fieldType())); return addr; } @@ -1113,17 +1217,14 @@ CodeGenModule::GetAddrOfGlobalBlock(const BlockExpr *BE, computeBlockInfo(*this, nullptr, blockInfo); // Using that metadata, generate the actual block function. - llvm::Constant *blockFn; { CodeGenFunction::DeclMapTy LocalDeclMap; - blockFn = CodeGenFunction(*this).GenerateBlockFunction(GlobalDecl(), - blockInfo, - LocalDeclMap, - false); + CodeGenFunction(*this).GenerateBlockFunction( + GlobalDecl(), blockInfo, LocalDeclMap, + /*IsLambdaConversionToBlock*/ false, /*BuildGlobalBlock*/ true); } - blockFn = llvm::ConstantExpr::getBitCast(blockFn, VoidPtrTy); - return buildGlobalBlock(*this, blockInfo, blockFn); + return getAddrOfGlobalBlockIfEmitted(BE); } static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, @@ -1140,27 +1241,37 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, ConstantInitBuilder builder(CGM); auto fields = builder.beginStruct(); - // isa - fields.add((!CGM.getContext().getLangOpts().OpenCL) - ? CGM.getNSConcreteGlobalBlock() - : CGM.getNullPointer(CGM.VoidPtrPtrTy, - CGM.getContext().getPointerType(QualType( - CGM.getContext().VoidPtrTy)))); + bool IsOpenCL = CGM.getLangOpts().OpenCL; + if (!IsOpenCL) { + // isa + fields.add(CGM.getNSConcreteGlobalBlock()); - // __flags - BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE; - if (blockInfo.UsesStret) flags |= BLOCK_USE_STRET; - - fields.addInt(CGM.IntTy, flags.getBitMask()); + // __flags + BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE; + if (blockInfo.UsesStret) + flags |= BLOCK_USE_STRET; - // Reserved - fields.addInt(CGM.IntTy, 0); + fields.addInt(CGM.IntTy, flags.getBitMask()); + + // Reserved + fields.addInt(CGM.IntTy, 0); + } else { + fields.addInt(CGM.IntTy, blockInfo.BlockSize.getQuantity()); + fields.addInt(CGM.IntTy, blockInfo.BlockAlign.getQuantity()); + } // Function fields.add(blockFn); - // Descriptor - fields.add(buildBlockDescriptor(CGM, blockInfo)); + if (!IsOpenCL) { + // Descriptor + fields.add(buildBlockDescriptor(CGM, blockInfo)); + } else if (auto *Helper = + CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { + for (auto I : Helper->getCustomFieldValues(CGM, blockInfo)) { + fields.add(I); + } + } unsigned AddrSpace = 0; if (CGM.getContext().getLangOpts().OpenCL) @@ -1184,20 +1295,17 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D, llvm::Value *arg) { assert(BlockInfo && "not emitting prologue of block invocation function?!"); - llvm::Value *localAddr = nullptr; - if (CGM.getCodeGenOpts().OptimizationLevel == 0) { - // Allocate a stack slot to let the debug info survive the RA. - Address alloc = CreateMemTemp(D->getType(), D->getName() + ".addr"); - Builder.CreateStore(arg, alloc); - localAddr = Builder.CreateLoad(alloc); - } - + // Allocate a stack slot like for any local variable to guarantee optimal + // debug info at -O0. The mem2reg pass will eliminate it when optimizing. + Address alloc = CreateMemTemp(D->getType(), D->getName() + ".addr"); + Builder.CreateStore(arg, alloc); if (CGDebugInfo *DI = getDebugInfo()) { if (CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) { DI->setLocation(D->getLocation()); - DI->EmitDeclareOfBlockLiteralArgVariable(*BlockInfo, arg, argNum, - localAddr, Builder); + DI->EmitDeclareOfBlockLiteralArgVariable( + *BlockInfo, D->getName(), argNum, + cast<llvm::AllocaInst>(alloc.getPointer()), Builder); } } @@ -1225,7 +1333,8 @@ llvm::Function * CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, const CGBlockInfo &blockInfo, const DeclMapTy &ldm, - bool IsLambdaConversionToBlock) { + bool IsLambdaConversionToBlock, + bool BuildGlobalBlock) { const BlockDecl *blockDecl = blockInfo.getBlockDecl(); CurGD = GD; @@ -1284,6 +1393,14 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, fnLLVMType, llvm::GlobalValue::InternalLinkage, name, &CGM.getModule()); CGM.SetInternalFunctionAttributes(blockDecl, fn, fnInfo); + if (BuildGlobalBlock) { + auto GenVoidPtrTy = getContext().getLangOpts().OpenCL + ? CGM.getOpenCLRuntime().getGenericVoidPointerType() + : VoidPtrTy; + buildGlobalBlock(CGM, blockInfo, + llvm::ConstantExpr::getPointerCast(fn, GenVoidPtrTy)); + } + // Begin generating the function. StartFunction(blockDecl, fnType->getReturnType(), fn, fnInfo, args, blockDecl->getLocation(), @@ -1529,10 +1646,8 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { CGM.SetInternalFunctionAttributes(nullptr, Fn, FI); - auto NL = ApplyDebugLocation::CreateEmpty(*this); StartFunction(FD, C.VoidTy, Fn, FI, args); - // Create a scope with an artificial location for the body of this function. - auto AL = ApplyDebugLocation::CreateArtificial(*this); + ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getLocStart()}; llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo(); Address src = GetAddrOfLocalVar(&SrcDecl); @@ -1701,10 +1816,8 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { CGM.SetInternalFunctionAttributes(nullptr, Fn, FI); - // Create a scope with an artificial location for the body of this function. - auto NL = ApplyDebugLocation::CreateEmpty(*this); StartFunction(FD, C.VoidTy, Fn, FI, args); - auto AL = ApplyDebugLocation::CreateArtificial(*this); + ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getLocStart()}; llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo(); diff --git a/lib/CodeGen/CGBuilder.h b/lib/CodeGen/CGBuilder.h index 42f9a428bb3a..61fe4aac3afa 100644 --- a/lib/CodeGen/CGBuilder.h +++ b/lib/CodeGen/CGBuilder.h @@ -145,6 +145,13 @@ public: Addr.getAlignment()); } + using CGBuilderBaseTy::CreateAddrSpaceCast; + Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, + const llvm::Twine &Name = "") { + return Address(CreateAddrSpaceCast(Addr.getPointer(), Ty, Name), + Addr.getAlignment()); + } + /// Cast the element type of the given address to a different type, /// preserving information like the alignment and address space. Address CreateElementBitCast(Address Addr, llvm::Type *Ty, diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index f3527b0f39d1..3ecd1c6697d7 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -16,6 +16,7 @@ #include "CGOpenCLRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "ConstantEmitter.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" @@ -29,6 +30,9 @@ #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/TargetParser.h" #include <sstream> using namespace clang; @@ -641,6 +645,287 @@ struct CallObjCArcUse final : EHScopeStack::Cleanup { }; } +Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, + BuiltinCheckKind Kind) { + assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) + && "Unsupported builtin check kind"); + + Value *ArgValue = EmitScalarExpr(E); + if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef()) + return ArgValue; + + SanitizerScope SanScope(this); + Value *Cond = Builder.CreateICmpNE( + ArgValue, llvm::Constant::getNullValue(ArgValue->getType())); + EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin), + SanitizerHandler::InvalidBuiltin, + {EmitCheckSourceLocation(E->getExprLoc()), + llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)}, + None); + return ArgValue; +} + +/// Get the argument type for arguments to os_log_helper. +static CanQualType getOSLogArgType(ASTContext &C, int Size) { + QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false); + return C.getCanonicalType(UnsignedTy); +} + +llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( + const analyze_os_log::OSLogBufferLayout &Layout, + CharUnits BufferAlignment) { + ASTContext &Ctx = getContext(); + + llvm::SmallString<64> Name; + { + raw_svector_ostream OS(Name); + OS << "__os_log_helper"; + OS << "_" << BufferAlignment.getQuantity(); + OS << "_" << int(Layout.getSummaryByte()); + OS << "_" << int(Layout.getNumArgsByte()); + for (const auto &Item : Layout.Items) + OS << "_" << int(Item.getSizeByte()) << "_" + << int(Item.getDescriptorByte()); + } + + if (llvm::Function *F = CGM.getModule().getFunction(Name)) + return F; + + llvm::SmallVector<ImplicitParamDecl, 4> Params; + Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), + Ctx.VoidPtrTy, ImplicitParamDecl::Other); + + for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) { + char Size = Layout.Items[I].getSizeByte(); + if (!Size) + continue; + + Params.emplace_back( + Ctx, nullptr, SourceLocation(), + &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), + getOSLogArgType(Ctx, Size), ImplicitParamDecl::Other); + } + + FunctionArgList Args; + for (auto &P : Params) + Args.push_back(&P); + + // The helper function has linkonce_odr linkage to enable the linker to merge + // identical functions. To ensure the merging always happens, 'noinline' is + // attached to the function when compiling with -Oz. + const CGFunctionInfo &FI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); + llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI); + llvm::Function *Fn = llvm::Function::Create( + FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule()); + Fn->setVisibility(llvm::GlobalValue::HiddenVisibility); + CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn); + CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn); + + // Attach 'noinline' at -Oz. + if (CGM.getCodeGenOpts().OptimizeSize == 2) + Fn->addFnAttr(llvm::Attribute::NoInline); + + auto NL = ApplyDebugLocation::CreateEmpty(*this); + IdentifierInfo *II = &Ctx.Idents.get(Name); + FunctionDecl *FD = FunctionDecl::Create( + Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, + Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false); + + StartFunction(FD, Ctx.VoidTy, Fn, FI, Args); + + // Create a scope with an artificial location for the body of this function. + auto AL = ApplyDebugLocation::CreateArtificial(*this); + + CharUnits Offset; + Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(&Params[0]), "buf"), + BufferAlignment); + Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()), + Builder.CreateConstByteGEP(BufAddr, Offset++, "summary")); + Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()), + Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs")); + + unsigned I = 1; + for (const auto &Item : Layout.Items) { + Builder.CreateStore( + Builder.getInt8(Item.getDescriptorByte()), + Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor")); + Builder.CreateStore( + Builder.getInt8(Item.getSizeByte()), + Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize")); + + CharUnits Size = Item.size(); + if (!Size.getQuantity()) + continue; + + Address Arg = GetAddrOfLocalVar(&Params[I]); + Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData"); + Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(), + "argDataCast"); + Builder.CreateStore(Builder.CreateLoad(Arg), Addr); + Offset += Size; + ++I; + } + + FinishFunction(); + + return Fn; +} + +RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) { + assert(E.getNumArgs() >= 2 && + "__builtin_os_log_format takes at least 2 arguments"); + ASTContext &Ctx = getContext(); + analyze_os_log::OSLogBufferLayout Layout; + analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout); + Address BufAddr = EmitPointerWithAlignment(E.getArg(0)); + llvm::SmallVector<llvm::Value *, 4> RetainableOperands; + + // Ignore argument 1, the format string. It is not currently used. + CallArgList Args; + Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy); + + for (const auto &Item : Layout.Items) { + int Size = Item.getSizeByte(); + if (!Size) + continue; + + llvm::Value *ArgVal; + + if (const Expr *TheExpr = Item.getExpr()) { + ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false); + + // Check if this is a retainable type. + if (TheExpr->getType()->isObjCRetainableType()) { + assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar && + "Only scalar can be a ObjC retainable type"); + // Check if the object is constant, if not, save it in + // RetainableOperands. + if (!isa<Constant>(ArgVal)) + RetainableOperands.push_back(ArgVal); + } + } else { + ArgVal = Builder.getInt32(Item.getConstValue().getQuantity()); + } + + unsigned ArgValSize = + CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType()); + llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(), + ArgValSize); + ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy); + CanQualType ArgTy = getOSLogArgType(Ctx, Size); + // If ArgVal has type x86_fp80, zero-extend ArgVal. + ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy)); + Args.add(RValue::get(ArgVal), ArgTy); + } + + const CGFunctionInfo &FI = + CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args); + llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction( + Layout, BufAddr.getAlignment()); + EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args); + + // Push a clang.arc.use cleanup for each object in RetainableOperands. The + // cleanup will cause the use to appear after the final log call, keeping + // the object valid while it’s held in the log buffer. Note that if there’s + // a release cleanup on the object, it will already be active; since + // cleanups are emitted in reverse order, the use will occur before the + // object is released. + if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount && + CGM.getCodeGenOpts().OptimizationLevel != 0) + for (llvm::Value *Object : RetainableOperands) + pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), Object); + + return RValue::get(BufAddr.getPointer()); +} + +/// Determine if a binop is a checked mixed-sign multiply we can specialize. +static bool isSpecialMixedSignMultiply(unsigned BuiltinID, + WidthAndSignedness Op1Info, + WidthAndSignedness Op2Info, + WidthAndSignedness ResultInfo) { + return BuiltinID == Builtin::BI__builtin_mul_overflow && + Op1Info.Width == Op2Info.Width && Op1Info.Width >= ResultInfo.Width && + Op1Info.Signed != Op2Info.Signed; +} + +/// Emit a checked mixed-sign multiply. This is a cheaper specialization of +/// the generic checked-binop irgen. +static RValue +EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, + WidthAndSignedness Op1Info, const clang::Expr *Op2, + WidthAndSignedness Op2Info, + const clang::Expr *ResultArg, QualType ResultQTy, + WidthAndSignedness ResultInfo) { + assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info, + Op2Info, ResultInfo) && + "Not a mixed-sign multipliction we can specialize"); + + // Emit the signed and unsigned operands. + const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2; + const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1; + llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp); + llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp); + + llvm::Type *OpTy = Signed->getType(); + llvm::Value *Zero = llvm::Constant::getNullValue(OpTy); + Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg); + llvm::Type *ResTy = ResultPtr.getElementType(); + + // Take the absolute value of the signed operand. + llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero); + llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed); + llvm::Value *AbsSigned = + CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed); + + // Perform a checked unsigned multiplication. + llvm::Value *UnsignedOverflow; + llvm::Value *UnsignedResult = + EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned, + Unsigned, UnsignedOverflow); + + llvm::Value *Overflow, *Result; + if (ResultInfo.Signed) { + // Signed overflow occurs if the result is greater than INT_MAX or lesser + // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative). + auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width) + .zextOrSelf(Op1Info.Width); + llvm::Value *MaxResult = + CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax), + CGF.Builder.CreateZExt(IsNegative, OpTy)); + llvm::Value *SignedOverflow = + CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult); + Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow); + + // Prepare the signed result (possibly by negating it). + llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult); + llvm::Value *SignedResult = + CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult); + Result = CGF.Builder.CreateTrunc(SignedResult, ResTy); + } else { + // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX. + llvm::Value *Underflow = CGF.Builder.CreateAnd( + IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult)); + Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow); + if (ResultInfo.Width < Op1Info.Width) { + auto IntMax = + llvm::APInt::getMaxValue(ResultInfo.Width).zext(Op1Info.Width); + llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT( + UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax)); + Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow); + } + + Result = CGF.Builder.CreateTrunc(UnsignedResult, ResTy); + } + assert(Overflow && Result && "Missing overflow or result"); + + bool isVolatile = + ResultArg->getType()->getPointeeType().isVolatileQualified(); + CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr, + isVolatile); + return RValue::get(Overflow); +} + RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { @@ -656,11 +941,196 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Result.Val.getFloat())); } + // There are LLVM math intrinsics/instructions corresponding to math library + // functions except the LLVM op will never set errno while the math library + // might. Also, math builtins have the same semantics as their math library + // twins. Thus, we can transform math library and builtin calls to their + // LLVM counterparts if the call is marked 'const' (known to never set errno). + if (FD->hasAttr<ConstAttr>()) { + switch (BuiltinID) { + case Builtin::BIceil: + case Builtin::BIceilf: + case Builtin::BIceill: + case Builtin::BI__builtin_ceil: + case Builtin::BI__builtin_ceilf: + case Builtin::BI__builtin_ceill: + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil)); + + case Builtin::BIcopysign: + case Builtin::BIcopysignf: + case Builtin::BIcopysignl: + case Builtin::BI__builtin_copysign: + case Builtin::BI__builtin_copysignf: + case Builtin::BI__builtin_copysignl: + return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); + + case Builtin::BIcos: + case Builtin::BIcosf: + case Builtin::BIcosl: + case Builtin::BI__builtin_cos: + case Builtin::BI__builtin_cosf: + case Builtin::BI__builtin_cosl: + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::cos)); + + case Builtin::BIexp: + case Builtin::BIexpf: + case Builtin::BIexpl: + case Builtin::BI__builtin_exp: + case Builtin::BI__builtin_expf: + case Builtin::BI__builtin_expl: + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp)); + + case Builtin::BIexp2: + case Builtin::BIexp2f: + case Builtin::BIexp2l: + case Builtin::BI__builtin_exp2: + case Builtin::BI__builtin_exp2f: + case Builtin::BI__builtin_exp2l: + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp2)); + + case Builtin::BIfabs: + case Builtin::BIfabsf: + case Builtin::BIfabsl: + case Builtin::BI__builtin_fabs: + case Builtin::BI__builtin_fabsf: + case Builtin::BI__builtin_fabsl: + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); + + case Builtin::BIfloor: + case Builtin::BIfloorf: + case Builtin::BIfloorl: + case Builtin::BI__builtin_floor: + case Builtin::BI__builtin_floorf: + case Builtin::BI__builtin_floorl: + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor)); + + case Builtin::BIfma: + case Builtin::BIfmaf: + case Builtin::BIfmal: + case Builtin::BI__builtin_fma: + case Builtin::BI__builtin_fmaf: + case Builtin::BI__builtin_fmal: + return RValue::get(emitTernaryBuiltin(*this, E, Intrinsic::fma)); + + case Builtin::BIfmax: + case Builtin::BIfmaxf: + case Builtin::BIfmaxl: + case Builtin::BI__builtin_fmax: + case Builtin::BI__builtin_fmaxf: + case Builtin::BI__builtin_fmaxl: + return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum)); + + case Builtin::BIfmin: + case Builtin::BIfminf: + case Builtin::BIfminl: + case Builtin::BI__builtin_fmin: + case Builtin::BI__builtin_fminf: + case Builtin::BI__builtin_fminl: + return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum)); + + // fmod() is a special-case. It maps to the frem instruction rather than an + // LLVM intrinsic. + case Builtin::BIfmod: + case Builtin::BIfmodf: + case Builtin::BIfmodl: + case Builtin::BI__builtin_fmod: + case Builtin::BI__builtin_fmodf: + case Builtin::BI__builtin_fmodl: { + Value *Arg1 = EmitScalarExpr(E->getArg(0)); + Value *Arg2 = EmitScalarExpr(E->getArg(1)); + return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod")); + } + + case Builtin::BIlog: + case Builtin::BIlogf: + case Builtin::BIlogl: + case Builtin::BI__builtin_log: + case Builtin::BI__builtin_logf: + case Builtin::BI__builtin_logl: + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log)); + + case Builtin::BIlog10: + case Builtin::BIlog10f: + case Builtin::BIlog10l: + case Builtin::BI__builtin_log10: + case Builtin::BI__builtin_log10f: + case Builtin::BI__builtin_log10l: + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log10)); + + case Builtin::BIlog2: + case Builtin::BIlog2f: + case Builtin::BIlog2l: + case Builtin::BI__builtin_log2: + case Builtin::BI__builtin_log2f: + case Builtin::BI__builtin_log2l: + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log2)); + + case Builtin::BInearbyint: + case Builtin::BInearbyintf: + case Builtin::BInearbyintl: + case Builtin::BI__builtin_nearbyint: + case Builtin::BI__builtin_nearbyintf: + case Builtin::BI__builtin_nearbyintl: + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint)); + + case Builtin::BIpow: + case Builtin::BIpowf: + case Builtin::BIpowl: + case Builtin::BI__builtin_pow: + case Builtin::BI__builtin_powf: + case Builtin::BI__builtin_powl: + return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::pow)); + + case Builtin::BIrint: + case Builtin::BIrintf: + case Builtin::BIrintl: + case Builtin::BI__builtin_rint: + case Builtin::BI__builtin_rintf: + case Builtin::BI__builtin_rintl: + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint)); + + case Builtin::BIround: + case Builtin::BIroundf: + case Builtin::BIroundl: + case Builtin::BI__builtin_round: + case Builtin::BI__builtin_roundf: + case Builtin::BI__builtin_roundl: + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round)); + + case Builtin::BIsin: + case Builtin::BIsinf: + case Builtin::BIsinl: + case Builtin::BI__builtin_sin: + case Builtin::BI__builtin_sinf: + case Builtin::BI__builtin_sinl: + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sin)); + + case Builtin::BIsqrt: + case Builtin::BIsqrtf: + case Builtin::BIsqrtl: + case Builtin::BI__builtin_sqrt: + case Builtin::BI__builtin_sqrtf: + case Builtin::BI__builtin_sqrtl: + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sqrt)); + + case Builtin::BItrunc: + case Builtin::BItruncf: + case Builtin::BItruncl: + case Builtin::BI__builtin_trunc: + case Builtin::BI__builtin_truncf: + case Builtin::BI__builtin_truncl: + return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc)); + + default: + break; + } + } + switch (BuiltinID) { - default: break; // Handle intrinsics and libm functions below. + default: break; case Builtin::BI__builtin___CFStringMakeConstantString: case Builtin::BI__builtin___NSStringMakeConstantString: - return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr)); + return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType())); case Builtin::BI__builtin_stdarg_start: case Builtin::BI__builtin_va_start: case Builtin::BI__va_start: @@ -696,64 +1166,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(Result); } - case Builtin::BI__builtin_fabs: - case Builtin::BI__builtin_fabsf: - case Builtin::BI__builtin_fabsl: { - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); - } - case Builtin::BI__builtin_fmod: - case Builtin::BI__builtin_fmodf: - case Builtin::BI__builtin_fmodl: { - Value *Arg1 = EmitScalarExpr(E->getArg(0)); - Value *Arg2 = EmitScalarExpr(E->getArg(1)); - Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod"); - return RValue::get(Result); - } - case Builtin::BI__builtin_copysign: - case Builtin::BI__builtin_copysignf: - case Builtin::BI__builtin_copysignl: { - return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); - } - case Builtin::BI__builtin_ceil: - case Builtin::BI__builtin_ceilf: - case Builtin::BI__builtin_ceill: { - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil)); - } - case Builtin::BI__builtin_floor: - case Builtin::BI__builtin_floorf: - case Builtin::BI__builtin_floorl: { - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor)); - } - case Builtin::BI__builtin_trunc: - case Builtin::BI__builtin_truncf: - case Builtin::BI__builtin_truncl: { - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc)); - } - case Builtin::BI__builtin_rint: - case Builtin::BI__builtin_rintf: - case Builtin::BI__builtin_rintl: { - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint)); - } - case Builtin::BI__builtin_nearbyint: - case Builtin::BI__builtin_nearbyintf: - case Builtin::BI__builtin_nearbyintl: { - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint)); - } - case Builtin::BI__builtin_round: - case Builtin::BI__builtin_roundf: - case Builtin::BI__builtin_roundl: { - return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round)); - } - case Builtin::BI__builtin_fmin: - case Builtin::BI__builtin_fminf: - case Builtin::BI__builtin_fminl: { - return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum)); - } - case Builtin::BI__builtin_fmax: - case Builtin::BI__builtin_fmaxf: - case Builtin::BI__builtin_fmaxl: { - return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum)); - } case Builtin::BI__builtin_conj: case Builtin::BI__builtin_conjf: case Builtin::BI__builtin_conjl: { @@ -792,7 +1204,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin_ctz: case Builtin::BI__builtin_ctzl: case Builtin::BI__builtin_ctzll: { - Value *ArgValue = EmitScalarExpr(E->getArg(0)); + Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero); llvm::Type *ArgType = ArgValue->getType(); Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); @@ -809,7 +1221,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin_clz: case Builtin::BI__builtin_clzl: case Builtin::BI__builtin_clzll: { - Value *ArgValue = EmitScalarExpr(E->getArg(0)); + Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero); llvm::Type *ArgType = ArgValue->getType(); Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); @@ -1234,7 +1646,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD, 0); Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false); - return RValue::get(Dest.getPointer()); + return RValue::get(nullptr); } case Builtin::BImemcpy: case Builtin::BI__builtin_memcpy: { @@ -1346,8 +1758,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::ConstantInt::get(Int32Ty, Offset))); } case Builtin::BI__builtin_return_address: { - Value *Depth = - CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this); + Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), + getContext().UnsignedIntTy); Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); return RValue::get(Builder.CreateCall(F, Depth)); } @@ -1356,8 +1768,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(Builder.CreateCall(F, Builder.getInt32(0))); } case Builtin::BI__builtin_frame_address: { - Value *Depth = - CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this); + Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), + getContext().UnsignedIntTy); Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); return RValue::get(Builder.CreateCall(F, Depth)); } @@ -1875,56 +2287,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(nullptr); } - // Library functions with special handling. - case Builtin::BIsqrt: - case Builtin::BIsqrtf: - case Builtin::BIsqrtl: { - // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only - // in finite- or unsafe-math mode (the intrinsic has different semantics - // for handling negative numbers compared to the library function, so - // -fmath-errno=0 is not enough). - if (!FD->hasAttr<ConstAttr>()) - break; - if (!(CGM.getCodeGenOpts().UnsafeFPMath || - CGM.getCodeGenOpts().NoNaNsFPMath)) - break; - Value *Arg0 = EmitScalarExpr(E->getArg(0)); - llvm::Type *ArgType = Arg0->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType); - return RValue::get(Builder.CreateCall(F, Arg0)); - } - - case Builtin::BI__builtin_pow: - case Builtin::BI__builtin_powf: - case Builtin::BI__builtin_powl: - case Builtin::BIpow: - case Builtin::BIpowf: - case Builtin::BIpowl: { - // Transform a call to pow* into a @llvm.pow.* intrinsic call. - if (!FD->hasAttr<ConstAttr>()) - break; - Value *Base = EmitScalarExpr(E->getArg(0)); - Value *Exponent = EmitScalarExpr(E->getArg(1)); - llvm::Type *ArgType = Base->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType); - return RValue::get(Builder.CreateCall(F, {Base, Exponent})); - } - - case Builtin::BIfma: - case Builtin::BIfmaf: - case Builtin::BIfmal: - case Builtin::BI__builtin_fma: - case Builtin::BI__builtin_fmaf: - case Builtin::BI__builtin_fmal: { - // Rewrite fma to intrinsic. - Value *FirstArg = EmitScalarExpr(E->getArg(0)); - llvm::Type *ArgType = FirstArg->getType(); - Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType); - return RValue::get( - Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)), - EmitScalarExpr(E->getArg(2))})); - } - case Builtin::BI__builtin_signbit: case Builtin::BI__builtin_signbitf: case Builtin::BI__builtin_signbitl: { @@ -1932,6 +2294,28 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), ConvertType(E->getType()))); } + case Builtin::BI__annotation: { + // Re-encode each wide string to UTF8 and make an MDString. + SmallVector<Metadata *, 1> Strings; + for (const Expr *Arg : E->arguments()) { + const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts()); + assert(Str->getCharByteWidth() == 2); + StringRef WideBytes = Str->getBytes(); + std::string StrUtf8; + if (!convertUTF16ToUTF8String( + makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) { + CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument"); + continue; + } + Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8)); + } + + // Build and MDTuple of MDStrings and emit the intrinsic call. + llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {}); + MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings); + Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple)); + return RValue::getIgnored(); + } case Builtin::BI__builtin_annotation: { llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, @@ -2026,6 +2410,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType()); WidthAndSignedness ResultInfo = getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy); + + // Handle mixed-sign multiplication as a special case, because adding + // runtime or backend support for our generic irgen would be too expensive. + if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo)) + return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg, + RightInfo, ResultArg, ResultQTy, + ResultInfo); + WidthAndSignedness EncompassingInfo = EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo}); @@ -2560,12 +2952,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // The most basic form of the call with parameters: // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) Name = "__enqueue_kernel_basic"; - llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy}; + llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy, + GenericVoidPtrTy}; llvm::FunctionType *FTy = llvm::FunctionType::get( - Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false); + Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); - llvm::Value *Block = Builder.CreatePointerCast( - EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy); + auto Info = + CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3)); + llvm::Value *Kernel = + Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); + llvm::Value *Block = + Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); AttrBuilder B; B.addAttribute(Attribute::ByVal); @@ -2574,33 +2971,58 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, auto RTCall = Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet), - {Queue, Flags, Range, Block}); + {Queue, Flags, Range, Kernel, Block}); RTCall->setAttributes(ByValAttrSet); return RValue::get(RTCall); } assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); + // Create a temporary array to hold the sizes of local pointer arguments + // for the block. \p First is the position of the first size argument. + auto CreateArrayForSizeVar = [=](unsigned First) { + auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First); + auto *Arr = Builder.CreateAlloca(AT); + llvm::Value *Ptr; + // Each of the following arguments specifies the size of the corresponding + // argument passed to the enqueued block. + auto *Zero = llvm::ConstantInt::get(IntTy, 0); + for (unsigned I = First; I < NumArgs; ++I) { + auto *Index = llvm::ConstantInt::get(IntTy, I - First); + auto *GEP = Builder.CreateGEP(Arr, {Zero, Index}); + if (I == First) + Ptr = GEP; + auto *V = + Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy); + Builder.CreateAlignedStore( + V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy)); + } + return Ptr; + }; + // Could have events and/or vaargs. if (E->getArg(3)->getType()->isBlockPointerType()) { // No events passed, but has variadic arguments. Name = "__enqueue_kernel_vaargs"; - llvm::Value *Block = Builder.CreatePointerCast( - EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy); + auto Info = + CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3)); + llvm::Value *Kernel = + Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); + auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); + auto *PtrToSizeArray = CreateArrayForSizeVar(4); + // Create a vector of the arguments, as well as a constant value to // express to the runtime the number of variadic arguments. - std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block, - ConstantInt::get(IntTy, NumArgs - 4)}; - std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, - GenericVoidPtrTy, IntTy}; - - // Each of the following arguments specifies the size of the corresponding - // argument passed to the enqueued block. - for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I) - Args.push_back( - Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy)); + std::vector<llvm::Value *> Args = { + Queue, Flags, Range, + Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4), + PtrToSizeArray}; + std::vector<llvm::Type *> ArgTys = { + QueueTy, IntTy, RangeTy, + GenericVoidPtrTy, GenericVoidPtrTy, IntTy, + PtrToSizeArray->getType()}; llvm::FunctionType *FTy = llvm::FunctionType::get( - Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); + Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); return RValue::get( Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), llvm::ArrayRef<llvm::Value *>(Args))); @@ -2621,15 +3043,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // Convert to generic address space. EventList = Builder.CreatePointerCast(EventList, EventPtrTy); ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy); - llvm::Value *Block = Builder.CreatePointerCast( - EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy); + auto Info = + CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6)); + llvm::Value *Kernel = + Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); + llvm::Value *Block = + Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); std::vector<llvm::Type *> ArgTys = { - QueueTy, Int32Ty, RangeTy, Int32Ty, - EventPtrTy, EventPtrTy, GenericVoidPtrTy}; + QueueTy, Int32Ty, RangeTy, Int32Ty, + EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy}; - std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, - EventList, ClkEvent, Block}; + std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, + EventList, ClkEvent, Kernel, Block}; if (NumArgs == 7) { // Has events but no variadics. @@ -2646,14 +3072,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, ArgTys.push_back(Int32Ty); Name = "__enqueue_kernel_events_vaargs"; - // Each of the following arguments specifies the size of the corresponding - // argument passed to the enqueued block. - for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I) - Args.push_back( - Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy)); + auto *PtrToSizeArray = CreateArrayForSizeVar(7); + Args.push_back(PtrToSizeArray); + ArgTys.push_back(PtrToSizeArray->getType()); llvm::FunctionType *FTy = llvm::FunctionType::get( - Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); + Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); return RValue::get( Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), llvm::ArrayRef<llvm::Value *>(Args))); @@ -2665,24 +3089,70 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BIget_kernel_work_group_size: { llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( getContext().getTargetAddressSpace(LangAS::opencl_generic)); - Value *Arg = EmitScalarExpr(E->getArg(0)); - Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); + auto Info = + CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); + Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); + Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); return RValue::get(Builder.CreateCall( CGM.CreateRuntimeFunction( - llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), + llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy}, + false), "__get_kernel_work_group_size_impl"), - Arg)); + {Kernel, Arg})); } case Builtin::BIget_kernel_preferred_work_group_size_multiple: { llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( getContext().getTargetAddressSpace(LangAS::opencl_generic)); - Value *Arg = EmitScalarExpr(E->getArg(0)); - Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy); + auto Info = + CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); + Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); + Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); return RValue::get(Builder.CreateCall( CGM.CreateRuntimeFunction( - llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false), + llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy}, + false), "__get_kernel_preferred_work_group_multiple_impl"), - Arg)); + {Kernel, Arg})); + } + case Builtin::BIget_kernel_max_sub_group_size_for_ndrange: + case Builtin::BIget_kernel_sub_group_count_for_ndrange: { + llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy( + getContext().getTargetAddressSpace(LangAS::opencl_generic)); + LValue NDRangeL = EmitAggExprToLValue(E->getArg(0)); + llvm::Value *NDRange = NDRangeL.getAddress().getPointer(); + auto Info = + CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1)); + Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); + Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); + const char *Name = + BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange + ? "__get_kernel_max_sub_group_size_for_ndrange_impl" + : "__get_kernel_sub_group_count_for_ndrange_impl"; + return RValue::get(Builder.CreateCall( + CGM.CreateRuntimeFunction( + llvm::FunctionType::get( + IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy}, + false), + Name), + {NDRange, Kernel, Block})); + } + + case Builtin::BI__builtin_store_half: + case Builtin::BI__builtin_store_halff: { + Value *Val = EmitScalarExpr(E->getArg(0)); + Address Address = EmitPointerWithAlignment(E->getArg(1)); + Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy()); + return RValue::get(Builder.CreateStore(HalfVal, Address)); + } + case Builtin::BI__builtin_load_half: { + Address Address = EmitPointerWithAlignment(E->getArg(0)); + Value *HalfVal = Builder.CreateLoad(Address); + return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy())); + } + case Builtin::BI__builtin_load_halff: { + Address Address = EmitPointerWithAlignment(E->getArg(0)); + Value *HalfVal = Builder.CreateLoad(Address); + return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy())); } case Builtin::BIprintf: if (getTarget().getTriple().isNVPTX()) @@ -2699,69 +3169,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // Fall through - it's already mapped to the intrinsic by GCCBuiltin. break; } - case Builtin::BI__builtin_os_log_format: { - assert(E->getNumArgs() >= 2 && - "__builtin_os_log_format takes at least 2 arguments"); - analyze_os_log::OSLogBufferLayout Layout; - analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout); - Address BufAddr = EmitPointerWithAlignment(E->getArg(0)); - // Ignore argument 1, the format string. It is not currently used. - CharUnits Offset; - Builder.CreateStore( - Builder.getInt8(Layout.getSummaryByte()), - Builder.CreateConstByteGEP(BufAddr, Offset++, "summary")); - Builder.CreateStore( - Builder.getInt8(Layout.getNumArgsByte()), - Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs")); - - llvm::SmallVector<llvm::Value *, 4> RetainableOperands; - for (const auto &Item : Layout.Items) { - Builder.CreateStore( - Builder.getInt8(Item.getDescriptorByte()), - Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor")); - Builder.CreateStore( - Builder.getInt8(Item.getSizeByte()), - Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize")); - Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset); - if (const Expr *TheExpr = Item.getExpr()) { - Addr = Builder.CreateElementBitCast( - Addr, ConvertTypeForMem(TheExpr->getType())); - // Check if this is a retainable type. - if (TheExpr->getType()->isObjCRetainableType()) { - assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar && - "Only scalar can be a ObjC retainable type"); - llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false); - RValue RV = RValue::get(SV); - LValue LV = MakeAddrLValue(Addr, TheExpr->getType()); - EmitStoreThroughLValue(RV, LV); - // Check if the object is constant, if not, save it in - // RetainableOperands. - if (!isa<Constant>(SV)) - RetainableOperands.push_back(SV); - } else { - EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true); - } - } else { - Addr = Builder.CreateElementBitCast(Addr, Int32Ty); - Builder.CreateStore( - Builder.getInt32(Item.getConstValue().getQuantity()), Addr); - } - Offset += Item.size(); - } - - // Push a clang.arc.use cleanup for each object in RetainableOperands. The - // cleanup will cause the use to appear after the final log call, keeping - // the object valid while it's held in the log buffer. Note that if there's - // a release cleanup on the object, it will already be active; since - // cleanups are emitted in reverse order, the use will occur before the - // object is released. - if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount && - CGM.getCodeGenOpts().OptimizationLevel != 0) - for (llvm::Value *object : RetainableOperands) - pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object); - - return RValue::get(BufAddr.getPointer()); - } + case Builtin::BI__builtin_os_log_format: + return emitBuiltinOSLogFormat(*E); case Builtin::BI__builtin_os_log_format_buffer_size: { analyze_os_log::OSLogBufferLayout Layout; @@ -2773,10 +3182,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__xray_customevent: { if (!ShouldXRayInstrumentFunction()) return RValue::getIgnored(); - if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) { - if (XRayAttr->neverXRayInstrument()) + if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) + if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents()) return RValue::getIgnored(); - } + Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent); auto FTy = F->getFunctionType(); auto Arg0 = E->getArg(0); @@ -2954,6 +3363,8 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, case llvm::Triple::wasm32: case llvm::Triple::wasm64: return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E); + case llvm::Triple::hexagon: + return CGF->EmitHexagonBuiltinExpr(BuiltinID, E); default: return nullptr; } @@ -4397,8 +4808,8 @@ static bool HasExtraNeonArgument(unsigned BuiltinID) { case NEON::BI__builtin_neon_vsha1cq_u32: case NEON::BI__builtin_neon_vsha1pq_u32: case NEON::BI__builtin_neon_vsha1mq_u32: - case ARM::BI_MoveToCoprocessor: - case ARM::BI_MoveToCoprocessor2: + case clang::ARM::BI_MoveToCoprocessor: + case clang::ARM::BI_MoveToCoprocessor2: return false; } return true; @@ -7153,6 +7564,19 @@ static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]); } +static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, + unsigned NumElts, SmallVectorImpl<Value *> &Ops, + bool InvertLHS = false) { + Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts); + Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts); + + if (InvertLHS) + LHS = CGF.Builder.CreateNot(LHS); + + return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS), + CGF.Builder.getIntNTy(std::max(NumElts, 8U))); +} + static Value *EmitX86SubVectorBroadcast(CodeGenFunction &CGF, SmallVectorImpl<Value *> &Ops, llvm::Type *DstTy, @@ -7229,6 +7653,18 @@ static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, std::max(NumElts, 8U))); } +static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { + + llvm::Type *Ty = Ops[0]->getType(); + Value *Zero = llvm::Constant::getNullValue(Ty); + Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]); + Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero); + Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub); + if (Ops.size() == 1) + return Res; + return EmitX86Select(CGF, Ops[2], Res, Ops[1]); +} + static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, ArrayRef<Value *> Ops) { Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); @@ -7248,8 +7684,118 @@ static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); } +Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) { + const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); + return EmitX86CpuIs(CPUStr); +} + +Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { + + llvm::Type *Int32Ty = Builder.getInt32Ty(); + + // Matching the struct layout from the compiler-rt/libgcc structure that is + // filled in: + // unsigned int __cpu_vendor; + // unsigned int __cpu_type; + // unsigned int __cpu_subtype; + // unsigned int __cpu_features[1]; + llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, + llvm::ArrayType::get(Int32Ty, 1)); + + // Grab the global __cpu_model. + llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); + + // Calculate the index needed to access the correct field based on the + // range. Also adjust the expected value. + unsigned Index; + unsigned Value; + std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr) +#define X86_VENDOR(ENUM, STRING) \ + .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)}) +#define X86_CPU_TYPE_COMPAT_WITH_ALIAS(ARCHNAME, ENUM, STR, ALIAS) \ + .Cases(STR, ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)}) +#define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) \ + .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)}) +#define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR) \ + .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)}) +#include "llvm/Support/X86TargetParser.def" + .Default({0, 0}); + assert(Value != 0 && "Invalid CPUStr passed to CpuIs"); + + // Grab the appropriate field from __cpu_model. + llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, Index)}; + llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs); + CpuValue = Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4)); + + // Check the value of the field against the requested value. + return Builder.CreateICmpEQ(CpuValue, + llvm::ConstantInt::get(Int32Ty, Value)); +} + +Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) { + const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString(); + return EmitX86CpuSupports(FeatureStr); +} + +Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) { + // Processor features and mapping to processor feature value. + + uint32_t FeaturesMask = 0; + + for (const StringRef &FeatureStr : FeatureStrs) { + unsigned Feature = + StringSwitch<unsigned>(FeatureStr) +#define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, VAL) +#include "llvm/Support/X86TargetParser.def" + ; + FeaturesMask |= (1U << Feature); + } + + // Matching the struct layout from the compiler-rt/libgcc structure that is + // filled in: + // unsigned int __cpu_vendor; + // unsigned int __cpu_type; + // unsigned int __cpu_subtype; + // unsigned int __cpu_features[1]; + llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, + llvm::ArrayType::get(Int32Ty, 1)); + + // Grab the global __cpu_model. + llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); + + // Grab the first (0th) element from the field __cpu_features off of the + // global in the struct STy. + Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 3), + ConstantInt::get(Int32Ty, 0)}; + Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); + Value *Features = + Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4)); + + // Check the value of the bit corresponding to the feature requested. + Value *Bitset = Builder.CreateAnd( + Features, llvm::ConstantInt::get(Int32Ty, FeaturesMask)); + return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0)); +} + +Value *CodeGenFunction::EmitX86CpuInit() { + llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, + /*Variadic*/ false); + llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init"); + return Builder.CreateCall(Func); +} + Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { + if (BuiltinID == X86::BI__builtin_cpu_is) + return EmitX86CpuIs(E); + if (BuiltinID == X86::BI__builtin_cpu_supports) + return EmitX86CpuSupports(E); + if (BuiltinID == X86::BI__builtin_cpu_init) + return EmitX86CpuInit(); + SmallVector<Value*, 4> Ops; // Find out if any arguments are required to be integer constant expressions. @@ -7300,110 +7846,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, switch (BuiltinID) { default: return nullptr; - case X86::BI__builtin_cpu_supports: { - const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts(); - StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString(); - - // TODO: When/if this becomes more than x86 specific then use a TargetInfo - // based mapping. - // Processor features and mapping to processor feature value. - enum X86Features { - CMOV = 0, - MMX, - POPCNT, - SSE, - SSE2, - SSE3, - SSSE3, - SSE4_1, - SSE4_2, - AVX, - AVX2, - SSE4_A, - FMA4, - XOP, - FMA, - AVX512F, - BMI, - BMI2, - AES, - PCLMUL, - AVX512VL, - AVX512BW, - AVX512DQ, - AVX512CD, - AVX512ER, - AVX512PF, - AVX512VBMI, - AVX512IFMA, - AVX5124VNNIW, // TODO implement this fully - AVX5124FMAPS, // TODO implement this fully - AVX512VPOPCNTDQ, - MAX - }; - - X86Features Feature = - StringSwitch<X86Features>(FeatureStr) - .Case("cmov", X86Features::CMOV) - .Case("mmx", X86Features::MMX) - .Case("popcnt", X86Features::POPCNT) - .Case("sse", X86Features::SSE) - .Case("sse2", X86Features::SSE2) - .Case("sse3", X86Features::SSE3) - .Case("ssse3", X86Features::SSSE3) - .Case("sse4.1", X86Features::SSE4_1) - .Case("sse4.2", X86Features::SSE4_2) - .Case("avx", X86Features::AVX) - .Case("avx2", X86Features::AVX2) - .Case("sse4a", X86Features::SSE4_A) - .Case("fma4", X86Features::FMA4) - .Case("xop", X86Features::XOP) - .Case("fma", X86Features::FMA) - .Case("avx512f", X86Features::AVX512F) - .Case("bmi", X86Features::BMI) - .Case("bmi2", X86Features::BMI2) - .Case("aes", X86Features::AES) - .Case("pclmul", X86Features::PCLMUL) - .Case("avx512vl", X86Features::AVX512VL) - .Case("avx512bw", X86Features::AVX512BW) - .Case("avx512dq", X86Features::AVX512DQ) - .Case("avx512cd", X86Features::AVX512CD) - .Case("avx512er", X86Features::AVX512ER) - .Case("avx512pf", X86Features::AVX512PF) - .Case("avx512vbmi", X86Features::AVX512VBMI) - .Case("avx512ifma", X86Features::AVX512IFMA) - .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ) - .Default(X86Features::MAX); - assert(Feature != X86Features::MAX && "Invalid feature!"); - - // Matching the struct layout from the compiler-rt/libgcc structure that is - // filled in: - // unsigned int __cpu_vendor; - // unsigned int __cpu_type; - // unsigned int __cpu_subtype; - // unsigned int __cpu_features[1]; - llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, - llvm::ArrayType::get(Int32Ty, 1)); - - // Grab the global __cpu_model. - llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); - - // Grab the first (0th) element from the field __cpu_features off of the - // global in the struct STy. - Value *Idxs[] = { - ConstantInt::get(Int32Ty, 0), - ConstantInt::get(Int32Ty, 3), - ConstantInt::get(Int32Ty, 0) - }; - Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); - Value *Features = Builder.CreateAlignedLoad(CpuFeatures, - CharUnits::fromQuantity(4)); - - // Check the value of the bit corresponding to the feature requested. - Value *Bitset = Builder.CreateAnd( - Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature)); - return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0)); - } case X86::BI_mm_prefetch: { Value *Address = Ops[0]; Value *RW = ConstantInt::get(Int32Ty, 0); @@ -7526,6 +7968,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_storesd128_mask: { return EmitX86MaskedStore(*this, Ops, 16); } + case X86::BI__builtin_ia32_vpopcntd_128: + case X86::BI__builtin_ia32_vpopcntq_128: + case X86::BI__builtin_ia32_vpopcntd_256: + case X86::BI__builtin_ia32_vpopcntq_256: case X86::BI__builtin_ia32_vpopcntd_512: case X86::BI__builtin_ia32_vpopcntq_512: { llvm::Type *ResultType = ConvertType(E->getType()); @@ -7669,6 +8115,45 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return EmitX86Select(*this, Ops[4], Align, Ops[3]); } + case X86::BI__builtin_ia32_vperm2f128_pd256: + case X86::BI__builtin_ia32_vperm2f128_ps256: + case X86::BI__builtin_ia32_vperm2f128_si256: + case X86::BI__builtin_ia32_permti256: { + unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + + // This takes a very simple approach since there are two lanes and a + // shuffle can have 2 inputs. So we reserve the first input for the first + // lane and the second input for the second lane. This may result in + // duplicate sources, but this can be dealt with in the backend. + + Value *OutOps[2]; + uint32_t Indices[8]; + for (unsigned l = 0; l != 2; ++l) { + // Determine the source for this lane. + if (Imm & (1 << ((l * 4) + 3))) + OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType()); + else if (Imm & (1 << ((l * 4) + 1))) + OutOps[l] = Ops[1]; + else + OutOps[l] = Ops[0]; + + for (unsigned i = 0; i != NumElts/2; ++i) { + // Start with ith element of the source for this lane. + unsigned Idx = (l * NumElts) + i; + // If bit 0 of the immediate half is set, switch to the high half of + // the source. + if (Imm & (1 << (l * 4))) + Idx += NumElts/2; + Indices[(l * (NumElts/2)) + i] = Idx; + } + } + + return Builder.CreateShuffleVector(OutOps[0], OutOps[1], + makeArrayRef(Indices, NumElts), + "vperm"); + } + case X86::BI__builtin_ia32_movnti: case X86::BI__builtin_ia32_movnti64: case X86::BI__builtin_ia32_movntsd: @@ -7714,32 +8199,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_selectpd_256: case X86::BI__builtin_ia32_selectpd_512: return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); - case X86::BI__builtin_ia32_pcmpeqb128_mask: - case X86::BI__builtin_ia32_pcmpeqb256_mask: - case X86::BI__builtin_ia32_pcmpeqb512_mask: - case X86::BI__builtin_ia32_pcmpeqw128_mask: - case X86::BI__builtin_ia32_pcmpeqw256_mask: - case X86::BI__builtin_ia32_pcmpeqw512_mask: - case X86::BI__builtin_ia32_pcmpeqd128_mask: - case X86::BI__builtin_ia32_pcmpeqd256_mask: - case X86::BI__builtin_ia32_pcmpeqd512_mask: - case X86::BI__builtin_ia32_pcmpeqq128_mask: - case X86::BI__builtin_ia32_pcmpeqq256_mask: - case X86::BI__builtin_ia32_pcmpeqq512_mask: - return EmitX86MaskedCompare(*this, 0, false, Ops); - case X86::BI__builtin_ia32_pcmpgtb128_mask: - case X86::BI__builtin_ia32_pcmpgtb256_mask: - case X86::BI__builtin_ia32_pcmpgtb512_mask: - case X86::BI__builtin_ia32_pcmpgtw128_mask: - case X86::BI__builtin_ia32_pcmpgtw256_mask: - case X86::BI__builtin_ia32_pcmpgtw512_mask: - case X86::BI__builtin_ia32_pcmpgtd128_mask: - case X86::BI__builtin_ia32_pcmpgtd256_mask: - case X86::BI__builtin_ia32_pcmpgtd512_mask: - case X86::BI__builtin_ia32_pcmpgtq128_mask: - case X86::BI__builtin_ia32_pcmpgtq256_mask: - case X86::BI__builtin_ia32_pcmpgtq512_mask: - return EmitX86MaskedCompare(*this, 6, true, Ops); case X86::BI__builtin_ia32_cmpb128_mask: case X86::BI__builtin_ia32_cmpb256_mask: case X86::BI__builtin_ia32_cmpb512_mask: @@ -7771,6 +8230,22 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return EmitX86MaskedCompare(*this, CC, false, Ops); } + case X86::BI__builtin_ia32_kandhi: + return EmitX86MaskLogic(*this, Instruction::And, 16, Ops); + case X86::BI__builtin_ia32_kandnhi: + return EmitX86MaskLogic(*this, Instruction::And, 16, Ops, true); + case X86::BI__builtin_ia32_korhi: + return EmitX86MaskLogic(*this, Instruction::Or, 16, Ops); + case X86::BI__builtin_ia32_kxnorhi: + return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops, true); + case X86::BI__builtin_ia32_kxorhi: + return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops); + case X86::BI__builtin_ia32_knothi: { + Ops[0] = getMaskVecValue(*this, Ops[0], 16); + return Builder.CreateBitCast(Builder.CreateNot(Ops[0]), + Builder.getInt16Ty()); + } + case X86::BI__builtin_ia32_vplzcntd_128_mask: case X86::BI__builtin_ia32_vplzcntd_256_mask: case X86::BI__builtin_ia32_vplzcntd_512_mask: @@ -7783,6 +8258,20 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops[1]); } + case X86::BI__builtin_ia32_pabsb128: + case X86::BI__builtin_ia32_pabsw128: + case X86::BI__builtin_ia32_pabsd128: + case X86::BI__builtin_ia32_pabsb256: + case X86::BI__builtin_ia32_pabsw256: + case X86::BI__builtin_ia32_pabsd256: + case X86::BI__builtin_ia32_pabsq128_mask: + case X86::BI__builtin_ia32_pabsq256_mask: + case X86::BI__builtin_ia32_pabsb512_mask: + case X86::BI__builtin_ia32_pabsw512_mask: + case X86::BI__builtin_ia32_pabsd512_mask: + case X86::BI__builtin_ia32_pabsq512_mask: + return EmitX86Abs(*this, Ops); + case X86::BI__builtin_ia32_pmaxsb128: case X86::BI__builtin_ia32_pmaxsw128: case X86::BI__builtin_ia32_pmaxsd128: @@ -8071,6 +8560,45 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); case X86::BI_InterlockedIncrement64: return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); + case X86::BI_InterlockedCompareExchange128: { + // InterlockedCompareExchange128 doesn't directly refer to 128bit ints, + // instead it takes pointers to 64bit ints for Destination and + // ComparandResult, and exchange is taken as two 64bit ints (high & low). + // The previous value is written to ComparandResult, and success is + // returned. + + llvm::Type *Int128Ty = Builder.getInt128Ty(); + llvm::Type *Int128PtrTy = Int128Ty->getPointerTo(); + + Value *Destination = + Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PtrTy); + Value *ExchangeHigh128 = + Builder.CreateZExt(EmitScalarExpr(E->getArg(1)), Int128Ty); + Value *ExchangeLow128 = + Builder.CreateZExt(EmitScalarExpr(E->getArg(2)), Int128Ty); + Address ComparandResult( + Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int128PtrTy), + getContext().toCharUnitsFromBits(128)); + + Value *Exchange = Builder.CreateOr( + Builder.CreateShl(ExchangeHigh128, 64, "", false, false), + ExchangeLow128); + + Value *Comparand = Builder.CreateLoad(ComparandResult); + + AtomicCmpXchgInst *CXI = + Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, + AtomicOrdering::SequentiallyConsistent, + AtomicOrdering::SequentiallyConsistent); + CXI->setVolatile(true); + + // Write the result back to the inout pointer. + Builder.CreateStore(Builder.CreateExtractValue(CXI, 0), ComparandResult); + + // Get the success boolean and zero extend it to i8. + Value *Success = Builder.CreateExtractValue(CXI, 1); + return Builder.CreateZExt(Success, ConvertType(E->getType())); + } case X86::BI_AddressOfReturnAddress: { Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); @@ -8680,6 +9208,15 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CI->setConvergent(); return CI; } + case AMDGPU::BI__builtin_amdgcn_read_exec_lo: + case AMDGPU::BI__builtin_amdgcn_read_exec_hi: { + StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ? + "exec_lo" : "exec_hi"; + CallInst *CI = cast<CallInst>( + EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, true, RegName)); + CI->setConvergent(); + return CI; + } // amdgcn workitem case AMDGPU::BI__builtin_amdgcn_workitem_id_x: @@ -9129,6 +9666,16 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(FnALAF32, {Ptr, Val}); } + case NVPTX::BI__nvvm_atom_add_gen_d: { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + Value *Val = EmitScalarExpr(E->getArg(1)); + // atomicrmw only deals with integer arguments, so we need to use + // LLVM's nvvm_atomic_load_add_f64 intrinsic. + Value *FnALAF64 = + CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f64, Ptr->getType()); + return Builder.CreateCall(FnALAF64, {Ptr, Val}); + } + case NVPTX::BI__nvvm_atom_inc_gen_ui: { Value *Ptr = EmitScalarExpr(E->getArg(0)); Value *Val = EmitScalarExpr(E->getArg(1)); @@ -9282,6 +9829,219 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, {Ptr->getType()->getPointerElementType(), Ptr->getType()}), {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); } + case NVPTX::BI__nvvm_match_all_sync_i32p: + case NVPTX::BI__nvvm_match_all_sync_i64p: { + Value *Mask = EmitScalarExpr(E->getArg(0)); + Value *Val = EmitScalarExpr(E->getArg(1)); + Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2)); + Value *ResultPair = Builder.CreateCall( + CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p + ? Intrinsic::nvvm_match_all_sync_i32p + : Intrinsic::nvvm_match_all_sync_i64p), + {Mask, Val}); + Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1), + PredOutPtr.getElementType()); + Builder.CreateStore(Pred, PredOutPtr); + return Builder.CreateExtractValue(ResultPair, 0); + } + case NVPTX::BI__hmma_m16n16k16_ld_a: + case NVPTX::BI__hmma_m16n16k16_ld_b: + case NVPTX::BI__hmma_m16n16k16_ld_c_f16: + case NVPTX::BI__hmma_m16n16k16_ld_c_f32: { + Address Dst = EmitPointerWithAlignment(E->getArg(0)); + Value *Src = EmitScalarExpr(E->getArg(1)); + Value *Ldm = EmitScalarExpr(E->getArg(2)); + llvm::APSInt isColMajorArg; + if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext())) + return nullptr; + bool isColMajor = isColMajorArg.getSExtValue(); + unsigned IID; + unsigned NumResults; + switch (BuiltinID) { + case NVPTX::BI__hmma_m16n16k16_ld_a: + IID = isColMajor ? Intrinsic::nvvm_wmma_load_a_f16_col_stride + : Intrinsic::nvvm_wmma_load_a_f16_row_stride; + NumResults = 8; + break; + case NVPTX::BI__hmma_m16n16k16_ld_b: + IID = isColMajor ? Intrinsic::nvvm_wmma_load_b_f16_col_stride + : Intrinsic::nvvm_wmma_load_b_f16_row_stride; + NumResults = 8; + break; + case NVPTX::BI__hmma_m16n16k16_ld_c_f16: + IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f16_col_stride + : Intrinsic::nvvm_wmma_load_c_f16_row_stride; + NumResults = 4; + break; + case NVPTX::BI__hmma_m16n16k16_ld_c_f32: + IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f32_col_stride + : Intrinsic::nvvm_wmma_load_c_f32_row_stride; + NumResults = 8; + break; + default: + llvm_unreachable("Unexpected builtin ID."); + } + Value *Result = + Builder.CreateCall(CGM.getIntrinsic(IID), + {Builder.CreatePointerCast(Src, VoidPtrTy), Ldm}); + + // Save returned values. + for (unsigned i = 0; i < NumResults; ++i) { + Builder.CreateAlignedStore( + Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), + Dst.getElementType()), + Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)), + CharUnits::fromQuantity(4)); + } + return Result; + } + + case NVPTX::BI__hmma_m16n16k16_st_c_f16: + case NVPTX::BI__hmma_m16n16k16_st_c_f32: { + Value *Dst = EmitScalarExpr(E->getArg(0)); + Address Src = EmitPointerWithAlignment(E->getArg(1)); + Value *Ldm = EmitScalarExpr(E->getArg(2)); + llvm::APSInt isColMajorArg; + if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext())) + return nullptr; + bool isColMajor = isColMajorArg.getSExtValue(); + unsigned IID; + unsigned NumResults = 8; + // PTX Instructions (and LLVM instrinsics) are defined for slice _d_, yet + // for some reason nvcc builtins use _c_. + switch (BuiltinID) { + case NVPTX::BI__hmma_m16n16k16_st_c_f16: + IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f16_col_stride + : Intrinsic::nvvm_wmma_store_d_f16_row_stride; + NumResults = 4; + break; + case NVPTX::BI__hmma_m16n16k16_st_c_f32: + IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f32_col_stride + : Intrinsic::nvvm_wmma_store_d_f32_row_stride; + break; + default: + llvm_unreachable("Unexpected builtin ID."); + } + Function *Intrinsic = CGM.getIntrinsic(IID); + llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1); + SmallVector<Value *, 10> Values; + Values.push_back(Builder.CreatePointerCast(Dst, VoidPtrTy)); + for (unsigned i = 0; i < NumResults; ++i) { + Value *V = Builder.CreateAlignedLoad( + Builder.CreateGEP(Src.getPointer(), llvm::ConstantInt::get(IntTy, i)), + CharUnits::fromQuantity(4)); + Values.push_back(Builder.CreateBitCast(V, ParamType)); + } + Values.push_back(Ldm); + Value *Result = Builder.CreateCall(Intrinsic, Values); + return Result; + } + + // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) + // --> Intrinsic::nvvm_wmma_mma_sync<layout A,B><DType><CType><Satf> + case NVPTX::BI__hmma_m16n16k16_mma_f16f16: + case NVPTX::BI__hmma_m16n16k16_mma_f32f16: + case NVPTX::BI__hmma_m16n16k16_mma_f32f32: + case NVPTX::BI__hmma_m16n16k16_mma_f16f32: { + Address Dst = EmitPointerWithAlignment(E->getArg(0)); + Address SrcA = EmitPointerWithAlignment(E->getArg(1)); + Address SrcB = EmitPointerWithAlignment(E->getArg(2)); + Address SrcC = EmitPointerWithAlignment(E->getArg(3)); + llvm::APSInt LayoutArg; + if (!E->getArg(4)->isIntegerConstantExpr(LayoutArg, getContext())) + return nullptr; + int Layout = LayoutArg.getSExtValue(); + if (Layout < 0 || Layout > 3) + return nullptr; + llvm::APSInt SatfArg; + if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext())) + return nullptr; + bool Satf = SatfArg.getSExtValue(); + + // clang-format off +#define MMA_VARIANTS(type) {{ \ + Intrinsic::nvvm_wmma_mma_sync_row_row_##type, \ + Intrinsic::nvvm_wmma_mma_sync_row_row_##type##_satfinite, \ + Intrinsic::nvvm_wmma_mma_sync_row_col_##type, \ + Intrinsic::nvvm_wmma_mma_sync_row_col_##type##_satfinite, \ + Intrinsic::nvvm_wmma_mma_sync_col_row_##type, \ + Intrinsic::nvvm_wmma_mma_sync_col_row_##type##_satfinite, \ + Intrinsic::nvvm_wmma_mma_sync_col_col_##type, \ + Intrinsic::nvvm_wmma_mma_sync_col_col_##type##_satfinite \ + }} + // clang-format on + + auto getMMAIntrinsic = [Layout, Satf](std::array<unsigned, 8> Variants) { + unsigned Index = Layout * 2 + Satf; + assert(Index < 8); + return Variants[Index]; + }; + unsigned IID; + unsigned NumEltsC; + unsigned NumEltsD; + switch (BuiltinID) { + case NVPTX::BI__hmma_m16n16k16_mma_f16f16: + IID = getMMAIntrinsic(MMA_VARIANTS(f16_f16)); + NumEltsC = 4; + NumEltsD = 4; + break; + case NVPTX::BI__hmma_m16n16k16_mma_f32f16: + IID = getMMAIntrinsic(MMA_VARIANTS(f32_f16)); + NumEltsC = 4; + NumEltsD = 8; + break; + case NVPTX::BI__hmma_m16n16k16_mma_f16f32: + IID = getMMAIntrinsic(MMA_VARIANTS(f16_f32)); + NumEltsC = 8; + NumEltsD = 4; + break; + case NVPTX::BI__hmma_m16n16k16_mma_f32f32: + IID = getMMAIntrinsic(MMA_VARIANTS(f32_f32)); + NumEltsC = 8; + NumEltsD = 8; + break; + default: + llvm_unreachable("Unexpected builtin ID."); + } +#undef MMA_VARIANTS + + SmallVector<Value *, 24> Values; + Function *Intrinsic = CGM.getIntrinsic(IID); + llvm::Type *ABType = Intrinsic->getFunctionType()->getParamType(0); + // Load A + for (unsigned i = 0; i < 8; ++i) { + Value *V = Builder.CreateAlignedLoad( + Builder.CreateGEP(SrcA.getPointer(), + llvm::ConstantInt::get(IntTy, i)), + CharUnits::fromQuantity(4)); + Values.push_back(Builder.CreateBitCast(V, ABType)); + } + // Load B + for (unsigned i = 0; i < 8; ++i) { + Value *V = Builder.CreateAlignedLoad( + Builder.CreateGEP(SrcB.getPointer(), + llvm::ConstantInt::get(IntTy, i)), + CharUnits::fromQuantity(4)); + Values.push_back(Builder.CreateBitCast(V, ABType)); + } + // Load C + llvm::Type *CType = Intrinsic->getFunctionType()->getParamType(16); + for (unsigned i = 0; i < NumEltsC; ++i) { + Value *V = Builder.CreateAlignedLoad( + Builder.CreateGEP(SrcC.getPointer(), + llvm::ConstantInt::get(IntTy, i)), + CharUnits::fromQuantity(4)); + Values.push_back(Builder.CreateBitCast(V, CType)); + } + Value *Result = Builder.CreateCall(Intrinsic, Values); + llvm::Type *DType = Dst.getElementType(); + for (unsigned i = 0; i < NumEltsD; ++i) + Builder.CreateAlignedStore( + Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType), + Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)), + CharUnits::fromQuantity(4)); + return Result; + } default: return nullptr; } @@ -9315,3 +10075,58 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, return nullptr; } } + +Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + SmallVector<llvm::Value *, 4> Ops; + Intrinsic::ID ID = Intrinsic::not_intrinsic; + + switch (BuiltinID) { + case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry: + case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: { + Address Dest = EmitPointerWithAlignment(E->getArg(2)); + unsigned Size; + if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vaddcarry) { + Size = 512; + ID = Intrinsic::hexagon_V6_vaddcarry; + } else { + Size = 1024; + ID = Intrinsic::hexagon_V6_vaddcarry_128B; + } + Dest = Builder.CreateBitCast(Dest, + llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0)); + LoadInst *QLd = Builder.CreateLoad(Dest); + Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd }; + llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); + llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1); + llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)), + Vprd->getType()->getPointerTo(0)); + Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment()); + return Builder.CreateExtractValue(Result, 0); + } + case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry: + case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: { + Address Dest = EmitPointerWithAlignment(E->getArg(2)); + unsigned Size; + if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vsubcarry) { + Size = 512; + ID = Intrinsic::hexagon_V6_vsubcarry; + } else { + Size = 1024; + ID = Intrinsic::hexagon_V6_vsubcarry_128B; + } + Dest = Builder.CreateBitCast(Dest, + llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0)); + LoadInst *QLd = Builder.CreateLoad(Dest); + Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd }; + llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); + llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1); + llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)), + Vprd->getType()->getPointerTo(0)); + Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment()); + return Builder.CreateExtractValue(Result, 0); + } + } // switch + + return nullptr; +} diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp index 0f3141ab76d0..5ef4dc45fba1 100644 --- a/lib/CodeGen/CGCXX.cpp +++ b/lib/CodeGen/CGCXX.cpp @@ -110,16 +110,14 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) { return true; return TryEmitDefinitionAsAlias(GlobalDecl(D, Dtor_Base), - GlobalDecl(BaseD, Dtor_Base), - false); + GlobalDecl(BaseD, Dtor_Base)); } /// Try to emit a definition as a global alias for another definition. /// If \p InEveryTU is true, we know that an equivalent alias can be produced /// in every translation unit. bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl, - GlobalDecl TargetDecl, - bool InEveryTU) { + GlobalDecl TargetDecl) { if (!getCodeGenOpts().CXXCtorDtorAliases) return true; @@ -134,11 +132,6 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl, llvm::GlobalValue::LinkageTypes TargetLinkage = getFunctionLinkage(TargetDecl); - // available_externally definitions aren't real definitions, so we cannot - // create an alias to one. - if (TargetLinkage == llvm::GlobalValue::AvailableExternallyLinkage) - return true; - // Check if we have it already. StringRef MangledName = getMangledName(AliasDecl); llvm::GlobalValue *Entry = GetGlobalValue(MangledName); @@ -161,7 +154,14 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl, // Instead of creating as alias to a linkonce_odr, replace all of the uses // of the aliasee. - if (llvm::GlobalValue::isDiscardableIfUnused(Linkage)) { + if (llvm::GlobalValue::isDiscardableIfUnused(Linkage) && + !(TargetLinkage == llvm::GlobalValue::AvailableExternallyLinkage && + TargetDecl.getDecl()->hasAttr<AlwaysInlineAttr>())) { + // FIXME: An extern template instantiation will create functions with + // linkage "AvailableExternally". In libc++, some classes also define + // members with attribute "AlwaysInline" and expect no reference to + // be generated. It is desirable to reenable this optimisation after + // corresponding LLVM changes. addReplacement(MangledName, Aliasee); return false; } @@ -176,13 +176,11 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl, return true; } - if (!InEveryTU) { - // If we don't have a definition for the destructor yet, don't - // emit. We can't emit aliases to declarations; that's just not - // how aliases work. - if (Ref->isDeclaration()) - return true; - } + // If we don't have a definition for the destructor yet or the definition is + // avaialable_externally, don't emit an alias. We can't emit aliases to + // declarations; that's just not how aliases work. + if (Ref->isDeclarationForLinker()) + return true; // Don't create an alias to a linker weak symbol. This avoids producing // different COMDATs in different TUs. Another option would be to diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp index 033258643ddf..a27c3e9d27e3 100644 --- a/lib/CodeGen/CGCXXABI.cpp +++ b/lib/CodeGen/CGCXXABI.cpp @@ -149,12 +149,15 @@ void CGCXXABI::buildThisParam(CodeGenFunction &CGF, FunctionArgList ¶ms) { } } -void CGCXXABI::EmitThisParam(CodeGenFunction &CGF) { +llvm::Value *CGCXXABI::loadIncomingCXXThis(CodeGenFunction &CGF) { + return CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(getThisDecl(CGF)), + "this"); +} + +void CGCXXABI::setCXXABIThisValue(CodeGenFunction &CGF, llvm::Value *ThisPtr) { /// Initialize the 'this' slot. assert(getThisDecl(CGF) && "no 'this' variable for function"); - CGF.CXXABIThisValue - = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(getThisDecl(CGF)), - "this"); + CGF.CXXABIThisValue = ThisPtr; } void CGCXXABI::EmitReturnFromThunk(CodeGenFunction &CGF, diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h index 7b912e3aca57..83426dc3a03c 100644 --- a/lib/CodeGen/CGCXXABI.h +++ b/lib/CodeGen/CGCXXABI.h @@ -73,9 +73,10 @@ protected: return CGF.CXXStructorImplicitParamValue; } - /// Perform prolog initialization of the parameter variable suitable - /// for 'this' emitted by buildThisParam. - void EmitThisParam(CodeGenFunction &CGF); + /// Loads the incoming C++ this pointer as it was passed by the caller. + llvm::Value *loadIncomingCXXThis(CodeGenFunction &CGF); + + void setCXXABIThisValue(CodeGenFunction &CGF, llvm::Value *ThisPtr); ASTContext &getContext() const { return CGM.getContext(); } @@ -358,13 +359,6 @@ public: return CharUnits::Zero(); } - /// Perform ABI-specific "this" parameter adjustment in a virtual function - /// prologue. - virtual llvm::Value *adjustThisParameterInVirtualFunctionPrologue( - CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This) { - return This; - } - /// Emit the ABI-specific prolog for the function. virtual void EmitInstanceFunctionProlog(CodeGenFunction &CGF) = 0; @@ -588,6 +582,13 @@ public: /// Emit a single constructor/destructor with the given type from a C++ /// constructor Decl. virtual void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) = 0; + + /// Load a vtable from This, an object of polymorphic type RD, or from one of + /// its virtual bases if it does not have its own vtable. Returns the vtable + /// and the class from which the vtable was loaded. + virtual std::pair<llvm::Value *, const CXXRecordDecl *> + LoadVTablePtr(CodeGenFunction &CGF, Address This, + const CXXRecordDecl *RD) = 0; }; // Create an instance of a C++ ABI class: diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index 316bf44cb1c3..c3709bf2e447 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -455,11 +455,15 @@ const CGFunctionInfo & CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD, QualType receiverType) { SmallVector<CanQualType, 16> argTys; + SmallVector<FunctionProtoType::ExtParameterInfo, 4> extParamInfos(2); argTys.push_back(Context.getCanonicalParamType(receiverType)); argTys.push_back(Context.getCanonicalParamType(Context.getObjCSelType())); // FIXME: Kill copy? for (const auto *I : MD->parameters()) { argTys.push_back(Context.getCanonicalParamType(I->getType())); + auto extParamInfo = FunctionProtoType::ExtParameterInfo().withIsNoEscape( + I->hasAttr<NoEscapeAttr>()); + extParamInfos.push_back(extParamInfo); } FunctionType::ExtInfo einfo; @@ -475,7 +479,7 @@ CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD, return arrangeLLVMFunctionInfo( GetReturnType(MD->getReturnType()), /*instanceMethod=*/false, - /*chainCall=*/false, argTys, einfo, {}, required); + /*chainCall=*/false, argTys, einfo, extParamInfos, required); } const CGFunctionInfo & @@ -1223,14 +1227,15 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty, // // FIXME: Assert that we aren't truncating non-padding bits when have access // to that information. - Src = CGF.Builder.CreateBitCast(Src, llvm::PointerType::getUnqual(Ty)); + Src = CGF.Builder.CreateBitCast(Src, + Ty->getPointerTo(Src.getAddressSpace())); return CGF.Builder.CreateLoad(Src); } // Otherwise do coercion through memory. This is stupid, but simple. Address Tmp = CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment()); - Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.Int8PtrTy); - Address SrcCasted = CGF.Builder.CreateBitCast(Src, CGF.Int8PtrTy); + Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy); + Address SrcCasted = CGF.Builder.CreateBitCast(Src, CGF.AllocaInt8PtrTy); CGF.Builder.CreateMemCpy(Casted, SrcCasted, llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize), false); @@ -1311,8 +1316,8 @@ static void CreateCoercedStore(llvm::Value *Src, // to that information. Address Tmp = CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment()); CGF.Builder.CreateStore(Src, Tmp); - Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.Int8PtrTy); - Address DstCasted = CGF.Builder.CreateBitCast(Dst, CGF.Int8PtrTy); + Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy); + Address DstCasted = CGF.Builder.CreateBitCast(Dst, CGF.AllocaInt8PtrTy); CGF.Builder.CreateMemCpy(DstCasted, Casted, llvm::ConstantInt::get(CGF.IntPtrTy, DstSize), false); @@ -1734,10 +1739,15 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt)); // TODO: Reciprocal estimate codegen options should apply to instructions? - std::vector<std::string> &Recips = getTarget().getTargetOpts().Reciprocals; + const std::vector<std::string> &Recips = CodeGenOpts.Reciprocals; if (!Recips.empty()) FuncAttrs.addAttribute("reciprocal-estimates", - llvm::join(Recips.begin(), Recips.end(), ",")); + llvm::join(Recips, ",")); + + if (!CodeGenOpts.PreferVectorWidth.empty() && + CodeGenOpts.PreferVectorWidth != "none") + FuncAttrs.addAttribute("prefer-vector-width", + CodeGenOpts.PreferVectorWidth); if (CodeGenOpts.StackRealignment) FuncAttrs.addAttribute("stackrealign"); @@ -1745,13 +1755,16 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, FuncAttrs.addAttribute("backchain"); } - if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { - // Conservatively, mark all functions and calls in CUDA as convergent - // (meaning, they may call an intrinsically convergent op, such as - // __syncthreads(), and so can't have certain optimizations applied around - // them). LLVM will remove this attribute where it safely can. + if (getLangOpts().assumeFunctionsAreConvergent()) { + // Conservatively, mark all functions and calls in CUDA and OpenCL as + // convergent (meaning, they may call an intrinsically convergent op, such + // as __syncthreads() / barrier(), and so can't have certain optimizations + // applied around them). LLVM will remove this attribute where it safely + // can. FuncAttrs.addAttribute(llvm::Attribute::Convergent); + } + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { // Exceptions aren't supported in CUDA device code. FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); @@ -1847,6 +1860,16 @@ void CodeGenModule::ConstructAttributeList( !(TargetDecl && TargetDecl->hasAttr<NoSplitStackAttr>())) FuncAttrs.addAttribute("split-stack"); + // Add NonLazyBind attribute to function declarations when -fno-plt + // is used. + if (TargetDecl && CodeGenOpts.NoPLT) { + if (auto *Fn = dyn_cast<FunctionDecl>(TargetDecl)) { + if (!Fn->isDefined() && !AttrOnCallSite) { + FuncAttrs.addAttribute(llvm::Attribute::NonLazyBind); + } + } + } + if (!AttrOnCallSite) { bool DisableTailCalls = CodeGenOpts.DisableTailCalls || @@ -1859,13 +1882,13 @@ void CodeGenModule::ConstructAttributeList( // we have a decl for the function and it has a target attribute then // parse that and add it to the feature set. StringRef TargetCPU = getTarget().getTargetOpts().CPU; + std::vector<std::string> Features; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl); if (FD && FD->hasAttr<TargetAttr>()) { llvm::StringMap<bool> FeatureMap; getFunctionFeatureMap(FeatureMap, FD); // Produce the canonical string for this set of features. - std::vector<std::string> Features; for (llvm::StringMap<bool>::const_iterator it = FeatureMap.begin(), ie = FeatureMap.end(); it != ie; ++it) @@ -1877,28 +1900,22 @@ void CodeGenModule::ConstructAttributeList( // the function. const auto *TD = FD->getAttr<TargetAttr>(); TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse(); - if (ParsedAttr.Architecture != "") + if (ParsedAttr.Architecture != "" && + getTarget().isValidCPUName(ParsedAttr.Architecture)) TargetCPU = ParsedAttr.Architecture; - if (TargetCPU != "") - FuncAttrs.addAttribute("target-cpu", TargetCPU); - if (!Features.empty()) { - std::sort(Features.begin(), Features.end()); - FuncAttrs.addAttribute( - "target-features", - llvm::join(Features.begin(), Features.end(), ",")); - } } else { // Otherwise just add the existing target cpu and target features to the // function. - std::vector<std::string> &Features = getTarget().getTargetOpts().Features; - if (TargetCPU != "") - FuncAttrs.addAttribute("target-cpu", TargetCPU); - if (!Features.empty()) { - std::sort(Features.begin(), Features.end()); - FuncAttrs.addAttribute( - "target-features", - llvm::join(Features.begin(), Features.end(), ",")); - } + Features = getTarget().getTargetOpts().Features; + } + + if (TargetCPU != "") + FuncAttrs.addAttribute("target-cpu", TargetCPU); + if (!Features.empty()) { + std::sort(Features.begin(), Features.end()); + FuncAttrs.addAttribute( + "target-features", + llvm::join(Features, ",")); } } @@ -2092,6 +2109,9 @@ void CodeGenModule::ConstructAttributeList( break; } + if (FI.getExtParameterInfo(ArgNo).isNoEscape()) + Attrs.addAttribute(llvm::Attribute::NoCapture); + if (Attrs.hasAttributes()) { unsigned FirstIRArg, NumIRArgs; std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo); @@ -3054,7 +3074,8 @@ static void emitWriteback(CodeGenFunction &CGF, // If the argument wasn't provably non-null, we need to null check // before doing the store. - bool provablyNonNull = llvm::isKnownNonNull(srcAddr.getPointer()); + bool provablyNonNull = llvm::isKnownNonZero(srcAddr.getPointer(), + CGF.CGM.getDataLayout()); if (!provablyNonNull) { llvm::BasicBlock *writebackBB = CGF.createBasicBlock("icr.writeback"); contBB = CGF.createBasicBlock("icr.done"); @@ -3194,7 +3215,8 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args, // If the address is *not* known to be non-null, we need to switch. llvm::Value *finalArgument; - bool provablyNonNull = llvm::isKnownNonNull(srcAddr.getPointer()); + bool provablyNonNull = llvm::isKnownNonZero(srcAddr.getPointer(), + CGF.CGM.getDataLayout()); if (provablyNonNull) { finalArgument = temp.getPointer(); } else { @@ -3946,7 +3968,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Builder.CreateMemCpy(TempAlloca, Src, SrcSize); Src = TempAlloca; } else { - Src = Builder.CreateBitCast(Src, llvm::PointerType::getUnqual(STy)); + Src = Builder.CreateBitCast(Src, + STy->getPointerTo(Src.getAddressSpace())); } auto SrcLayout = CGM.getDataLayout().getStructLayout(STy); diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp index 50d702c62268..a6915071ec17 100644 --- a/lib/CodeGen/CGClass.cpp +++ b/lib/CodeGen/CGClass.cpp @@ -129,14 +129,16 @@ Address CodeGenFunction::EmitCXXMemberDataPointerAddress(const Expr *E, Address base, llvm::Value *memberPtr, const MemberPointerType *memberPtrType, - LValueBaseInfo *BaseInfo) { + LValueBaseInfo *BaseInfo, + TBAAAccessInfo *TBAAInfo) { // Ask the ABI to compute the actual address. llvm::Value *ptr = CGM.getCXXABI().EmitMemberDataPointerAddress(*this, E, base, memberPtr, memberPtrType); QualType memberType = memberPtrType->getPointeeType(); - CharUnits memberAlign = getNaturalTypeAlignment(memberType, BaseInfo); + CharUnits memberAlign = getNaturalTypeAlignment(memberType, BaseInfo, + TBAAInfo); memberAlign = CGM.getDynamicOffsetAlignment(base.getAlignment(), memberPtrType->getClass()->getAsCXXRecordDecl(), @@ -1413,10 +1415,11 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { // possible to delegate the destructor body to the complete // destructor. Do so. if (DtorType == Dtor_Deleting) { + RunCleanupsScope DtorEpilogue(*this); EnterDtorCleanups(Dtor, Dtor_Deleting); - EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false, - /*Delegating=*/false, LoadCXXThisAddress()); - PopCleanupBlock(); + if (HaveInsertPoint()) + EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false, + /*Delegating=*/false, LoadCXXThisAddress()); return; } @@ -1512,6 +1515,13 @@ void CodeGenFunction::emitImplicitAssignmentOperatorBody(FunctionArgList &Args) } namespace { + llvm::Value *LoadThisForDtorDelete(CodeGenFunction &CGF, + const CXXDestructorDecl *DD) { + if (Expr *ThisArg = DD->getOperatorDeleteThisArg()) + return CGF.EmitScalarExpr(ThisArg); + return CGF.LoadCXXThis(); + } + /// Call the operator delete associated with the current destructor. struct CallDtorDelete final : EHScopeStack::Cleanup { CallDtorDelete() {} @@ -1519,11 +1529,38 @@ namespace { void Emit(CodeGenFunction &CGF, Flags flags) override { const CXXDestructorDecl *Dtor = cast<CXXDestructorDecl>(CGF.CurCodeDecl); const CXXRecordDecl *ClassDecl = Dtor->getParent(); - CGF.EmitDeleteCall(Dtor->getOperatorDelete(), CGF.LoadCXXThis(), + CGF.EmitDeleteCall(Dtor->getOperatorDelete(), + LoadThisForDtorDelete(CGF, Dtor), CGF.getContext().getTagDeclType(ClassDecl)); } }; + void EmitConditionalDtorDeleteCall(CodeGenFunction &CGF, + llvm::Value *ShouldDeleteCondition, + bool ReturnAfterDelete) { + llvm::BasicBlock *callDeleteBB = CGF.createBasicBlock("dtor.call_delete"); + llvm::BasicBlock *continueBB = CGF.createBasicBlock("dtor.continue"); + llvm::Value *ShouldCallDelete + = CGF.Builder.CreateIsNull(ShouldDeleteCondition); + CGF.Builder.CreateCondBr(ShouldCallDelete, continueBB, callDeleteBB); + + CGF.EmitBlock(callDeleteBB); + const CXXDestructorDecl *Dtor = cast<CXXDestructorDecl>(CGF.CurCodeDecl); + const CXXRecordDecl *ClassDecl = Dtor->getParent(); + CGF.EmitDeleteCall(Dtor->getOperatorDelete(), + LoadThisForDtorDelete(CGF, Dtor), + CGF.getContext().getTagDeclType(ClassDecl)); + assert(Dtor->getOperatorDelete()->isDestroyingOperatorDelete() == + ReturnAfterDelete && + "unexpected value for ReturnAfterDelete"); + if (ReturnAfterDelete) + CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); + else + CGF.Builder.CreateBr(continueBB); + + CGF.EmitBlock(continueBB); + } + struct CallDtorDeleteConditional final : EHScopeStack::Cleanup { llvm::Value *ShouldDeleteCondition; @@ -1534,20 +1571,8 @@ namespace { } void Emit(CodeGenFunction &CGF, Flags flags) override { - llvm::BasicBlock *callDeleteBB = CGF.createBasicBlock("dtor.call_delete"); - llvm::BasicBlock *continueBB = CGF.createBasicBlock("dtor.continue"); - llvm::Value *ShouldCallDelete - = CGF.Builder.CreateIsNull(ShouldDeleteCondition); - CGF.Builder.CreateCondBr(ShouldCallDelete, continueBB, callDeleteBB); - - CGF.EmitBlock(callDeleteBB); - const CXXDestructorDecl *Dtor = cast<CXXDestructorDecl>(CGF.CurCodeDecl); - const CXXRecordDecl *ClassDecl = Dtor->getParent(); - CGF.EmitDeleteCall(Dtor->getOperatorDelete(), CGF.LoadCXXThis(), - CGF.getContext().getTagDeclType(ClassDecl)); - CGF.Builder.CreateBr(continueBB); - - CGF.EmitBlock(continueBB); + EmitConditionalDtorDeleteCall(CGF, ShouldDeleteCondition, + /*ReturnAfterDelete*/false); } }; @@ -1577,6 +1602,7 @@ namespace { static void EmitSanitizerDtorCallback(CodeGenFunction &CGF, llvm::Value *Ptr, CharUnits::QuantityType PoisonSize) { + CodeGenFunction::SanitizerScope SanScope(&CGF); // Pass in void pointer and size of region as arguments to runtime // function llvm::Value *Args[] = {CGF.Builder.CreateBitCast(Ptr, CGF.VoidPtrTy), @@ -1705,6 +1731,9 @@ namespace { /// \brief Emit all code that comes at the end of class's /// destructor. This is to call destructors on members and base classes /// in reverse order of their construction. +/// +/// For a deleting destructor, this also handles the case where a destroying +/// operator delete completely overrides the definition. void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD, CXXDtorType DtorType) { assert((!DD->isTrivial() || DD->hasAttr<DLLExportAttr>()) && @@ -1717,11 +1746,23 @@ void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD, "operator delete missing - EnterDtorCleanups"); if (CXXStructorImplicitParamValue) { // If there is an implicit param to the deleting dtor, it's a boolean - // telling whether we should call delete at the end of the dtor. - EHStack.pushCleanup<CallDtorDeleteConditional>( - NormalAndEHCleanup, CXXStructorImplicitParamValue); + // telling whether this is a deleting destructor. + if (DD->getOperatorDelete()->isDestroyingOperatorDelete()) + EmitConditionalDtorDeleteCall(*this, CXXStructorImplicitParamValue, + /*ReturnAfterDelete*/true); + else + EHStack.pushCleanup<CallDtorDeleteConditional>( + NormalAndEHCleanup, CXXStructorImplicitParamValue); } else { - EHStack.pushCleanup<CallDtorDelete>(NormalAndEHCleanup); + if (DD->getOperatorDelete()->isDestroyingOperatorDelete()) { + const CXXRecordDecl *ClassDecl = DD->getParent(); + EmitDeleteCall(DD->getOperatorDelete(), + LoadThisForDtorDelete(*this, DD), + getContext().getTagDeclType(ClassDecl)); + EmitBranchThroughCleanup(ReturnBlock); + } else { + EHStack.pushCleanup<CallDtorDelete>(NormalAndEHCleanup); + } } return; } @@ -2382,7 +2423,8 @@ void CodeGenFunction::InitializeVTablePointer(const VPtr &Vptr) { VTableAddressPoint = Builder.CreateBitCast(VTableAddressPoint, VTablePtrTy); llvm::StoreInst *Store = Builder.CreateStore(VTableAddressPoint, VTableField); - CGM.DecorateInstructionWithTBAA(Store, CGM.getTBAAInfoForVTablePtr()); + TBAAAccessInfo TBAAInfo = CGM.getTBAAVTablePtrAccessInfo(VTablePtrTy); + CGM.DecorateInstructionWithTBAA(Store, TBAAInfo); if (CGM.getCodeGenOpts().OptimizationLevel > 0 && CGM.getCodeGenOpts().StrictVTablePointers) CGM.DecorateInstructionWithInvariantGroup(Store, Vptr.VTableClass); @@ -2476,7 +2518,8 @@ llvm::Value *CodeGenFunction::GetVTablePtr(Address This, const CXXRecordDecl *RD) { Address VTablePtrSrc = Builder.CreateElementBitCast(This, VTableTy); llvm::Instruction *VTable = Builder.CreateLoad(VTablePtrSrc, "vtable"); - CGM.DecorateInstructionWithTBAA(VTable, CGM.getTBAAInfoForVTablePtr()); + TBAAAccessInfo TBAAInfo = CGM.getTBAAVTablePtrAccessInfo(VTableTy); + CGM.DecorateInstructionWithTBAA(VTable, TBAAInfo); if (CGM.getCodeGenOpts().OptimizationLevel > 0 && CGM.getCodeGenOpts().StrictVTablePointers) @@ -2523,8 +2566,10 @@ LeastDerivedClassWithSameLayout(const CXXRecordDecl *RD) { void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD, llvm::Value *VTable, SourceLocation Loc) { - if (CGM.getCodeGenOpts().WholeProgramVTables && - CGM.HasHiddenLTOVisibility(RD)) { + if (SanOpts.has(SanitizerKind::CFIVCall)) + EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc); + else if (CGM.getCodeGenOpts().WholeProgramVTables && + CGM.HasHiddenLTOVisibility(RD)) { llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0)); llvm::Value *TypeId = @@ -2536,9 +2581,6 @@ void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD, {CastedVTable, TypeId}); Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::assume), TypeTest); } - - if (SanOpts.has(SanitizerKind::CFIVCall)) - EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc); } void CodeGenFunction::EmitVTablePtrCheckForCall(const CXXRecordDecl *RD, @@ -2585,8 +2627,9 @@ void CodeGenFunction::EmitVTablePtrCheckForCast(QualType T, EmitBlock(CheckBlock); } - llvm::Value *VTable = - GetVTablePtr(Address(Derived, getPointerAlign()), Int8PtrTy, ClassDecl); + llvm::Value *VTable; + std::tie(VTable, ClassDecl) = CGM.getCXXABI().LoadVTablePtr( + *this, Address(Derived, getPointerAlign()), ClassDecl); EmitVTablePtrCheck(ClassDecl, VTable, TCK, Loc); @@ -2604,28 +2647,34 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD, !CGM.HasHiddenLTOVisibility(RD)) return; - std::string TypeName = RD->getQualifiedNameAsString(); - if (getContext().getSanitizerBlacklist().isBlacklistedType(TypeName)) - return; - - SanitizerScope SanScope(this); + SanitizerMask M; llvm::SanitizerStatKind SSK; switch (TCK) { case CFITCK_VCall: + M = SanitizerKind::CFIVCall; SSK = llvm::SanStat_CFI_VCall; break; case CFITCK_NVCall: + M = SanitizerKind::CFINVCall; SSK = llvm::SanStat_CFI_NVCall; break; case CFITCK_DerivedCast: + M = SanitizerKind::CFIDerivedCast; SSK = llvm::SanStat_CFI_DerivedCast; break; case CFITCK_UnrelatedCast: + M = SanitizerKind::CFIUnrelatedCast; SSK = llvm::SanStat_CFI_UnrelatedCast; break; case CFITCK_ICall: llvm_unreachable("not expecting CFITCK_ICall"); } + + std::string TypeName = RD->getQualifiedNameAsString(); + if (getContext().getSanitizerBlacklist().isBlacklistedType(M, TypeName)) + return; + + SanitizerScope SanScope(this); EmitSanitizerStatReport(SSK); llvm::Metadata *MD = @@ -2636,24 +2685,6 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD, llvm::Value *TypeTest = Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::type_test), {CastedVTable, TypeId}); - SanitizerMask M; - switch (TCK) { - case CFITCK_VCall: - M = SanitizerKind::CFIVCall; - break; - case CFITCK_NVCall: - M = SanitizerKind::CFINVCall; - break; - case CFITCK_DerivedCast: - M = SanitizerKind::CFIDerivedCast; - break; - case CFITCK_UnrelatedCast: - M = SanitizerKind::CFIUnrelatedCast; - break; - case CFITCK_ICall: - llvm_unreachable("not expecting CFITCK_ICall"); - } - llvm::Constant *StaticData[] = { llvm::ConstantInt::get(Int8Ty, TCK), EmitCheckSourceLocation(Loc), @@ -2688,7 +2719,8 @@ bool CodeGenFunction::ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) { return false; std::string TypeName = RD->getQualifiedNameAsString(); - return !getContext().getSanitizerBlacklist().isBlacklistedType(TypeName); + return !getContext().getSanitizerBlacklist().isBlacklistedType( + SanitizerKind::CFIVCall, TypeName); } llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad( @@ -2745,9 +2777,12 @@ void CodeGenFunction::EmitForwardingCallToLambda( RValue RV = EmitCall(calleeFnInfo, callee, returnSlot, callArgs); // If necessary, copy the returned value into the slot. - if (!resultType->isVoidType() && returnSlot.isNull()) + if (!resultType->isVoidType() && returnSlot.isNull()) { + if (getLangOpts().ObjCAutoRefCount && resultType->isObjCRetainableType()) { + RV = RValue::get(EmitARCRetainAutoreleasedReturnValue(RV.getScalarVal())); + } EmitReturnOfRValue(RV, resultType); - else + } else EmitBranchThroughCleanup(ReturnBlock); } @@ -2755,6 +2790,15 @@ void CodeGenFunction::EmitLambdaBlockInvokeBody() { const BlockDecl *BD = BlockInfo->getBlockDecl(); const VarDecl *variable = BD->capture_begin()->getVariable(); const CXXRecordDecl *Lambda = variable->getType()->getAsCXXRecordDecl(); + const CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator(); + + if (CallOp->isVariadic()) { + // FIXME: Making this work correctly is nasty because it requires either + // cloning the body of the call operator or making the call operator + // forward. + CGM.ErrorUnsupported(CurCodeDecl, "lambda conversion to variadic function"); + return; + } // Start building arguments for forwarding call CallArgList CallArgs; @@ -2769,18 +2813,7 @@ void CodeGenFunction::EmitLambdaBlockInvokeBody() { assert(!Lambda->isGenericLambda() && "generic lambda interconversion to block not implemented"); - EmitForwardingCallToLambda(Lambda->getLambdaCallOperator(), CallArgs); -} - -void CodeGenFunction::EmitLambdaToBlockPointerBody(FunctionArgList &Args) { - if (cast<CXXMethodDecl>(CurCodeDecl)->isVariadic()) { - // FIXME: Making this work correctly is nasty because it requires either - // cloning the body of the call operator or making the call operator forward. - CGM.ErrorUnsupported(CurCodeDecl, "lambda conversion to variadic function"); - return; - } - - EmitFunctionBody(Args, cast<FunctionDecl>(CurGD.getDecl())->getBody()); + EmitForwardingCallToLambda(CallOp, CallArgs); } void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) { @@ -2813,7 +2846,7 @@ void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) { EmitForwardingCallToLambda(CallOp, CallArgs); } -void CodeGenFunction::EmitLambdaStaticInvokeFunction(const CXXMethodDecl *MD) { +void CodeGenFunction::EmitLambdaStaticInvokeBody(const CXXMethodDecl *MD) { if (MD->isVariadic()) { // FIXME: Making this work correctly is nasty because it requires either // cloning the body of the call operator or making the call operator forward. diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp index b5453bc11e30..22055b2cb902 100644 --- a/lib/CodeGen/CGCleanup.cpp +++ b/lib/CodeGen/CGCleanup.cpp @@ -1096,7 +1096,7 @@ void CodeGenFunction::EmitBranchThroughCleanup(JumpDest Dest) { break; } - // Otherwise, tell the scope that there's a jump propoagating + // Otherwise, tell the scope that there's a jump propagating // through it. If this isn't new information, all the rest of // the work has been done before. if (!Scope.addBranchThrough(Dest.getBlock())) diff --git a/lib/CodeGen/CGCoroutine.cpp b/lib/CodeGen/CGCoroutine.cpp index a65faa602b33..5842e7b3ff93 100644 --- a/lib/CodeGen/CGCoroutine.cpp +++ b/lib/CodeGen/CGCoroutine.cpp @@ -181,10 +181,8 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr}); auto *SuspendRet = CGF.EmitScalarExpr(S.getSuspendExpr()); - if (SuspendRet != nullptr) { + if (SuspendRet != nullptr && SuspendRet->getType()->isIntegerTy(1)) { // Veto suspension if requested by bool returning await_suspend. - assert(SuspendRet->getType()->isIntegerTy(1) && - "Sema should have already checked that it is void or bool"); BasicBlock *RealSuspendBlock = CGF.createBasicBlock(Prefix + Twine(".suspend.bool")); CGF.Builder.CreateCondBr(SuspendRet, RealSuspendBlock, ReadyBlock); @@ -234,6 +232,13 @@ RValue CodeGenFunction::EmitCoyieldExpr(const CoyieldExpr &E, void CodeGenFunction::EmitCoreturnStmt(CoreturnStmt const &S) { ++CurCoro.Data->CoreturnCount; + const Expr *RV = S.getOperand(); + if (RV && RV->getType()->isVoidType()) { + // Make sure to evaluate the expression of a co_return with a void + // expression for side effects. + RunCleanupsScope cleanupScope(*this); + EmitIgnoredExpr(RV); + } EmitStmt(S.getPromiseCall()); EmitBranchThroughCleanup(CurCoro.Data->FinalJD); } diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp index 18b1d10a921d..caea41ec0e03 100644 --- a/lib/CodeGen/CGDebugInfo.cpp +++ b/lib/CodeGen/CGDebugInfo.cpp @@ -18,6 +18,7 @@ #include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "ConstantEmitter.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclFriend.h" #include "clang/AST/DeclObjC.h" @@ -28,6 +29,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/Version.h" #include "clang/Frontend/CodeGenOptions.h" +#include "clang/Frontend/FrontendOptions.h" #include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/ModuleMap.h" #include "clang/Lex/PreprocessorOptions.h" @@ -95,6 +97,10 @@ void ApplyDebugLocation::init(SourceLocation TemporaryLocation, } OriginalLocation = CGF->Builder.getCurrentDebugLocation(); + + if (OriginalLocation && !DI->CGM.getExpressionLocationsEnabled()) + return; + if (TemporaryLocation.isValid()) { DI->EmitLocation(CGF->Builder, TemporaryLocation); return; @@ -218,6 +224,19 @@ llvm::DIScope *CGDebugInfo::getContextDescriptor(const Decl *Context, return Default; } +PrintingPolicy CGDebugInfo::getPrintingPolicy() const { + PrintingPolicy PP = CGM.getContext().getPrintingPolicy(); + + // If we're emitting codeview, it's important to try to match MSVC's naming so + // that visualizers written for MSVC will trigger for our class names. In + // particular, we can't have spaces between arguments of standard templates + // like basic_string and vector. + if (CGM.getCodeGenOpts().EmitCodeView) + PP.MSVCFormatting = true; + + return PP; +} + StringRef CGDebugInfo::getFunctionName(const FunctionDecl *FD) { assert(FD && "Invalid FunctionDecl!"); IdentifierInfo *FII = FD->getIdentifier(); @@ -238,18 +257,15 @@ StringRef CGDebugInfo::getFunctionName(const FunctionDecl *FD) { SmallString<128> NS; llvm::raw_svector_ostream OS(NS); - PrintingPolicy Policy(CGM.getLangOpts()); - Policy.MSVCFormatting = CGM.getCodeGenOpts().EmitCodeView; if (!UseQualifiedName) FD->printName(OS); else - FD->printQualifiedName(OS, Policy); + FD->printQualifiedName(OS, getPrintingPolicy()); // Add any template specialization args. if (Info) { const TemplateArgumentList *TArgs = Info->TemplateArguments; - TemplateSpecializationType::PrintTemplateArgumentList(OS, TArgs->asArray(), - Policy); + printTemplateArgumentList(OS, TArgs->asArray(), getPrintingPolicy()); } // Copy this name on the side and use its reference. @@ -296,7 +312,7 @@ StringRef CGDebugInfo::getClassName(const RecordDecl *RD) { if (isa<ClassTemplateSpecializationDecl>(RD)) { SmallString<128> Name; llvm::raw_svector_ostream OS(Name); - RD->getNameForDiagnostic(OS, CGM.getContext().getPrintingPolicy(), + RD->getNameForDiagnostic(OS, getPrintingPolicy(), /*Qualified*/ false); // Copy this name on the side and use its reference. @@ -483,6 +499,16 @@ void CGDebugInfo::CreateCompileUnit() { llvm::sys::path::append(MainFileDirSS, MainFileName); MainFileName = MainFileDirSS.str(); } + // If the main file name provided is identical to the input file name, and + // if the input file is a preprocessed source, use the module name for + // debug info. The module name comes from the name specified in the first + // linemarker if the input is a preprocessed source. + if (MainFile->getName() == MainFileName && + FrontendOptions::getInputKindForExtension( + MainFile->getName().rsplit('.').second) + .isPreprocessed()) + MainFileName = CGM.getModule().getName().str(); + CSKind = computeChecksum(SM.getMainFileID(), Checksum); } @@ -527,16 +553,16 @@ void CGDebugInfo::CreateCompileUnit() { // Create new compile unit. // FIXME - Eliminate TheCU. + auto &CGOpts = CGM.getCodeGenOpts(); TheCU = DBuilder.createCompileUnit( LangTag, DBuilder.createFile(remapDIPath(MainFileName), remapDIPath(getCurrentDirname()), CSKind, Checksum), - Producer, LO.Optimize, CGM.getCodeGenOpts().DwarfDebugFlags, RuntimeVers, - CGM.getCodeGenOpts().EnableSplitDwarf - ? "" - : CGM.getCodeGenOpts().SplitDwarfFile, - EmissionKind, 0 /* DWOid */, CGM.getCodeGenOpts().SplitDwarfInlining, - CGM.getCodeGenOpts().DebugInfoForProfiling); + Producer, LO.Optimize || CGOpts.PrepareForLTO || CGOpts.EmitSummaryIndex, + CGOpts.DwarfDebugFlags, RuntimeVers, + CGOpts.EnableSplitDwarf ? "" : CGOpts.SplitDwarfFile, EmissionKind, + 0 /* DWOid */, CGOpts.SplitDwarfInlining, CGOpts.DebugInfoForProfiling, + CGOpts.GnuPubnames); } llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { @@ -645,6 +671,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { case BuiltinType::Half: case BuiltinType::Float: case BuiltinType::LongDouble: + case BuiltinType::Float16: case BuiltinType::Float128: case BuiltinType::Double: // FIXME: For targets where long double and __float128 have the same size, @@ -805,6 +832,10 @@ CGDebugInfo::getOrCreateRecordFwdDecl(const RecordType *Ty, llvm::DICompositeType *RetTy = DBuilder.createReplaceableCompositeType( getTagForRecord(RD), RDName, Ctx, DefUnit, Line, 0, Size, Align, llvm::DINode::FlagFwdDecl, FullName); + if (CGM.getCodeGenOpts().DebugFwdTemplateParams) + if (auto *TSpecial = dyn_cast<ClassTemplateSpecializationDecl>(RD)) + DBuilder.replaceArrays(RetTy, llvm::DINodeArray(), + CollectCXXTemplateParams(TSpecial, DefUnit)); ReplaceMap.emplace_back( std::piecewise_construct, std::make_tuple(Ty), std::make_tuple(static_cast<llvm::Metadata *>(RetTy))); @@ -909,12 +940,8 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty, SmallString<128> NS; llvm::raw_svector_ostream OS(NS); - Ty->getTemplateName().print(OS, CGM.getContext().getPrintingPolicy(), - /*qualified*/ false); - - TemplateSpecializationType::PrintTemplateArgumentList( - OS, Ty->template_arguments(), - CGM.getContext().getPrintingPolicy()); + Ty->getTemplateName().print(OS, getPrintingPolicy(), /*qualified*/ false); + printTemplateArgumentList(OS, Ty->template_arguments(), getPrintingPolicy()); auto *AliasDecl = cast<TypeAliasTemplateDecl>( Ty->getTemplateName().getAsTemplateDecl())->getTemplatedDecl(); @@ -1174,13 +1201,13 @@ void CGDebugInfo::CollectRecordNormalField( elements.push_back(FieldType); } -void CGDebugInfo::CollectRecordNestedRecord( - const RecordDecl *RD, SmallVectorImpl<llvm::Metadata *> &elements) { - QualType Ty = CGM.getContext().getTypeDeclType(RD); +void CGDebugInfo::CollectRecordNestedType( + const TypeDecl *TD, SmallVectorImpl<llvm::Metadata *> &elements) { + QualType Ty = CGM.getContext().getTypeDeclType(TD); // Injected class names are not considered nested records. if (isa<InjectedClassNameType>(Ty)) return; - SourceLocation Loc = RD->getLocation(); + SourceLocation Loc = TD->getLocation(); llvm::DIType *nestedType = getOrCreateType(Ty, getOrCreateFile(Loc)); elements.push_back(nestedType); } @@ -1196,9 +1223,9 @@ void CGDebugInfo::CollectRecordFields( else { const ASTRecordLayout &layout = CGM.getContext().getASTRecordLayout(record); - // Debug info for nested records is included in the member list only for + // Debug info for nested types is included in the member list only for // CodeView. - bool IncludeNestedRecords = CGM.getCodeGenOpts().EmitCodeView; + bool IncludeNestedTypes = CGM.getCodeGenOpts().EmitCodeView; // Field number for non-static fields. unsigned fieldNo = 0; @@ -1225,10 +1252,12 @@ void CGDebugInfo::CollectRecordFields( // Bump field number for next field. ++fieldNo; - } else if (const auto *nestedRec = dyn_cast<CXXRecordDecl>(I)) - if (IncludeNestedRecords && !nestedRec->isImplicit() && - nestedRec->getDeclContext() == record) - CollectRecordNestedRecord(nestedRec, elements); + } else if (IncludeNestedTypes) { + if (const auto *nestedType = dyn_cast<TypeDecl>(I)) + if (!nestedType->isImplicit() && + nestedType->getDeclContext() == record) + CollectRecordNestedType(nestedType, elements); + } } } @@ -1366,7 +1395,7 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( // C++ ABI does not include all virtual methods from non-primary bases in // the vtable for the most derived class. For example, if C inherits from // A and B, C's primary vftable will not include B's virtual methods. - if (Method->begin_overridden_methods() == Method->end_overridden_methods()) + if (Method->size_overridden_methods() == 0) Flags |= llvm::DINode::FlagIntroducedVirtual; // The 'this' adjustment accounts for both the virtual and non-virtual @@ -1379,6 +1408,8 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( ContainingType = RecordTy; } + if (Method->isStatic()) + Flags |= llvm::DINode::FlagStaticMember; if (Method->isImplicit()) Flags |= llvm::DINode::FlagArtificial; Flags |= getAccessFlag(Method->getAccess(), Method->getParent()); @@ -1590,7 +1621,7 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList, QualType T = E->getType(); if (E->isGLValue()) T = CGM.getContext().getLValueReferenceType(T); - llvm::Constant *V = CGM.EmitConstantExpr(E, T); + llvm::Constant *V = ConstantEmitter(CGM).emitAbstract(E, T); assert(V && "Expression in template argument isn't constant"); llvm::DIType *TTy = getOrCreateType(T, Unit); TemplateParams.push_back(DBuilder.createTemplateValueParameter( @@ -1766,6 +1797,29 @@ static bool isClassOrMethodDLLImport(const CXXRecordDecl *RD) { return false; } +/// Does a type definition exist in an imported clang module? +static bool isDefinedInClangModule(const RecordDecl *RD) { + // Only definitions that where imported from an AST file come from a module. + if (!RD || !RD->isFromASTFile()) + return false; + // Anonymous entities cannot be addressed. Treat them as not from module. + if (!RD->isExternallyVisible() && RD->getName().empty()) + return false; + if (auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) { + if (!CXXDecl->isCompleteDefinition()) + return false; + auto TemplateKind = CXXDecl->getTemplateSpecializationKind(); + if (TemplateKind != TSK_Undeclared) { + // This is a template, check the origin of the first member. + if (CXXDecl->field_begin() == CXXDecl->field_end()) + return TemplateKind == TSK_ExplicitInstantiationDeclaration; + if (!CXXDecl->field_begin()->isFromASTFile()) + return false; + } + } + return true; +} + void CGDebugInfo::completeClassData(const RecordDecl *RD) { if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) if (CXXRD->isDynamicClass() && @@ -1773,6 +1827,10 @@ void CGDebugInfo::completeClassData(const RecordDecl *RD) { llvm::GlobalValue::AvailableExternallyLinkage && !isClassOrMethodDLLImport(CXXRD)) return; + + if (DebugTypeExtRefs && isDefinedInClangModule(RD->getDefinition())) + return; + completeClass(RD); } @@ -1799,29 +1857,6 @@ static bool hasExplicitMemberDefinition(CXXRecordDecl::method_iterator I, return false; } -/// Does a type definition exist in an imported clang module? -static bool isDefinedInClangModule(const RecordDecl *RD) { - // Only definitions that where imported from an AST file come from a module. - if (!RD || !RD->isFromASTFile()) - return false; - // Anonymous entities cannot be addressed. Treat them as not from module. - if (!RD->isExternallyVisible() && RD->getName().empty()) - return false; - if (auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) { - if (!CXXDecl->isCompleteDefinition()) - return false; - auto TemplateKind = CXXDecl->getTemplateSpecializationKind(); - if (TemplateKind != TSK_Undeclared) { - // This is a template, check the origin of the first member. - if (CXXDecl->field_begin() == CXXDecl->field_end()) - return TemplateKind == TSK_ExplicitInstantiationDeclaration; - if (!CXXDecl->field_begin()->isFromASTFile()) - return false; - } - } - return true; -} - static bool shouldOmitDefinition(codegenoptions::DebugInfoKind DebugKind, bool DebugTypeExtRefs, const RecordDecl *RD, const LangOptions &LangOpts) { @@ -3655,9 +3690,9 @@ bool operator<(const BlockLayoutChunk &l, const BlockLayoutChunk &r) { } void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, - llvm::Value *Arg, + StringRef Name, unsigned ArgNo, - llvm::Value *LocalAddr, + llvm::AllocaInst *Alloca, CGBuilderTy &Builder) { assert(DebugKind >= codegenoptions::LimitedDebugInfo); ASTContext &C = CGM.getContext(); @@ -3789,19 +3824,11 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, // Create the descriptor for the parameter. auto *debugVar = DBuilder.createParameterVariable( - scope, Arg->getName(), ArgNo, tunit, line, type, + scope, Name, ArgNo, tunit, line, type, CGM.getLangOpts().Optimize, flags); - if (LocalAddr) { - // Insert an llvm.dbg.value into the current block. - DBuilder.insertDbgValueIntrinsic( - LocalAddr, 0, debugVar, DBuilder.createExpression(), - llvm::DebugLoc::get(line, column, scope, CurInlinedAt), - Builder.GetInsertBlock()); - } - // Insert an llvm.dbg.declare into the current block. - DBuilder.insertDeclare(Arg, debugVar, DBuilder.createExpression(), + DBuilder.insertDeclare(Alloca, debugVar, DBuilder.createExpression(), llvm::DebugLoc::get(line, column, scope, CurInlinedAt), Builder.GetInsertBlock()); } diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h index 39249c7cf4da..4f7b7f2a0d9c 100644 --- a/lib/CodeGen/CGDebugInfo.h +++ b/lib/CodeGen/CGDebugInfo.h @@ -278,8 +278,8 @@ class CGDebugInfo { llvm::DIFile *F, SmallVectorImpl<llvm::Metadata *> &E, llvm::DIType *RecordTy, const RecordDecl *RD); - void CollectRecordNestedRecord(const RecordDecl *RD, - SmallVectorImpl<llvm::Metadata *> &E); + void CollectRecordNestedType(const TypeDecl *RD, + SmallVectorImpl<llvm::Metadata *> &E); void CollectRecordFields(const RecordDecl *Decl, llvm::DIFile *F, SmallVectorImpl<llvm::Metadata *> &E, llvm::DICompositeType *RecordTy); @@ -398,8 +398,8 @@ public: /// Emit call to \c llvm.dbg.declare for the block-literal argument /// to a block invocation function. void EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, - llvm::Value *Arg, unsigned ArgNo, - llvm::Value *LocalAddr, + StringRef Name, unsigned ArgNo, + llvm::AllocaInst *LocalAddr, CGBuilderTy &Builder); /// Emit information about a global variable. @@ -558,6 +558,9 @@ private: unsigned LineNo, StringRef LinkageName, llvm::GlobalVariable *Var, llvm::DIScope *DContext); + /// Get the printing policy for producing names for debug info. + PrintingPolicy getPrintingPolicy() const; + /// Get function name for the given FunctionDecl. If the name is /// constructed on demand (e.g., C++ destructor) then the name is /// stored on the side. diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp index 23517867437c..04585a8afbb6 100644 --- a/lib/CodeGen/CGDecl.cpp +++ b/lib/CodeGen/CGDecl.cpp @@ -19,6 +19,7 @@ #include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "ConstantEmitter.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/CharUnits.h" @@ -161,6 +162,10 @@ void CodeGenFunction::EmitVarDecl(const VarDecl &D) { // needs to be emitted like a static variable, e.g. a function-scope // variable in constant address space in OpenCL. if (D.getStorageDuration() != SD_Automatic) { + // Static sampler variables translated to function calls. + if (D.getType()->isSamplerT()) + return; + llvm::GlobalValue::LinkageTypes Linkage = CGM.getLLVMLinkageVarDefinition(&D, /*isConstant=*/false); @@ -221,7 +226,7 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl( Name = getStaticDeclName(*this, D); llvm::Type *LTy = getTypes().ConvertTypeForMem(Ty); - unsigned AS = GetGlobalVarAddressSpace(&D); + LangAS AS = GetGlobalVarAddressSpace(&D); unsigned TargetAS = getContext().getTargetAddressSpace(AS); // Local address space cannot have an initializer. @@ -235,7 +240,7 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl( getModule(), LTy, Ty.isConstant(getContext()), Linkage, Init, Name, nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS); GV->setAlignment(getContext().getDeclAlign(&D).getQuantity()); - setGlobalVisibility(GV, &D); + setGlobalVisibility(GV, &D, ForDefinition); if (supportsCOMDAT() && GV->isWeakForLinker()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); @@ -251,7 +256,7 @@ llvm::Constant *CodeGenModule::getOrCreateStaticVarDecl( } // Make sure the result is of the correct type. - unsigned ExpectedAS = Ty.getAddressSpace(); + LangAS ExpectedAS = Ty.getAddressSpace(); llvm::Constant *Addr = GV; if (AS != ExpectedAS) { Addr = getTargetCodeGenInfo().performAddrSpaceCast( @@ -307,7 +312,8 @@ static bool hasNontrivialDestruction(QualType T) { llvm::GlobalVariable * CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D, llvm::GlobalVariable *GV) { - llvm::Constant *Init = CGM.EmitConstantInit(D, this); + ConstantEmitter emitter(*this); + llvm::Constant *Init = emitter.tryEmitForInitializer(D); // If constant emission failed, then this should be a C++ static // initializer. @@ -355,6 +361,8 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D, GV->setConstant(CGM.isTypeConstant(D.getType(), true)); GV->setInitializer(Init); + emitter.finalize(GV); + if (hasNontrivialDestruction(D.getType()) && HaveInsertPoint()) { // We have a constant initializer, but a nontrivial destructor. We still // need to perform a guarded "initialization" in order to register the @@ -952,7 +960,9 @@ void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr) { CodeGenFunction::AutoVarEmission CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { QualType Ty = D.getType(); - assert(Ty.getAddressSpace() == LangAS::Default); + assert( + Ty.getAddressSpace() == LangAS::Default || + (Ty.getAddressSpace() == LangAS::opencl_private && getLangOpts().OpenCL)); AutoVarEmission emission(D); @@ -1236,7 +1246,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { llvm::Constant *constant = nullptr; if (emission.IsConstantAggregate || D.isConstexpr()) { assert(!capturedByInit && "constant init contains a capturing block?"); - constant = CGM.EmitConstantInit(D, this); + constant = ConstantEmitter(*this).tryEmitAbstractForInitializer(D); } if (!constant) { @@ -1260,7 +1270,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { llvm::ConstantInt::get(IntPtrTy, getContext().getTypeSizeInChars(type).getQuantity()); - llvm::Type *BP = Int8PtrTy; + llvm::Type *BP = AllocaInt8PtrTy; if (Loc.getType() != BP) Loc = Builder.CreateBitCast(Loc, BP); @@ -1786,24 +1796,6 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, setBlockContextParameter(IPD, ArgNo, Arg.getDirectValue()); return; } - - // Apply any prologue 'this' adjustments required by the ABI. Be careful to - // handle the case where 'this' is passed indirectly as part of an inalloca - // struct. - if (const CXXMethodDecl *MD = - dyn_cast_or_null<CXXMethodDecl>(CurCodeDecl)) { - if (MD->isVirtual() && IPD == CXXABIThisDecl) { - llvm::Value *This = Arg.isIndirect() - ? Builder.CreateLoad(Arg.getIndirectAddress()) - : Arg.getDirectValue(); - This = CGM.getCXXABI().adjustThisParameterInVirtualFunctionPrologue( - *this, CurGD, This); - if (Arg.isIndirect()) - Builder.CreateStore(This, Arg.getIndirectAddress()); - else - Arg = ParamValue::forDirect(This); - } - } } Address DeclPtr = Address::invalid(); diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp index d8768bee2cdf..042997831702 100644 --- a/lib/CodeGen/CGDeclCXX.cpp +++ b/lib/CodeGen/CGDeclCXX.cpp @@ -18,6 +18,7 @@ #include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/Support/Path.h" using namespace clang; @@ -259,6 +260,43 @@ void CodeGenFunction::EmitCXXGuardedInit(const VarDecl &D, CGM.getCXXABI().EmitGuardedInit(*this, D, DeclPtr, PerformInit); } +void CodeGenFunction::EmitCXXGuardedInitBranch(llvm::Value *NeedsInit, + llvm::BasicBlock *InitBlock, + llvm::BasicBlock *NoInitBlock, + GuardKind Kind, + const VarDecl *D) { + assert((Kind == GuardKind::TlsGuard || D) && "no guarded variable"); + + // A guess at how many times we will enter the initialization of a + // variable, depending on the kind of variable. + static const uint64_t InitsPerTLSVar = 1024; + static const uint64_t InitsPerLocalVar = 1024 * 1024; + + llvm::MDNode *Weights; + if (Kind == GuardKind::VariableGuard && !D->isLocalVarDecl()) { + // For non-local variables, don't apply any weighting for now. Due to our + // use of COMDATs, we expect there to be at most one initialization of the + // variable per DSO, but we have no way to know how many DSOs will try to + // initialize the variable. + Weights = nullptr; + } else { + uint64_t NumInits; + // FIXME: For the TLS case, collect and use profiling information to + // determine a more accurate brach weight. + if (Kind == GuardKind::TlsGuard || D->getTLSKind()) + NumInits = InitsPerTLSVar; + else + NumInits = InitsPerLocalVar; + + // The probability of us entering the initializer is + // 1 / (total number of times we attempt to initialize the variable). + llvm::MDBuilder MDHelper(CGM.getLLVMContext()); + Weights = MDHelper.createBranchWeights(1, NumInits - 1); + } + + Builder.CreateCondBr(NeedsInit, InitBlock, NoInitBlock, Weights); +} + llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction( llvm::FunctionType *FTy, const Twine &Name, const CGFunctionInfo &FI, SourceLocation Loc, bool TLS) { @@ -278,17 +316,29 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction( if (!getLangOpts().Exceptions) Fn->setDoesNotThrow(); - if (!isInSanitizerBlacklist(Fn, Loc)) { - if (getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address | - SanitizerKind::KernelAddress)) - Fn->addFnAttr(llvm::Attribute::SanitizeAddress); - if (getLangOpts().Sanitize.has(SanitizerKind::Thread)) - Fn->addFnAttr(llvm::Attribute::SanitizeThread); - if (getLangOpts().Sanitize.has(SanitizerKind::Memory)) - Fn->addFnAttr(llvm::Attribute::SanitizeMemory); - if (getLangOpts().Sanitize.has(SanitizerKind::SafeStack)) - Fn->addFnAttr(llvm::Attribute::SafeStack); - } + if (getLangOpts().Sanitize.has(SanitizerKind::Address) && + !isInSanitizerBlacklist(SanitizerKind::Address, Fn, Loc)) + Fn->addFnAttr(llvm::Attribute::SanitizeAddress); + + if (getLangOpts().Sanitize.has(SanitizerKind::KernelAddress) && + !isInSanitizerBlacklist(SanitizerKind::KernelAddress, Fn, Loc)) + Fn->addFnAttr(llvm::Attribute::SanitizeAddress); + + if (getLangOpts().Sanitize.has(SanitizerKind::HWAddress) && + !isInSanitizerBlacklist(SanitizerKind::HWAddress, Fn, Loc)) + Fn->addFnAttr(llvm::Attribute::SanitizeHWAddress); + + if (getLangOpts().Sanitize.has(SanitizerKind::Thread) && + !isInSanitizerBlacklist(SanitizerKind::Thread, Fn, Loc)) + Fn->addFnAttr(llvm::Attribute::SanitizeThread); + + if (getLangOpts().Sanitize.has(SanitizerKind::Memory) && + !isInSanitizerBlacklist(SanitizerKind::Memory, Fn, Loc)) + Fn->addFnAttr(llvm::Attribute::SanitizeMemory); + + if (getLangOpts().Sanitize.has(SanitizerKind::SafeStack) && + !isInSanitizerBlacklist(SanitizerKind::SafeStack, Fn, Loc)) + Fn->addFnAttr(llvm::Attribute::SafeStack); return Fn; } @@ -449,16 +499,12 @@ CodeGenModule::EmitCXXGlobalInitFunc() { PrioritizedCXXGlobalInits.clear(); } - SmallString<128> FileName; - SourceManager &SM = Context.getSourceManager(); - if (const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID())) { - // Include the filename in the symbol name. Including "sub_" matches gcc and - // makes sure these symbols appear lexicographically behind the symbols with - // priority emitted above. - FileName = llvm::sys::path::filename(MainFile->getName()); - } else { + // Include the filename in the symbol name. Including "sub_" matches gcc and + // makes sure these symbols appear lexicographically behind the symbols with + // priority emitted above. + SmallString<128> FileName = llvm::sys::path::filename(getModule().getName()); + if (FileName.empty()) FileName = "<null>"; - } for (size_t i = 0; i < FileName.size(); ++i) { // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens @@ -539,7 +585,8 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn, "guard.uninitialized"); llvm::BasicBlock *InitBlock = createBasicBlock("init"); ExitBlock = createBasicBlock("exit"); - Builder.CreateCondBr(Uninit, InitBlock, ExitBlock); + EmitCXXGuardedInitBranch(Uninit, InitBlock, ExitBlock, + GuardKind::TlsGuard, nullptr); EmitBlock(InitBlock); // Mark as initialized before initializing anything else. If the // initializers use previously-initialized thread_local vars, that's diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp index 40ae0921098c..6c9d9f170ace 100644 --- a/lib/CodeGen/CGException.cpp +++ b/lib/CodeGen/CGException.cpp @@ -15,6 +15,7 @@ #include "CGCXXABI.h" #include "CGCleanup.h" #include "CGObjCRuntime.h" +#include "ConstantEmitter.h" #include "TargetInfo.h" #include "clang/AST/Mangle.h" #include "clang/AST/StmtCXX.h" @@ -111,17 +112,11 @@ EHPersonality::MSVC_C_specific_handler = { "__C_specific_handler", nullptr }; const EHPersonality EHPersonality::MSVC_CxxFrameHandler3 = { "__CxxFrameHandler3", nullptr }; -/// On Win64, use libgcc's SEH personality function. We fall back to dwarf on -/// other platforms, unless the user asked for SjLj exceptions. -static bool useLibGCCSEHPersonality(const llvm::Triple &T) { - return T.isOSWindows() && T.getArch() == llvm::Triple::x86_64; -} - static const EHPersonality &getCPersonality(const llvm::Triple &T, const LangOptions &L) { if (L.SjLjExceptions) return EHPersonality::GNU_C_SJLJ; - else if (useLibGCCSEHPersonality(T)) + if (L.SEHExceptions) return EHPersonality::GNU_C_SEH; return EHPersonality::GNU_C; } @@ -143,7 +138,7 @@ static const EHPersonality &getObjCPersonality(const llvm::Triple &T, case ObjCRuntime::ObjFW: if (L.SjLjExceptions) return EHPersonality::GNU_ObjC_SJLJ; - else if (useLibGCCSEHPersonality(T)) + if (L.SEHExceptions) return EHPersonality::GNU_ObjC_SEH; return EHPersonality::GNU_ObjC; } @@ -154,7 +149,7 @@ static const EHPersonality &getCXXPersonality(const llvm::Triple &T, const LangOptions &L) { if (L.SjLjExceptions) return EHPersonality::GNU_CPlusPlus_SJLJ; - else if (useLibGCCSEHPersonality(T)) + if (L.SEHExceptions) return EHPersonality::GNU_CPlusPlus_SEH; return EHPersonality::GNU_CPlusPlus; } @@ -164,26 +159,27 @@ static const EHPersonality &getCXXPersonality(const llvm::Triple &T, static const EHPersonality &getObjCXXPersonality(const llvm::Triple &T, const LangOptions &L) { switch (L.ObjCRuntime.getKind()) { + // In the fragile ABI, just use C++ exception handling and hope + // they're not doing crazy exception mixing. + case ObjCRuntime::FragileMacOSX: + return getCXXPersonality(T, L); + // The ObjC personality defers to the C++ personality for non-ObjC // handlers. Unlike the C++ case, we use the same personality // function on targets using (backend-driven) SJLJ EH. case ObjCRuntime::MacOSX: case ObjCRuntime::iOS: case ObjCRuntime::WatchOS: - return EHPersonality::NeXT_ObjC; + return getObjCPersonality(T, L); - // In the fragile ABI, just use C++ exception handling and hope - // they're not doing crazy exception mixing. - case ObjCRuntime::FragileMacOSX: - return getCXXPersonality(T, L); + case ObjCRuntime::GNUstep: + return EHPersonality::GNU_ObjCXX; // The GCC runtime's personality function inherently doesn't support - // mixed EH. Use the C++ personality just to avoid returning null. + // mixed EH. Use the ObjC personality just to avoid returning null. case ObjCRuntime::GCC: case ObjCRuntime::ObjFW: return getObjCPersonality(T, L); - case ObjCRuntime::GNUstep: - return EHPersonality::GNU_ObjCXX; } llvm_unreachable("bad runtime kind"); } @@ -209,8 +205,9 @@ const EHPersonality &EHPersonality::get(CodeGenModule &CGM, if (T.isWindowsMSVCEnvironment() && !L.ObjC1) { if (L.SjLjExceptions) return EHPersonality::GNU_CPlusPlus_SJLJ; - else - return EHPersonality::MSVC_CxxFrameHandler3; + if (L.DWARFExceptions) + return EHPersonality::GNU_CPlusPlus; + return EHPersonality::MSVC_CxxFrameHandler3; } if (L.CPlusPlus && L.ObjC1) @@ -224,7 +221,12 @@ const EHPersonality &EHPersonality::get(CodeGenModule &CGM, } const EHPersonality &EHPersonality::get(CodeGenFunction &CGF) { - return get(CGF.CGM, dyn_cast_or_null<FunctionDecl>(CGF.CurCodeDecl)); + const auto *FD = CGF.CurCodeDecl; + // For outlined finallys and filters, use the SEH personality in case they + // contain more SEH. This mostly only affects finallys. Filters could + // hypothetically use gnu statement expressions to sneak in nested SEH. + FD = FD ? FD : CGF.CurSEHParent; + return get(CGF.CGM, dyn_cast_or_null<FunctionDecl>(FD)); } static llvm::Constant *getPersonalityFn(CodeGenModule &CGM, @@ -1800,7 +1802,8 @@ void CodeGenFunction::EnterSEHTryStmt(const SEHTryStmt &S) { // "catch i8* null". We can't do this on x86 because the filter has to save // the exception code. llvm::Constant *C = - CGM.EmitConstantExpr(Except->getFilterExpr(), getContext().IntTy, this); + ConstantEmitter(*this).tryEmitAbstract(Except->getFilterExpr(), + getContext().IntTy); if (CGM.getTarget().getTriple().getArch() != llvm::Triple::x86 && C && C->isOneValue()) { CatchScope->setCatchAllHandler(0, createBasicBlock("__except")); diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index 63c7b3d10bf9..98740e8f9aab 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -20,6 +20,7 @@ #include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "ConstantEmitter.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" @@ -48,7 +49,7 @@ using namespace CodeGen; llvm::Value *CodeGenFunction::EmitCastToVoidPtr(llvm::Value *value) { unsigned addressSpace = - cast<llvm::PointerType>(value->getType())->getAddressSpace(); + cast<llvm::PointerType>(value->getType())->getAddressSpace(); llvm::PointerType *destType = Int8PtrTy; if (addressSpace) @@ -73,12 +74,15 @@ Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, // cast alloca to the default address space when necessary. if (CastToDefaultAddrSpace && getASTAllocaAddressSpace() != LangAS::Default) { auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default); - auto CurIP = Builder.saveIP(); - Builder.SetInsertPoint(AllocaInsertPt); + llvm::IRBuilderBase::InsertPointGuard IPG(Builder); + // When ArraySize is nullptr, alloca is inserted at AllocaInsertPt, + // otherwise alloca is inserted at the current insertion point of the + // builder. + if (!ArraySize) + Builder.SetInsertPoint(AllocaInsertPt); V = getTargetHooks().performAddrSpaceCast( *this, V, getASTAllocaAddressSpace(), LangAS::Default, Ty->getPointerTo(DestAddrSpace), /*non-null*/ true); - Builder.restoreIP(CurIP); } return Address(V, Align); @@ -356,7 +360,7 @@ static Address createReferenceTemporary(CodeGenFunction &CGF, if (CGF.CGM.getCodeGenOpts().MergeAllConstants && (Ty->isArrayType() || Ty->isRecordType()) && CGF.CGM.isTypeConstant(Ty, true)) - if (llvm::Constant *Init = CGF.CGM.EmitConstantExpr(Inner, Ty, &CGF)) { + if (auto Init = ConstantEmitter(CGF).tryEmitAbstract(Inner, Ty)) { if (auto AddrSpace = CGF.getTarget().getConstantAddressSpace()) { auto AS = AddrSpace.getValue(); auto *GV = new llvm::GlobalVariable( @@ -411,14 +415,12 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { // dynamic initialization or a cleanup and we can just return the address // of the temporary. if (Var->hasInitializer()) - return MakeAddrLValue(Object, M->getType(), - LValueBaseInfo(AlignmentSource::Decl, false)); + return MakeAddrLValue(Object, M->getType(), AlignmentSource::Decl); Var->setInitializer(CGM.EmitNullConstant(E->getType())); } LValue RefTempDst = MakeAddrLValue(Object, M->getType(), - LValueBaseInfo(AlignmentSource::Decl, - false)); + AlignmentSource::Decl); switch (getEvaluationKind(E->getType())) { default: llvm_unreachable("expected scalar or aggregate expression"); @@ -505,8 +507,7 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { break; case SubobjectAdjustment::FieldAdjustment: { - LValue LV = MakeAddrLValue(Object, E->getType(), - LValueBaseInfo(AlignmentSource::Decl, false)); + LValue LV = MakeAddrLValue(Object, E->getType(), AlignmentSource::Decl); LV = EmitLValueForField(LV, Adjustment.Field); assert(LV.isSimple() && "materialized temporary field is not a simple lvalue"); @@ -523,8 +524,7 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { } } - return MakeAddrLValue(Object, M->getType(), - LValueBaseInfo(AlignmentSource::Decl, false)); + return MakeAddrLValue(Object, M->getType(), AlignmentSource::Decl); } RValue @@ -568,6 +568,19 @@ static llvm::Value *emitHash16Bytes(CGBuilderTy &Builder, llvm::Value *Low, return Builder.CreateMul(B1, KMul); } +bool CodeGenFunction::isNullPointerAllowed(TypeCheckKind TCK) { + return TCK == TCK_DowncastPointer || TCK == TCK_Upcast || + TCK == TCK_UpcastToVirtualBase; +} + +bool CodeGenFunction::isVptrCheckRequired(TypeCheckKind TCK, QualType Ty) { + CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); + return (RD && RD->hasDefinition() && RD->isDynamicClass()) && + (TCK == TCK_MemberAccess || TCK == TCK_MemberCall || + TCK == TCK_DowncastPointer || TCK == TCK_DowncastReference || + TCK == TCK_UpcastToVirtualBase); +} + bool CodeGenFunction::sanitizePerformTypeCheck() const { return SanOpts.has(SanitizerKind::Null) | SanOpts.has(SanitizerKind::Alignment) | @@ -604,20 +617,22 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, auto PtrToAlloca = dyn_cast<llvm::AllocaInst>(Ptr->stripPointerCastsNoFollowAliases()); - bool AllowNullPointers = TCK == TCK_DowncastPointer || TCK == TCK_Upcast || - TCK == TCK_UpcastToVirtualBase; + llvm::Value *True = llvm::ConstantInt::getTrue(getLLVMContext()); + llvm::Value *IsNonNull = nullptr; + bool IsGuaranteedNonNull = + SkippedChecks.has(SanitizerKind::Null) || PtrToAlloca; + bool AllowNullPointers = isNullPointerAllowed(TCK); if ((SanOpts.has(SanitizerKind::Null) || AllowNullPointers) && - !SkippedChecks.has(SanitizerKind::Null) && !PtrToAlloca) { + !IsGuaranteedNonNull) { // The glvalue must not be an empty glvalue. - llvm::Value *IsNonNull = Builder.CreateIsNotNull(Ptr); + IsNonNull = Builder.CreateIsNotNull(Ptr); // The IR builder can constant-fold the null check if the pointer points to // a constant. - bool PtrIsNonNull = - IsNonNull == llvm::ConstantInt::getTrue(getLLVMContext()); + IsGuaranteedNonNull = IsNonNull == True; // Skip the null check if the pointer is known to be non-null. - if (!PtrIsNonNull) { + if (!IsGuaranteedNonNull) { if (AllowNullPointers) { // When performing pointer casts, it's OK if the value is null. // Skip the remaining checks in that case. @@ -652,6 +667,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, } uint64_t AlignVal = 0; + llvm::Value *PtrAsInt = nullptr; if (SanOpts.has(SanitizerKind::Alignment) && !SkippedChecks.has(SanitizerKind::Alignment)) { @@ -662,12 +678,13 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, // The glvalue must be suitably aligned. if (AlignVal > 1 && (!PtrToAlloca || PtrToAlloca->getAlignment() < AlignVal)) { - llvm::Value *Align = - Builder.CreateAnd(Builder.CreatePtrToInt(Ptr, IntPtrTy), - llvm::ConstantInt::get(IntPtrTy, AlignVal - 1)); + PtrAsInt = Builder.CreatePtrToInt(Ptr, IntPtrTy); + llvm::Value *Align = Builder.CreateAnd( + PtrAsInt, llvm::ConstantInt::get(IntPtrTy, AlignVal - 1)); llvm::Value *Aligned = - Builder.CreateICmpEQ(Align, llvm::ConstantInt::get(IntPtrTy, 0)); - Checks.push_back(std::make_pair(Aligned, SanitizerKind::Alignment)); + Builder.CreateICmpEQ(Align, llvm::ConstantInt::get(IntPtrTy, 0)); + if (Aligned != True) + Checks.push_back(std::make_pair(Aligned, SanitizerKind::Alignment)); } } @@ -679,7 +696,8 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, EmitCheckSourceLocation(Loc), EmitCheckTypeDescriptor(Ty), llvm::ConstantInt::get(Int8Ty, AlignVal ? llvm::Log2_64(AlignVal) : 1), llvm::ConstantInt::get(Int8Ty, TCK)}; - EmitCheck(Checks, SanitizerHandler::TypeMismatch, StaticData, Ptr); + EmitCheck(Checks, SanitizerHandler::TypeMismatch, StaticData, + PtrAsInt ? PtrAsInt : Ptr); } // If possible, check that the vptr indicates that there is a subobject of @@ -690,13 +708,20 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, // The program has undefined behavior if: // -- the [pointer or glvalue] is used to access a non-static data member // or call a non-static member function - CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); if (SanOpts.has(SanitizerKind::Vptr) && - !SkippedChecks.has(SanitizerKind::Vptr) && - (TCK == TCK_MemberAccess || TCK == TCK_MemberCall || - TCK == TCK_DowncastPointer || TCK == TCK_DowncastReference || - TCK == TCK_UpcastToVirtualBase) && - RD && RD->hasDefinition() && RD->isDynamicClass()) { + !SkippedChecks.has(SanitizerKind::Vptr) && isVptrCheckRequired(TCK, Ty)) { + // Ensure that the pointer is non-null before loading it. If there is no + // compile-time guarantee, reuse the run-time null check or emit a new one. + if (!IsGuaranteedNonNull) { + if (!IsNonNull) + IsNonNull = Builder.CreateIsNotNull(Ptr); + if (!Done) + Done = createBasicBlock("vptr.null"); + llvm::BasicBlock *VptrNotNull = createBasicBlock("vptr.not.null"); + Builder.CreateCondBr(IsNonNull, VptrNotNull, Done); + EmitBlock(VptrNotNull); + } + // Compute a hash of the mangled name of the type. // // FIXME: This is not guaranteed to be deterministic! Move to a @@ -709,7 +734,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, // Blacklist based on the mangled type. if (!CGM.getContext().getSanitizerBlacklist().isBlacklistedType( - Out.str())) { + SanitizerKind::Vptr, Out.str())) { llvm::hash_code TypeHash = hash_value(Out.str()); // Load the vptr, and compute hash_16_bytes(TypeHash, vptr). @@ -789,6 +814,45 @@ static bool isFlexibleArrayMemberExpr(const Expr *E) { return false; } +llvm::Value *CodeGenFunction::LoadPassedObjectSize(const Expr *E, + QualType EltTy) { + ASTContext &C = getContext(); + uint64_t EltSize = C.getTypeSizeInChars(EltTy).getQuantity(); + if (!EltSize) + return nullptr; + + auto *ArrayDeclRef = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts()); + if (!ArrayDeclRef) + return nullptr; + + auto *ParamDecl = dyn_cast<ParmVarDecl>(ArrayDeclRef->getDecl()); + if (!ParamDecl) + return nullptr; + + auto *POSAttr = ParamDecl->getAttr<PassObjectSizeAttr>(); + if (!POSAttr) + return nullptr; + + // Don't load the size if it's a lower bound. + int POSType = POSAttr->getType(); + if (POSType != 0 && POSType != 1) + return nullptr; + + // Find the implicit size parameter. + auto PassedSizeIt = SizeArguments.find(ParamDecl); + if (PassedSizeIt == SizeArguments.end()) + return nullptr; + + const ImplicitParamDecl *PassedSizeDecl = PassedSizeIt->second; + assert(LocalDeclMap.count(PassedSizeDecl) && "Passed size not loadable"); + Address AddrOfSize = LocalDeclMap.find(PassedSizeDecl)->second; + llvm::Value *SizeInBytes = EmitLoadOfScalar(AddrOfSize, /*Volatile=*/false, + C.getSizeType(), E->getExprLoc()); + llvm::Value *SizeOfElement = + llvm::ConstantInt::get(SizeInBytes->getType(), EltSize); + return Builder.CreateUDiv(SizeInBytes, SizeOfElement); +} + /// If Base is known to point to the start of an array, return the length of /// that array. Return 0 if the length cannot be determined. static llvm::Value *getArrayIndexingBound( @@ -810,9 +874,16 @@ static llvm::Value *getArrayIndexingBound( return CGF.Builder.getInt(CAT->getSize()); else if (const auto *VAT = dyn_cast<VariableArrayType>(AT)) return CGF.getVLASize(VAT).first; + // Ignore pass_object_size here. It's not applicable on decayed pointers. } } + QualType EltTy{Base->getType()->getPointeeOrArrayElementType(), 0}; + if (llvm::Value *POS = CGF.LoadPassedObjectSize(Base, EltTy)) { + IndexedType = Base->getType(); + return POS; + } + return nullptr; } @@ -894,7 +965,8 @@ void CodeGenModule::EmitExplicitCastExprType(const ExplicitCastExpr *E, /// EmitPointerWithAlignment - Given an expression of pointer type, try to /// derive a more accurate bound on the alignment of the pointer. Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, - LValueBaseInfo *BaseInfo) { + LValueBaseInfo *BaseInfo, + TBAAAccessInfo *TBAAInfo) { // We allow this with ObjC object pointers because of fragile ABIs. assert(E->getType()->isPointerType() || E->getType()->isObjCObjectPointerType()); @@ -909,24 +981,35 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, // Non-converting casts (but not C's implicit conversion from void*). case CK_BitCast: case CK_NoOp: + case CK_AddressSpaceConversion: if (auto PtrTy = CE->getSubExpr()->getType()->getAs<PointerType>()) { if (PtrTy->getPointeeType()->isVoidType()) break; - LValueBaseInfo InnerInfo; - Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), &InnerInfo); - if (BaseInfo) *BaseInfo = InnerInfo; - - // If this is an explicit bitcast, and the source l-value is - // opaque, honor the alignment of the casted-to type. - if (isa<ExplicitCastExpr>(CE) && - InnerInfo.getAlignmentSource() != AlignmentSource::Decl) { - LValueBaseInfo ExpInfo; + LValueBaseInfo InnerBaseInfo; + TBAAAccessInfo InnerTBAAInfo; + Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), + &InnerBaseInfo, + &InnerTBAAInfo); + if (BaseInfo) *BaseInfo = InnerBaseInfo; + if (TBAAInfo) *TBAAInfo = InnerTBAAInfo; + + if (isa<ExplicitCastExpr>(CE)) { + LValueBaseInfo TargetTypeBaseInfo; + TBAAAccessInfo TargetTypeTBAAInfo; CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(), - &ExpInfo); - if (BaseInfo) - BaseInfo->mergeForCast(ExpInfo); - Addr = Address(Addr.getPointer(), Align); + &TargetTypeBaseInfo, + &TargetTypeTBAAInfo); + if (TBAAInfo) + *TBAAInfo = CGM.mergeTBAAInfoForCast(*TBAAInfo, + TargetTypeTBAAInfo); + // If the source l-value is opaque, honor the alignment of the + // casted-to type. + if (InnerBaseInfo.getAlignmentSource() != AlignmentSource::Decl) { + if (BaseInfo) + BaseInfo->mergeForCast(TargetTypeBaseInfo); + Addr = Address(Addr.getPointer(), Align); + } } if (SanOpts.has(SanitizerKind::CFIUnrelatedCast) && @@ -937,19 +1020,22 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, CodeGenFunction::CFITCK_UnrelatedCast, CE->getLocStart()); } - - return Builder.CreateBitCast(Addr, ConvertType(E->getType())); + return CE->getCastKind() != CK_AddressSpaceConversion + ? Builder.CreateBitCast(Addr, ConvertType(E->getType())) + : Builder.CreateAddrSpaceCast(Addr, + ConvertType(E->getType())); } break; // Array-to-pointer decay. case CK_ArrayToPointerDecay: - return EmitArrayToPointerDecay(CE->getSubExpr(), BaseInfo); + return EmitArrayToPointerDecay(CE->getSubExpr(), BaseInfo, TBAAInfo); // Derived-to-base conversions. case CK_UncheckedDerivedToBase: case CK_DerivedToBase: { - Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), BaseInfo); + Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), BaseInfo, + TBAAInfo); auto Derived = CE->getSubExpr()->getType()->getPointeeCXXRecordDecl(); return GetAddressOfBaseClass(Addr, Derived, CE->path_begin(), CE->path_end(), @@ -969,6 +1055,7 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, if (UO->getOpcode() == UO_AddrOf) { LValue LV = EmitLValue(UO->getSubExpr()); if (BaseInfo) *BaseInfo = LV.getBaseInfo(); + if (TBAAInfo) *TBAAInfo = LV.getTBAAInfo(); return LV.getAddress(); } } @@ -976,7 +1063,8 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, // TODO: conditional operators, comma. // Otherwise, use the alignment of the type. - CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(), BaseInfo); + CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(), BaseInfo, + TBAAInfo); return Address(EmitScalarExpr(E), Align); } @@ -1145,8 +1233,7 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { llvm::Value *V = LV.getPointer(); Scope.ForceCleanup({&V}); return LValue::MakeAddr(Address(V, LV.getAlignment()), LV.getType(), - getContext(), LV.getBaseInfo(), - LV.getTBAAInfo()); + getContext(), LV.getBaseInfo(), LV.getTBAAInfo()); } // FIXME: Is it possible to create an ExprWithCleanups that produces a // bitfield lvalue or some other non-simple lvalue? @@ -1303,7 +1390,8 @@ CodeGenFunction::tryEmitAsConstant(DeclRefExpr *refExpr) { return ConstantEmission(); // Emit as a constant. - llvm::Constant *C = CGM.EmitConstantValue(result.Val, resultType, this); + auto C = ConstantEmitter(*this).emitAbstract(refExpr->getLocation(), + result.Val, resultType); // Make sure we emit a debug reference to the global variable. // This should probably fire even for @@ -1322,13 +1410,30 @@ CodeGenFunction::tryEmitAsConstant(DeclRefExpr *refExpr) { return ConstantEmission::forValue(C); } +static DeclRefExpr *tryToConvertMemberExprToDeclRefExpr(CodeGenFunction &CGF, + const MemberExpr *ME) { + if (auto *VD = dyn_cast<VarDecl>(ME->getMemberDecl())) { + // Try to emit static variable member expressions as DREs. + return DeclRefExpr::Create( + CGF.getContext(), NestedNameSpecifierLoc(), SourceLocation(), VD, + /*RefersToEnclosingVariableOrCapture=*/false, ME->getExprLoc(), + ME->getType(), ME->getValueKind()); + } + return nullptr; +} + +CodeGenFunction::ConstantEmission +CodeGenFunction::tryEmitAsConstant(const MemberExpr *ME) { + if (DeclRefExpr *DRE = tryToConvertMemberExprToDeclRefExpr(*this, ME)) + return tryEmitAsConstant(DRE); + return ConstantEmission(); +} + llvm::Value *CodeGenFunction::EmitLoadOfScalar(LValue lvalue, SourceLocation Loc) { return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(), lvalue.getType(), Loc, lvalue.getBaseInfo(), - lvalue.getTBAAInfo(), - lvalue.getTBAABaseType(), lvalue.getTBAAOffset(), - lvalue.isNontemporal()); + lvalue.getTBAAInfo(), lvalue.isNontemporal()); } static bool hasBooleanRepresentation(QualType Ty) { @@ -1412,17 +1517,17 @@ bool CodeGenFunction::EmitScalarRangeCheck(llvm::Value *Value, QualType Ty, if (!getRangeForType(*this, Ty, Min, End, /*StrictEnums=*/true, IsBool)) return true; + auto &Ctx = getLLVMContext(); SanitizerScope SanScope(this); llvm::Value *Check; --End; if (!Min) { - Check = Builder.CreateICmpULE( - Value, llvm::ConstantInt::get(getLLVMContext(), End)); + Check = Builder.CreateICmpULE(Value, llvm::ConstantInt::get(Ctx, End)); } else { - llvm::Value *Upper = Builder.CreateICmpSLE( - Value, llvm::ConstantInt::get(getLLVMContext(), End)); - llvm::Value *Lower = Builder.CreateICmpSGE( - Value, llvm::ConstantInt::get(getLLVMContext(), Min)); + llvm::Value *Upper = + Builder.CreateICmpSLE(Value, llvm::ConstantInt::get(Ctx, End)); + llvm::Value *Lower = + Builder.CreateICmpSGE(Value, llvm::ConstantInt::get(Ctx, Min)); Check = Builder.CreateAnd(Upper, Lower); } llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(Loc), @@ -1438,9 +1543,7 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, LValueBaseInfo BaseInfo, - llvm::MDNode *TBAAInfo, - QualType TBAABaseType, - uint64_t TBAAOffset, + TBAAAccessInfo TBAAInfo, bool isNontemporal) { if (!CGM.getCodeGenOpts().PreserveVec3Type) { // For better performance, handle vector loads differently. @@ -1480,14 +1583,8 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, Load->getContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); Load->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); } - if (TBAAInfo) { - bool MayAlias = BaseInfo.getMayAlias(); - llvm::MDNode *TBAA = MayAlias - ? CGM.getTBAAInfo(getContext().CharTy) - : CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, TBAAOffset); - if (TBAA) - CGM.DecorateInstructionWithTBAA(Load, TBAA, MayAlias); - } + + CGM.DecorateInstructionWithTBAA(Load, TBAAInfo); if (EmitScalarRangeCheck(Load, Ty, Loc)) { // In order to prevent the optimizer from throwing away the check, don't @@ -1527,11 +1624,8 @@ llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) { void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, LValueBaseInfo BaseInfo, - llvm::MDNode *TBAAInfo, - bool isInit, QualType TBAABaseType, - uint64_t TBAAOffset, - bool isNontemporal) { - + TBAAAccessInfo TBAAInfo, + bool isInit, bool isNontemporal) { if (!CGM.getCodeGenOpts().PreserveVec3Type) { // Handle vectors differently to get better performance. if (Ty->isVectorType()) { @@ -1571,22 +1665,15 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, llvm::ConstantAsMetadata::get(Builder.getInt32(1))); Store->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); } - if (TBAAInfo) { - bool MayAlias = BaseInfo.getMayAlias(); - llvm::MDNode *TBAA = MayAlias - ? CGM.getTBAAInfo(getContext().CharTy) - : CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, TBAAOffset); - if (TBAA) - CGM.DecorateInstructionWithTBAA(Store, TBAA, MayAlias); - } + + CGM.DecorateInstructionWithTBAA(Store, TBAAInfo); } void CodeGenFunction::EmitStoreOfScalar(llvm::Value *value, LValue lvalue, bool isInit) { EmitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(), lvalue.getType(), lvalue.getBaseInfo(), - lvalue.getTBAAInfo(), isInit, lvalue.getTBAABaseType(), - lvalue.getTBAAOffset(), lvalue.isNontemporal()); + lvalue.getTBAAInfo(), isInit, lvalue.isNontemporal()); } /// EmitLoadOfLValue - Given an expression that represents a value lvalue, this @@ -2116,39 +2203,48 @@ static LValue EmitThreadPrivateVarDeclLValue( llvm::Type *RealVarTy, SourceLocation Loc) { Addr = CGF.CGM.getOpenMPRuntime().getAddrOfThreadPrivate(CGF, VD, Addr, Loc); Addr = CGF.Builder.CreateElementBitCast(Addr, RealVarTy); - LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); - return CGF.MakeAddrLValue(Addr, T, BaseInfo); + return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl); } -Address CodeGenFunction::EmitLoadOfReference(Address Addr, - const ReferenceType *RefTy, - LValueBaseInfo *BaseInfo) { - llvm::Value *Ptr = Builder.CreateLoad(Addr); - return Address(Ptr, getNaturalTypeAlignment(RefTy->getPointeeType(), - BaseInfo, /*forPointee*/ true)); +Address +CodeGenFunction::EmitLoadOfReference(LValue RefLVal, + LValueBaseInfo *PointeeBaseInfo, + TBAAAccessInfo *PointeeTBAAInfo) { + llvm::LoadInst *Load = Builder.CreateLoad(RefLVal.getAddress(), + RefLVal.isVolatile()); + CGM.DecorateInstructionWithTBAA(Load, RefLVal.getTBAAInfo()); + + CharUnits Align = getNaturalTypeAlignment(RefLVal.getType()->getPointeeType(), + PointeeBaseInfo, PointeeTBAAInfo, + /* forPointeeType= */ true); + return Address(Load, Align); } -LValue CodeGenFunction::EmitLoadOfReferenceLValue(Address RefAddr, - const ReferenceType *RefTy) { - LValueBaseInfo BaseInfo; - Address Addr = EmitLoadOfReference(RefAddr, RefTy, &BaseInfo); - return MakeAddrLValue(Addr, RefTy->getPointeeType(), BaseInfo); +LValue CodeGenFunction::EmitLoadOfReferenceLValue(LValue RefLVal) { + LValueBaseInfo PointeeBaseInfo; + TBAAAccessInfo PointeeTBAAInfo; + Address PointeeAddr = EmitLoadOfReference(RefLVal, &PointeeBaseInfo, + &PointeeTBAAInfo); + return MakeAddrLValue(PointeeAddr, RefLVal.getType()->getPointeeType(), + PointeeBaseInfo, PointeeTBAAInfo); } Address CodeGenFunction::EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, - LValueBaseInfo *BaseInfo) { + LValueBaseInfo *BaseInfo, + TBAAAccessInfo *TBAAInfo) { llvm::Value *Addr = Builder.CreateLoad(Ptr); return Address(Addr, getNaturalTypeAlignment(PtrTy->getPointeeType(), - BaseInfo, + BaseInfo, TBAAInfo, /*forPointeeType=*/true)); } LValue CodeGenFunction::EmitLoadOfPointerLValue(Address PtrAddr, const PointerType *PtrTy) { LValueBaseInfo BaseInfo; - Address Addr = EmitLoadOfPointer(PtrAddr, PtrTy, &BaseInfo); - return MakeAddrLValue(Addr, PtrTy->getPointeeType(), BaseInfo); + TBAAAccessInfo TBAAInfo; + Address Addr = EmitLoadOfPointer(PtrAddr, PtrTy, &BaseInfo, &TBAAInfo); + return MakeAddrLValue(Addr, PtrTy->getPointeeType(), BaseInfo, TBAAInfo); } static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF, @@ -2165,18 +2261,15 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF, V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy); CharUnits Alignment = CGF.getContext().getDeclAlign(VD); Address Addr(V, Alignment); - LValue LV; // Emit reference to the private copy of the variable if it is an OpenMP // threadprivate variable. if (CGF.getLangOpts().OpenMP && VD->hasAttr<OMPThreadPrivateDeclAttr>()) return EmitThreadPrivateVarDeclLValue(CGF, VD, T, Addr, RealVarTy, E->getExprLoc()); - if (auto RefTy = VD->getType()->getAs<ReferenceType>()) { - LV = CGF.EmitLoadOfReferenceLValue(Addr, RefTy); - } else { - LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); - LV = CGF.MakeAddrLValue(Addr, T, BaseInfo); - } + LValue LV = VD->getType()->isReferenceType() ? + CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), + AlignmentSource::Decl) : + CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl); setObjCGCLValueClass(CGF.getContext(), E, LV); return LV; } @@ -2209,8 +2302,8 @@ static LValue EmitFunctionDeclLValue(CodeGenFunction &CGF, const Expr *E, const FunctionDecl *FD) { llvm::Value *V = EmitFunctionDeclPointer(CGF.CGM, FD); CharUnits Alignment = CGF.getContext().getDeclAlign(FD); - LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); - return CGF.MakeAddrLValue(V, E->getType(), Alignment, BaseInfo); + return CGF.MakeAddrLValue(V, E->getType(), Alignment, + AlignmentSource::Decl); } static LValue EmitCapturedFieldLValue(CodeGenFunction &CGF, const FieldDecl *FD, @@ -2265,44 +2358,52 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { VD->isUsableInConstantExpressions(getContext()) && VD->checkInitIsICE() && // Do not emit if it is private OpenMP variable. - !(E->refersToEnclosingVariableOrCapture() && CapturedStmtInfo && - LocalDeclMap.count(VD))) { + !(E->refersToEnclosingVariableOrCapture() && + ((CapturedStmtInfo && + (LocalDeclMap.count(VD->getCanonicalDecl()) || + CapturedStmtInfo->lookup(VD->getCanonicalDecl()))) || + LambdaCaptureFields.lookup(VD->getCanonicalDecl()) || + isa<BlockDecl>(CurCodeDecl)))) { llvm::Constant *Val = - CGM.EmitConstantValue(*VD->evaluateValue(), VD->getType(), this); + ConstantEmitter(*this).emitAbstract(E->getLocation(), + *VD->evaluateValue(), + VD->getType()); assert(Val && "failed to emit reference constant expression"); // FIXME: Eventually we will want to emit vector element references. // Should we be using the alignment of the constant pointer we emitted? - CharUnits Alignment = getNaturalTypeAlignment(E->getType(), nullptr, - /*pointee*/ true); - LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); - return MakeAddrLValue(Address(Val, Alignment), T, BaseInfo); + CharUnits Alignment = getNaturalTypeAlignment(E->getType(), + /* BaseInfo= */ nullptr, + /* TBAAInfo= */ nullptr, + /* forPointeeType= */ true); + return MakeAddrLValue(Address(Val, Alignment), T, AlignmentSource::Decl); } // Check for captured variables. if (E->refersToEnclosingVariableOrCapture()) { + VD = VD->getCanonicalDecl(); if (auto *FD = LambdaCaptureFields.lookup(VD)) return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue); else if (CapturedStmtInfo) { auto I = LocalDeclMap.find(VD); if (I != LocalDeclMap.end()) { - if (auto RefTy = VD->getType()->getAs<ReferenceType>()) - return EmitLoadOfReferenceLValue(I->second, RefTy); + if (VD->getType()->isReferenceType()) + return EmitLoadOfReferenceLValue(I->second, VD->getType(), + AlignmentSource::Decl); return MakeAddrLValue(I->second, T); } LValue CapLVal = EmitCapturedFieldLValue(*this, CapturedStmtInfo->lookup(VD), CapturedStmtInfo->getContextValue()); - bool MayAlias = CapLVal.getBaseInfo().getMayAlias(); return MakeAddrLValue( Address(CapLVal.getPointer(), getContext().getDeclAlign(VD)), - CapLVal.getType(), LValueBaseInfo(AlignmentSource::Decl, MayAlias)); + CapLVal.getType(), LValueBaseInfo(AlignmentSource::Decl), + CapLVal.getTBAAInfo()); } assert(isa<BlockDecl>(CurCodeDecl)); Address addr = GetAddrOfBlockDecl(VD, VD->hasAttr<BlocksAttr>()); - LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); - return MakeAddrLValue(addr, T, BaseInfo); + return MakeAddrLValue(addr, T, AlignmentSource::Decl); } } @@ -2316,8 +2417,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { if (ND->hasAttr<WeakRefAttr>()) { const auto *VD = cast<ValueDecl>(ND); ConstantAddress Aliasee = CGM.GetWeakRefReference(VD); - return MakeAddrLValue(Aliasee, T, - LValueBaseInfo(AlignmentSource::Decl, false)); + return MakeAddrLValue(Aliasee, T, AlignmentSource::Decl); } if (const auto *VD = dyn_cast<VarDecl>(ND)) { @@ -2359,13 +2459,9 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { } // Drill into reference types. - LValue LV; - if (auto RefTy = VD->getType()->getAs<ReferenceType>()) { - LV = EmitLoadOfReferenceLValue(addr, RefTy); - } else { - LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); - LV = MakeAddrLValue(addr, T, BaseInfo); - } + LValue LV = VD->getType()->isReferenceType() ? + EmitLoadOfReferenceLValue(addr, VD->getType(), AlignmentSource::Decl) : + MakeAddrLValue(addr, T, AlignmentSource::Decl); bool isLocalStorage = VD->hasLocalStorage(); @@ -2410,8 +2506,10 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) { assert(!T.isNull() && "CodeGenFunction::EmitUnaryOpLValue: Illegal type"); LValueBaseInfo BaseInfo; - Address Addr = EmitPointerWithAlignment(E->getSubExpr(), &BaseInfo); - LValue LV = MakeAddrLValue(Addr, T, BaseInfo); + TBAAAccessInfo TBAAInfo; + Address Addr = EmitPointerWithAlignment(E->getSubExpr(), &BaseInfo, + &TBAAInfo); + LValue LV = MakeAddrLValue(Addr, T, BaseInfo, TBAAInfo); LV.getQuals().setAddressSpace(ExprTy.getAddressSpace()); // We should not generate __weak write barrier on indirect reference @@ -2443,7 +2541,8 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) { (E->getOpcode() == UO_Real ? emitAddrOfRealComponent(LV.getAddress(), LV.getType()) : emitAddrOfImagComponent(LV.getAddress(), LV.getType())); - LValue ElemLV = MakeAddrLValue(Component, T, LV.getBaseInfo()); + LValue ElemLV = MakeAddrLValue(Component, T, LV.getBaseInfo(), + CGM.getTBAAInfoForSubobject(LV, T)); ElemLV.getQuals().addQualifiers(LV.getQuals()); return ElemLV; } @@ -2463,14 +2562,12 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) { LValue CodeGenFunction::EmitStringLiteralLValue(const StringLiteral *E) { return MakeAddrLValue(CGM.GetAddrOfConstantStringFromLiteral(E), - E->getType(), - LValueBaseInfo(AlignmentSource::Decl, false)); + E->getType(), AlignmentSource::Decl); } LValue CodeGenFunction::EmitObjCEncodeExprLValue(const ObjCEncodeExpr *E) { return MakeAddrLValue(CGM.GetAddrOfConstantStringFromObjCEncode(E), - E->getType(), - LValueBaseInfo(AlignmentSource::Decl, false)); + E->getType(), AlignmentSource::Decl); } LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) { @@ -2482,7 +2579,6 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) { StringRef NameItems[] = { PredefinedExpr::getIdentTypeName(E->getIdentType()), FnName}; std::string GVName = llvm::join(NameItems, NameItems + 2, "."); - LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); if (auto *BD = dyn_cast<BlockDecl>(CurCodeDecl)) { std::string Name = SL->getString(); if (!Name.empty()) { @@ -2491,14 +2587,14 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) { if (Discriminator) Name += "_" + Twine(Discriminator + 1).str(); auto C = CGM.GetAddrOfConstantCString(Name, GVName.c_str()); - return MakeAddrLValue(C, E->getType(), BaseInfo); + return MakeAddrLValue(C, E->getType(), AlignmentSource::Decl); } else { auto C = CGM.GetAddrOfConstantCString(FnName, GVName.c_str()); - return MakeAddrLValue(C, E->getType(), BaseInfo); + return MakeAddrLValue(C, E->getType(), AlignmentSource::Decl); } } auto C = CGM.GetAddrOfConstantStringFromLiteral(SL, GVName); - return MakeAddrLValue(C, E->getType(), BaseInfo); + return MakeAddrLValue(C, E->getType(), AlignmentSource::Decl); } /// Emit a type description suitable for use by a runtime sanitizer library. The @@ -2556,6 +2652,9 @@ llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) { llvm::Value *CodeGenFunction::EmitCheckValue(llvm::Value *V) { llvm::Type *TargetTy = IntPtrTy; + if (V->getType() == TargetTy) + return V; + // Floating-point types which fit into intptr_t are bitcast to integers // and then passed directly (after zero-extension, if necessary). if (V->getType()->isFloatingPointTy()) { @@ -2685,13 +2784,16 @@ static void emitCheckHandlerCall(CodeGenFunction &CGF, assert(IsFatal || RecoverKind != CheckRecoverableKind::Unrecoverable); bool NeedsAbortSuffix = IsFatal && RecoverKind != CheckRecoverableKind::Unrecoverable; + bool MinimalRuntime = CGF.CGM.getCodeGenOpts().SanitizeMinimalRuntime; const SanitizerHandlerInfo &CheckInfo = SanitizerHandlers[CheckHandler]; const StringRef CheckName = CheckInfo.Name; - std::string FnName = - ("__ubsan_handle_" + CheckName + - (CheckInfo.Version ? "_v" + llvm::utostr(CheckInfo.Version) : "") + - (NeedsAbortSuffix ? "_abort" : "")) - .str(); + std::string FnName = "__ubsan_handle_" + CheckName.str(); + if (CheckInfo.Version && !MinimalRuntime) + FnName += "_v" + llvm::utostr(CheckInfo.Version); + if (MinimalRuntime) + FnName += "_minimal"; + if (NeedsAbortSuffix) + FnName += "_abort"; bool MayReturn = !IsFatal || RecoverKind == CheckRecoverableKind::AlwaysRecoverable; @@ -2723,7 +2825,7 @@ void CodeGenFunction::EmitCheck( assert(IsSanitizerScope); assert(Checked.size() > 0); assert(CheckHandler >= 0 && - CheckHandler < sizeof(SanitizerHandlers) / sizeof(*SanitizerHandlers)); + size_t(CheckHandler) < llvm::array_lengthof(SanitizerHandlers)); const StringRef CheckName = SanitizerHandlers[CheckHandler].Name; llvm::Value *FatalCond = nullptr; @@ -2778,24 +2880,26 @@ void CodeGenFunction::EmitCheck( // representing operand values. SmallVector<llvm::Value *, 4> Args; SmallVector<llvm::Type *, 4> ArgTypes; - Args.reserve(DynamicArgs.size() + 1); - ArgTypes.reserve(DynamicArgs.size() + 1); - - // Emit handler arguments and create handler function type. - if (!StaticArgs.empty()) { - llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs); - auto *InfoPtr = - new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false, - llvm::GlobalVariable::PrivateLinkage, Info); - InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr); - Args.push_back(Builder.CreateBitCast(InfoPtr, Int8PtrTy)); - ArgTypes.push_back(Int8PtrTy); - } + if (!CGM.getCodeGenOpts().SanitizeMinimalRuntime) { + Args.reserve(DynamicArgs.size() + 1); + ArgTypes.reserve(DynamicArgs.size() + 1); + + // Emit handler arguments and create handler function type. + if (!StaticArgs.empty()) { + llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs); + auto *InfoPtr = + new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false, + llvm::GlobalVariable::PrivateLinkage, Info); + InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr); + Args.push_back(Builder.CreateBitCast(InfoPtr, Int8PtrTy)); + ArgTypes.push_back(Int8PtrTy); + } - for (size_t i = 0, n = DynamicArgs.size(); i != n; ++i) { - Args.push_back(EmitCheckValue(DynamicArgs[i])); - ArgTypes.push_back(IntPtrTy); + for (size_t i = 0, n = DynamicArgs.size(); i != n; ++i) { + Args.push_back(EmitCheckValue(DynamicArgs[i])); + ArgTypes.push_back(IntPtrTy); + } } llvm::FunctionType *FnType = @@ -3005,14 +3109,14 @@ llvm::CallInst *CodeGenFunction::EmitTrapCall(llvm::Intrinsic::ID IntrID) { } Address CodeGenFunction::EmitArrayToPointerDecay(const Expr *E, - LValueBaseInfo *BaseInfo) { + LValueBaseInfo *BaseInfo, + TBAAAccessInfo *TBAAInfo) { assert(E->getType()->isArrayType() && "Array to pointer decay must have array source type!"); // Expressions of array type can't be bitfields or vector elements. LValue LV = EmitLValue(E); Address Addr = LV.getAddress(); - if (BaseInfo) *BaseInfo = LV.getBaseInfo(); // If the array type was an incomplete type, we need to make sure // the decay ends up being the right type. @@ -3027,7 +3131,15 @@ Address CodeGenFunction::EmitArrayToPointerDecay(const Expr *E, Addr = Builder.CreateStructGEP(Addr, 0, CharUnits::Zero(), "arraydecay"); } + // The result of this decay conversion points to an array element within the + // base lvalue. However, since TBAA currently does not support representing + // accesses to elements of member arrays, we conservatively represent accesses + // to the pointee object as if it had no any base lvalue specified. + // TODO: Support TBAA for member arrays. QualType EltType = E->getType()->castAsArrayTypeUnsafe()->getElementType(); + if (BaseInfo) *BaseInfo = LV.getBaseInfo(); + if (TBAAInfo) *TBAAInfo = CGM.getTBAAAccessInfo(EltType); + return Builder.CreateElementBitCast(Addr, ConvertTypeForMem(EltType)); } @@ -3152,9 +3264,8 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, LValue LHS = EmitLValue(E->getBase()); auto *Idx = EmitIdxAfterBase(/*Promote*/false); assert(LHS.isSimple() && "Can only subscript lvalue vectors here!"); - return LValue::MakeVectorElt(LHS.getAddress(), Idx, - E->getBase()->getType(), - LHS.getBaseInfo()); + return LValue::MakeVectorElt(LHS.getAddress(), Idx, E->getBase()->getType(), + LHS.getBaseInfo(), TBAAAccessInfo()); } // All the other cases basically behave like simple offsetting. @@ -3168,17 +3279,19 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, QualType EltType = LV.getType()->castAs<VectorType>()->getElementType(); Addr = emitArraySubscriptGEP(*this, Addr, Idx, EltType, /*inbounds*/ true, SignedIndices, E->getExprLoc()); - return MakeAddrLValue(Addr, EltType, LV.getBaseInfo()); + return MakeAddrLValue(Addr, EltType, LV.getBaseInfo(), + CGM.getTBAAInfoForSubobject(LV, EltType)); } - LValueBaseInfo BaseInfo; + LValueBaseInfo EltBaseInfo; + TBAAAccessInfo EltTBAAInfo; Address Addr = Address::invalid(); if (const VariableArrayType *vla = getContext().getAsVariableArrayType(E->getType())) { // The base must be a pointer, which is not an aggregate. Emit // it. It needs to be emitted first in case it's what captures // the VLA bounds. - Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo); + Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo); auto *Idx = EmitIdxAfterBase(/*Promote*/true); // The element count here is the total number of non-VLA elements. @@ -3202,7 +3315,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, // Indexing over an interface, as in "NSString *P; P[4];" // Emit the base pointer. - Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo); + Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo); auto *Idx = EmitIdxAfterBase(/*Promote*/true); CharUnits InterfaceSize = getContext().getTypeSizeInChars(OIT); @@ -3249,19 +3362,18 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx}, E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices, E->getExprLoc()); - BaseInfo = ArrayLV.getBaseInfo(); + EltBaseInfo = ArrayLV.getBaseInfo(); + EltTBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, E->getType()); } else { // The base must be a pointer; emit it with an estimate of its alignment. - Addr = EmitPointerWithAlignment(E->getBase(), &BaseInfo); + Addr = EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo); auto *Idx = EmitIdxAfterBase(/*Promote*/true); Addr = emitArraySubscriptGEP(*this, Addr, Idx, E->getType(), !getLangOpts().isSignedOverflowDefined(), SignedIndices, E->getExprLoc()); } - LValue LV = MakeAddrLValue(Addr, E->getType(), BaseInfo); - - // TODO: Preserve/extend path TBAA metadata? + LValue LV = MakeAddrLValue(Addr, E->getType(), EltBaseInfo, EltTBAAInfo); if (getLangOpts().ObjC1 && getLangOpts().getGC() != LangOptions::NonGC) { @@ -3273,6 +3385,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, LValueBaseInfo &BaseInfo, + TBAAAccessInfo &TBAAInfo, QualType BaseTy, QualType ElTy, bool IsLowerBound) { LValue BaseLVal; @@ -3299,12 +3412,15 @@ static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base, return CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(ElTy)); } - LValueBaseInfo TypeInfo; - CharUnits Align = CGF.getNaturalTypeAlignment(ElTy, &TypeInfo); - BaseInfo.mergeForCast(TypeInfo); + LValueBaseInfo TypeBaseInfo; + TBAAAccessInfo TypeTBAAInfo; + CharUnits Align = CGF.getNaturalTypeAlignment(ElTy, &TypeBaseInfo, + &TypeTBAAInfo); + BaseInfo.mergeForCast(TypeBaseInfo); + TBAAInfo = CGF.CGM.mergeTBAAInfoForCast(TBAAInfo, TypeTBAAInfo); return Address(CGF.Builder.CreateLoad(BaseLVal.getAddress()), Align); } - return CGF.EmitPointerWithAlignment(Base, &BaseInfo); + return CGF.EmitPointerWithAlignment(Base, &BaseInfo, &TBAAInfo); } LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, @@ -3404,13 +3520,14 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, Address EltPtr = Address::invalid(); LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; if (auto *VLA = getContext().getAsVariableArrayType(ResultExprTy)) { // The base must be a pointer, which is not an aggregate. Emit // it. It needs to be emitted first in case it's what captures // the VLA bounds. Address Base = - emitOMPArraySectionBase(*this, E->getBase(), BaseInfo, BaseTy, - VLA->getElementType(), IsLowerBound); + emitOMPArraySectionBase(*this, E->getBase(), BaseInfo, TBAAInfo, + BaseTy, VLA->getElementType(), IsLowerBound); // The element count here is the total number of non-VLA elements. llvm::Value *NumElements = getVLASize(VLA).first; @@ -3446,15 +3563,17 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, ResultExprTy, !getLangOpts().isSignedOverflowDefined(), /*SignedIndices=*/false, E->getExprLoc()); BaseInfo = ArrayLV.getBaseInfo(); + TBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, ResultExprTy); } else { Address Base = emitOMPArraySectionBase(*this, E->getBase(), BaseInfo, - BaseTy, ResultExprTy, IsLowerBound); + TBAAInfo, BaseTy, ResultExprTy, + IsLowerBound); EltPtr = emitArraySubscriptGEP(*this, Base, Idx, ResultExprTy, !getLangOpts().isSignedOverflowDefined(), /*SignedIndices=*/false, E->getExprLoc()); } - return MakeAddrLValue(EltPtr, ResultExprTy, BaseInfo); + return MakeAddrLValue(EltPtr, ResultExprTy, BaseInfo, TBAAInfo); } LValue CodeGenFunction:: @@ -3467,9 +3586,10 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) { // If it is a pointer to a vector, emit the address and form an lvalue with // it. LValueBaseInfo BaseInfo; - Address Ptr = EmitPointerWithAlignment(E->getBase(), &BaseInfo); + TBAAAccessInfo TBAAInfo; + Address Ptr = EmitPointerWithAlignment(E->getBase(), &BaseInfo, &TBAAInfo); const PointerType *PT = E->getBase()->getType()->getAs<PointerType>(); - Base = MakeAddrLValue(Ptr, PT->getPointeeType(), BaseInfo); + Base = MakeAddrLValue(Ptr, PT->getPointeeType(), BaseInfo, TBAAInfo); Base.getQuals().removeObjCGCAttr(); } else if (E->getBase()->isGLValue()) { // Otherwise, if the base is an lvalue ( as in the case of foo.x.x), @@ -3486,7 +3606,7 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) { Address VecMem = CreateMemTemp(E->getBase()->getType()); Builder.CreateStore(Vec, VecMem); Base = MakeAddrLValue(VecMem, E->getBase()->getType(), - LValueBaseInfo(AlignmentSource::Decl, false)); + AlignmentSource::Decl); } QualType type = @@ -3500,7 +3620,7 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) { llvm::Constant *CV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); return LValue::MakeExtVectorElt(Base.getAddress(), CV, type, - Base.getBaseInfo()); + Base.getBaseInfo(), TBAAAccessInfo()); } assert(Base.isExtVectorElt() && "Can only subscript lvalue vec elts here!"); @@ -3511,16 +3631,22 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) { CElts.push_back(BaseElts->getAggregateElement(Indices[i])); llvm::Constant *CV = llvm::ConstantVector::get(CElts); return LValue::MakeExtVectorElt(Base.getExtVectorAddress(), CV, type, - Base.getBaseInfo()); + Base.getBaseInfo(), TBAAAccessInfo()); } LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) { + if (DeclRefExpr *DRE = tryToConvertMemberExprToDeclRefExpr(*this, E)) { + EmitIgnoredExpr(E->getBase()); + return EmitDeclRefLValue(DRE); + } + Expr *BaseExpr = E->getBase(); // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a scalar. LValue BaseLV; if (E->isArrow()) { LValueBaseInfo BaseInfo; - Address Addr = EmitPointerWithAlignment(BaseExpr, &BaseInfo); + TBAAAccessInfo TBAAInfo; + Address Addr = EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); QualType PtrTy = BaseExpr->getType()->getPointeeType(); SanitizerSet SkippedChecks; bool IsBaseCXXThis = IsWrappedCXXThis(BaseExpr); @@ -3530,7 +3656,7 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) { SkippedChecks.set(SanitizerKind::Null, true); EmitTypeCheck(TCK_MemberAccess, E->getExprLoc(), Addr.getPointer(), PtrTy, /*Alignment=*/CharUnits::Zero(), SkippedChecks); - BaseLV = MakeAddrLValue(Addr, PtrTy, BaseInfo); + BaseLV = MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); } else BaseLV = EmitCheckedLValue(BaseExpr, TCK_MemberAccess); @@ -3541,9 +3667,6 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) { return LV; } - if (auto *VD = dyn_cast<VarDecl>(ND)) - return EmitGlobalVarDeclLValue(*this, E, VD); - if (const auto *FD = dyn_cast<FunctionDecl>(ND)) return EmitFunctionDeclLValue(*this, E, FD); @@ -3610,15 +3733,6 @@ static bool hasAnyVptr(const QualType Type, const ASTContext &Context) { LValue CodeGenFunction::EmitLValueForField(LValue base, const FieldDecl *field) { LValueBaseInfo BaseInfo = base.getBaseInfo(); - AlignmentSource fieldAlignSource = - getFieldAlignmentSource(BaseInfo.getAlignmentSource()); - LValueBaseInfo FieldBaseInfo(fieldAlignSource, BaseInfo.getMayAlias()); - - QualType type = field->getType(); - const RecordDecl *rec = field->getParent(); - if (rec->isUnion() || rec->hasAttr<MayAliasAttr>() || type->isVectorType()) - FieldBaseInfo.setMayAlias(true); - bool mayAlias = FieldBaseInfo.getMayAlias(); if (field->isBitField()) { const CGRecordLayout &RL = @@ -3638,19 +3752,53 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, QualType fieldType = field->getType().withCVRQualifiers(base.getVRQualifiers()); - return LValue::MakeBitfield(Addr, Info, fieldType, FieldBaseInfo); + // TODO: Support TBAA for bit fields. + LValueBaseInfo FieldBaseInfo(BaseInfo.getAlignmentSource()); + return LValue::MakeBitfield(Addr, Info, fieldType, FieldBaseInfo, + TBAAAccessInfo()); + } + + // Fields of may-alias structures are may-alias themselves. + // FIXME: this should get propagated down through anonymous structs + // and unions. + QualType FieldType = field->getType(); + const RecordDecl *rec = field->getParent(); + AlignmentSource BaseAlignSource = BaseInfo.getAlignmentSource(); + LValueBaseInfo FieldBaseInfo(getFieldAlignmentSource(BaseAlignSource)); + TBAAAccessInfo FieldTBAAInfo; + if (base.getTBAAInfo().isMayAlias() || + rec->hasAttr<MayAliasAttr>() || FieldType->isVectorType()) { + FieldTBAAInfo = TBAAAccessInfo::getMayAliasInfo(); + } else if (rec->isUnion()) { + // TODO: Support TBAA for unions. + FieldTBAAInfo = TBAAAccessInfo::getMayAliasInfo(); + } else { + // If no base type been assigned for the base access, then try to generate + // one for this base lvalue. + FieldTBAAInfo = base.getTBAAInfo(); + if (!FieldTBAAInfo.BaseType) { + FieldTBAAInfo.BaseType = CGM.getTBAABaseTypeInfo(base.getType()); + assert(!FieldTBAAInfo.Offset && + "Nonzero offset for an access with no base type!"); + } + + // Adjust offset to be relative to the base type. + const ASTRecordLayout &Layout = + getContext().getASTRecordLayout(field->getParent()); + unsigned CharWidth = getContext().getCharWidth(); + if (FieldTBAAInfo.BaseType) + FieldTBAAInfo.Offset += + Layout.getFieldOffset(field->getFieldIndex()) / CharWidth; + + // Update the final access type. + FieldTBAAInfo.AccessType = CGM.getTBAATypeInfo(FieldType); } Address addr = base.getAddress(); - unsigned cvr = base.getVRQualifiers(); - bool TBAAPath = CGM.getCodeGenOpts().StructPathTBAA; + unsigned RecordCVR = base.getVRQualifiers(); if (rec->isUnion()) { // For unions, there is no pointer adjustment. - assert(!type->isReferenceType() && "union has reference member"); - // TODO: handle path-aware TBAA for union. - TBAAPath = false; - - const auto FieldType = field->getType(); + assert(!FieldType->isReferenceType() && "union has reference member"); if (CGM.getCodeGenOpts().StrictVTablePointers && hasAnyVptr(FieldType, getContext())) // Because unions can easily skip invariant.barriers, we need to add @@ -3662,34 +3810,16 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, addr = emitAddrOfFieldStorage(*this, addr, field); // If this is a reference field, load the reference right now. - if (const ReferenceType *refType = type->getAs<ReferenceType>()) { - llvm::LoadInst *load = Builder.CreateLoad(addr, "ref"); - if (cvr & Qualifiers::Volatile) load->setVolatile(true); - - // Loading the reference will disable path-aware TBAA. - TBAAPath = false; - if (CGM.shouldUseTBAA()) { - llvm::MDNode *tbaa; - if (mayAlias) - tbaa = CGM.getTBAAInfo(getContext().CharTy); - else - tbaa = CGM.getTBAAInfo(type); - if (tbaa) - CGM.DecorateInstructionWithTBAA(load, tbaa); - } - - mayAlias = false; - type = refType->getPointeeType(); - - CharUnits alignment = - getNaturalTypeAlignment(type, &FieldBaseInfo, /*pointee*/ true); - FieldBaseInfo.setMayAlias(false); - addr = Address(load, alignment); - - // Qualifiers on the struct don't apply to the referencee, and - // we'll pick up CVR from the actual type later, so reset these - // additional qualifiers now. - cvr = 0; + if (FieldType->isReferenceType()) { + LValue RefLVal = MakeAddrLValue(addr, FieldType, FieldBaseInfo, + FieldTBAAInfo); + if (RecordCVR & Qualifiers::Volatile) + RefLVal.getQuals().setVolatile(true); + addr = EmitLoadOfReference(RefLVal, &FieldBaseInfo, &FieldTBAAInfo); + + // Qualifiers on the struct don't apply to the referencee. + RecordCVR = 0; + FieldType = FieldType->getPointeeType(); } } @@ -3697,36 +3827,19 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, // for both unions and structs. A union needs a bitcast, a struct element // will need a bitcast if the LLVM type laid out doesn't match the desired // type. - addr = Builder.CreateElementBitCast(addr, - CGM.getTypes().ConvertTypeForMem(type), - field->getName()); + addr = Builder.CreateElementBitCast( + addr, CGM.getTypes().ConvertTypeForMem(FieldType), field->getName()); if (field->hasAttr<AnnotateAttr>()) addr = EmitFieldAnnotations(field, addr); - LValue LV = MakeAddrLValue(addr, type, FieldBaseInfo); - LV.getQuals().addCVRQualifiers(cvr); - if (TBAAPath) { - const ASTRecordLayout &Layout = - getContext().getASTRecordLayout(field->getParent()); - // Set the base type to be the base type of the base LValue and - // update offset to be relative to the base type. - LV.setTBAABaseType(mayAlias ? getContext().CharTy : base.getTBAABaseType()); - LV.setTBAAOffset(mayAlias ? 0 : base.getTBAAOffset() + - Layout.getFieldOffset(field->getFieldIndex()) / - getContext().getCharWidth()); - } + LValue LV = MakeAddrLValue(addr, FieldType, FieldBaseInfo, FieldTBAAInfo); + LV.getQuals().addCVRQualifiers(RecordCVR); // __weak attribute on a field is ignored. if (LV.getQuals().getObjCGCAttr() == Qualifiers::Weak) LV.getQuals().removeObjCGCAttr(); - // Fields of may_alias structs act like 'char' for TBAA purposes. - // FIXME: this should get propagated down through anonymous structs - // and unions. - if (mayAlias && LV.getTBAAInfo()) - LV.setTBAAInfo(CGM.getTBAAInfo(getContext().CharTy)); - return LV; } @@ -3744,19 +3857,20 @@ CodeGenFunction::EmitLValueForFieldInitialization(LValue Base, llvm::Type *llvmType = ConvertTypeForMem(FieldType); V = Builder.CreateElementBitCast(V, llvmType, Field->getName()); - // TODO: access-path TBAA? + // TODO: Generate TBAA information that describes this access as a structure + // member access and not just an access to an object of the field's type. This + // should be similar to what we do in EmitLValueForField(). LValueBaseInfo BaseInfo = Base.getBaseInfo(); - LValueBaseInfo FieldBaseInfo( - getFieldAlignmentSource(BaseInfo.getAlignmentSource()), - BaseInfo.getMayAlias()); - return MakeAddrLValue(V, FieldType, FieldBaseInfo); + AlignmentSource FieldAlignSource = BaseInfo.getAlignmentSource(); + LValueBaseInfo FieldBaseInfo(getFieldAlignmentSource(FieldAlignSource)); + return MakeAddrLValue(V, FieldType, FieldBaseInfo, + CGM.getTBAAInfoForSubobject(Base, FieldType)); } LValue CodeGenFunction::EmitCompoundLiteralLValue(const CompoundLiteralExpr *E){ - LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); if (E->isFileScope()) { ConstantAddress GlobalPtr = CGM.GetAddrOfConstantCompoundLiteral(E); - return MakeAddrLValue(GlobalPtr, E->getType(), BaseInfo); + return MakeAddrLValue(GlobalPtr, E->getType(), AlignmentSource::Decl); } if (E->getType()->isVariablyModifiedType()) // make sure to emit the VLA size. @@ -3764,7 +3878,7 @@ LValue CodeGenFunction::EmitCompoundLiteralLValue(const CompoundLiteralExpr *E){ Address DeclPtr = CreateMemTemp(E->getType(), ".compoundliteral"); const Expr *InitExpr = E->getInitializer(); - LValue Result = MakeAddrLValue(DeclPtr, E->getType(), BaseInfo); + LValue Result = MakeAddrLValue(DeclPtr, E->getType(), AlignmentSource::Decl); EmitAnyExprToMem(InitExpr, DeclPtr, E->getType().getQualifiers(), /*Init*/ true); @@ -3863,10 +3977,10 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) { AlignmentSource alignSource = std::max(lhs->getBaseInfo().getAlignmentSource(), rhs->getBaseInfo().getAlignmentSource()); - bool MayAlias = lhs->getBaseInfo().getMayAlias() || - rhs->getBaseInfo().getMayAlias(); - return MakeAddrLValue(result, expr->getType(), - LValueBaseInfo(alignSource, MayAlias)); + TBAAAccessInfo TBAAInfo = CGM.mergeTBAAInfoForConditionalOperator( + lhs->getTBAAInfo(), rhs->getTBAAInfo()); + return MakeAddrLValue(result, expr->getType(), LValueBaseInfo(alignSource), + TBAAInfo); } else { assert((lhs || rhs) && "both operands of glvalue conditional are throw-expressions?"); @@ -3964,7 +4078,11 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { This, DerivedClassDecl, E->path_begin(), E->path_end(), /*NullCheckValue=*/false, E->getExprLoc()); - return MakeAddrLValue(Base, E->getType(), LV.getBaseInfo()); + // TODO: Support accesses to members of base classes in TBAA. For now, we + // conservatively pretend that the complete object is of the base class + // type. + return MakeAddrLValue(Base, E->getType(), LV.getBaseInfo(), + CGM.getTBAAInfoForSubobject(LV, E->getType())); } case CK_ToUnion: return EmitAggExprToLValue(E); @@ -3991,7 +4109,8 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { /*MayBeNull=*/false, CFITCK_DerivedCast, E->getLocStart()); - return MakeAddrLValue(Derived, E->getType(), LV.getBaseInfo()); + return MakeAddrLValue(Derived, E->getType(), LV.getBaseInfo(), + CGM.getTBAAInfoForSubobject(LV, E->getType())); } case CK_LValueBitCast: { // This must be a reinterpret_cast (or c-style equivalent). @@ -4007,13 +4126,15 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { /*MayBeNull=*/false, CFITCK_UnrelatedCast, E->getLocStart()); - return MakeAddrLValue(V, E->getType(), LV.getBaseInfo()); + return MakeAddrLValue(V, E->getType(), LV.getBaseInfo(), + CGM.getTBAAInfoForSubobject(LV, E->getType())); } case CK_ObjCObjectLValueCast: { LValue LV = EmitLValue(E->getSubExpr()); Address V = Builder.CreateElementBitCast(LV.getAddress(), ConvertType(E->getType())); - return MakeAddrLValue(V, E->getType(), LV.getBaseInfo()); + return MakeAddrLValue(V, E->getType(), LV.getBaseInfo(), + CGM.getTBAAInfoForSubobject(LV, E->getType())); } case CK_ZeroToOCLQueue: llvm_unreachable("NULL to OpenCL queue lvalue cast is not valid"); @@ -4202,7 +4323,7 @@ LValue CodeGenFunction::EmitCallExprLValue(const CallExpr *E) { if (!RV.isScalar()) return MakeAddrLValue(RV.getAggregateAddress(), E->getType(), - LValueBaseInfo(AlignmentSource::Decl, false)); + AlignmentSource::Decl); assert(E->getCallReturnType(getContext())->isReferenceType() && "Can't have a scalar return unless the return type is a " @@ -4221,8 +4342,7 @@ LValue CodeGenFunction::EmitCXXConstructLValue(const CXXConstructExpr *E) { && "binding l-value to type which needs a temporary"); AggValueSlot Slot = CreateAggTemp(E->getType()); EmitCXXConstructExpr(E, Slot); - return MakeAddrLValue(Slot.getAddress(), E->getType(), - LValueBaseInfo(AlignmentSource::Decl, false)); + return MakeAddrLValue(Slot.getAddress(), E->getType(), AlignmentSource::Decl); } LValue @@ -4237,7 +4357,7 @@ Address CodeGenFunction::EmitCXXUuidofExpr(const CXXUuidofExpr *E) { LValue CodeGenFunction::EmitCXXUuidofLValue(const CXXUuidofExpr *E) { return MakeAddrLValue(EmitCXXUuidofExpr(E), E->getType(), - LValueBaseInfo(AlignmentSource::Decl, false)); + AlignmentSource::Decl); } LValue @@ -4246,16 +4366,14 @@ CodeGenFunction::EmitCXXBindTemporaryLValue(const CXXBindTemporaryExpr *E) { Slot.setExternallyDestructed(); EmitAggExpr(E->getSubExpr(), Slot); EmitCXXTemporary(E->getTemporary(), E->getType(), Slot.getAddress()); - return MakeAddrLValue(Slot.getAddress(), E->getType(), - LValueBaseInfo(AlignmentSource::Decl, false)); + return MakeAddrLValue(Slot.getAddress(), E->getType(), AlignmentSource::Decl); } LValue CodeGenFunction::EmitLambdaLValue(const LambdaExpr *E) { AggValueSlot Slot = CreateAggTemp(E->getType(), "temp.lvalue"); EmitLambdaExpr(E, Slot); - return MakeAddrLValue(Slot.getAddress(), E->getType(), - LValueBaseInfo(AlignmentSource::Decl, false)); + return MakeAddrLValue(Slot.getAddress(), E->getType(), AlignmentSource::Decl); } LValue CodeGenFunction::EmitObjCMessageExprLValue(const ObjCMessageExpr *E) { @@ -4263,7 +4381,7 @@ LValue CodeGenFunction::EmitObjCMessageExprLValue(const ObjCMessageExpr *E) { if (!RV.isScalar()) return MakeAddrLValue(RV.getAggregateAddress(), E->getType(), - LValueBaseInfo(AlignmentSource::Decl, false)); + AlignmentSource::Decl); assert(E->getMethodDecl()->getReturnType()->isReferenceType() && "Can't have a scalar return unless the return type is a " @@ -4275,8 +4393,7 @@ LValue CodeGenFunction::EmitObjCMessageExprLValue(const ObjCMessageExpr *E) { LValue CodeGenFunction::EmitObjCSelectorLValue(const ObjCSelectorExpr *E) { Address V = CGM.getObjCRuntime().GetAddrOfSelector(*this, E->getSelector()); - return MakeAddrLValue(V, E->getType(), - LValueBaseInfo(AlignmentSource::Decl, false)); + return MakeAddrLValue(V, E->getType(), AlignmentSource::Decl); } llvm::Value *CodeGenFunction::EmitIvarOffset(const ObjCInterfaceDecl *Interface, @@ -4320,7 +4437,7 @@ LValue CodeGenFunction::EmitStmtExprLValue(const StmtExpr *E) { // Can only get l-value for message expression returning aggregate type RValue RV = EmitAnyExprToTemp(E); return MakeAddrLValue(RV.getAggregateAddress(), E->getType(), - LValueBaseInfo(AlignmentSource::Decl, false)); + AlignmentSource::Decl); } RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee, @@ -4358,10 +4475,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee SanitizerScope SanScope(this); llvm::Constant *FTRTTIConst = CGM.GetAddrOfRTTIDescriptor(QualType(FnType, 0), /*ForEH=*/true); - llvm::Type *PrefixStructTyElems[] = { - PrefixSig->getType(), - FTRTTIConst->getType() - }; + llvm::Type *PrefixStructTyElems[] = {PrefixSig->getType(), Int32Ty}; llvm::StructType *PrefixStructTy = llvm::StructType::get( CGM.getLLVMContext(), PrefixStructTyElems, /*isPacked=*/true); @@ -4382,8 +4496,10 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee EmitBlock(TypeCheck); llvm::Value *CalleeRTTIPtr = Builder.CreateConstGEP2_32(PrefixStructTy, CalleePrefixStruct, 0, 1); - llvm::Value *CalleeRTTI = + llvm::Value *CalleeRTTIEncoded = Builder.CreateAlignedLoad(CalleeRTTIPtr, getPointerAlign()); + llvm::Value *CalleeRTTI = + DecodeAddrUsedInPrologue(CalleePtr, CalleeRTTIEncoded); llvm::Value *CalleeRTTIMatch = Builder.CreateICmpEQ(CalleeRTTI, FTRTTIConst); llvm::Constant *StaticData[] = { @@ -4405,7 +4521,12 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee SanitizerScope SanScope(this); EmitSanitizerStatReport(llvm::SanStat_CFI_ICall); - llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(QualType(FnType, 0)); + llvm::Metadata *MD; + if (CGM.getCodeGenOpts().SanitizeCfiICallGeneralizePointers) + MD = CGM.CreateMetadataIdentifierGeneralized(QualType(FnType, 0)); + else + MD = CGM.CreateMetadataIdentifierForType(QualType(FnType, 0)); + llvm::Value *TypeId = llvm::MetadataAsValue::get(getLLVMContext(), MD); llvm::Value *CalleePtr = Callee.getFunctionPointer(); @@ -4513,10 +4634,12 @@ EmitPointerToDataMemberBinaryExpr(const BinaryOperator *E) { = E->getRHS()->getType()->getAs<MemberPointerType>(); LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; Address MemberAddr = - EmitCXXMemberDataPointerAddress(E, BaseAddr, OffsetV, MPT, &BaseInfo); + EmitCXXMemberDataPointerAddress(E, BaseAddr, OffsetV, MPT, &BaseInfo, + &TBAAInfo); - return MakeAddrLValue(MemberAddr, MPT->getPointeeType(), BaseInfo); + return MakeAddrLValue(MemberAddr, MPT->getPointeeType(), BaseInfo, TBAAInfo); } /// Given the address of a temporary variable, produce an r-value of @@ -4524,8 +4647,7 @@ EmitPointerToDataMemberBinaryExpr(const BinaryOperator *E) { RValue CodeGenFunction::convertTempToRValue(Address addr, QualType type, SourceLocation loc) { - LValue lvalue = MakeAddrLValue(addr, type, - LValueBaseInfo(AlignmentSource::Decl, false)); + LValue lvalue = MakeAddrLValue(addr, type, AlignmentSource::Decl); switch (getEvaluationKind(type)) { case TEK_Complex: return RValue::getComplex(EmitLoadOfComplex(lvalue, loc)); @@ -4580,9 +4702,8 @@ static LValueOrRValue emitPseudoObjectExpr(CodeGenFunction &CGF, if (ov == resultExpr && ov->isRValue() && !forLValue && CodeGenFunction::hasAggregateEvaluationKind(ov->getType())) { CGF.EmitAggExpr(ov->getSourceExpr(), slot); - LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); LValue LV = CGF.MakeAddrLValue(slot.getAddress(), ov->getType(), - BaseInfo); + AlignmentSource::Decl); opaqueData = OVMA::bind(CGF, ov, LV); result.RV = slot.asRValue(); diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp index a05a088f0919..1ab8433864c4 100644 --- a/lib/CodeGen/CGExprAgg.cpp +++ b/lib/CodeGen/CGExprAgg.cpp @@ -124,24 +124,7 @@ public: } // l-values. - void VisitDeclRefExpr(DeclRefExpr *E) { - // For aggregates, we should always be able to emit the variable - // as an l-value unless it's a reference. This is due to the fact - // that we can't actually ever see a normal l2r conversion on an - // aggregate in C++, and in C there's no language standard - // actively preventing us from listing variables in the captures - // list of a block. - if (E->getDecl()->getType()->isReferenceType()) { - if (CodeGenFunction::ConstantEmission result - = CGF.tryEmitAsConstant(E)) { - EmitFinalDestCopy(E->getType(), result.getReferenceLValue(CGF, E)); - return; - } - } - - EmitAggLoadOfLValue(E); - } - + void VisitDeclRefExpr(DeclRefExpr *E) { EmitAggLoadOfLValue(E); } void VisitMemberExpr(MemberExpr *ME) { EmitAggLoadOfLValue(ME); } void VisitUnaryDeref(UnaryOperator *E) { EmitAggLoadOfLValue(E); } void VisitStringLiteral(StringLiteral *E) { EmitAggLoadOfLValue(E); } diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp index ab170245284c..41bb199ffde7 100644 --- a/lib/CodeGen/CGExprCXX.cpp +++ b/lib/CodeGen/CGExprCXX.cpp @@ -16,6 +16,7 @@ #include "CGCXXABI.h" #include "CGDebugInfo.h" #include "CGObjCRuntime.h" +#include "ConstantEmitter.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/Frontend/CodeGenOptions.h" #include "llvm/IR/CallSite.h" @@ -367,9 +368,11 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( } else { if (SanOpts.has(SanitizerKind::CFINVCall) && MD->getParent()->isDynamicClass()) { - llvm::Value *VTable = GetVTablePtr(This, Int8PtrTy, MD->getParent()); - EmitVTablePtrCheckForCall(MD->getParent(), VTable, CFITCK_NVCall, - CE->getLocStart()); + llvm::Value *VTable; + const CXXRecordDecl *RD; + std::tie(VTable, RD) = + CGM.getCXXABI().LoadVTablePtr(*this, This, MD->getParent()); + EmitVTablePtrCheckForCall(RD, VTable, CFITCK_NVCall, CE->getLocStart()); } if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier) @@ -681,8 +684,8 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF, // Emit the array size expression. // We multiply the size of all dimensions for NumElements. // e.g for 'int[2][3]', ElemType is 'int' and NumElements is 6. - numElements = CGF.CGM.EmitConstantExpr(e->getArraySize(), - CGF.getContext().getSizeType(), &CGF); + numElements = + ConstantEmitter(CGF).tryEmitAbstract(e->getArraySize(), e->getType()); if (!numElements) numElements = CGF.EmitScalarExpr(e->getArraySize()); assert(isa<llvm::IntegerType>(numElements->getType())); @@ -1310,29 +1313,44 @@ RValue CodeGenFunction::EmitBuiltinNewDeleteCall(const FunctionProtoType *Type, llvm_unreachable("predeclared global operator new/delete is missing"); } -static std::pair<bool, bool> -shouldPassSizeAndAlignToUsualDelete(const FunctionProtoType *FPT) { +namespace { +/// The parameters to pass to a usual operator delete. +struct UsualDeleteParams { + bool DestroyingDelete = false; + bool Size = false; + bool Alignment = false; +}; +} + +static UsualDeleteParams getUsualDeleteParams(const FunctionDecl *FD) { + UsualDeleteParams Params; + + const FunctionProtoType *FPT = FD->getType()->castAs<FunctionProtoType>(); auto AI = FPT->param_type_begin(), AE = FPT->param_type_end(); // The first argument is always a void*. ++AI; - // Figure out what other parameters we should be implicitly passing. - bool PassSize = false; - bool PassAlignment = false; + // The next parameter may be a std::destroying_delete_t. + if (FD->isDestroyingOperatorDelete()) { + Params.DestroyingDelete = true; + assert(AI != AE); + ++AI; + } + // Figure out what other parameters we should be implicitly passing. if (AI != AE && (*AI)->isIntegerType()) { - PassSize = true; + Params.Size = true; ++AI; } if (AI != AE && (*AI)->isAlignValT()) { - PassAlignment = true; + Params.Alignment = true; ++AI; } assert(AI == AE && "unexpected usual deallocation function parameter"); - return {PassSize, PassAlignment}; + return Params; } namespace { @@ -1385,25 +1403,27 @@ namespace { OperatorDelete->getType()->getAs<FunctionProtoType>(); CallArgList DeleteArgs; - // The first argument is always a void*. + // The first argument is always a void* (or C* for a destroying operator + // delete for class type C). DeleteArgs.add(Traits::get(CGF, Ptr), FPT->getParamType(0)); // Figure out what other parameters we should be implicitly passing. - bool PassSize = false; - bool PassAlignment = false; + UsualDeleteParams Params; if (NumPlacementArgs) { // A placement deallocation function is implicitly passed an alignment // if the placement allocation function was, but is never passed a size. - PassAlignment = PassAlignmentToPlacementDelete; + Params.Alignment = PassAlignmentToPlacementDelete; } else { // For a non-placement new-expression, 'operator delete' can take a // size and/or an alignment if it has the right parameters. - std::tie(PassSize, PassAlignment) = - shouldPassSizeAndAlignToUsualDelete(FPT); + Params = getUsualDeleteParams(OperatorDelete); } + assert(!Params.DestroyingDelete && + "should not call destroying delete in a new-expression"); + // The second argument can be a std::size_t (for non-placement delete). - if (PassSize) + if (Params.Size) DeleteArgs.add(Traits::get(CGF, AllocSize), CGF.getContext().getSizeType()); @@ -1411,7 +1431,7 @@ namespace { // is an enum whose underlying type is std::size_t. // FIXME: Use the right type as the parameter type. Note that in a call // to operator delete(size_t, ...), we may not have it available. - if (PassAlignment) + if (Params.Alignment) DeleteArgs.add(RValue::get(llvm::ConstantInt::get( CGF.SizeTy, AllocAlign.getQuantity())), CGF.getContext().getSizeType()); @@ -1714,9 +1734,7 @@ void CodeGenFunction::EmitDeleteCall(const FunctionDecl *DeleteFD, CallArgList DeleteArgs; - std::pair<bool, bool> PassSizeAndAlign = - shouldPassSizeAndAlignToUsualDelete(DeleteFTy); - + auto Params = getUsualDeleteParams(DeleteFD); auto ParamTypeIt = DeleteFTy->param_type_begin(); // Pass the pointer itself. @@ -1724,8 +1742,16 @@ void CodeGenFunction::EmitDeleteCall(const FunctionDecl *DeleteFD, llvm::Value *DeletePtr = Builder.CreateBitCast(Ptr, ConvertType(ArgTy)); DeleteArgs.add(RValue::get(DeletePtr), ArgTy); + // Pass the std::destroying_delete tag if present. + if (Params.DestroyingDelete) { + QualType DDTag = *ParamTypeIt++; + // Just pass an 'undef'. We expect the tag type to be an empty struct. + auto *V = llvm::UndefValue::get(getTypes().ConvertType(DDTag)); + DeleteArgs.add(RValue::get(V), DDTag); + } + // Pass the size if the delete function has a size_t parameter. - if (PassSizeAndAlign.first) { + if (Params.Size) { QualType SizeType = *ParamTypeIt++; CharUnits DeleteTypeSize = getContext().getTypeSizeInChars(DeleteTy); llvm::Value *Size = llvm::ConstantInt::get(ConvertType(SizeType), @@ -1744,7 +1770,7 @@ void CodeGenFunction::EmitDeleteCall(const FunctionDecl *DeleteFD, } // Pass the alignment if the delete function has an align_val_t parameter. - if (PassSizeAndAlign.second) { + if (Params.Alignment) { QualType AlignValType = *ParamTypeIt++; CharUnits DeleteTypeAlign = getContext().toCharUnitsFromBits( getContext().getTypeAlignIfKnown(DeleteTy)); @@ -1786,6 +1812,21 @@ CodeGenFunction::pushCallObjectDeleteCleanup(const FunctionDecl *OperatorDelete, OperatorDelete, ElementType); } +/// Emit the code for deleting a single object with a destroying operator +/// delete. If the element type has a non-virtual destructor, Ptr has already +/// been converted to the type of the parameter of 'operator delete'. Otherwise +/// Ptr points to an object of the static type. +static void EmitDestroyingObjectDelete(CodeGenFunction &CGF, + const CXXDeleteExpr *DE, Address Ptr, + QualType ElementType) { + auto *Dtor = ElementType->getAsCXXRecordDecl()->getDestructor(); + if (Dtor && Dtor->isVirtual()) + CGF.CGM.getCXXABI().emitVirtualObjectDelete(CGF, DE, Ptr, ElementType, + Dtor); + else + CGF.EmitDeleteCall(DE->getOperatorDelete(), Ptr.getPointer(), ElementType); +} + /// Emit the code for deleting a single object. static void EmitObjectDelete(CodeGenFunction &CGF, const CXXDeleteExpr *DE, @@ -1800,6 +1841,9 @@ static void EmitObjectDelete(CodeGenFunction &CGF, DE->getExprLoc(), Ptr.getPointer(), ElementType); + const FunctionDecl *OperatorDelete = DE->getOperatorDelete(); + assert(!OperatorDelete->isDestroyingOperatorDelete()); + // Find the destructor for the type, if applicable. If the // destructor is virtual, we'll just emit the vcall and return. const CXXDestructorDecl *Dtor = nullptr; @@ -1819,7 +1863,6 @@ static void EmitObjectDelete(CodeGenFunction &CGF, // Make sure that we call delete even if the dtor throws. // This doesn't have to a conditional cleanup because we're going // to pop it off in a second. - const FunctionDecl *OperatorDelete = DE->getOperatorDelete(); CGF.EHStack.pushCleanup<CallObjectDelete>(NormalAndEHCleanup, Ptr.getPointer(), OperatorDelete, ElementType); @@ -1931,10 +1974,19 @@ void CodeGenFunction::EmitCXXDeleteExpr(const CXXDeleteExpr *E) { Builder.CreateCondBr(IsNull, DeleteEnd, DeleteNotNull); EmitBlock(DeleteNotNull); + QualType DeleteTy = E->getDestroyedType(); + + // A destroying operator delete overrides the entire operation of the + // delete expression. + if (E->getOperatorDelete()->isDestroyingOperatorDelete()) { + EmitDestroyingObjectDelete(*this, E, Ptr, DeleteTy); + EmitBlock(DeleteEnd); + return; + } + // We might be deleting a pointer to array. If so, GEP down to the // first non-array element. // (this assumes that A(*)[3][7] is converted to [3 x [7 x %A]]*) - QualType DeleteTy = Arg->getType()->getAs<PointerType>()->getPointeeType(); if (DeleteTy->isConstantArrayType()) { llvm::Value *Zero = Builder.getInt32(0); SmallVector<llvm::Value*,8> GEP; diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp index 980972370dc2..e860b3045f0e 100644 --- a/lib/CodeGen/CGExprComplex.cpp +++ b/lib/CodeGen/CGExprComplex.cpp @@ -120,18 +120,22 @@ public: return Visit(E->getSubExpr()); } + ComplexPairTy emitConstant(const CodeGenFunction::ConstantEmission &Constant, + Expr *E) { + assert(Constant && "not a constant"); + if (Constant.isReference()) + return EmitLoadOfLValue(Constant.getReferenceLValue(CGF, E), + E->getExprLoc()); + + llvm::Constant *pair = Constant.getValue(); + return ComplexPairTy(pair->getAggregateElement(0U), + pair->getAggregateElement(1U)); + } // l-values. ComplexPairTy VisitDeclRefExpr(DeclRefExpr *E) { - if (CodeGenFunction::ConstantEmission result = CGF.tryEmitAsConstant(E)) { - if (result.isReference()) - return EmitLoadOfLValue(result.getReferenceLValue(CGF, E), - E->getExprLoc()); - - llvm::Constant *pair = result.getValue(); - return ComplexPairTy(pair->getAggregateElement(0U), - pair->getAggregateElement(1U)); - } + if (CodeGenFunction::ConstantEmission Constant = CGF.tryEmitAsConstant(E)) + return emitConstant(Constant, E); return EmitLoadOfLValue(E); } ComplexPairTy VisitObjCIvarRefExpr(ObjCIvarRefExpr *E) { @@ -141,7 +145,14 @@ public: return CGF.EmitObjCMessageExpr(E).getComplexVal(); } ComplexPairTy VisitArraySubscriptExpr(Expr *E) { return EmitLoadOfLValue(E); } - ComplexPairTy VisitMemberExpr(const Expr *E) { return EmitLoadOfLValue(E); } + ComplexPairTy VisitMemberExpr(MemberExpr *ME) { + if (CodeGenFunction::ConstantEmission Constant = + CGF.tryEmitAsConstant(ME)) { + CGF.EmitIgnoredExpr(ME->getBase()); + return emitConstant(Constant, ME); + } + return EmitLoadOfLValue(ME); + } ComplexPairTy VisitOpaqueValueExpr(OpaqueValueExpr *E) { if (E->isGLValue()) return EmitLoadOfLValue(CGF.getOpaqueLValueMapping(E), E->getExprLoc()); @@ -764,7 +775,6 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) { if (!LHSi) LibCallOp.LHS.second = llvm::Constant::getNullValue(LHSr->getType()); - StringRef LibCallName; switch (LHSr->getType()->getTypeID()) { default: llvm_unreachable("Unsupported floating point type!"); diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp index 6b72774c10a5..d1b9e13a6f93 100644 --- a/lib/CodeGen/CGExprConstant.cpp +++ b/lib/CodeGen/CGExprConstant.cpp @@ -16,6 +16,7 @@ #include "CGObjCRuntime.h" #include "CGRecordLayout.h" #include "CodeGenModule.h" +#include "ConstantEmitter.h" #include "TargetInfo.h" #include "clang/AST/APValue.h" #include "clang/AST/ASTContext.h" @@ -37,25 +38,26 @@ namespace { class ConstExprEmitter; class ConstStructBuilder { CodeGenModule &CGM; - CodeGenFunction *CGF; + ConstantEmitter &Emitter; bool Packed; CharUnits NextFieldOffsetInChars; CharUnits LLVMStructAlignment; SmallVector<llvm::Constant *, 32> Elements; public: - static llvm::Constant *BuildStruct(CodeGenModule &CGM, CodeGenFunction *CFG, - ConstExprEmitter *Emitter, + static llvm::Constant *BuildStruct(ConstantEmitter &Emitter, + ConstExprEmitter *ExprEmitter, llvm::ConstantStruct *Base, - InitListExpr *Updater); - static llvm::Constant *BuildStruct(CodeGenModule &CGM, CodeGenFunction *CGF, - InitListExpr *ILE); - static llvm::Constant *BuildStruct(CodeGenModule &CGM, CodeGenFunction *CGF, + InitListExpr *Updater, + QualType ValTy); + static llvm::Constant *BuildStruct(ConstantEmitter &Emitter, + InitListExpr *ILE, QualType StructTy); + static llvm::Constant *BuildStruct(ConstantEmitter &Emitter, const APValue &Value, QualType ValTy); private: - ConstStructBuilder(CodeGenModule &CGM, CodeGenFunction *CGF) - : CGM(CGM), CGF(CGF), Packed(false), + ConstStructBuilder(ConstantEmitter &emitter) + : CGM(emitter.CGM), Emitter(emitter), Packed(false), NextFieldOffsetInChars(CharUnits::Zero()), LLVMStructAlignment(CharUnits::One()) { } @@ -76,7 +78,7 @@ private: bool Build(InitListExpr *ILE); bool Build(ConstExprEmitter *Emitter, llvm::ConstantStruct *Base, InitListExpr *Updater); - void Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase, + bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase, const CXXRecordDecl *VTableClass, CharUnits BaseOffset); llvm::Constant *Finalize(QualType Ty); @@ -391,10 +393,10 @@ bool ConstStructBuilder::Build(InitListExpr *ILE) { // we just use explicit null values for them. llvm::Constant *EltInit; if (ElementNo < ILE->getNumInits()) - EltInit = CGM.EmitConstantExpr(ILE->getInit(ElementNo++), - Field->getType(), CGF); + EltInit = Emitter.tryEmitPrivateForMemory(ILE->getInit(ElementNo++), + Field->getType()); else - EltInit = CGM.EmitNullConstant(Field->getType()); + EltInit = Emitter.emitNullForMemory(Field->getType()); if (!EltInit) return false; @@ -431,7 +433,7 @@ struct BaseInfo { }; } -void ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, +bool ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase, const CXXRecordDecl *VTableClass, CharUnits Offset) { @@ -486,8 +488,9 @@ void ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, const APValue &FieldValue = RD->isUnion() ? Val.getUnionValue() : Val.getStructField(FieldNo); llvm::Constant *EltInit = - CGM.EmitConstantValueForMemory(FieldValue, Field->getType(), CGF); - assert(EltInit && "EmitConstantValue can't fail"); + Emitter.tryEmitPrivateForMemory(FieldValue, Field->getType()); + if (!EltInit) + return false; if (!Field->isBitField()) { // Handle non-bitfield members. @@ -498,6 +501,8 @@ void ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, cast<llvm::ConstantInt>(EltInit)); } } + + return true; } llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) { @@ -559,37 +564,37 @@ llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) { return Result; } -llvm::Constant *ConstStructBuilder::BuildStruct(CodeGenModule &CGM, - CodeGenFunction *CGF, - ConstExprEmitter *Emitter, +llvm::Constant *ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter, + ConstExprEmitter *ExprEmitter, llvm::ConstantStruct *Base, - InitListExpr *Updater) { - ConstStructBuilder Builder(CGM, CGF); - if (!Builder.Build(Emitter, Base, Updater)) + InitListExpr *Updater, + QualType ValTy) { + ConstStructBuilder Builder(Emitter); + if (!Builder.Build(ExprEmitter, Base, Updater)) return nullptr; - return Builder.Finalize(Updater->getType()); + return Builder.Finalize(ValTy); } -llvm::Constant *ConstStructBuilder::BuildStruct(CodeGenModule &CGM, - CodeGenFunction *CGF, - InitListExpr *ILE) { - ConstStructBuilder Builder(CGM, CGF); +llvm::Constant *ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter, + InitListExpr *ILE, + QualType ValTy) { + ConstStructBuilder Builder(Emitter); if (!Builder.Build(ILE)) return nullptr; - return Builder.Finalize(ILE->getType()); + return Builder.Finalize(ValTy); } -llvm::Constant *ConstStructBuilder::BuildStruct(CodeGenModule &CGM, - CodeGenFunction *CGF, +llvm::Constant *ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter, const APValue &Val, QualType ValTy) { - ConstStructBuilder Builder(CGM, CGF); + ConstStructBuilder Builder(Emitter); const RecordDecl *RD = ValTy->castAs<RecordType>()->getDecl(); const CXXRecordDecl *CD = dyn_cast<CXXRecordDecl>(RD); - Builder.Build(Val, RD, false, CD, CharUnits::Zero()); + if (!Builder.Build(Val, RD, false, CD, CharUnits::Zero())) + return nullptr; return Builder.Finalize(ValTy); } @@ -599,57 +604,86 @@ llvm::Constant *ConstStructBuilder::BuildStruct(CodeGenModule &CGM, // ConstExprEmitter //===----------------------------------------------------------------------===// +static ConstantAddress tryEmitGlobalCompoundLiteral(CodeGenModule &CGM, + CodeGenFunction *CGF, + const CompoundLiteralExpr *E) { + CharUnits Align = CGM.getContext().getTypeAlignInChars(E->getType()); + if (llvm::GlobalVariable *Addr = + CGM.getAddrOfConstantCompoundLiteralIfEmitted(E)) + return ConstantAddress(Addr, Align); + + LangAS addressSpace = E->getType().getAddressSpace(); + + ConstantEmitter emitter(CGM, CGF); + llvm::Constant *C = emitter.tryEmitForInitializer(E->getInitializer(), + addressSpace, E->getType()); + if (!C) { + assert(!E->isFileScope() && + "file-scope compound literal did not have constant initializer!"); + return ConstantAddress::invalid(); + } + + auto GV = new llvm::GlobalVariable(CGM.getModule(), C->getType(), + CGM.isTypeConstant(E->getType(), true), + llvm::GlobalValue::InternalLinkage, + C, ".compoundliteral", nullptr, + llvm::GlobalVariable::NotThreadLocal, + CGM.getContext().getTargetAddressSpace(addressSpace)); + emitter.finalize(GV); + GV->setAlignment(Align.getQuantity()); + CGM.setAddrOfConstantCompoundLiteral(E, GV); + return ConstantAddress(GV, Align); +} + /// This class only needs to handle two cases: /// 1) Literals (this is used by APValue emission to emit literals). /// 2) Arrays, structs and unions (outside C++11 mode, we don't currently /// constant fold these types). class ConstExprEmitter : - public StmtVisitor<ConstExprEmitter, llvm::Constant*> { + public StmtVisitor<ConstExprEmitter, llvm::Constant*, QualType> { CodeGenModule &CGM; - CodeGenFunction *CGF; + ConstantEmitter &Emitter; llvm::LLVMContext &VMContext; public: - ConstExprEmitter(CodeGenModule &cgm, CodeGenFunction *cgf) - : CGM(cgm), CGF(cgf), VMContext(cgm.getLLVMContext()) { + ConstExprEmitter(ConstantEmitter &emitter) + : CGM(emitter.CGM), Emitter(emitter), VMContext(CGM.getLLVMContext()) { } //===--------------------------------------------------------------------===// // Visitor Methods //===--------------------------------------------------------------------===// - llvm::Constant *VisitStmt(Stmt *S) { + llvm::Constant *VisitStmt(Stmt *S, QualType T) { return nullptr; } - llvm::Constant *VisitParenExpr(ParenExpr *PE) { - return Visit(PE->getSubExpr()); + llvm::Constant *VisitParenExpr(ParenExpr *PE, QualType T) { + return Visit(PE->getSubExpr(), T); } llvm::Constant * - VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *PE) { - return Visit(PE->getReplacement()); + VisitSubstNonTypeTemplateParmExpr(SubstNonTypeTemplateParmExpr *PE, + QualType T) { + return Visit(PE->getReplacement(), T); } - llvm::Constant *VisitGenericSelectionExpr(GenericSelectionExpr *GE) { - return Visit(GE->getResultExpr()); + llvm::Constant *VisitGenericSelectionExpr(GenericSelectionExpr *GE, + QualType T) { + return Visit(GE->getResultExpr(), T); } - llvm::Constant *VisitChooseExpr(ChooseExpr *CE) { - return Visit(CE->getChosenSubExpr()); + llvm::Constant *VisitChooseExpr(ChooseExpr *CE, QualType T) { + return Visit(CE->getChosenSubExpr(), T); } - llvm::Constant *VisitCompoundLiteralExpr(CompoundLiteralExpr *E) { - return Visit(E->getInitializer()); + llvm::Constant *VisitCompoundLiteralExpr(CompoundLiteralExpr *E, QualType T) { + return Visit(E->getInitializer(), T); } - llvm::Constant *VisitCastExpr(CastExpr* E) { + llvm::Constant *VisitCastExpr(CastExpr *E, QualType destType) { if (const auto *ECE = dyn_cast<ExplicitCastExpr>(E)) - CGM.EmitExplicitCastExprType(ECE, CGF); + CGM.EmitExplicitCastExprType(ECE, Emitter.CGF); Expr *subExpr = E->getSubExpr(); - llvm::Constant *C = CGM.EmitConstantExpr(subExpr, subExpr->getType(), CGF); - if (!C) return nullptr; - - llvm::Type *destType = ConvertType(E->getType()); switch (E->getCastKind()) { case CK_ToUnion: { @@ -657,14 +691,22 @@ public: assert(E->getType()->isUnionType() && "Destination type is not union type!"); + auto field = E->getTargetUnionField(); + + auto C = Emitter.tryEmitPrivateForMemory(subExpr, field->getType()); + if (!C) return nullptr; + + auto destTy = ConvertType(destType); + if (C->getType() == destTy) return C; + // Build a struct with the union sub-element as the first member, - // and padded to the appropriate size + // and padded to the appropriate size. SmallVector<llvm::Constant*, 2> Elts; SmallVector<llvm::Type*, 2> Types; Elts.push_back(C); Types.push_back(C->getType()); unsigned CurSize = CGM.getDataLayout().getTypeAllocSize(C->getType()); - unsigned TotalSize = CGM.getDataLayout().getTypeAllocSize(destType); + unsigned TotalSize = CGM.getDataLayout().getTypeAllocSize(destTy); assert(CurSize <= TotalSize && "Union size mismatch!"); if (unsigned NumPadBytes = TotalSize - CurSize) { @@ -676,20 +718,26 @@ public: Types.push_back(Ty); } - llvm::StructType* STy = - llvm::StructType::get(C->getType()->getContext(), Types, false); + llvm::StructType *STy = llvm::StructType::get(VMContext, Types, false); return llvm::ConstantStruct::get(STy, Elts); } - case CK_AddressSpaceConversion: - return llvm::ConstantExpr::getAddrSpaceCast(C, destType); + case CK_AddressSpaceConversion: { + auto C = Emitter.tryEmitPrivate(subExpr, subExpr->getType()); + if (!C) return nullptr; + LangAS destAS = E->getType()->getPointeeType().getAddressSpace(); + LangAS srcAS = subExpr->getType()->getPointeeType().getAddressSpace(); + llvm::Type *destTy = ConvertType(E->getType()); + return CGM.getTargetCodeGenInfo().performAddrSpaceCast(CGM, C, srcAS, + destAS, destTy); + } case CK_LValueToRValue: case CK_AtomicToNonAtomic: case CK_NonAtomicToAtomic: case CK_NoOp: case CK_ConstructorConversion: - return C; + return Visit(subExpr, destType); case CK_IntToOCLSampler: llvm_unreachable("global sampler variables are not generated"); @@ -701,8 +749,11 @@ public: case CK_ReinterpretMemberPointer: case CK_DerivedToBaseMemberPointer: - case CK_BaseToDerivedMemberPointer: + case CK_BaseToDerivedMemberPointer: { + auto C = Emitter.tryEmitPrivate(subExpr, subExpr->getType()); + if (!C) return nullptr; return CGM.getCXXABI().EmitMemberPointerConversion(E, C); + } // These will never be supported. case CK_ObjCObjectLValueCast: @@ -759,27 +810,28 @@ public: llvm_unreachable("Invalid CastKind"); } - llvm::Constant *VisitCXXDefaultArgExpr(CXXDefaultArgExpr *DAE) { - return Visit(DAE->getExpr()); + llvm::Constant *VisitCXXDefaultArgExpr(CXXDefaultArgExpr *DAE, QualType T) { + return Visit(DAE->getExpr(), T); } - llvm::Constant *VisitCXXDefaultInitExpr(CXXDefaultInitExpr *DIE) { + llvm::Constant *VisitCXXDefaultInitExpr(CXXDefaultInitExpr *DIE, QualType T) { // No need for a DefaultInitExprScope: we don't handle 'this' in a // constant expression. - return Visit(DIE->getExpr()); + return Visit(DIE->getExpr(), T); } - llvm::Constant *VisitExprWithCleanups(ExprWithCleanups *E) { + llvm::Constant *VisitExprWithCleanups(ExprWithCleanups *E, QualType T) { if (!E->cleanupsHaveSideEffects()) - return Visit(E->getSubExpr()); + return Visit(E->getSubExpr(), T); return nullptr; } - llvm::Constant *VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E) { - return Visit(E->GetTemporaryExpr()); + llvm::Constant *VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E, + QualType T) { + return Visit(E->GetTemporaryExpr(), T); } - llvm::Constant *EmitArrayInitialization(InitListExpr *ILE) { + llvm::Constant *EmitArrayInitialization(InitListExpr *ILE, QualType T) { llvm::ArrayType *AType = cast<llvm::ArrayType>(ConvertType(ILE->getType())); llvm::Type *ElemTy = AType->getElementType(); @@ -790,13 +842,14 @@ public: // initialise any elements that have not been initialised explicitly unsigned NumInitableElts = std::min(NumInitElements, NumElements); + QualType EltType = CGM.getContext().getAsArrayType(T)->getElementType(); + // Initialize remaining array elements. - // FIXME: This doesn't handle member pointers correctly! llvm::Constant *fillC; if (Expr *filler = ILE->getArrayFiller()) - fillC = CGM.EmitConstantExpr(filler, filler->getType(), CGF); + fillC = Emitter.tryEmitAbstractForMemory(filler, EltType); else - fillC = llvm::Constant::getNullValue(ElemTy); + fillC = Emitter.emitNullForMemory(EltType); if (!fillC) return nullptr; @@ -805,13 +858,13 @@ public: return llvm::ConstantAggregateZero::get(AType); // Copy initializer elements. - std::vector<llvm::Constant*> Elts; + SmallVector<llvm::Constant*, 16> Elts; Elts.reserve(NumInitableElts + NumElements); bool RewriteType = false; for (unsigned i = 0; i < NumInitableElts; ++i) { Expr *Init = ILE->getInit(i); - llvm::Constant *C = CGM.EmitConstantExpr(Init, Init->getType(), CGF); + llvm::Constant *C = Emitter.tryEmitPrivateForMemory(Init, EltType); if (!C) return nullptr; RewriteType |= (C->getType() != ElemTy); @@ -835,33 +888,33 @@ public: return llvm::ConstantArray::get(AType, Elts); } - llvm::Constant *EmitRecordInitialization(InitListExpr *ILE) { - return ConstStructBuilder::BuildStruct(CGM, CGF, ILE); + llvm::Constant *EmitRecordInitialization(InitListExpr *ILE, QualType T) { + return ConstStructBuilder::BuildStruct(Emitter, ILE, T); } - llvm::Constant *VisitImplicitValueInitExpr(ImplicitValueInitExpr* E) { - return CGM.EmitNullConstant(E->getType()); + llvm::Constant *VisitImplicitValueInitExpr(ImplicitValueInitExpr* E, + QualType T) { + return CGM.EmitNullConstant(T); } - llvm::Constant *VisitInitListExpr(InitListExpr *ILE) { + llvm::Constant *VisitInitListExpr(InitListExpr *ILE, QualType T) { if (ILE->isTransparent()) - return Visit(ILE->getInit(0)); + return Visit(ILE->getInit(0), T); if (ILE->getType()->isArrayType()) - return EmitArrayInitialization(ILE); + return EmitArrayInitialization(ILE, T); if (ILE->getType()->isRecordType()) - return EmitRecordInitialization(ILE); + return EmitRecordInitialization(ILE, T); return nullptr; } llvm::Constant *EmitDesignatedInitUpdater(llvm::Constant *Base, - InitListExpr *Updater) { - QualType ExprType = Updater->getType(); - - if (ExprType->isArrayType()) { - llvm::ArrayType *AType = cast<llvm::ArrayType>(ConvertType(ExprType)); + InitListExpr *Updater, + QualType destType) { + if (auto destAT = CGM.getContext().getAsArrayType(destType)) { + llvm::ArrayType *AType = cast<llvm::ArrayType>(ConvertType(destType)); llvm::Type *ElemType = AType->getElementType(); unsigned NumInitElements = Updater->getNumInits(); @@ -870,12 +923,12 @@ public: std::vector<llvm::Constant *> Elts; Elts.reserve(NumElements); - if (llvm::ConstantDataArray *DataArray = - dyn_cast<llvm::ConstantDataArray>(Base)) + QualType destElemType = destAT->getElementType(); + + if (auto DataArray = dyn_cast<llvm::ConstantDataArray>(Base)) for (unsigned i = 0; i != NumElements; ++i) Elts.push_back(DataArray->getElementAsConstant(i)); - else if (llvm::ConstantArray *Array = - dyn_cast<llvm::ConstantArray>(Base)) + else if (auto Array = dyn_cast<llvm::ConstantArray>(Base)) for (unsigned i = 0; i != NumElements; ++i) Elts.push_back(Array->getOperand(i)); else @@ -884,7 +937,7 @@ public: llvm::Constant *fillC = nullptr; if (Expr *filler = Updater->getArrayFiller()) if (!isa<NoInitExpr>(filler)) - fillC = CGM.EmitConstantExpr(filler, filler->getType(), CGF); + fillC = Emitter.tryEmitAbstractForMemory(filler, destElemType); bool RewriteType = (fillC && fillC->getType() != ElemType); for (unsigned i = 0; i != NumElements; ++i) { @@ -897,9 +950,9 @@ public: else if (!Init || isa<NoInitExpr>(Init)) ; // Do nothing. else if (InitListExpr *ChildILE = dyn_cast<InitListExpr>(Init)) - Elts[i] = EmitDesignatedInitUpdater(Elts[i], ChildILE); + Elts[i] = EmitDesignatedInitUpdater(Elts[i], ChildILE, destElemType); else - Elts[i] = CGM.EmitConstantExpr(Init, Init->getType(), CGF); + Elts[i] = Emitter.tryEmitPrivateForMemory(Init, destElemType); if (!Elts[i]) return nullptr; @@ -919,25 +972,24 @@ public: return llvm::ConstantArray::get(AType, Elts); } - if (ExprType->isRecordType()) - return ConstStructBuilder::BuildStruct(CGM, CGF, this, - dyn_cast<llvm::ConstantStruct>(Base), Updater); + if (destType->isRecordType()) + return ConstStructBuilder::BuildStruct(Emitter, this, + dyn_cast<llvm::ConstantStruct>(Base), Updater, destType); return nullptr; } - llvm::Constant *VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E) { - return EmitDesignatedInitUpdater( - CGM.EmitConstantExpr(E->getBase(), E->getType(), CGF), - E->getUpdater()); + llvm::Constant *VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E, + QualType destType) { + auto C = Visit(E->getBase(), destType); + if (!C) return nullptr; + return EmitDesignatedInitUpdater(C, E->getUpdater(), destType); } - llvm::Constant *VisitCXXConstructExpr(CXXConstructExpr *E) { + llvm::Constant *VisitCXXConstructExpr(CXXConstructExpr *E, QualType Ty) { if (!E->getConstructor()->isTrivial()) return nullptr; - QualType Ty = E->getType(); - // FIXME: We should not have to call getBaseElementType here. const RecordType *RT = CGM.getContext().getBaseElementType(Ty)->getAs<RecordType>(); @@ -960,26 +1012,23 @@ public: assert(CGM.getContext().hasSameUnqualifiedType(Ty, Arg->getType()) && "argument to copy ctor is of wrong type"); - return Visit(Arg); + return Visit(Arg, Ty); } return CGM.EmitNullConstant(Ty); } - llvm::Constant *VisitStringLiteral(StringLiteral *E) { + llvm::Constant *VisitStringLiteral(StringLiteral *E, QualType T) { return CGM.GetConstantArrayFromStringLiteral(E); } - llvm::Constant *VisitObjCEncodeExpr(ObjCEncodeExpr *E) { + llvm::Constant *VisitObjCEncodeExpr(ObjCEncodeExpr *E, QualType T) { // This must be an @encode initializing an array in a static initializer. // Don't emit it as the address of the string, emit the string data itself // as an inline array. std::string Str; CGM.getContext().getObjCEncodingForType(E->getEncodedType(), Str); - QualType T = E->getType(); - if (T->getTypeClass() == Type::TypeOfExpr) - T = cast<TypeOfExprType>(T)->getUnderlyingExpr()->getType(); - const ConstantArrayType *CAT = cast<ConstantArrayType>(T); + const ConstantArrayType *CAT = CGM.getContext().getAsConstantArrayType(T); // Resize the string to the right size, adding zeros at the end, or // truncating as needed. @@ -987,151 +1036,19 @@ public: return llvm::ConstantDataArray::getString(VMContext, Str, false); } - llvm::Constant *VisitUnaryExtension(const UnaryOperator *E) { - return Visit(E->getSubExpr()); + llvm::Constant *VisitUnaryExtension(const UnaryOperator *E, QualType T) { + return Visit(E->getSubExpr(), T); } // Utility methods llvm::Type *ConvertType(QualType T) { return CGM.getTypes().ConvertType(T); } - -public: - ConstantAddress EmitLValue(APValue::LValueBase LVBase) { - if (const ValueDecl *Decl = LVBase.dyn_cast<const ValueDecl*>()) { - if (Decl->hasAttr<WeakRefAttr>()) - return CGM.GetWeakRefReference(Decl); - if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(Decl)) - return ConstantAddress(CGM.GetAddrOfFunction(FD), CharUnits::One()); - if (const VarDecl* VD = dyn_cast<VarDecl>(Decl)) { - // We can never refer to a variable with local storage. - if (!VD->hasLocalStorage()) { - CharUnits Align = CGM.getContext().getDeclAlign(VD); - if (VD->isFileVarDecl() || VD->hasExternalStorage()) - return ConstantAddress(CGM.GetAddrOfGlobalVar(VD), Align); - else if (VD->isLocalVarDecl()) { - auto Ptr = CGM.getOrCreateStaticVarDecl( - *VD, CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false)); - return ConstantAddress(Ptr, Align); - } - } - } - return ConstantAddress::invalid(); - } - - Expr *E = const_cast<Expr*>(LVBase.get<const Expr*>()); - switch (E->getStmtClass()) { - default: break; - case Expr::CompoundLiteralExprClass: { - CompoundLiteralExpr *CLE = cast<CompoundLiteralExpr>(E); - CharUnits Align = CGM.getContext().getTypeAlignInChars(E->getType()); - if (llvm::GlobalVariable *Addr = - CGM.getAddrOfConstantCompoundLiteralIfEmitted(CLE)) - return ConstantAddress(Addr, Align); - - llvm::Constant* C = CGM.EmitConstantExpr(CLE->getInitializer(), - CLE->getType(), CGF); - // FIXME: "Leaked" on failure. - if (!C) return ConstantAddress::invalid(); - - auto GV = new llvm::GlobalVariable(CGM.getModule(), C->getType(), - E->getType().isConstant(CGM.getContext()), - llvm::GlobalValue::InternalLinkage, - C, ".compoundliteral", nullptr, - llvm::GlobalVariable::NotThreadLocal, - CGM.getContext().getTargetAddressSpace(E->getType())); - GV->setAlignment(Align.getQuantity()); - CGM.setAddrOfConstantCompoundLiteral(CLE, GV); - return ConstantAddress(GV, Align); - } - case Expr::StringLiteralClass: - return CGM.GetAddrOfConstantStringFromLiteral(cast<StringLiteral>(E)); - case Expr::ObjCEncodeExprClass: - return CGM.GetAddrOfConstantStringFromObjCEncode(cast<ObjCEncodeExpr>(E)); - case Expr::ObjCStringLiteralClass: { - ObjCStringLiteral* SL = cast<ObjCStringLiteral>(E); - ConstantAddress C = - CGM.getObjCRuntime().GenerateConstantString(SL->getString()); - return C.getElementBitCast(ConvertType(E->getType())); - } - case Expr::PredefinedExprClass: { - unsigned Type = cast<PredefinedExpr>(E)->getIdentType(); - if (CGF) { - LValue Res = CGF->EmitPredefinedLValue(cast<PredefinedExpr>(E)); - return cast<ConstantAddress>(Res.getAddress()); - } else if (Type == PredefinedExpr::PrettyFunction) { - return CGM.GetAddrOfConstantCString("top level", ".tmp"); - } - - return CGM.GetAddrOfConstantCString("", ".tmp"); - } - case Expr::AddrLabelExprClass: { - assert(CGF && "Invalid address of label expression outside function."); - llvm::Constant *Ptr = - CGF->GetAddrOfLabel(cast<AddrLabelExpr>(E)->getLabel()); - Ptr = llvm::ConstantExpr::getBitCast(Ptr, ConvertType(E->getType())); - return ConstantAddress(Ptr, CharUnits::One()); - } - case Expr::CallExprClass: { - CallExpr* CE = cast<CallExpr>(E); - unsigned builtin = CE->getBuiltinCallee(); - if (builtin != - Builtin::BI__builtin___CFStringMakeConstantString && - builtin != - Builtin::BI__builtin___NSStringMakeConstantString) - break; - const Expr *Arg = CE->getArg(0)->IgnoreParenCasts(); - const StringLiteral *Literal = cast<StringLiteral>(Arg); - if (builtin == - Builtin::BI__builtin___NSStringMakeConstantString) { - return CGM.getObjCRuntime().GenerateConstantString(Literal); - } - // FIXME: need to deal with UCN conversion issues. - return CGM.GetAddrOfConstantCFString(Literal); - } - case Expr::BlockExprClass: { - StringRef FunctionName; - if (CGF) - FunctionName = CGF->CurFn->getName(); - else - FunctionName = "global"; - - // This is not really an l-value. - llvm::Constant *Ptr = - CGM.GetAddrOfGlobalBlock(cast<BlockExpr>(E), FunctionName); - return ConstantAddress(Ptr, CGM.getPointerAlign()); - } - case Expr::CXXTypeidExprClass: { - CXXTypeidExpr *Typeid = cast<CXXTypeidExpr>(E); - QualType T; - if (Typeid->isTypeOperand()) - T = Typeid->getTypeOperand(CGM.getContext()); - else - T = Typeid->getExprOperand()->getType(); - return ConstantAddress(CGM.GetAddrOfRTTIDescriptor(T), - CGM.getPointerAlign()); - } - case Expr::CXXUuidofExprClass: { - return CGM.GetAddrOfUuidDescriptor(cast<CXXUuidofExpr>(E)); - } - case Expr::MaterializeTemporaryExprClass: { - MaterializeTemporaryExpr *MTE = cast<MaterializeTemporaryExpr>(E); - assert(MTE->getStorageDuration() == SD_Static); - SmallVector<const Expr *, 2> CommaLHSs; - SmallVector<SubobjectAdjustment, 2> Adjustments; - const Expr *Inner = MTE->GetTemporaryExpr() - ->skipRValueSubobjectAdjustments(CommaLHSs, Adjustments); - return CGM.GetAddrOfGlobalTemporary(MTE, Inner); - } - } - - return ConstantAddress::invalid(); - } }; } // end anonymous namespace. -bool ConstStructBuilder::Build(ConstExprEmitter *Emitter, +bool ConstStructBuilder::Build(ConstExprEmitter *ExprEmitter, llvm::ConstantStruct *Base, InitListExpr *Updater) { assert(Base && "base expression should not be empty"); @@ -1179,9 +1096,10 @@ bool ConstStructBuilder::Build(ConstExprEmitter *Emitter, if (!Init || isa<NoInitExpr>(Init)) ; // Do nothing. else if (InitListExpr *ChildILE = dyn_cast<InitListExpr>(Init)) - EltInit = Emitter->EmitDesignatedInitUpdater(EltInit, ChildILE); + EltInit = ExprEmitter->EmitDesignatedInitUpdater(EltInit, ChildILE, + Field->getType()); else - EltInit = CGM.EmitConstantExpr(Init, Field->getType(), CGF); + EltInit = Emitter.tryEmitPrivateForMemory(Init, Field->getType()); ++ElementNo; @@ -1200,26 +1118,294 @@ bool ConstStructBuilder::Build(ConstExprEmitter *Emitter, return true; } -llvm::Constant *CodeGenModule::EmitConstantInit(const VarDecl &D, - CodeGenFunction *CGF) { +llvm::Constant *ConstantEmitter::validateAndPopAbstract(llvm::Constant *C, + AbstractState saved) { + Abstract = saved.OldValue; + + assert(saved.OldPlaceholdersSize == PlaceholderAddresses.size() && + "created a placeholder while doing an abstract emission?"); + + // No validation necessary for now. + // No cleanup to do for now. + return C; +} + +llvm::Constant * +ConstantEmitter::tryEmitAbstractForInitializer(const VarDecl &D) { + auto state = pushAbstract(); + auto C = tryEmitPrivateForVarInit(D); + return validateAndPopAbstract(C, state); +} + +llvm::Constant * +ConstantEmitter::tryEmitAbstract(const Expr *E, QualType destType) { + auto state = pushAbstract(); + auto C = tryEmitPrivate(E, destType); + return validateAndPopAbstract(C, state); +} + +llvm::Constant * +ConstantEmitter::tryEmitAbstract(const APValue &value, QualType destType) { + auto state = pushAbstract(); + auto C = tryEmitPrivate(value, destType); + return validateAndPopAbstract(C, state); +} + +llvm::Constant * +ConstantEmitter::emitAbstract(const Expr *E, QualType destType) { + auto state = pushAbstract(); + auto C = tryEmitPrivate(E, destType); + C = validateAndPopAbstract(C, state); + if (!C) { + CGM.Error(E->getExprLoc(), + "internal error: could not emit constant value \"abstractly\""); + C = CGM.EmitNullConstant(destType); + } + return C; +} + +llvm::Constant * +ConstantEmitter::emitAbstract(SourceLocation loc, const APValue &value, + QualType destType) { + auto state = pushAbstract(); + auto C = tryEmitPrivate(value, destType); + C = validateAndPopAbstract(C, state); + if (!C) { + CGM.Error(loc, + "internal error: could not emit constant value \"abstractly\""); + C = CGM.EmitNullConstant(destType); + } + return C; +} + +llvm::Constant *ConstantEmitter::tryEmitForInitializer(const VarDecl &D) { + initializeNonAbstract(D.getType().getAddressSpace()); + return markIfFailed(tryEmitPrivateForVarInit(D)); +} + +llvm::Constant *ConstantEmitter::tryEmitForInitializer(const Expr *E, + LangAS destAddrSpace, + QualType destType) { + initializeNonAbstract(destAddrSpace); + return markIfFailed(tryEmitPrivateForMemory(E, destType)); +} + +llvm::Constant *ConstantEmitter::emitForInitializer(const APValue &value, + LangAS destAddrSpace, + QualType destType) { + initializeNonAbstract(destAddrSpace); + auto C = tryEmitPrivateForMemory(value, destType); + assert(C && "couldn't emit constant value non-abstractly?"); + return C; +} + +llvm::GlobalValue *ConstantEmitter::getCurrentAddrPrivate() { + assert(!Abstract && "cannot get current address for abstract constant"); + + + + // Make an obviously ill-formed global that should blow up compilation + // if it survives. + auto global = new llvm::GlobalVariable(CGM.getModule(), CGM.Int8Ty, true, + llvm::GlobalValue::PrivateLinkage, + /*init*/ nullptr, + /*name*/ "", + /*before*/ nullptr, + llvm::GlobalVariable::NotThreadLocal, + CGM.getContext().getTargetAddressSpace(DestAddressSpace)); + + PlaceholderAddresses.push_back(std::make_pair(nullptr, global)); + + return global; +} + +void ConstantEmitter::registerCurrentAddrPrivate(llvm::Constant *signal, + llvm::GlobalValue *placeholder) { + assert(!PlaceholderAddresses.empty()); + assert(PlaceholderAddresses.back().first == nullptr); + assert(PlaceholderAddresses.back().second == placeholder); + PlaceholderAddresses.back().first = signal; +} + +namespace { + struct ReplacePlaceholders { + CodeGenModule &CGM; + + /// The base address of the global. + llvm::Constant *Base; + llvm::Type *BaseValueTy = nullptr; + + /// The placeholder addresses that were registered during emission. + llvm::DenseMap<llvm::Constant*, llvm::GlobalVariable*> PlaceholderAddresses; + + /// The locations of the placeholder signals. + llvm::DenseMap<llvm::GlobalVariable*, llvm::Constant*> Locations; + + /// The current index stack. We use a simple unsigned stack because + /// we assume that placeholders will be relatively sparse in the + /// initializer, but we cache the index values we find just in case. + llvm::SmallVector<unsigned, 8> Indices; + llvm::SmallVector<llvm::Constant*, 8> IndexValues; + + ReplacePlaceholders(CodeGenModule &CGM, llvm::Constant *base, + ArrayRef<std::pair<llvm::Constant*, + llvm::GlobalVariable*>> addresses) + : CGM(CGM), Base(base), + PlaceholderAddresses(addresses.begin(), addresses.end()) { + } + + void replaceInInitializer(llvm::Constant *init) { + // Remember the type of the top-most initializer. + BaseValueTy = init->getType(); + + // Initialize the stack. + Indices.push_back(0); + IndexValues.push_back(nullptr); + + // Recurse into the initializer. + findLocations(init); + + // Check invariants. + assert(IndexValues.size() == Indices.size() && "mismatch"); + assert(Indices.size() == 1 && "didn't pop all indices"); + + // Do the replacement; this basically invalidates 'init'. + assert(Locations.size() == PlaceholderAddresses.size() && + "missed a placeholder?"); + + // We're iterating over a hashtable, so this would be a source of + // non-determinism in compiler output *except* that we're just + // messing around with llvm::Constant structures, which never itself + // does anything that should be visible in compiler output. + for (auto &entry : Locations) { + assert(entry.first->getParent() == nullptr && "not a placeholder!"); + entry.first->replaceAllUsesWith(entry.second); + entry.first->eraseFromParent(); + } + } + + private: + void findLocations(llvm::Constant *init) { + // Recurse into aggregates. + if (auto agg = dyn_cast<llvm::ConstantAggregate>(init)) { + for (unsigned i = 0, e = agg->getNumOperands(); i != e; ++i) { + Indices.push_back(i); + IndexValues.push_back(nullptr); + + findLocations(agg->getOperand(i)); + + IndexValues.pop_back(); + Indices.pop_back(); + } + return; + } + + // Otherwise, check for registered constants. + while (true) { + auto it = PlaceholderAddresses.find(init); + if (it != PlaceholderAddresses.end()) { + setLocation(it->second); + break; + } + + // Look through bitcasts or other expressions. + if (auto expr = dyn_cast<llvm::ConstantExpr>(init)) { + init = expr->getOperand(0); + } else { + break; + } + } + } + + void setLocation(llvm::GlobalVariable *placeholder) { + assert(Locations.find(placeholder) == Locations.end() && + "already found location for placeholder!"); + + // Lazily fill in IndexValues with the values from Indices. + // We do this in reverse because we should always have a strict + // prefix of indices from the start. + assert(Indices.size() == IndexValues.size()); + for (size_t i = Indices.size() - 1; i != size_t(-1); --i) { + if (IndexValues[i]) { +#ifndef NDEBUG + for (size_t j = 0; j != i + 1; ++j) { + assert(IndexValues[j] && + isa<llvm::ConstantInt>(IndexValues[j]) && + cast<llvm::ConstantInt>(IndexValues[j])->getZExtValue() + == Indices[j]); + } +#endif + break; + } + + IndexValues[i] = llvm::ConstantInt::get(CGM.Int32Ty, Indices[i]); + } + + // Form a GEP and then bitcast to the placeholder type so that the + // replacement will succeed. + llvm::Constant *location = + llvm::ConstantExpr::getInBoundsGetElementPtr(BaseValueTy, + Base, IndexValues); + location = llvm::ConstantExpr::getBitCast(location, + placeholder->getType()); + + Locations.insert({placeholder, location}); + } + }; +} + +void ConstantEmitter::finalize(llvm::GlobalVariable *global) { + assert(InitializedNonAbstract && + "finalizing emitter that was used for abstract emission?"); + assert(!Finalized && "finalizing emitter multiple times"); + assert(global->getInitializer()); + + // Note that we might also be Failed. + Finalized = true; + + if (!PlaceholderAddresses.empty()) { + ReplacePlaceholders(CGM, global, PlaceholderAddresses) + .replaceInInitializer(global->getInitializer()); + PlaceholderAddresses.clear(); // satisfy + } +} + +ConstantEmitter::~ConstantEmitter() { + assert((!InitializedNonAbstract || Finalized || Failed) && + "not finalized after being initialized for non-abstract emission"); + assert(PlaceholderAddresses.empty() && "unhandled placeholders"); +} + +static QualType getNonMemoryType(CodeGenModule &CGM, QualType type) { + if (auto AT = type->getAs<AtomicType>()) { + return CGM.getContext().getQualifiedType(AT->getValueType(), + type.getQualifiers()); + } + return type; +} + +llvm::Constant *ConstantEmitter::tryEmitPrivateForVarInit(const VarDecl &D) { // Make a quick check if variable can be default NULL initialized // and avoid going through rest of code which may do, for c++11, // initialization of memory to all NULLs. if (!D.hasLocalStorage()) { - QualType Ty = D.getType(); - if (Ty->isArrayType()) - Ty = Context.getBaseElementType(Ty); + QualType Ty = CGM.getContext().getBaseElementType(D.getType()); if (Ty->isRecordType()) if (const CXXConstructExpr *E = dyn_cast_or_null<CXXConstructExpr>(D.getInit())) { const CXXConstructorDecl *CD = E->getConstructor(); if (CD->isTrivial() && CD->isDefaultConstructor()) - return EmitNullConstant(D.getType()); + return CGM.EmitNullConstant(D.getType()); } } - - if (const APValue *Value = D.evaluateValue()) - return EmitConstantValueForMemory(*Value, D.getType(), CGF); + + QualType destType = D.getType(); + + // Try to emit the initializer. Note that this can allow some things that + // are not allowed by tryEmitPrivateForMemory alone. + if (auto value = D.evaluateValue()) { + return tryEmitPrivateForMemory(*value, destType); + } // FIXME: Implement C++11 [basic.start.init]p2: if the initializer of a // reference is a constant expression, and the reference binds to a temporary, @@ -1227,42 +1413,95 @@ llvm::Constant *CodeGenModule::EmitConstantInit(const VarDecl &D, // incorrectly emit a prvalue constant in this case, and the calling code // interprets that as the (pointer) value of the reference, rather than the // desired value of the referee. - if (D.getType()->isReferenceType()) + if (destType->isReferenceType()) return nullptr; const Expr *E = D.getInit(); assert(E && "No initializer to emit"); - llvm::Constant* C = ConstExprEmitter(*this, CGF).Visit(const_cast<Expr*>(E)); - if (C && C->getType()->isIntegerTy(1)) { - llvm::Type *BoolTy = getTypes().ConvertTypeForMem(E->getType()); - C = llvm::ConstantExpr::getZExt(C, BoolTy); + auto nonMemoryDestType = getNonMemoryType(CGM, destType); + auto C = + ConstExprEmitter(*this).Visit(const_cast<Expr*>(E), nonMemoryDestType); + return (C ? emitForMemory(C, destType) : nullptr); +} + +llvm::Constant * +ConstantEmitter::tryEmitAbstractForMemory(const Expr *E, QualType destType) { + auto nonMemoryDestType = getNonMemoryType(CGM, destType); + auto C = tryEmitAbstract(E, nonMemoryDestType); + return (C ? emitForMemory(C, destType) : nullptr); +} + +llvm::Constant * +ConstantEmitter::tryEmitAbstractForMemory(const APValue &value, + QualType destType) { + auto nonMemoryDestType = getNonMemoryType(CGM, destType); + auto C = tryEmitAbstract(value, nonMemoryDestType); + return (C ? emitForMemory(C, destType) : nullptr); +} + +llvm::Constant *ConstantEmitter::tryEmitPrivateForMemory(const Expr *E, + QualType destType) { + auto nonMemoryDestType = getNonMemoryType(CGM, destType); + llvm::Constant *C = tryEmitPrivate(E, nonMemoryDestType); + return (C ? emitForMemory(C, destType) : nullptr); +} + +llvm::Constant *ConstantEmitter::tryEmitPrivateForMemory(const APValue &value, + QualType destType) { + auto nonMemoryDestType = getNonMemoryType(CGM, destType); + auto C = tryEmitPrivate(value, nonMemoryDestType); + return (C ? emitForMemory(C, destType) : nullptr); +} + +llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule &CGM, + llvm::Constant *C, + QualType destType) { + // For an _Atomic-qualified constant, we may need to add tail padding. + if (auto AT = destType->getAs<AtomicType>()) { + QualType destValueType = AT->getValueType(); + C = emitForMemory(CGM, C, destValueType); + + uint64_t innerSize = CGM.getContext().getTypeSize(destValueType); + uint64_t outerSize = CGM.getContext().getTypeSize(destType); + if (innerSize == outerSize) + return C; + + assert(innerSize < outerSize && "emitted over-large constant for atomic"); + llvm::Constant *elts[] = { + C, + llvm::ConstantAggregateZero::get( + llvm::ArrayType::get(CGM.Int8Ty, (outerSize - innerSize) / 8)) + }; + return llvm::ConstantStruct::getAnon(elts); + } + + // Zero-extend bool. + if (C->getType()->isIntegerTy(1)) { + llvm::Type *boolTy = CGM.getTypes().ConvertTypeForMem(destType); + return llvm::ConstantExpr::getZExt(C, boolTy); } + return C; } -llvm::Constant *CodeGenModule::EmitConstantExpr(const Expr *E, - QualType DestType, - CodeGenFunction *CGF) { +llvm::Constant *ConstantEmitter::tryEmitPrivate(const Expr *E, + QualType destType) { Expr::EvalResult Result; bool Success = false; - if (DestType->isReferenceType()) - Success = E->EvaluateAsLValue(Result, Context); + if (destType->isReferenceType()) + Success = E->EvaluateAsLValue(Result, CGM.getContext()); else - Success = E->EvaluateAsRValue(Result, Context); + Success = E->EvaluateAsRValue(Result, CGM.getContext()); - llvm::Constant *C = nullptr; + llvm::Constant *C; if (Success && !Result.HasSideEffects) - C = EmitConstantValue(Result.Val, DestType, CGF); + C = tryEmitPrivate(Result.Val, destType); else - C = ConstExprEmitter(*this, CGF).Visit(const_cast<Expr*>(E)); + C = ConstExprEmitter(*this).Visit(const_cast<Expr*>(E), destType); - if (C && C->getType()->isIntegerTy(1)) { - llvm::Type *BoolTy = getTypes().ConvertTypeForMem(E->getType()); - C = llvm::ConstantExpr::getZExt(C, BoolTy); - } return C; } @@ -1270,94 +1509,311 @@ llvm::Constant *CodeGenModule::getNullPointer(llvm::PointerType *T, QualType QT) return getTargetCodeGenInfo().getNullPointer(*this, T, QT); } -llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value, - QualType DestType, - CodeGenFunction *CGF) { - // For an _Atomic-qualified constant, we may need to add tail padding. - if (auto *AT = DestType->getAs<AtomicType>()) { - QualType InnerType = AT->getValueType(); - auto *Inner = EmitConstantValue(Value, InnerType, CGF); - - uint64_t InnerSize = Context.getTypeSize(InnerType); - uint64_t OuterSize = Context.getTypeSize(DestType); - if (InnerSize == OuterSize) - return Inner; - - assert(InnerSize < OuterSize && "emitted over-large constant for atomic"); - llvm::Constant *Elts[] = { - Inner, - llvm::ConstantAggregateZero::get( - llvm::ArrayType::get(Int8Ty, (OuterSize - InnerSize) / 8)) - }; - return llvm::ConstantStruct::getAnon(Elts); - } +namespace { +/// A struct which can be used to peephole certain kinds of finalization +/// that normally happen during l-value emission. +struct ConstantLValue { + llvm::Constant *Value; + bool HasOffsetApplied; + + /*implicit*/ ConstantLValue(llvm::Constant *value, + bool hasOffsetApplied = false) + : Value(value), HasOffsetApplied(false) {} + + /*implicit*/ ConstantLValue(ConstantAddress address) + : ConstantLValue(address.getPointer()) {} +}; - switch (Value.getKind()) { - case APValue::Uninitialized: - llvm_unreachable("Constant expressions should be initialized."); - case APValue::LValue: { - llvm::Type *DestTy = getTypes().ConvertTypeForMem(DestType); - llvm::Constant *Offset = - llvm::ConstantInt::get(Int64Ty, Value.getLValueOffset().getQuantity()); - - llvm::Constant *C = nullptr; - - if (APValue::LValueBase LVBase = Value.getLValueBase()) { - // An array can be represented as an lvalue referring to the base. - if (isa<llvm::ArrayType>(DestTy)) { - assert(Offset->isNullValue() && "offset on array initializer"); - return ConstExprEmitter(*this, CGF).Visit( - const_cast<Expr*>(LVBase.get<const Expr*>())); - } +/// A helper class for emitting constant l-values. +class ConstantLValueEmitter : public ConstStmtVisitor<ConstantLValueEmitter, + ConstantLValue> { + CodeGenModule &CGM; + ConstantEmitter &Emitter; + const APValue &Value; + QualType DestType; - C = ConstExprEmitter(*this, CGF).EmitLValue(LVBase).getPointer(); + // Befriend StmtVisitorBase so that we don't have to expose Visit*. + friend StmtVisitorBase; - // Apply offset if necessary. - if (!Offset->isNullValue()) { - unsigned AS = C->getType()->getPointerAddressSpace(); - llvm::Type *CharPtrTy = Int8Ty->getPointerTo(AS); - llvm::Constant *Casted = llvm::ConstantExpr::getBitCast(C, CharPtrTy); - Casted = llvm::ConstantExpr::getGetElementPtr(Int8Ty, Casted, Offset); - C = llvm::ConstantExpr::getPointerCast(Casted, C->getType()); - } +public: + ConstantLValueEmitter(ConstantEmitter &emitter, const APValue &value, + QualType destType) + : CGM(emitter.CGM), Emitter(emitter), Value(value), DestType(destType) {} - // Convert to the appropriate type; this could be an lvalue for - // an integer. - if (isa<llvm::PointerType>(DestTy)) - return llvm::ConstantExpr::getPointerCast(C, DestTy); + llvm::Constant *tryEmit(); - return llvm::ConstantExpr::getPtrToInt(C, DestTy); - } else { - C = Offset; - - // Convert to the appropriate type; this could be an lvalue for - // an integer. - if (auto PT = dyn_cast<llvm::PointerType>(DestTy)) { - if (Value.isNullPointer()) - return getNullPointer(PT, DestType); - // Convert the integer to a pointer-sized integer before converting it - // to a pointer. - C = llvm::ConstantExpr::getIntegerCast( - C, getDataLayout().getIntPtrType(DestTy), - /*isSigned=*/false); - return llvm::ConstantExpr::getIntToPtr(C, DestTy); - } +private: + llvm::Constant *tryEmitAbsolute(llvm::Type *destTy); + ConstantLValue tryEmitBase(const APValue::LValueBase &base); + + ConstantLValue VisitStmt(const Stmt *S) { return nullptr; } + ConstantLValue VisitCompoundLiteralExpr(const CompoundLiteralExpr *E); + ConstantLValue VisitStringLiteral(const StringLiteral *E); + ConstantLValue VisitObjCEncodeExpr(const ObjCEncodeExpr *E); + ConstantLValue VisitObjCStringLiteral(const ObjCStringLiteral *E); + ConstantLValue VisitPredefinedExpr(const PredefinedExpr *E); + ConstantLValue VisitAddrLabelExpr(const AddrLabelExpr *E); + ConstantLValue VisitCallExpr(const CallExpr *E); + ConstantLValue VisitBlockExpr(const BlockExpr *E); + ConstantLValue VisitCXXTypeidExpr(const CXXTypeidExpr *E); + ConstantLValue VisitCXXUuidofExpr(const CXXUuidofExpr *E); + ConstantLValue VisitMaterializeTemporaryExpr( + const MaterializeTemporaryExpr *E); + + bool hasNonZeroOffset() const { + return !Value.getLValueOffset().isZero(); + } - // If the types don't match this should only be a truncate. - if (C->getType() != DestTy) - return llvm::ConstantExpr::getTrunc(C, DestTy); + /// Return the value offset. + llvm::Constant *getOffset() { + return llvm::ConstantInt::get(CGM.Int64Ty, + Value.getLValueOffset().getQuantity()); + } + /// Apply the value offset to the given constant. + llvm::Constant *applyOffset(llvm::Constant *C) { + if (!hasNonZeroOffset()) return C; + + llvm::Type *origPtrTy = C->getType(); + unsigned AS = origPtrTy->getPointerAddressSpace(); + llvm::Type *charPtrTy = CGM.Int8Ty->getPointerTo(AS); + C = llvm::ConstantExpr::getBitCast(C, charPtrTy); + C = llvm::ConstantExpr::getGetElementPtr(CGM.Int8Ty, C, getOffset()); + C = llvm::ConstantExpr::getPointerCast(C, origPtrTy); + return C; + } +}; + +} + +llvm::Constant *ConstantLValueEmitter::tryEmit() { + const APValue::LValueBase &base = Value.getLValueBase(); + + // Certain special array initializers are represented in APValue + // as l-values referring to the base expression which generates the + // array. This happens with e.g. string literals. These should + // probably just get their own representation kind in APValue. + if (DestType->isArrayType()) { + assert(!hasNonZeroOffset() && "offset on array initializer"); + auto expr = const_cast<Expr*>(base.get<const Expr*>()); + return ConstExprEmitter(Emitter).Visit(expr, DestType); + } + + // Otherwise, the destination type should be a pointer or reference + // type, but it might also be a cast thereof. + // + // FIXME: the chain of casts required should be reflected in the APValue. + // We need this in order to correctly handle things like a ptrtoint of a + // non-zero null pointer and addrspace casts that aren't trivially + // represented in LLVM IR. + auto destTy = CGM.getTypes().ConvertTypeForMem(DestType); + assert(isa<llvm::IntegerType>(destTy) || isa<llvm::PointerType>(destTy)); + + // If there's no base at all, this is a null or absolute pointer, + // possibly cast back to an integer type. + if (!base) { + return tryEmitAbsolute(destTy); + } + + // Otherwise, try to emit the base. + ConstantLValue result = tryEmitBase(base); + + // If that failed, we're done. + llvm::Constant *value = result.Value; + if (!value) return nullptr; + + // Apply the offset if necessary and not already done. + if (!result.HasOffsetApplied) { + value = applyOffset(value); + } + + // Convert to the appropriate type; this could be an lvalue for + // an integer. FIXME: performAddrSpaceCast + if (isa<llvm::PointerType>(destTy)) + return llvm::ConstantExpr::getPointerCast(value, destTy); + + return llvm::ConstantExpr::getPtrToInt(value, destTy); +} + +/// Try to emit an absolute l-value, such as a null pointer or an integer +/// bitcast to pointer type. +llvm::Constant * +ConstantLValueEmitter::tryEmitAbsolute(llvm::Type *destTy) { + auto offset = getOffset(); + + // If we're producing a pointer, this is easy. + if (auto destPtrTy = cast<llvm::PointerType>(destTy)) { + if (Value.isNullPointer()) { + // FIXME: integer offsets from non-zero null pointers. + return CGM.getNullPointer(destPtrTy, DestType); } + + // Convert the integer to a pointer-sized integer before converting it + // to a pointer. + // FIXME: signedness depends on the original integer type. + auto intptrTy = CGM.getDataLayout().getIntPtrType(destPtrTy); + llvm::Constant *C = offset; + C = llvm::ConstantExpr::getIntegerCast(getOffset(), intptrTy, + /*isSigned*/ false); + C = llvm::ConstantExpr::getIntToPtr(C, destPtrTy); + return C; + } + + // Otherwise, we're basically returning an integer constant. + + // FIXME: this does the wrong thing with ptrtoint of a null pointer, + // but since we don't know the original pointer type, there's not much + // we can do about it. + + auto C = getOffset(); + C = llvm::ConstantExpr::getIntegerCast(C, destTy, /*isSigned*/ false); + return C; +} + +ConstantLValue +ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) { + // Handle values. + if (const ValueDecl *D = base.dyn_cast<const ValueDecl*>()) { + if (D->hasAttr<WeakRefAttr>()) + return CGM.GetWeakRefReference(D).getPointer(); + + if (auto FD = dyn_cast<FunctionDecl>(D)) + return CGM.GetAddrOfFunction(FD); + + if (auto VD = dyn_cast<VarDecl>(D)) { + // We can never refer to a variable with local storage. + if (!VD->hasLocalStorage()) { + if (VD->isFileVarDecl() || VD->hasExternalStorage()) + return CGM.GetAddrOfGlobalVar(VD); + + if (VD->isLocalVarDecl()) { + return CGM.getOrCreateStaticVarDecl( + *VD, CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false)); + } + } + } + + return nullptr; + } + + // Otherwise, it must be an expression. + return Visit(base.get<const Expr*>()); +} + +ConstantLValue +ConstantLValueEmitter::VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) { + return tryEmitGlobalCompoundLiteral(CGM, Emitter.CGF, E); +} + +ConstantLValue +ConstantLValueEmitter::VisitStringLiteral(const StringLiteral *E) { + return CGM.GetAddrOfConstantStringFromLiteral(E); +} + +ConstantLValue +ConstantLValueEmitter::VisitObjCEncodeExpr(const ObjCEncodeExpr *E) { + return CGM.GetAddrOfConstantStringFromObjCEncode(E); +} + +ConstantLValue +ConstantLValueEmitter::VisitObjCStringLiteral(const ObjCStringLiteral *E) { + auto C = CGM.getObjCRuntime().GenerateConstantString(E->getString()); + return C.getElementBitCast(CGM.getTypes().ConvertTypeForMem(E->getType())); +} + +ConstantLValue +ConstantLValueEmitter::VisitPredefinedExpr(const PredefinedExpr *E) { + if (auto CGF = Emitter.CGF) { + LValue Res = CGF->EmitPredefinedLValue(E); + return cast<ConstantAddress>(Res.getAddress()); + } + + auto kind = E->getIdentType(); + if (kind == PredefinedExpr::PrettyFunction) { + return CGM.GetAddrOfConstantCString("top level", ".tmp"); + } + + return CGM.GetAddrOfConstantCString("", ".tmp"); +} + +ConstantLValue +ConstantLValueEmitter::VisitAddrLabelExpr(const AddrLabelExpr *E) { + assert(Emitter.CGF && "Invalid address of label expression outside function"); + llvm::Constant *Ptr = Emitter.CGF->GetAddrOfLabel(E->getLabel()); + Ptr = llvm::ConstantExpr::getBitCast(Ptr, + CGM.getTypes().ConvertType(E->getType())); + return Ptr; +} + +ConstantLValue +ConstantLValueEmitter::VisitCallExpr(const CallExpr *E) { + unsigned builtin = E->getBuiltinCallee(); + if (builtin != Builtin::BI__builtin___CFStringMakeConstantString && + builtin != Builtin::BI__builtin___NSStringMakeConstantString) + return nullptr; + + auto literal = cast<StringLiteral>(E->getArg(0)->IgnoreParenCasts()); + if (builtin == Builtin::BI__builtin___NSStringMakeConstantString) { + return CGM.getObjCRuntime().GenerateConstantString(literal); + } else { + // FIXME: need to deal with UCN conversion issues. + return CGM.GetAddrOfConstantCFString(literal); } +} + +ConstantLValue +ConstantLValueEmitter::VisitBlockExpr(const BlockExpr *E) { + StringRef functionName; + if (auto CGF = Emitter.CGF) + functionName = CGF->CurFn->getName(); + else + functionName = "global"; + + return CGM.GetAddrOfGlobalBlock(E, functionName); +} + +ConstantLValue +ConstantLValueEmitter::VisitCXXTypeidExpr(const CXXTypeidExpr *E) { + QualType T; + if (E->isTypeOperand()) + T = E->getTypeOperand(CGM.getContext()); + else + T = E->getExprOperand()->getType(); + return CGM.GetAddrOfRTTIDescriptor(T); +} + +ConstantLValue +ConstantLValueEmitter::VisitCXXUuidofExpr(const CXXUuidofExpr *E) { + return CGM.GetAddrOfUuidDescriptor(E); +} + +ConstantLValue +ConstantLValueEmitter::VisitMaterializeTemporaryExpr( + const MaterializeTemporaryExpr *E) { + assert(E->getStorageDuration() == SD_Static); + SmallVector<const Expr *, 2> CommaLHSs; + SmallVector<SubobjectAdjustment, 2> Adjustments; + const Expr *Inner = E->GetTemporaryExpr() + ->skipRValueSubobjectAdjustments(CommaLHSs, Adjustments); + return CGM.GetAddrOfGlobalTemporary(E, Inner); +} + +llvm::Constant *ConstantEmitter::tryEmitPrivate(const APValue &Value, + QualType DestType) { + switch (Value.getKind()) { + case APValue::Uninitialized: + llvm_unreachable("Constant expressions should be initialized."); + case APValue::LValue: + return ConstantLValueEmitter(*this, Value, DestType).tryEmit(); case APValue::Int: - return llvm::ConstantInt::get(VMContext, Value.getInt()); + return llvm::ConstantInt::get(CGM.getLLVMContext(), Value.getInt()); case APValue::ComplexInt: { llvm::Constant *Complex[2]; - Complex[0] = llvm::ConstantInt::get(VMContext, + Complex[0] = llvm::ConstantInt::get(CGM.getLLVMContext(), Value.getComplexIntReal()); - Complex[1] = llvm::ConstantInt::get(VMContext, + Complex[1] = llvm::ConstantInt::get(CGM.getLLVMContext(), Value.getComplexIntImag()); // FIXME: the target may want to specify that this is packed. @@ -1368,18 +1824,19 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value, case APValue::Float: { const llvm::APFloat &Init = Value.getFloat(); if (&Init.getSemantics() == &llvm::APFloat::IEEEhalf() && - !Context.getLangOpts().NativeHalfType && - !Context.getLangOpts().HalfArgsAndReturns) - return llvm::ConstantInt::get(VMContext, Init.bitcastToAPInt()); + !CGM.getContext().getLangOpts().NativeHalfType && + CGM.getContext().getTargetInfo().useFP16ConversionIntrinsics()) + return llvm::ConstantInt::get(CGM.getLLVMContext(), + Init.bitcastToAPInt()); else - return llvm::ConstantFP::get(VMContext, Init); + return llvm::ConstantFP::get(CGM.getLLVMContext(), Init); } case APValue::ComplexFloat: { llvm::Constant *Complex[2]; - Complex[0] = llvm::ConstantFP::get(VMContext, + Complex[0] = llvm::ConstantFP::get(CGM.getLLVMContext(), Value.getComplexFloatReal()); - Complex[1] = llvm::ConstantFP::get(VMContext, + Complex[1] = llvm::ConstantFP::get(CGM.getLLVMContext(), Value.getComplexFloatImag()); // FIXME: the target may want to specify that this is packed. @@ -1394,9 +1851,9 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value, for (unsigned I = 0; I != NumElts; ++I) { const APValue &Elt = Value.getVectorElt(I); if (Elt.isInt()) - Inits[I] = llvm::ConstantInt::get(VMContext, Elt.getInt()); + Inits[I] = llvm::ConstantInt::get(CGM.getLLVMContext(), Elt.getInt()); else if (Elt.isFloat()) - Inits[I] = llvm::ConstantFP::get(VMContext, Elt.getFloat()); + Inits[I] = llvm::ConstantFP::get(CGM.getLLVMContext(), Elt.getFloat()); else llvm_unreachable("unsupported vector element type"); } @@ -1405,13 +1862,14 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value, case APValue::AddrLabelDiff: { const AddrLabelExpr *LHSExpr = Value.getAddrLabelDiffLHS(); const AddrLabelExpr *RHSExpr = Value.getAddrLabelDiffRHS(); - llvm::Constant *LHS = EmitConstantExpr(LHSExpr, LHSExpr->getType(), CGF); - llvm::Constant *RHS = EmitConstantExpr(RHSExpr, RHSExpr->getType(), CGF); + llvm::Constant *LHS = tryEmitPrivate(LHSExpr, LHSExpr->getType()); + llvm::Constant *RHS = tryEmitPrivate(RHSExpr, RHSExpr->getType()); + if (!LHS || !RHS) return nullptr; // Compute difference - llvm::Type *ResultType = getTypes().ConvertType(DestType); - LHS = llvm::ConstantExpr::getPtrToInt(LHS, IntPtrTy); - RHS = llvm::ConstantExpr::getPtrToInt(RHS, IntPtrTy); + llvm::Type *ResultType = CGM.getTypes().ConvertType(DestType); + LHS = llvm::ConstantExpr::getPtrToInt(LHS, CGM.IntPtrTy); + RHS = llvm::ConstantExpr::getPtrToInt(RHS, CGM.IntPtrTy); llvm::Constant *AddrLabelDiff = llvm::ConstantExpr::getSub(LHS, RHS); // LLVM is a bit sensitive about the exact format of the @@ -1421,21 +1879,21 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value, } case APValue::Struct: case APValue::Union: - return ConstStructBuilder::BuildStruct(*this, CGF, Value, DestType); + return ConstStructBuilder::BuildStruct(*this, Value, DestType); case APValue::Array: { - const ArrayType *CAT = Context.getAsArrayType(DestType); + const ArrayType *CAT = CGM.getContext().getAsArrayType(DestType); unsigned NumElements = Value.getArraySize(); unsigned NumInitElts = Value.getArrayInitializedElts(); // Emit array filler, if there is one. llvm::Constant *Filler = nullptr; if (Value.hasArrayFiller()) - Filler = EmitConstantValueForMemory(Value.getArrayFiller(), - CAT->getElementType(), CGF); + Filler = tryEmitAbstractForMemory(Value.getArrayFiller(), + CAT->getElementType()); // Emit initializer elements. llvm::Type *CommonElementType = - getTypes().ConvertType(CAT->getElementType()); + CGM.getTypes().ConvertType(CAT->getElementType()); // Try to use a ConstantAggregateZero if we can. if (Filler && Filler->isNullValue() && !NumInitElts) { @@ -1444,15 +1902,21 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value, return llvm::ConstantAggregateZero::get(AType); } - std::vector<llvm::Constant*> Elts; + SmallVector<llvm::Constant*, 16> Elts; Elts.reserve(NumElements); for (unsigned I = 0; I < NumElements; ++I) { llvm::Constant *C = Filler; - if (I < NumInitElts) - C = EmitConstantValueForMemory(Value.getArrayInitializedElt(I), - CAT->getElementType(), CGF); - else - assert(Filler && "Missing filler for implicit elements of initializer"); + if (I < NumInitElts) { + C = tryEmitPrivateForMemory(Value.getArrayInitializedElt(I), + CAT->getElementType()); + } else if (!Filler) { + assert(Value.hasArrayFiller() && + "Missing filler for implicit elements of initializer"); + C = tryEmitPrivateForMemory(Value.getArrayFiller(), + CAT->getElementType()); + } + if (!C) return nullptr; + if (I == 0) CommonElementType = C->getType(); else if (C->getType() != CommonElementType) @@ -1466,7 +1930,8 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value, Types.reserve(NumElements); for (unsigned i = 0, e = Elts.size(); i < e; ++i) Types.push_back(Elts[i]->getType()); - llvm::StructType *SType = llvm::StructType::get(VMContext, Types, true); + llvm::StructType *SType = + llvm::StructType::get(CGM.getLLVMContext(), Types, true); return llvm::ConstantStruct::get(SType, Elts); } @@ -1475,23 +1940,11 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value, return llvm::ConstantArray::get(AType, Elts); } case APValue::MemberPointer: - return getCXXABI().EmitMemberPointer(Value, DestType); + return CGM.getCXXABI().EmitMemberPointer(Value, DestType); } llvm_unreachable("Unknown APValue kind"); } -llvm::Constant * -CodeGenModule::EmitConstantValueForMemory(const APValue &Value, - QualType DestType, - CodeGenFunction *CGF) { - llvm::Constant *C = EmitConstantValue(Value, DestType, CGF); - if (C->getType()->isIntegerTy(1)) { - llvm::Type *BoolTy = getTypes().ConvertTypeForMem(DestType); - C = llvm::ConstantExpr::getZExt(C, BoolTy); - } - return C; -} - llvm::GlobalVariable *CodeGenModule::getAddrOfConstantCompoundLiteralIfEmitted( const CompoundLiteralExpr *E) { return EmittedCompoundLiterals.lookup(E); @@ -1507,7 +1960,7 @@ void CodeGenModule::setAddrOfConstantCompoundLiteral( ConstantAddress CodeGenModule::GetAddrOfConstantCompoundLiteral(const CompoundLiteralExpr *E) { assert(E->isFileScope() && "not a file-scope compound literal expr"); - return ConstExprEmitter(*this, nullptr).EmitLValue(E); + return tryEmitGlobalCompoundLiteral(*this, nullptr, E); } llvm::Constant * @@ -1629,6 +2082,11 @@ static llvm::Constant *EmitNullConstantForBase(CodeGenModule &CGM, return EmitNullConstant(CGM, base, /*asCompleteObject=*/false); } +llvm::Constant *ConstantEmitter::emitNullForMemory(CodeGenModule &CGM, + QualType T) { + return emitForMemory(CGM, CGM.EmitNullConstant(T), T); +} + llvm::Constant *CodeGenModule::EmitNullConstant(QualType T) { if (T->getAs<PointerType>()) return getNullPointer( @@ -1643,7 +2101,8 @@ llvm::Constant *CodeGenModule::EmitNullConstant(QualType T) { QualType ElementTy = CAT->getElementType(); - llvm::Constant *Element = EmitNullConstant(ElementTy); + llvm::Constant *Element = + ConstantEmitter::emitNullForMemory(*this, ElementTy); unsigned NumElements = CAT->getSize().getZExtValue(); SmallVector<llvm::Constant *, 8> Array(NumElements, Element); return llvm::ConstantArray::get(ATy, Array); diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp index 1170b014ec7f..c46215067a68 100644 --- a/lib/CodeGen/CGExprScalar.cpp +++ b/lib/CodeGen/CGExprScalar.cpp @@ -428,14 +428,19 @@ public: return CGF.getOpaqueRValueMapping(E).getScalarVal(); } + Value *emitConstant(const CodeGenFunction::ConstantEmission &Constant, + Expr *E) { + assert(Constant && "not a constant"); + if (Constant.isReference()) + return EmitLoadOfLValue(Constant.getReferenceLValue(CGF, E), + E->getExprLoc()); + return Constant.getValue(); + } + // l-values. Value *VisitDeclRefExpr(DeclRefExpr *E) { - if (CodeGenFunction::ConstantEmission result = CGF.tryEmitAsConstant(E)) { - if (result.isReference()) - return EmitLoadOfLValue(result.getReferenceLValue(CGF, E), - E->getExprLoc()); - return result.getValue(); - } + if (CodeGenFunction::ConstantEmission Constant = CGF.tryEmitAsConstant(E)) + return emitConstant(Constant, E); return EmitLoadOfLValue(E); } @@ -946,7 +951,7 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, if (SrcType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) { // Cast to FP using the intrinsic if the half type itself isn't supported. if (DstTy->isFloatingPointTy()) { - if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) + if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) return Builder.CreateCall( CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16, DstTy), Src); @@ -954,7 +959,7 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, // Cast to other types through float, using either the intrinsic or FPExt, // depending on whether the half type itself is supported // (as opposed to operations on half, available with NativeHalfType). - if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) { + if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) { Src = Builder.CreateCall( CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16, CGF.CGM.FloatTy), @@ -1009,10 +1014,42 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, return Builder.CreateVectorSplat(NumElements, Src, "splat"); } - // Allow bitcast from vector to integer/fp of the same size. - if (isa<llvm::VectorType>(SrcTy) || - isa<llvm::VectorType>(DstTy)) - return Builder.CreateBitCast(Src, DstTy, "conv"); + if (isa<llvm::VectorType>(SrcTy) || isa<llvm::VectorType>(DstTy)) { + // Allow bitcast from vector to integer/fp of the same size. + unsigned SrcSize = SrcTy->getPrimitiveSizeInBits(); + unsigned DstSize = DstTy->getPrimitiveSizeInBits(); + if (SrcSize == DstSize) + return Builder.CreateBitCast(Src, DstTy, "conv"); + + // Conversions between vectors of different sizes are not allowed except + // when vectors of half are involved. Operations on storage-only half + // vectors require promoting half vector operands to float vectors and + // truncating the result, which is either an int or float vector, to a + // short or half vector. + + // Source and destination are both expected to be vectors. + llvm::Type *SrcElementTy = SrcTy->getVectorElementType(); + llvm::Type *DstElementTy = DstTy->getVectorElementType(); + (void)DstElementTy; + + assert(((SrcElementTy->isIntegerTy() && + DstElementTy->isIntegerTy()) || + (SrcElementTy->isFloatingPointTy() && + DstElementTy->isFloatingPointTy())) && + "unexpected conversion between a floating-point vector and an " + "integer vector"); + + // Truncate an i32 vector to an i16 vector. + if (SrcElementTy->isIntegerTy()) + return Builder.CreateIntCast(Src, DstTy, false, "conv"); + + // Truncate a float vector to a half vector. + if (SrcSize > DstSize) + return Builder.CreateFPTrunc(Src, DstTy, "conv"); + + // Promote a half vector to a float vector. + return Builder.CreateFPExt(Src, DstTy, "conv"); + } // Finally, we have the arithmetic types: real int/float. Value *Res = nullptr; @@ -1031,7 +1068,7 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, if (SrcTy->isFloatingPointTy()) { // Use the intrinsic if the half type itself isn't supported // (as opposed to operations on half, available with NativeHalfType). - if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) + if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) return Builder.CreateCall( CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, SrcTy), Src); // If the half type is supported, just use an fptrunc. @@ -1067,7 +1104,7 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, } if (DstTy != ResTy) { - if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) { + if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) { assert(ResTy->isIntegerTy(16) && "Only half FP requires extra conversion"); Res = Builder.CreateCall( CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, CGF.CGM.FloatTy), @@ -1299,13 +1336,15 @@ Value *ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) { } Value *ScalarExprEmitter::VisitMemberExpr(MemberExpr *E) { - llvm::APSInt Value; - if (E->EvaluateAsInt(Value, CGF.getContext(), Expr::SE_AllowSideEffects)) { - if (E->isArrow()) - CGF.EmitScalarExpr(E->getBase()); - else - EmitLValue(E->getBase()); - return Builder.getInt(Value); + if (CodeGenFunction::ConstantEmission Constant = CGF.tryEmitAsConstant(E)) { + CGF.EmitIgnoredExpr(E->getBase()); + return emitConstant(Constant, E); + } else { + llvm::APSInt Value; + if (E->EvaluateAsInt(Value, CGF.getContext(), Expr::SE_AllowSideEffects)) { + CGF.EmitIgnoredExpr(E->getBase()); + return Builder.getInt(Value); + } } return EmitLoadOfLValue(E); @@ -1778,7 +1817,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { } case CK_IntToOCLSampler: - return CGF.CGM.createOpenCLIntToSamplerConversion(E, CGF);
+ return CGF.CGM.createOpenCLIntToSamplerConversion(E, CGF); } // end of switch @@ -1989,7 +2028,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) { // Another special case: half FP increment should be done via float - if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) { + if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) { value = Builder.CreateCall( CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16, CGF.CGM.FloatTy), @@ -2024,7 +2063,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, value = Builder.CreateFAdd(value, amt, isInc ? "inc" : "dec"); if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) { - if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) { + if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) { value = Builder.CreateCall( CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, CGF.CGM.FloatTy), @@ -2671,6 +2710,30 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, unsigned width = cast<llvm::IntegerType>(index->getType())->getBitWidth(); auto &DL = CGF.CGM.getDataLayout(); auto PtrTy = cast<llvm::PointerType>(pointer->getType()); + + // Some versions of glibc and gcc use idioms (particularly in their malloc + // routines) that add a pointer-sized integer (known to be a pointer value) + // to a null pointer in order to cast the value back to an integer or as + // part of a pointer alignment algorithm. This is undefined behavior, but + // we'd like to be able to compile programs that use it. + // + // Normally, we'd generate a GEP with a null-pointer base here in response + // to that code, but it's also UB to dereference a pointer created that + // way. Instead (as an acknowledged hack to tolerate the idiom) we will + // generate a direct cast of the integer value to a pointer. + // + // The idiom (p = nullptr + N) is not met if any of the following are true: + // + // The operation is subtraction. + // The index is not pointer-sized. + // The pointer type is not byte-sized. + // + if (BinaryOperator::isNullPointerArithmeticExtension(CGF.getContext(), + op.Opcode, + expr->getLHS(), + expr->getRHS())) + return CGF.Builder.CreateIntToPtr(index, pointer->getType()); + if (width != DL.getTypeSizeInBits(PtrTy)) { // Zero-extend or sign-extend the pointer value according to // whether the index is signed or not. @@ -3057,16 +3120,25 @@ static llvm::Intrinsic::ID GetIntrinsic(IntrinsicType IT, return (IT == VCMPEQ) ? llvm::Intrinsic::ppc_altivec_vcmpequh_p : llvm::Intrinsic::ppc_altivec_vcmpgtsh_p; case BuiltinType::UInt: - case BuiltinType::ULong: return (IT == VCMPEQ) ? llvm::Intrinsic::ppc_altivec_vcmpequw_p : llvm::Intrinsic::ppc_altivec_vcmpgtuw_p; case BuiltinType::Int: - case BuiltinType::Long: return (IT == VCMPEQ) ? llvm::Intrinsic::ppc_altivec_vcmpequw_p : llvm::Intrinsic::ppc_altivec_vcmpgtsw_p; + case BuiltinType::ULong: + case BuiltinType::ULongLong: + return (IT == VCMPEQ) ? llvm::Intrinsic::ppc_altivec_vcmpequd_p : + llvm::Intrinsic::ppc_altivec_vcmpgtud_p; + case BuiltinType::Long: + case BuiltinType::LongLong: + return (IT == VCMPEQ) ? llvm::Intrinsic::ppc_altivec_vcmpequd_p : + llvm::Intrinsic::ppc_altivec_vcmpgtsd_p; case BuiltinType::Float: return (IT == VCMPEQ) ? llvm::Intrinsic::ppc_altivec_vcmpeqfp_p : llvm::Intrinsic::ppc_altivec_vcmpgtfp_p; + case BuiltinType::Double: + return (IT == VCMPEQ) ? llvm::Intrinsic::ppc_vsx_xvcmpeqdp_p : + llvm::Intrinsic::ppc_vsx_xvcmpgtdp_p; } } @@ -3151,6 +3223,16 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E, Value *CR6Param = Builder.getInt32(CR6); llvm::Function *F = CGF.CGM.getIntrinsic(ID); Result = Builder.CreateCall(F, {CR6Param, FirstVecArg, SecondVecArg}); + + // The result type of intrinsic may not be same as E->getType(). + // If E->getType() is not BoolTy, EmitScalarConversion will do the + // conversion work. If E->getType() is BoolTy, EmitScalarConversion will + // do nothing, if ResultTy is not i1 at the same time, it will cause + // crash later. + llvm::IntegerType *ResultTy = cast<llvm::IntegerType>(Result->getType()); + if (ResultTy->getBitWidth() > 1 && + E->getType() == CGF.getContext().BoolTy) + Result = Builder.CreateTrunc(Result, Builder.getInt1Ty()); return EmitScalarConversion(Result, CGF.getContext().BoolTy, E->getType(), E->getExprLoc()); } @@ -3840,6 +3922,7 @@ LValue CodeGenFunction::EmitCompoundAssignmentLValue( case BO_GE: case BO_EQ: case BO_NE: + case BO_Cmp: case BO_And: case BO_Xor: case BO_Or: diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp index 90fcad261415..f26263d9472d 100644 --- a/lib/CodeGen/CGObjC.cpp +++ b/lib/CodeGen/CGObjC.cpp @@ -162,7 +162,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, const Expr *Rhs = ALE->getElement(i); LValue LV = MakeAddrLValue( Builder.CreateConstArrayGEP(Objects, i, getPointerSize()), - ElementType, LValueBaseInfo(AlignmentSource::Decl, false)); + ElementType, AlignmentSource::Decl); llvm::Value *value = EmitScalarExpr(Rhs); EmitStoreThroughLValue(RValue::get(value), LV, true); @@ -174,7 +174,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, const Expr *Key = DLE->getKeyValueElement(i).Key; LValue KeyLV = MakeAddrLValue( Builder.CreateConstArrayGEP(Keys, i, getPointerSize()), - ElementType, LValueBaseInfo(AlignmentSource::Decl, false)); + ElementType, AlignmentSource::Decl); llvm::Value *keyValue = EmitScalarExpr(Key); EmitStoreThroughLValue(RValue::get(keyValue), KeyLV, /*isInit=*/true); @@ -182,7 +182,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, const Expr *Value = DLE->getKeyValueElement(i).Value; LValue ValueLV = MakeAddrLValue( Builder.CreateConstArrayGEP(Objects, i, getPointerSize()), - ElementType, LValueBaseInfo(AlignmentSource::Decl, false)); + ElementType, AlignmentSource::Decl); llvm::Value *valueValue = EmitScalarExpr(Value); EmitStoreThroughLValue(RValue::get(valueValue), ValueLV, /*isInit=*/true); if (TrackNeededObjects) { @@ -1546,16 +1546,15 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ getContext().getPointerType(ItemsTy)); // The third argument is the capacity of that temporary array. - llvm::Type *UnsignedLongLTy = ConvertType(getContext().UnsignedLongTy); - llvm::Constant *Count = llvm::ConstantInt::get(UnsignedLongLTy, NumItems); - Args.add(RValue::get(Count), getContext().UnsignedLongTy); + llvm::Type *NSUIntegerTy = ConvertType(getContext().getNSUIntegerType()); + llvm::Constant *Count = llvm::ConstantInt::get(NSUIntegerTy, NumItems); + Args.add(RValue::get(Count), getContext().getNSUIntegerType()); // Start the enumeration. RValue CountRV = - CGM.getObjCRuntime().GenerateMessageSend(*this, ReturnValueSlot(), - getContext().UnsignedLongTy, - FastEnumSel, - Collection, Args); + CGM.getObjCRuntime().GenerateMessageSend(*this, ReturnValueSlot(), + getContext().getNSUIntegerType(), + FastEnumSel, Collection, Args); // The initial number of objects that were returned in the buffer. llvm::Value *initialBufferLimit = CountRV.getScalarVal(); @@ -1563,7 +1562,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ llvm::BasicBlock *EmptyBB = createBasicBlock("forcoll.empty"); llvm::BasicBlock *LoopInitBB = createBasicBlock("forcoll.loopinit"); - llvm::Value *zero = llvm::Constant::getNullValue(UnsignedLongLTy); + llvm::Value *zero = llvm::Constant::getNullValue(NSUIntegerTy); // If the limit pointer was zero to begin with, the collection is // empty; skip all this. Set the branch weight assuming this has the same @@ -1595,11 +1594,11 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ EmitBlock(LoopBodyBB); // The current index into the buffer. - llvm::PHINode *index = Builder.CreatePHI(UnsignedLongLTy, 3, "forcoll.index"); + llvm::PHINode *index = Builder.CreatePHI(NSUIntegerTy, 3, "forcoll.index"); index->addIncoming(zero, LoopInitBB); // The current buffer size. - llvm::PHINode *count = Builder.CreatePHI(UnsignedLongLTy, 3, "forcoll.count"); + llvm::PHINode *count = Builder.CreatePHI(NSUIntegerTy, 3, "forcoll.count"); count->addIncoming(initialBufferLimit, LoopInitBB); incrementProfileCounter(&S); @@ -1709,8 +1708,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ llvm::BasicBlock *FetchMoreBB = createBasicBlock("forcoll.refetch"); // First we check in the local buffer. - llvm::Value *indexPlusOne - = Builder.CreateAdd(index, llvm::ConstantInt::get(UnsignedLongLTy, 1)); + llvm::Value *indexPlusOne = + Builder.CreateAdd(index, llvm::ConstantInt::get(NSUIntegerTy, 1)); // If we haven't overrun the buffer yet, we can continue. // Set the branch weights based on the simplifying assumption that this is @@ -1727,10 +1726,9 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ EmitBlock(FetchMoreBB); CountRV = - CGM.getObjCRuntime().GenerateMessageSend(*this, ReturnValueSlot(), - getContext().UnsignedLongTy, - FastEnumSel, - Collection, Args); + CGM.getObjCRuntime().GenerateMessageSend(*this, ReturnValueSlot(), + getContext().getNSUIntegerType(), + FastEnumSel, Collection, Args); // If we got a zero count, we're done. llvm::Value *refetchCount = CountRV.getScalarVal(); diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp index 98435fefbd2e..ef4e6cd4f01b 100644 --- a/lib/CodeGen/CGObjCMac.cpp +++ b/lib/CodeGen/CGObjCMac.cpp @@ -4885,10 +4885,7 @@ void CGObjCCommonMac::EmitImageInfo() { } // Indicate whether we're compiling this to run on a simulator. - const llvm::Triple &Triple = CGM.getTarget().getTriple(); - if ((Triple.isiOS() || Triple.isWatchOS()) && - (Triple.getArch() == llvm::Triple::x86 || - Triple.getArch() == llvm::Triple::x86_64)) + if (CGM.getTarget().getTriple().isSimulatorEnvironment()) Mod.addModuleFlag(llvm::Module::Error, "Objective-C Is Simulated", eImageInfo_ImageIsSimulated); @@ -5084,6 +5081,11 @@ void IvarLayoutBuilder::visitField(const FieldDecl *field, // Drill down into arrays. uint64_t numElts = 1; + if (auto arrayType = CGM.getContext().getAsIncompleteArrayType(fieldType)) { + numElts = 0; + fieldType = arrayType->getElementType(); + } + // Unlike incomplete arrays, constant arrays can be nested. while (auto arrayType = CGM.getContext().getAsConstantArrayType(fieldType)) { numElts *= arrayType->getSize().getZExtValue(); fieldType = arrayType->getElementType(); @@ -6615,10 +6617,14 @@ CGObjCNonFragileABIMac::ObjCIvarOffsetVariable(const ObjCInterfaceDecl *ID, Ivar->getAccessControl() == ObjCIvarDecl::Private || Ivar->getAccessControl() == ObjCIvarDecl::Package; - if (ID->hasAttr<DLLExportAttr>() && !IsPrivateOrPackage) - IvarOffsetGV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); - else if (ID->hasAttr<DLLImportAttr>()) - IvarOffsetGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + const ObjCInterfaceDecl *ContainingID = Ivar->getContainingInterface(); + + if (ContainingID->hasAttr<DLLImportAttr>()) + IvarOffsetGV + ->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + else if (ContainingID->hasAttr<DLLExportAttr>() && !IsPrivateOrPackage) + IvarOffsetGV + ->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); } } return IvarOffsetGV; @@ -7549,8 +7555,9 @@ CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID, llvm::Value *VTableIdx = llvm::ConstantInt::get(CGM.Int32Ty, 2); ConstantInitBuilder builder(CGM); auto values = builder.beginStruct(ObjCTypes.EHTypeTy); - values.add(llvm::ConstantExpr::getGetElementPtr(VTableGV->getValueType(), - VTableGV, VTableIdx)); + values.add( + llvm::ConstantExpr::getInBoundsGetElementPtr(VTableGV->getValueType(), + VTableGV, VTableIdx)); values.add(GetClassName(ClassName)); values.add(GetClassGlobal(ID, /*metaclass*/ false, NotForDefinition)); diff --git a/lib/CodeGen/CGObjCRuntime.cpp b/lib/CodeGen/CGObjCRuntime.cpp index 4cfddcb107cb..2f886fd82caa 100644 --- a/lib/CodeGen/CGObjCRuntime.cpp +++ b/lib/CodeGen/CGObjCRuntime.cpp @@ -110,7 +110,8 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF, llvm::Type::getIntNTy(CGF.getLLVMContext(), Info->StorageSize)); return LValue::MakeBitfield(Addr, *Info, IvarTy, - LValueBaseInfo(AlignmentSource::Decl, false)); + LValueBaseInfo(AlignmentSource::Decl), + TBAAAccessInfo()); } namespace { diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp index db02c631c9e6..d140e7f09e9a 100644 --- a/lib/CodeGen/CGOpenCLRuntime.cpp +++ b/lib/CodeGen/CGOpenCLRuntime.cpp @@ -16,6 +16,7 @@ #include "CGOpenCLRuntime.h" #include "CodeGenFunction.h" #include "TargetInfo.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include <assert.h> @@ -35,8 +36,8 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) { "Not an OpenCL specific type!"); llvm::LLVMContext& Ctx = CGM.getLLVMContext(); - uint32_t ImgAddrSpc = CGM.getContext().getTargetAddressSpace( - CGM.getTarget().getOpenCLImageAddrSpace()); + uint32_t AddrSpc = CGM.getContext().getTargetAddressSpace( + CGM.getContext().getOpenCLTypeAddrSpace(T)); switch (cast<BuiltinType>(T)->getKind()) { default: llvm_unreachable("Unexpected opencl builtin type!"); @@ -45,29 +46,29 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) { case BuiltinType::Id: \ return llvm::PointerType::get( \ llvm::StructType::create(Ctx, "opencl." #ImgType "_" #Suffix "_t"), \ - ImgAddrSpc); + AddrSpc); #include "clang/Basic/OpenCLImageTypes.def" case BuiltinType::OCLSampler: - return getSamplerType(); + return getSamplerType(T); case BuiltinType::OCLEvent: - return llvm::PointerType::get(llvm::StructType::create( - Ctx, "opencl.event_t"), 0); + return llvm::PointerType::get( + llvm::StructType::create(Ctx, "opencl.event_t"), AddrSpc); case BuiltinType::OCLClkEvent: return llvm::PointerType::get( - llvm::StructType::create(Ctx, "opencl.clk_event_t"), 0); + llvm::StructType::create(Ctx, "opencl.clk_event_t"), AddrSpc); case BuiltinType::OCLQueue: return llvm::PointerType::get( - llvm::StructType::create(Ctx, "opencl.queue_t"), 0); + llvm::StructType::create(Ctx, "opencl.queue_t"), AddrSpc); case BuiltinType::OCLReserveID: return llvm::PointerType::get( - llvm::StructType::create(Ctx, "opencl.reserve_id_t"), 0); + llvm::StructType::create(Ctx, "opencl.reserve_id_t"), AddrSpc); } } -llvm::Type *CGOpenCLRuntime::getPipeType() { +llvm::Type *CGOpenCLRuntime::getPipeType(const PipeType *T) { if (!PipeTy){ - uint32_t PipeAddrSpc = - CGM.getContext().getTargetAddressSpace(LangAS::opencl_global); + uint32_t PipeAddrSpc = CGM.getContext().getTargetAddressSpace( + CGM.getContext().getOpenCLTypeAddrSpace(T)); PipeTy = llvm::PointerType::get(llvm::StructType::create( CGM.getLLVMContext(), "opencl.pipe_t"), PipeAddrSpc); } @@ -75,12 +76,12 @@ llvm::Type *CGOpenCLRuntime::getPipeType() { return PipeTy; } -llvm::PointerType *CGOpenCLRuntime::getSamplerType() { +llvm::PointerType *CGOpenCLRuntime::getSamplerType(const Type *T) { if (!SamplerTy) SamplerTy = llvm::PointerType::get(llvm::StructType::create( CGM.getLLVMContext(), "opencl.sampler_t"), CGM.getContext().getTargetAddressSpace( - LangAS::opencl_constant)); + CGM.getContext().getOpenCLTypeAddrSpace(T))); return SamplerTy; } @@ -103,3 +104,45 @@ llvm::Value *CGOpenCLRuntime::getPipeElemAlign(const Expr *PipeArg) { .getQuantity(); return llvm::ConstantInt::get(Int32Ty, TypeSize, false); } + +llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() { + assert(CGM.getLangOpts().OpenCL); + return llvm::IntegerType::getInt8PtrTy( + CGM.getLLVMContext(), + CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); +} + +CGOpenCLRuntime::EnqueuedBlockInfo +CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) { + // The block literal may be assigned to a const variable. Chasing down + // to get the block literal. + if (auto DR = dyn_cast<DeclRefExpr>(E)) { + E = cast<VarDecl>(DR->getDecl())->getInit(); + } + if (auto Cast = dyn_cast<CastExpr>(E)) { + E = Cast->getSubExpr(); + } + auto *Block = cast<BlockExpr>(E); + + // The same block literal may be enqueued multiple times. Cache it if + // possible. + auto Loc = EnqueuedBlockMap.find(Block); + if (Loc != EnqueuedBlockMap.end()) { + return Loc->second; + } + + // Emit block literal as a common block expression and get the block invoke + // function. + llvm::Function *Invoke; + auto *V = CGF.EmitBlockLiteral(cast<BlockExpr>(Block), &Invoke); + auto *F = CGF.getTargetHooks().createEnqueuedBlockKernel( + CGF, Invoke, V->stripPointerCasts()); + + // The common part of the post-processing of the kernel goes here. + F->addFnAttr(llvm::Attribute::NoUnwind); + F->setCallingConv( + CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_OpenCLKernel)); + EnqueuedBlockInfo Info{F, V}; + EnqueuedBlockMap[Block] = Info; + return Info; +} diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h index ee3cb3dda063..ead303d1d0d5 100644 --- a/lib/CodeGen/CGOpenCLRuntime.h +++ b/lib/CodeGen/CGOpenCLRuntime.h @@ -17,11 +17,13 @@ #define LLVM_CLANG_LIB_CODEGEN_CGOPENCLRUNTIME_H #include "clang/AST/Type.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" namespace clang { +class Expr; class VarDecl; namespace CodeGen { @@ -35,6 +37,14 @@ protected: llvm::Type *PipeTy; llvm::PointerType *SamplerTy; + /// Structure for enqueued block information. + struct EnqueuedBlockInfo { + llvm::Function *Kernel; /// Enqueued block kernel. + llvm::Value *BlockArg; /// The first argument to enqueued block kernel. + }; + /// Maps block expression to block information. + llvm::DenseMap<const Expr *, EnqueuedBlockInfo> EnqueuedBlockMap; + public: CGOpenCLRuntime(CodeGenModule &CGM) : CGM(CGM), PipeTy(nullptr), SamplerTy(nullptr) {} @@ -48,9 +58,9 @@ public: virtual llvm::Type *convertOpenCLSpecificType(const Type *T); - virtual llvm::Type *getPipeType(); + virtual llvm::Type *getPipeType(const PipeType *T); - llvm::PointerType *getSamplerType(); + llvm::PointerType *getSamplerType(const Type *T); // \brief Returnes a value which indicates the size in bytes of the pipe // element. @@ -59,6 +69,13 @@ public: // \brief Returnes a value which indicates the alignment in bytes of the pipe // element. virtual llvm::Value *getPipeElemAlign(const Expr *PipeArg); + + /// \return __generic void* type. + llvm::PointerType *getGenericVoidPointerType(); + + /// \return enqueued block information for enqueued block. + EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, + const Expr *E); }; } diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 9f8aa6c8d964..5db29eb6004d 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -19,6 +19,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/StmtOpenMP.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitmaskEnum.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" @@ -427,7 +428,7 @@ public: /// \brief Values for bit flags used in the ident_t to describe the fields. /// All enumeric elements are named and described in accordance with the code /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h -enum OpenMPLocationFlags { +enum OpenMPLocationFlags : unsigned { /// \brief Use trampoline for internal microtask. OMP_IDENT_IMD = 0x01, /// \brief Use c-style ident structure. @@ -443,7 +444,14 @@ enum OpenMPLocationFlags { /// \brief Implicit barrier in 'sections' directive. OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, /// \brief Implicit barrier in 'single' directive. - OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140 + OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, + /// Call of __kmp_for_static_init for static loop. + OMP_IDENT_WORK_LOOP = 0x200, + /// Call of __kmp_for_static_init for sections. + OMP_IDENT_WORK_SECTIONS = 0x400, + /// Call of __kmp_for_static_init for distribute. + OMP_IDENT_WORK_DISTRIBUTE = 0x800, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) }; /// \brief Describes ident structure that describes a source location. @@ -660,27 +668,47 @@ enum OpenMPRTLFunction { // // Offloading related calls // - // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t - // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t + // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t + // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t // *arg_types); OMPRTL__tgt_target, - // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, - // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, - // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); + // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // *arg_types); + OMPRTL__tgt_target_nowait, + // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // *arg_types, int32_t num_teams, int32_t thread_limit); OMPRTL__tgt_target_teams, + // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void + // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t + // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); + OMPRTL__tgt_target_teams_nowait, // Call to void __tgt_register_lib(__tgt_bin_desc *desc); OMPRTL__tgt_register_lib, // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); OMPRTL__tgt_unregister_lib, - // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, - // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); OMPRTL__tgt_target_data_begin, - // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num, - // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t + // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // *arg_types); + OMPRTL__tgt_target_data_begin_nowait, + // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); OMPRTL__tgt_target_data_end, - // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num, - // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t + // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // *arg_types); + OMPRTL__tgt_target_data_end_nowait, + // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); OMPRTL__tgt_target_data_update, + // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t + // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // *arg_types); + OMPRTL__tgt_target_data_update_nowait, }; /// A basic class for pre|post-action for advanced codegen sequence for OpenMP @@ -862,18 +890,7 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, } LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { - if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) - return CGF.EmitOMPArraySectionExpr(OASE); - if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E)) - return CGF.EmitLValue(ASE); - auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); - DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), - CGF.CapturedStmtInfo && - CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, - E->getType(), VK_LValue, E->getExprLoc()); - // Store the address of the original variable associated with the LHS - // implicit variable. - return CGF.EmitLValue(&DRE); + return CGF.EmitOMPSharedLValue(E); } LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, @@ -919,8 +936,9 @@ ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { assert(SharedAddresses.size() == N && "Number of generated lvalues must be exactly N."); - SharedAddresses.emplace_back(emitSharedLValue(CGF, ClausesData[N].Ref), - emitSharedLValueUB(CGF, ClausesData[N].Ref)); + LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); + LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); + SharedAddresses.emplace_back(First, Second); } void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { @@ -928,7 +946,7 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); QualType PrivateType = PrivateVD->getType(); bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); - if (!AsArraySection && !PrivateType->isVariablyModifiedType()) { + if (!PrivateType->isVariablyModifiedType()) { Sizes.emplace_back( CGF.getTypeSize( SharedAddresses[N].first.getType().getNonReferenceType()), @@ -966,10 +984,9 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); QualType PrivateType = PrivateVD->getType(); - bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); - if (!AsArraySection && !PrivateType->isVariablyModifiedType()) { + if (!PrivateType->isVariablyModifiedType()) { assert(!Size && !Sizes[N].second && - "Size should be nullptr for non-variably modified redution " + "Size should be nullptr for non-variably modified reduction " "items."); return; } @@ -995,9 +1012,9 @@ void ReductionCodeGen::emitInitialization( SharedLVal = CGF.MakeAddrLValue( CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), CGF.ConvertTypeForMem(SharedType)), - SharedType, SharedAddresses[N].first.getBaseInfo()); - if (isa<OMPArraySectionExpr>(ClausesData[N].Ref) || - CGF.getContext().getAsArrayType(PrivateVD->getType())) { + SharedType, SharedAddresses[N].first.getBaseInfo(), + CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); + if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, @@ -1040,15 +1057,16 @@ static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, if (auto *PtrTy = BaseTy->getAs<PointerType>()) BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); else { - BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), - BaseTy->castAs<ReferenceType>()); + LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); + BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); } BaseTy = BaseTy->getPointeeType(); } return CGF.MakeAddrLValue( CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), CGF.ConvertTypeForMem(ElTy)), - BaseLV.getType(), BaseLV.getBaseInfo()); + BaseLV.getType(), BaseLV.getBaseInfo(), + CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); } static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, @@ -1106,11 +1124,14 @@ Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, OriginalBaseLValue); llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); - llvm::Value *Ptr = - CGF.Builder.CreateGEP(PrivateAddr.getPointer(), Adjustment); + llvm::Value *PrivatePointer = + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + PrivateAddr.getPointer(), + SharedAddresses[N].first.getAddress().getType()); + llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); return castToBase(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), - OriginalBaseLValue.getPointer()->getType(), + OriginalBaseLValue.getAddress().getType(), OriginalBaseLValue.getAlignment(), Ptr); } BaseDecls.emplace_back( @@ -1146,7 +1167,7 @@ LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( CodeGenFunction &CGF) { return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), getThreadIDVariable()->getType(), - LValueBaseInfo(AlignmentSource::Decl, false)); + AlignmentSource::Decl); } CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) @@ -1204,7 +1225,14 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, .getAddress(); }); (void)Scope.Privatize(); - CGF.EmitIgnoredExpr(CombinerInitializer); + if (!IsCombiner && Out->hasInit() && + !CGF.isTrivialInitializer(Out->getInit())) { + CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), + Out->getType().getQualifiers(), + /*IsInitializer=*/true); + } + if (CombinerInitializer) + CGF.EmitIgnoredExpr(CombinerInitializer); Scope.ForceCleanup(); CGF.FinishFunction(); return Fn; @@ -1230,7 +1258,10 @@ void CGOpenMPRuntime::emitUserDefinedReduction( Orig = &C.Idents.get("omp_orig"); } Initializer = emitCombinerOrInitializer( - CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()), + CGM, D->getType(), + D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init + : nullptr, + cast<VarDecl>(D->lookup(Orig).front()), cast<VarDecl>(D->lookup(Priv).front()), /*IsCombiner=*/false); } @@ -1283,6 +1314,15 @@ static llvm::Value *emitParallelOrTeamsOutlinedFunction( HasCancel = OPSD->hasCancel(); else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) HasCancel = OPFD->hasCancel(); + else if (auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) + HasCancel = OPFD->hasCancel(); + else if (auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) + HasCancel = OPFD->hasCancel(); + else if (auto *OPFD = dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) + HasCancel = OPFD->hasCancel(); + else if (auto *OPFD = + dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) + HasCancel = OPFD->hasCancel(); CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, HasCancel, OutlinedHelperName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); @@ -1442,19 +1482,24 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, if (ThreadID != nullptr) return ThreadID; } - if (auto *OMPRegionInfo = - dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { - if (OMPRegionInfo->getThreadIDVariable()) { - // Check if this an outlined function with thread id passed as argument. - auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); - ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); - // If value loaded in entry block, cache it and use it everywhere in - // function. - if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { - auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); - Elem.second.ThreadID = ThreadID; + // If exceptions are enabled, do not use parameter to avoid possible crash. + if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || + !CGF.getLangOpts().CXXExceptions || + CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { + if (auto *OMPRegionInfo = + dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { + if (OMPRegionInfo->getThreadIDVariable()) { + // Check if this an outlined function with thread id passed as argument. + auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); + ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); + // If value loaded in entry block, cache it and use it everywhere in + // function. + if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { + auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); + Elem.second.ThreadID = ThreadID; + } + return ThreadID; } - return ThreadID; } } @@ -1464,12 +1509,13 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, // function. CGBuilderTy::InsertPointGuard IPG(CGF.Builder); CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); - ThreadID = - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), - emitUpdateLocation(CGF, Loc)); + auto *Call = CGF.Builder.CreateCall( + createRuntimeFunction(OMPRTL__kmpc_global_thread_num), + emitUpdateLocation(CGF, Loc)); + Call->setCallingConv(CGF.getRuntimeCC()); auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); - Elem.second.ThreadID = ThreadID; - return ThreadID; + Elem.second.ThreadID = Call; + return Call; } void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { @@ -2001,32 +2047,48 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { break; } case OMPRTL__tgt_target: { - // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t - // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t + // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t + // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t // *arg_types); - llvm::Type *TypeParams[] = {CGM.Int32Ty, + llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), - CGM.Int32Ty->getPointerTo()}; + CGM.Int64Ty->getPointerTo()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); break; } + case OMPRTL__tgt_target_nowait: { + // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, + // int64_t *arg_types); + llvm::Type *TypeParams[] = {CGM.Int64Ty, + CGM.VoidPtrTy, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); + break; + } case OMPRTL__tgt_target_teams: { - // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, + // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, - // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); - llvm::Type *TypeParams[] = {CGM.Int32Ty, + // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); + llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), - CGM.Int32Ty->getPointerTo(), + CGM.Int64Ty->getPointerTo(), CGM.Int32Ty, CGM.Int32Ty}; llvm::FunctionType *FnTy = @@ -2034,6 +2096,24 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); break; } + case OMPRTL__tgt_target_teams_nowait: { + // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void + // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t + // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); + llvm::Type *TypeParams[] = {CGM.Int64Ty, + CGM.VoidPtrTy, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo(), + CGM.Int32Ty, + CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); + break; + } case OMPRTL__tgt_register_lib: { // Build void __tgt_register_lib(__tgt_bin_desc *desc); QualType ParamTy = @@ -2055,47 +2135,92 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { break; } case OMPRTL__tgt_target_data_begin: { - // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, - // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); - llvm::Type *TypeParams[] = {CGM.Int32Ty, + // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); + llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), - CGM.Int32Ty->getPointerTo()}; + CGM.Int64Ty->getPointerTo()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); break; } + case OMPRTL__tgt_target_data_begin_nowait: { + // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t + // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // *arg_types); + llvm::Type *TypeParams[] = {CGM.Int64Ty, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo()}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); + break; + } case OMPRTL__tgt_target_data_end: { - // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num, - // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); - llvm::Type *TypeParams[] = {CGM.Int32Ty, + // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); + llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), - CGM.Int32Ty->getPointerTo()}; + CGM.Int64Ty->getPointerTo()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); break; } + case OMPRTL__tgt_target_data_end_nowait: { + // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t + // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // *arg_types); + llvm::Type *TypeParams[] = {CGM.Int64Ty, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo()}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); + break; + } case OMPRTL__tgt_target_data_update: { - // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num, - // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); - llvm::Type *TypeParams[] = {CGM.Int32Ty, + // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); + llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), - CGM.Int32Ty->getPointerTo()}; + CGM.Int64Ty->getPointerTo()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); break; } + case OMPRTL__tgt_target_data_update_nowait: { + // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t + // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // *arg_types); + llvm::Type *TypeParams[] = {CGM.Int64Ty, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo()}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); + break; + } } assert(RTLFn && "Unable to find OpenMP runtime function"); return RTLFn; @@ -2459,7 +2584,7 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); - CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); + RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); // __kmpc_end_serialized_parallel(&Loc, GTid); llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; @@ -2968,87 +3093,101 @@ static void emitForStaticInitCall( CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, - unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB, - Address ST, llvm::Value *Chunk) { + const CGOpenMPRuntime::StaticRTInput &Values) { if (!CGF.HaveInsertPoint()) - return; - - assert(!Ordered); - assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || - Schedule == OMP_sch_static_balanced_chunked || - Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || - Schedule == OMP_dist_sch_static || - Schedule == OMP_dist_sch_static_chunked); - - // Call __kmpc_for_static_init( - // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, - // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, - // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, - // kmp_int[32|64] incr, kmp_int[32|64] chunk); - if (Chunk == nullptr) { - assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || - Schedule == OMP_dist_sch_static) && - "expected static non-chunked schedule"); - // If the Chunk was not specified in the clause - use default value 1. - Chunk = CGF.Builder.getIntN(IVSize, 1); - } else { - assert((Schedule == OMP_sch_static_chunked || - Schedule == OMP_sch_static_balanced_chunked || - Schedule == OMP_ord_static_chunked || - Schedule == OMP_dist_sch_static_chunked) && - "expected static chunked schedule"); - } - llvm::Value *Args[] = { - UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier( - Schedule, M1, M2)), // Schedule type - IL.getPointer(), // &isLastIter - LB.getPointer(), // &LB - UB.getPointer(), // &UB - ST.getPointer(), // &Stride - CGF.Builder.getIntN(IVSize, 1), // Incr - Chunk // Chunk - }; - CGF.EmitRuntimeCall(ForStaticInitFunction, Args); + return; + + assert(!Values.Ordered); + assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || + Schedule == OMP_sch_static_balanced_chunked || + Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || + Schedule == OMP_dist_sch_static || + Schedule == OMP_dist_sch_static_chunked); + + // Call __kmpc_for_static_init( + // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, + // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, + // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, + // kmp_int[32|64] incr, kmp_int[32|64] chunk); + llvm::Value *Chunk = Values.Chunk; + if (Chunk == nullptr) { + assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || + Schedule == OMP_dist_sch_static) && + "expected static non-chunked schedule"); + // If the Chunk was not specified in the clause - use default value 1. + Chunk = CGF.Builder.getIntN(Values.IVSize, 1); + } else { + assert((Schedule == OMP_sch_static_chunked || + Schedule == OMP_sch_static_balanced_chunked || + Schedule == OMP_ord_static_chunked || + Schedule == OMP_dist_sch_static_chunked) && + "expected static chunked schedule"); + } + llvm::Value *Args[] = { + UpdateLocation, + ThreadId, + CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, + M2)), // Schedule type + Values.IL.getPointer(), // &isLastIter + Values.LB.getPointer(), // &LB + Values.UB.getPointer(), // &UB + Values.ST.getPointer(), // &Stride + CGF.Builder.getIntN(Values.IVSize, 1), // Incr + Chunk // Chunk + }; + CGF.EmitRuntimeCall(ForStaticInitFunction, Args); } void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, - unsigned IVSize, bool IVSigned, - bool Ordered, Address IL, Address LB, - Address UB, Address ST, - llvm::Value *Chunk) { - OpenMPSchedType ScheduleNum = - getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); - auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); + const StaticRTInput &Values) { + OpenMPSchedType ScheduleNum = getRuntimeSchedule( + ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); + assert(isOpenMPWorksharingDirective(DKind) && + "Expected loop-based or sections-based directive."); + auto *UpdatedLocation = emitUpdateLocation(CGF, Loc, + isOpenMPLoopDirective(DKind) + ? OMP_IDENT_WORK_LOOP + : OMP_IDENT_WORK_SECTIONS); auto *ThreadId = getThreadID(CGF, Loc); - auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); + auto *StaticInitFunction = + createForStaticInitFunction(Values.IVSize, Values.IVSigned); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, - ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize, - Ordered, IL, LB, UB, ST, Chunk); + ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); } void CGOpenMPRuntime::emitDistributeStaticInit( CodeGenFunction &CGF, SourceLocation Loc, - OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned, - bool Ordered, Address IL, Address LB, Address UB, Address ST, - llvm::Value *Chunk) { - OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr); - auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); + OpenMPDistScheduleClauseKind SchedKind, + const CGOpenMPRuntime::StaticRTInput &Values) { + OpenMPSchedType ScheduleNum = + getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); + auto *UpdatedLocation = + emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); auto *ThreadId = getThreadID(CGF, Loc); - auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); + auto *StaticInitFunction = + createForStaticInitFunction(Values.IVSize, Values.IVSigned); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, - OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB, - UB, ST, Chunk); + OMPC_SCHEDULE_MODIFIER_unknown, Values); } void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, - SourceLocation Loc) { + SourceLocation Loc, + OpenMPDirectiveKind DKind) { if (!CGF.HaveInsertPoint()) return; // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); - llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; + llvm::Value *Args[] = { + emitUpdateLocation(CGF, Loc, + isOpenMPDistributeDirective(DKind) + ? OMP_IDENT_WORK_DISTRIBUTE + : isOpenMPLoopDirective(DKind) + ? OMP_IDENT_WORK_LOOP + : OMP_IDENT_WORK_SECTIONS), + getThreadID(CGF, Loc)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), Args); } @@ -3360,14 +3499,14 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { auto *UnRegFn = createOffloadingBinaryDescriptorFunction( CGM, ".omp_offloading.descriptor_unreg", [&](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), - Desc); + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), + Desc); }); auto *RegFn = createOffloadingBinaryDescriptorFunction( CGM, ".omp_offloading.descriptor_reg", [&](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), - Desc); + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), + Desc); CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); }); if (CGM.supportsCOMDAT()) { @@ -3802,7 +3941,6 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, ".omp_task_entry.", &CGM.getModule()); CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo); CodeGenFunction CGF(CGM); - CGF.disableDebugInfo(); CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, @@ -3871,7 +4009,8 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, } CallArgs.push_back(SharedsParam); - CGF.EmitCallOrInvoke(TaskFunction, CallArgs); + CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, + CallArgs); CGF.EmitStoreThroughLValue( RValue::get(CGF.Builder.getInt32(/*C=*/0)), CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); @@ -3893,7 +4032,6 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, ImplicitParamDecl::Other); Args.push_back(&GtidArg); Args.push_back(&TaskTypeArg); - FunctionType::ExtInfo Info; auto &DestructorFnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); @@ -4020,9 +4158,9 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, return TaskPrivatesMap; } -static int array_pod_sort_comparator(const PrivateDataTy *P1, - const PrivateDataTy *P2) { - return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); +static bool stable_sort_comparator(const PrivateDataTy P1, + const PrivateDataTy P2) { + return P1.first > P2.first; } /// Emit initialization for private variables in task-based directives. @@ -4059,8 +4197,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF, SharedRefLValue = CGF.MakeAddrLValue( Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), SharedRefLValue.getType(), - LValueBaseInfo(AlignmentSource::Decl, - SharedRefLValue.getBaseInfo().getMayAlias())); + LValueBaseInfo(AlignmentSource::Decl), + SharedRefLValue.getTBAAInfo()); QualType Type = OriginalVD->getType(); if (Type->isArrayType()) { // Initialize firstprivate array. @@ -4250,8 +4388,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, /*PrivateElemInit=*/nullptr))); ++I; } - llvm::array_pod_sort(Privates.begin(), Privates.end(), - array_pod_sort_comparator); + std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator); auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); // Build type kmp_routine_entry_t (if not built yet). emitKmpRoutineEntryT(KmpInt32Ty); @@ -4262,7 +4399,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); } KmpTaskTQTy = SavedKmpTaskloopTQTy; - } else if (D.getDirectiveKind() == OMPD_task) { + } else { assert(D.getDirectiveKind() == OMPD_task && "Expected taskloop or task directive"); if (SavedKmpTaskTQTy.isNull()) { @@ -4557,8 +4694,8 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, - NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF, - PrePostActionTy &) { + NumDependencies, &DepWaitTaskArgs, + Loc](CodeGenFunction &CGF, PrePostActionTy &) { auto &RT = CGF.CGM.getOpenMPRuntime(); CodeGenFunction::RunCleanupsScope LocalScope(CGF); // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, @@ -4569,11 +4706,12 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), DepWaitTaskArgs); // Call proxy_task_entry(gtid, new_task); - auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy]( - CodeGenFunction &CGF, PrePostActionTy &Action) { + auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, + Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; - CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); + CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, + OutlinedFnArgs); }; // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, @@ -5805,21 +5943,21 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); - // FIXME: Accommodate other combined directives with teams when they become - // available. - if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( + if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( ignoreCompoundStmts(CS.getCapturedStmt()))) { - if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { - CGOpenMPInnerExprInfo CGInfo(CGF, CS); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); - return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, - /*IsSigned=*/true); - } + if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { + if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { + CGOpenMPInnerExprInfo CGInfo(CGF, CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); + return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, + /*IsSigned=*/true); + } - // If we have an enclosed teams directive but no num_teams clause we use - // the default value 0. - return Bld.getInt32(0); + // If we have an enclosed teams directive but no num_teams clause we use + // the default value 0. + return Bld.getInt32(0); + } } // No teams associated with the directive. @@ -5908,21 +6046,21 @@ emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); - // FIXME: Accommodate other combined directives with teams when they become - // available. - if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( + if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( ignoreCompoundStmts(CS.getCapturedStmt()))) { - if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { - CGOpenMPInnerExprInfo CGInfo(CGF, CS); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); - return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, - /*IsSigned=*/true); - } + if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { + if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { + CGOpenMPInnerExprInfo CGInfo(CGF, CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); + return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, + /*IsSigned=*/true); + } - // If we have an enclosed teams directive but no thread_limit clause we use - // the default value 0. - return CGF.Builder.getInt32(0); + // If we have an enclosed teams directive but no thread_limit clause we + // use the default value 0. + return CGF.Builder.getInt32(0); + } } // No teams associated with the directive. @@ -5949,22 +6087,23 @@ public: /// \brief Delete the element from the device environment, ignoring the /// current reference count associated with the element. OMP_MAP_DELETE = 0x08, - /// \brief The element being mapped is a pointer, therefore the pointee - /// should be mapped as well. - OMP_MAP_IS_PTR = 0x10, - /// \brief This flags signals that an argument is the first one relating to - /// a map/private clause expression. For some cases a single - /// map/privatization results in multiple arguments passed to the runtime - /// library. - OMP_MAP_FIRST_REF = 0x20, + /// \brief The element being mapped is a pointer-pointee pair; both the + /// pointer and the pointee should be mapped. + OMP_MAP_PTR_AND_OBJ = 0x10, + /// \brief This flags signals that the base address of an entry should be + /// passed to the target kernel as an argument. + OMP_MAP_TARGET_PARAM = 0x20, /// \brief Signal that the runtime library has to return the device pointer - /// in the current position for the data being mapped. - OMP_MAP_RETURN_PTR = 0x40, + /// in the current position for the data being mapped. Used when we have the + /// use_device_ptr clause. + OMP_MAP_RETURN_PARAM = 0x40, /// \brief This flag signals that the reference being passed is a pointer to /// private data. - OMP_MAP_PRIVATE_PTR = 0x80, + OMP_MAP_PRIVATE = 0x80, /// \brief Pass the element to the device by value. - OMP_MAP_PRIVATE_VAL = 0x100, + OMP_MAP_LITERAL = 0x100, + /// Implicit map + OMP_MAP_IMPLICIT = 0x200, }; /// Class that associates information with a base pointer to be passed to the @@ -5986,7 +6125,7 @@ public: typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy; typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy; - typedef SmallVector<unsigned, 16> MapFlagsArrayTy; + typedef SmallVector<uint64_t, 16> MapFlagsArrayTy; private: /// \brief Directive from where the map clauses were extracted. @@ -5997,6 +6136,8 @@ private: /// \brief Set of all first private variables in the current directive. llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; + /// Set of all reduction variables in the current directive. + llvm::SmallPtrSet<const VarDecl *, 8> ReductionDecls; /// Map between device pointer declarations and their expression components. /// The key value for declarations in 'this' is null. @@ -6051,10 +6192,10 @@ private: /// a flag marking the map as a pointer if requested. Add a flag marking the /// map as the first one of a series of maps that relate to the same map /// expression. - unsigned getMapTypeBits(OpenMPMapClauseKind MapType, + uint64_t getMapTypeBits(OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag, - bool AddIsFirstFlag) const { - unsigned Bits = 0u; + bool AddIsTargetParamFlag) const { + uint64_t Bits = 0u; switch (MapType) { case OMPC_MAP_alloc: case OMPC_MAP_release: @@ -6080,9 +6221,9 @@ private: break; } if (AddPtrFlag) - Bits |= OMP_MAP_IS_PTR; - if (AddIsFirstFlag) - Bits |= OMP_MAP_FIRST_REF; + Bits |= OMP_MAP_PTR_AND_OBJ; + if (AddIsTargetParamFlag) + Bits |= OMP_MAP_TARGET_PARAM; if (MapTypeModifier == OMPC_MAP_always) Bits |= OMP_MAP_ALWAYS; return Bits; @@ -6135,7 +6276,7 @@ private: OMPClauseMappableExprCommon::MappableExprComponentListRef Components, MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, - bool IsFirstComponentList) const { + bool IsFirstComponentList, bool IsImplicit) const { // The following summarizes what has to be generated for each map and the // types bellow. The generated information is expressed in this order: @@ -6189,28 +6330,28 @@ private: // // map(s.p[:22], s.a s.b) // &s, &(s.p), sizeof(double*), noflags - // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag + // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag // // map(s.ps) // &s, &(s.ps), sizeof(S2*), noflags // // map(s.ps->s.i) // &s, &(s.ps), sizeof(S2*), noflags - // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag + // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag // // map(s.ps->ps) // &s, &(s.ps), sizeof(S2*), noflags - // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag + // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag // // map(s.ps->ps->ps) // &s, &(s.ps), sizeof(S2*), noflags - // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag - // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag + // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag // // map(s.ps->ps->s.f[:22]) // &s, &(s.ps), sizeof(S2*), noflags - // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag - // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag + // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag // // map(ps) // &ps, &ps, sizeof(S2*), noflags @@ -6226,29 +6367,28 @@ private: // // map(ps->p[:22]) // ps, &(ps->p), sizeof(double*), noflags - // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag + // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag // // map(ps->ps) // ps, &(ps->ps), sizeof(S2*), noflags // // map(ps->ps->s.i) // ps, &(ps->ps), sizeof(S2*), noflags - // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag + // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag // // map(ps->ps->ps) // ps, &(ps->ps), sizeof(S2*), noflags - // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag + // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag // // map(ps->ps->ps->ps) // ps, &(ps->ps), sizeof(S2*), noflags - // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag - // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag + // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag // // map(ps->ps->ps->s.f[:22]) // ps, &(ps->ps), sizeof(S2*), noflags - // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag - // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + - // extra_flag + // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag // Track if the map information being generated is the first for a capture. bool IsCaptureFirstInfo = IsFirstComponentList; @@ -6270,8 +6410,7 @@ private: } else { // The base is the reference to the variable. // BP = &Var. - BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression())) - .getPointer(); + BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer(); // If the variable is a pointer and is being dereferenced (i.e. is not // the last component), the base has to be the pointer itself, not its @@ -6290,6 +6429,7 @@ private: } } + uint64_t DefaultFlags = IsImplicit ? OMP_MAP_IMPLICIT : 0; for (; I != CE; ++I) { auto Next = std::next(I); @@ -6324,7 +6464,8 @@ private: isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && "Unexpected expression"); - auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer(); + llvm::Value *LB = + CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer(); auto *Size = getExprTypeSize(I->getAssociatedExpression()); // If we have a member expression and the current component is a @@ -6339,9 +6480,11 @@ private: BasePointers.push_back(BP); Pointers.push_back(RefAddr); Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); - Types.push_back(getMapTypeBits( - /*MapType*/ OMPC_MAP_alloc, /*MapTypeModifier=*/OMPC_MAP_unknown, - !IsExpressionFirstInfo, IsCaptureFirstInfo)); + Types.push_back(DefaultFlags | + getMapTypeBits( + /*MapType*/ OMPC_MAP_alloc, + /*MapTypeModifier=*/OMPC_MAP_unknown, + !IsExpressionFirstInfo, IsCaptureFirstInfo)); IsExpressionFirstInfo = false; IsCaptureFirstInfo = false; // The reference will be the next base address. @@ -6356,9 +6499,9 @@ private: // same expression except for the first one. We also need to signal // this map is the first one that relates with the current capture // (there is a set of entries for each capture). - Types.push_back(getMapTypeBits(MapType, MapTypeModifier, - !IsExpressionFirstInfo, - IsCaptureFirstInfo)); + Types.push_back(DefaultFlags | getMapTypeBits(MapType, MapTypeModifier, + !IsExpressionFirstInfo, + IsCaptureFirstInfo)); // If we have a final array section, we are done with this expression. if (IsFinalArraySection) @@ -6370,7 +6513,6 @@ private: IsExpressionFirstInfo = false; IsCaptureFirstInfo = false; - continue; } } } @@ -6386,8 +6528,14 @@ private: // 'private ptr' and 'map to' flag. Return the right flags if the captured // declaration is known as first-private in this handler. if (FirstPrivateDecls.count(Cap.getCapturedVar())) - return MappableExprsHandler::OMP_MAP_PRIVATE_PTR | + return MappableExprsHandler::OMP_MAP_PRIVATE | MappableExprsHandler::OMP_MAP_TO; + // Reduction variable will use only the 'private ptr' and 'map to_from' + // flag. + if (ReductionDecls.count(Cap.getCapturedVar())) { + return MappableExprsHandler::OMP_MAP_TO | + MappableExprsHandler::OMP_MAP_FROM; + } // We didn't modify anything. return CurrentModifiers; @@ -6401,6 +6549,12 @@ public: for (const auto *D : C->varlists()) FirstPrivateDecls.insert( cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); + for (const auto *C : Dir.getClausesOfKind<OMPReductionClause>()) { + for (const auto *D : C->varlists()) { + ReductionDecls.insert( + cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); + } + } // Extract device pointer clause information. for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) for (auto L : C->component_lists()) @@ -6432,20 +6586,19 @@ public: RPK_MemberReference, }; OMPClauseMappableExprCommon::MappableExprComponentListRef Components; - OpenMPMapClauseKind MapType; - OpenMPMapClauseKind MapTypeModifier; - ReturnPointerKind ReturnDevicePointer; + OpenMPMapClauseKind MapType = OMPC_MAP_unknown; + OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown; + ReturnPointerKind ReturnDevicePointer = RPK_None; + bool IsImplicit = false; - MapInfo() - : MapType(OMPC_MAP_unknown), MapTypeModifier(OMPC_MAP_unknown), - ReturnDevicePointer(RPK_None) {} + MapInfo() = default; MapInfo( OMPClauseMappableExprCommon::MappableExprComponentListRef Components, OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, - ReturnPointerKind ReturnDevicePointer) + ReturnPointerKind ReturnDevicePointer, bool IsImplicit) : Components(Components), MapType(MapType), MapTypeModifier(MapTypeModifier), - ReturnDevicePointer(ReturnDevicePointer) {} + ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} }; // We have to process the component lists that relate with the same @@ -6459,25 +6612,29 @@ public: const ValueDecl *D, OMPClauseMappableExprCommon::MappableExprComponentListRef L, OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, - MapInfo::ReturnPointerKind ReturnDevicePointer) { + MapInfo::ReturnPointerKind ReturnDevicePointer, bool IsImplicit) { const ValueDecl *VD = D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; - Info[VD].push_back({L, MapType, MapModifier, ReturnDevicePointer}); + Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer, + IsImplicit); }; // FIXME: MSVC 2013 seems to require this-> to find member CurDir. for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) - for (auto L : C->component_lists()) + for (auto L : C->component_lists()) { InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(), - MapInfo::RPK_None); + MapInfo::RPK_None, C->isImplicit()); + } for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) - for (auto L : C->component_lists()) + for (auto L : C->component_lists()) { InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown, - MapInfo::RPK_None); + MapInfo::RPK_None, C->isImplicit()); + } for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) - for (auto L : C->component_lists()) + for (auto L : C->component_lists()) { InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown, - MapInfo::RPK_None); + MapInfo::RPK_None, C->isImplicit()); + } // Look at the use_device_ptr clause information and mark the existing map // entries as such. If there is no map information for an entry in the @@ -6524,7 +6681,7 @@ public: BasePointers.push_back({Ptr, VD}); Pointers.push_back(Ptr); Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); - Types.push_back(OMP_MAP_RETURN_PTR | OMP_MAP_FIRST_REF); + Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); } for (auto &M : Info) { @@ -6538,9 +6695,9 @@ public: // Remember the current base pointer index. unsigned CurrentBasePointersIdx = BasePointers.size(); // FIXME: MSVC 2013 seems to require this-> to find the member method. - this->generateInfoForComponentList(L.MapType, L.MapTypeModifier, - L.Components, BasePointers, Pointers, - Sizes, Types, IsFirstComponentList); + this->generateInfoForComponentList( + L.MapType, L.MapTypeModifier, L.Components, BasePointers, Pointers, + Sizes, Types, IsFirstComponentList, L.IsImplicit); // If this entry relates with a device pointer, set the relevant // declaration and add the 'return pointer' flag. @@ -6562,7 +6719,7 @@ public: "No relevant declaration related with device pointer??"); BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); - Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PTR; + Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; } IsFirstComponentList = false; } @@ -6604,7 +6761,8 @@ public: for (auto L : It->second) { generateInfoForComponentList( /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L, - BasePointers, Pointers, Sizes, Types, IsFirstComponentList); + BasePointers, Pointers, Sizes, Types, IsFirstComponentList, + /*IsImplicit=*/false); IsFirstComponentList = false; } return; @@ -6613,7 +6771,7 @@ public: BasePointers.push_back({Arg, VD}); Pointers.push_back(Arg); Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); - Types.push_back(OMP_MAP_PRIVATE_VAL | OMP_MAP_FIRST_REF); + Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); return; } @@ -6624,9 +6782,9 @@ public: "We got information for the wrong declaration??"); assert(!L.second.empty() && "Not expecting declaration with no component lists."); - generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(), - L.second, BasePointers, Pointers, Sizes, - Types, IsFirstComponentList); + generateInfoForComponentList( + C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers, + Pointers, Sizes, Types, IsFirstComponentList, C->isImplicit()); IsFirstComponentList = false; } @@ -6656,7 +6814,7 @@ public: if (!RI.getType()->isAnyPointerType()) { // We have to signal to the runtime captures passed by value that are // not pointers. - CurMapTypes.push_back(OMP_MAP_PRIVATE_VAL); + CurMapTypes.push_back(OMP_MAP_LITERAL); CurSizes.push_back(CGF.getTypeSize(RI.getType())); } else { // Pointers are implicitly mapped with a zero size and no flags @@ -6676,19 +6834,12 @@ public: // The default map type for a scalar/complex type is 'to' because by // default the value doesn't have to be retrieved. For an aggregate // type, the default is 'tofrom'. - CurMapTypes.push_back(ElementType->isAggregateType() - ? (OMP_MAP_TO | OMP_MAP_FROM) - : OMP_MAP_TO); - - // If we have a capture by reference we may need to add the private - // pointer flag if the base declaration shows in some first-private - // clause. - CurMapTypes.back() = - adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back()); + CurMapTypes.emplace_back(adjustMapModifiersForPrivateClauses( + CI, ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM) + : OMP_MAP_TO)); } - // Every default map produces a single argument, so, it is always the - // first one. - CurMapTypes.back() |= OMP_MAP_FIRST_REF; + // Every default map produces a single argument which is a target parameter. + CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; } }; @@ -6831,7 +6982,7 @@ static void emitOffloadingArraysArgument( llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, /*Idx0=*/0, /*Idx1=*/0); MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.Int32Ty, Info.NumberOfPtrs), + llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.MapTypesArray, /*Idx0=*/0, /*Idx1=*/0); @@ -6840,7 +6991,7 @@ static void emitOffloadingArraysArgument( PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); MapTypesArrayArg = - llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()); + llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); } } @@ -6855,8 +7006,6 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, assert(OutlinedFn && "Invalid outlined function!"); - auto &Ctx = CGF.getContext(); - // Fill up the arrays with all the captured variables. MappableExprsHandler::MapValuesArrayTy KernelArgs; MappableExprsHandler::MapBaseValuesArrayTy BasePointers; @@ -6878,9 +7027,6 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), CE = CS.capture_end(); CI != CE; ++CI, ++RI, ++CV) { - StringRef Name; - QualType Ty; - CurBasePointers.clear(); CurPointers.clear(); CurSizes.clear(); @@ -6893,8 +7039,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, CurPointers.push_back(*CV); CurSizes.push_back(CGF.getTypeSize(RI->getType())); // Copy to the device as an argument. No need to retrieve it. - CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL | - MappableExprsHandler::OMP_MAP_FIRST_REF); + CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | + MappableExprsHandler::OMP_MAP_TARGET_PARAM); } else { // If we have any information in the map clause, we use it, otherwise we // just do a default mapping. @@ -6921,19 +7067,10 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); } - // Keep track on whether the host function has to be executed. - auto OffloadErrorQType = - Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); - auto OffloadError = CGF.MakeAddrLValue( - CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"), - OffloadErrorQType); - CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), - OffloadError); - // Fill up the pointer arrays and transfer execution to the device. - auto &&ThenGen = [&BasePointers, &Pointers, &Sizes, &MapTypes, Device, - OutlinedFnID, OffloadError, - &D](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&ThenGen = [this, &BasePointers, &Pointers, &Sizes, &MapTypes, Device, + OutlinedFn, OutlinedFnID, &D, + &KernelArgs](CodeGenFunction &CGF, PrePostActionTy &) { auto &RT = CGF.CGM.getOpenMPRuntime(); // Emit the offloading arrays. TargetDataInfo Info; @@ -6956,11 +7093,12 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // Emit device ID if any. llvm::Value *DeviceID; - if (Device) + if (Device) { DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), - CGF.Int32Ty, /*isSigned=*/true); - else - DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); + CGF.Int64Ty, /*isSigned=*/true); + } else { + DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); + } // Emit the number of elements in the offloading arrays. llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size()); @@ -6971,6 +7109,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D); auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D); + bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); // The target region is an outlined function launched by the runtime // via calls __tgt_target() or __tgt_target_teams(). // @@ -7013,24 +7152,41 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, Info.MapTypesArray, NumTeams, NumThreads}; Return = CGF.EmitRuntimeCall( - RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); + RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait + : OMPRTL__tgt_target_teams), + OffloadingArgs); } else { llvm::Value *OffloadingArgs[] = { DeviceID, OutlinedFnID, PointerNum, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, Info.MapTypesArray}; - Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target), - OffloadingArgs); + Return = CGF.EmitRuntimeCall( + RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait + : OMPRTL__tgt_target), + OffloadingArgs); } - CGF.EmitStoreOfScalar(Return, OffloadError); + // Check the error code and execute the host version if required. + llvm::BasicBlock *OffloadFailedBlock = + CGF.createBasicBlock("omp_offload.failed"); + llvm::BasicBlock *OffloadContBlock = + CGF.createBasicBlock("omp_offload.cont"); + llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); + CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); + + CGF.EmitBlock(OffloadFailedBlock); + emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, KernelArgs); + CGF.EmitBranch(OffloadContBlock); + + CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); }; // Notify that the host version must be executed. - auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u), - OffloadError); + auto &&ElseGen = [this, &D, OutlinedFn, &KernelArgs](CodeGenFunction &CGF, + PrePostActionTy &) { + emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, + KernelArgs); }; // If we have a target function ID it means that we need to support @@ -7048,19 +7204,6 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, RegionCodeGenTy ElseRCG(ElseGen); ElseRCG(CGF); } - - // Check the error code and execute the host version if required. - auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); - auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont"); - auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation()); - auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal); - CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); - - CGF.EmitBlock(OffloadFailedBlock); - CGF.Builder.CreateCall(OutlinedFn, KernelArgs); - CGF.EmitBranch(OffloadContBlock); - - CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); } void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, @@ -7101,6 +7244,26 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( CGM, ParentName, cast<OMPTargetTeamsDirective>(*S)); break; + case Stmt::OMPTargetTeamsDistributeDirectiveClass: + CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( + CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(*S)); + break; + case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass: + CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( + CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(*S)); + break; + case Stmt::OMPTargetParallelForDirectiveClass: + CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( + CGM, ParentName, cast<OMPTargetParallelForDirective>(*S)); + break; + case Stmt::OMPTargetParallelForSimdDirectiveClass: + CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( + CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(*S)); + break; + case Stmt::OMPTargetSimdDirectiveClass: + CodeGenFunction::EmitOMPTargetSimdDeviceFunction( + CGM, ParentName, cast<OMPTargetSimdDirective>(*S)); + break; default: llvm_unreachable("Unknown target directive for OpenMP device codegen."); } @@ -7278,11 +7441,12 @@ void CGOpenMPRuntime::emitTargetDataCalls( // Emit device ID if any. llvm::Value *DeviceID = nullptr; - if (Device) + if (Device) { DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), - CGF.Int32Ty, /*isSigned=*/true); - else - DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); + CGF.Int64Ty, /*isSigned=*/true); + } else { + DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); + } // Emit the number of elements in the offloading arrays. auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); @@ -7313,11 +7477,12 @@ void CGOpenMPRuntime::emitTargetDataCalls( // Emit device ID if any. llvm::Value *DeviceID = nullptr; - if (Device) + if (Device) { DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), - CGF.Int32Ty, /*isSigned=*/true); - else - DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); + CGF.Int64Ty, /*isSigned=*/true); + } else { + DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); + } // Emit the number of elements in the offloading arrays. auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); @@ -7399,11 +7564,12 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( // Emit device ID if any. llvm::Value *DeviceID = nullptr; - if (Device) + if (Device) { DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), - CGF.Int32Ty, /*isSigned=*/true); - else - DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); + CGF.Int64Ty, /*isSigned=*/true); + } else { + DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); + } // Emit the number of elements in the offloading arrays. auto *PointerNum = CGF.Builder.getInt32(BasePointers.size()); @@ -7415,19 +7581,23 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( auto &RT = CGF.CGM.getOpenMPRuntime(); // Select the right runtime function call for each expected standalone // directive. + const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); OpenMPRTLFunction RTLFn; switch (D.getDirectiveKind()) { default: llvm_unreachable("Unexpected standalone target data directive."); break; case OMPD_target_enter_data: - RTLFn = OMPRTL__tgt_target_data_begin; + RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait + : OMPRTL__tgt_target_data_begin; break; case OMPD_target_exit_data: - RTLFn = OMPRTL__tgt_target_data_end; + RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait + : OMPRTL__tgt_target_data_end; break; case OMPD_target_update: - RTLFn = OMPRTL__tgt_target_data_update; + RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait + : OMPRTL__tgt_target_data_update; break; } CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs); @@ -7777,3 +7947,29 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, CGF.EmitRuntimeCall(RTLFn, Args); } +void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, llvm::Value *Callee, + ArrayRef<llvm::Value *> Args, + SourceLocation Loc) const { + auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); + + if (auto *Fn = dyn_cast<llvm::Function>(Callee)) { + if (Fn->doesNotThrow()) { + CGF.EmitNounwindRuntimeCall(Fn, Args); + return; + } + } + CGF.EmitRuntimeCall(Callee, Args); +} + +void CGOpenMPRuntime::emitOutlinedFunctionCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> Args) const { + assert(Loc.isValid() && "Outlined function call location must be valid."); + emitCall(CGF, OutlinedFn, Args, Loc); +} + +Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, + const VarDecl *NativeParam, + const VarDecl *TargetParam) const { + return CGF.GetAddrOfLocalVar(NativeParam); +} diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 185c01d5e540..94a143841373 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -250,6 +250,11 @@ protected: // virtual StringRef getOutlinedHelperName() const { return ".omp_outlined."; } + /// Emits \p Callee function call with arguments \p Args with location \p Loc. + void emitCall(CodeGenFunction &CGF, llvm::Value *Callee, + ArrayRef<llvm::Value *> Args = llvm::None, + SourceLocation Loc = SourceLocation()) const; + private: /// \brief Default const ident_t object used for initialization of all other /// ident_t objects. @@ -805,6 +810,35 @@ public: unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues); + /// Struct with the values to be passed to the static runtime function + struct StaticRTInput { + /// Size of the iteration variable in bits. + unsigned IVSize = 0; + /// Sign of the iteration variable. + bool IVSigned = false; + /// true if loop is ordered, false otherwise. + bool Ordered = false; + /// Address of the output variable in which the flag of the last iteration + /// is returned. + Address IL = Address::invalid(); + /// Address of the output variable in which the lower iteration number is + /// returned. + Address LB = Address::invalid(); + /// Address of the output variable in which the upper iteration number is + /// returned. + Address UB = Address::invalid(); + /// Address of the output variable in which the stride value is returned + /// necessary to generated the static_chunked scheduled loop. + Address ST = Address::invalid(); + /// Value of the chunk for the static_chunked scheduled loop. For the + /// default (nullptr) value, the chunk 1 will be used. + llvm::Value *Chunk = nullptr; + StaticRTInput(unsigned IVSize, bool IVSigned, bool Ordered, Address IL, + Address LB, Address UB, Address ST, + llvm::Value *Chunk = nullptr) + : IVSize(IVSize), IVSigned(IVSigned), Ordered(Ordered), IL(IL), LB(LB), + UB(UB), ST(ST), Chunk(Chunk) {} + }; /// \brief Call the appropriate runtime routine to initialize it before start /// of loop. /// @@ -812,55 +846,29 @@ public: /// specify a ordered clause on the loop construct. /// Depending on the loop schedule, it is necessary to call some runtime /// routine before start of the OpenMP loop to get the loop upper / lower - /// bounds \a LB and \a UB and stride \a ST. + /// bounds LB and UB and stride ST. /// /// \param CGF Reference to current CodeGenFunction. /// \param Loc Clang source location. + /// \param DKind Kind of the directive. /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause. - /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the iteration variable. - /// \param Ordered true if loop is ordered, false otherwise. - /// \param IL Address of the output variable in which the flag of the - /// last iteration is returned. - /// \param LB Address of the output variable in which the lower iteration - /// number is returned. - /// \param UB Address of the output variable in which the upper iteration - /// number is returned. - /// \param ST Address of the output variable in which the stride value is - /// returned necessary to generated the static_chunked scheduled loop. - /// \param Chunk Value of the chunk for the static_chunked scheduled loop. - /// For the default (nullptr) value, the chunk 1 will be used. + /// \param Values Input arguments for the construct. /// virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, - unsigned IVSize, bool IVSigned, bool Ordered, - Address IL, Address LB, Address UB, Address ST, - llvm::Value *Chunk = nullptr); + const StaticRTInput &Values); /// /// \param CGF Reference to current CodeGenFunction. /// \param Loc Clang source location. /// \param SchedKind Schedule kind, specified by the 'dist_schedule' clause. - /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the iteration variable. - /// \param Ordered true if loop is ordered, false otherwise. - /// \param IL Address of the output variable in which the flag of the - /// last iteration is returned. - /// \param LB Address of the output variable in which the lower iteration - /// number is returned. - /// \param UB Address of the output variable in which the upper iteration - /// number is returned. - /// \param ST Address of the output variable in which the stride value is - /// returned necessary to generated the static_chunked scheduled loop. - /// \param Chunk Value of the chunk for the static_chunked scheduled loop. - /// For the default (nullptr) value, the chunk 1 will be used. + /// \param Values Input arguments for the construct. /// - virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, + virtual void emitDistributeStaticInit(CodeGenFunction &CGF, + SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, - unsigned IVSize, bool IVSigned, - bool Ordered, Address IL, Address LB, - Address UB, Address ST, - llvm::Value *Chunk = nullptr); + const StaticRTInput &Values); /// \brief Call the appropriate runtime routine to notify that we finished /// iteration of the ordered loop with the dynamic scheduling. @@ -879,8 +887,10 @@ public: /// /// \param CGF Reference to current CodeGenFunction. /// \param Loc Clang source location. + /// \param DKind Kind of the directive for which the static finish is emitted. /// - virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc); + virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind DKind); /// Call __kmpc_dispatch_next( /// ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, @@ -1328,6 +1338,30 @@ public: /// \param C 'depend' clause with 'sink|source' dependency kind. virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C); + + /// Translates the native parameter of outlined function if this is required + /// for target. + /// \param FD Field decl from captured record for the paramater. + /// \param NativeParam Parameter itself. + virtual const VarDecl *translateParameter(const FieldDecl *FD, + const VarDecl *NativeParam) const { + return NativeParam; + } + + /// Gets the address of the native argument basing on the address of the + /// target-specific parameter. + /// \param NativeParam Parameter itself. + /// \param TargetParam Corresponding target-specific parameter. + virtual Address getParameterAddress(CodeGenFunction &CGF, + const VarDecl *NativeParam, + const VarDecl *TargetParam) const; + + /// Emits call of the outlined function with the provided arguments, + /// translating these arguments to correct target-specific arguments. + virtual void + emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, + llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> Args = llvm::None) const; }; } // namespace CodeGen diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 3ced05d08a47..b5fc8d308067 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -22,19 +22,21 @@ using namespace CodeGen; namespace { enum OpenMPRTLFunctionNVPTX { - /// \brief Call to void __kmpc_kernel_init(kmp_int32 thread_limit); + /// \brief Call to void __kmpc_kernel_init(kmp_int32 thread_limit, + /// int16_t RequiresOMPRuntime); OMPRTL_NVPTX__kmpc_kernel_init, - /// \brief Call to void __kmpc_kernel_deinit(); + /// \brief Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); OMPRTL_NVPTX__kmpc_kernel_deinit, /// \brief Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, - /// short RequiresOMPRuntime, short RequiresDataSharing); + /// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); OMPRTL_NVPTX__kmpc_spmd_kernel_init, /// \brief Call to void __kmpc_spmd_kernel_deinit(); OMPRTL_NVPTX__kmpc_spmd_kernel_deinit, /// \brief Call to void __kmpc_kernel_prepare_parallel(void - /// *outlined_function); + /// *outlined_function, void ***args, kmp_int32 nArgs); OMPRTL_NVPTX__kmpc_kernel_prepare_parallel, - /// \brief Call to bool __kmpc_kernel_parallel(void **outlined_function); + /// \brief Call to bool __kmpc_kernel_parallel(void **outlined_function, void + /// ***args); OMPRTL_NVPTX__kmpc_kernel_parallel, /// \brief Call to void __kmpc_kernel_end_parallel(); OMPRTL_NVPTX__kmpc_kernel_end_parallel, @@ -150,20 +152,18 @@ enum NamedBarrier : unsigned { /// Get the GPU warp size. static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - return Bld.CreateCall( + return CGF.EmitRuntimeCall( llvm::Intrinsic::getDeclaration( &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize), - llvm::None, "nvptx_warp_size"); + "nvptx_warp_size"); } /// Get the id of the current thread on the GPU. static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - return Bld.CreateCall( + return CGF.EmitRuntimeCall( llvm::Intrinsic::getDeclaration( &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x), - llvm::None, "nvptx_tid"); + "nvptx_tid"); } /// Get the id of the warp in the block. @@ -185,17 +185,15 @@ static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) { /// Get the maximum number of threads in a block of the GPU. static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - return Bld.CreateCall( + return CGF.EmitRuntimeCall( llvm::Intrinsic::getDeclaration( &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x), - llvm::None, "nvptx_num_threads"); + "nvptx_num_threads"); } /// Get barrier to synchronize all threads in a block. static void getNVPTXCTABarrier(CodeGenFunction &CGF) { - CGBuilderTy &Bld = CGF.Builder; - Bld.CreateCall(llvm::Intrinsic::getDeclaration( + CGF.EmitRuntimeCall(llvm::Intrinsic::getDeclaration( &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0)); } @@ -205,9 +203,9 @@ static void getNVPTXBarrier(CodeGenFunction &CGF, int ID, llvm::Value *NumThreads) { CGBuilderTy &Bld = CGF.Builder; llvm::Value *Args[] = {Bld.getInt32(ID), NumThreads}; - Bld.CreateCall(llvm::Intrinsic::getDeclaration(&CGF.CGM.getModule(), - llvm::Intrinsic::nvvm_barrier), - Args); + CGF.EmitRuntimeCall(llvm::Intrinsic::getDeclaration( + &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier), + Args); } /// Synchronize all GPU threads in a block. @@ -280,6 +278,8 @@ getExecutionModeForDirective(CodeGenModule &CGM, case OMPD_target_teams: return CGOpenMPRuntimeNVPTX::ExecutionMode::Generic; case OMPD_target_parallel: + case OMPD_target_parallel_for: + case OMPD_target_parallel_for_simd: return CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd; default: llvm_unreachable("Unsupported directive on NVPTX device."); @@ -298,6 +298,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericKernel(const OMPExecutableDirective &D, EntryFunctionState EST; WorkerFunctionState WST(CGM); Work.clear(); + WrapperFunctionsMap.clear(); // Emit target region as a standalone region. class NVPTXPrePostActionTy : public PrePostActionTy { @@ -345,7 +346,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF, Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB); CGF.EmitBlock(WorkerBB); - CGF.EmitCallOrInvoke(WST.WorkerFn, llvm::None); + emitCall(CGF, WST.WorkerFn); CGF.EmitBranch(EST.ExitBB); CGF.EmitBlock(MasterCheckBB); @@ -356,7 +357,9 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF, CGF.EmitBlock(MasterBB); // First action in sequential region: // Initialize the state of the OpenMP runtime library on the GPU. - llvm::Value *Args[] = {getThreadLimit(CGF)}; + // TODO: Optimize runtime initialization and pass in correct value. + llvm::Value *Args[] = {getThreadLimit(CGF), + Bld.getInt16(/*RequiresOMPRuntime=*/1)}; CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args); } @@ -371,8 +374,10 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryFooter(CodeGenFunction &CGF, CGF.EmitBlock(TerminateBB); // Signal termination condition. + // TODO: Optimize runtime initialization and pass in correct value. + llvm::Value *Args[] = {CGF.Builder.getInt16(/*IsOMPRuntimeInitialized=*/1)}; CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), None); + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args); // Barrier to terminate worker threads. syncCTAThreads(CGF); // Master thread jumps to exit point. @@ -413,7 +418,6 @@ void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D, CodeGen.setAction(Action); emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); - return; } void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader( @@ -471,7 +475,7 @@ static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name, } void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) { - auto &Ctx = CGM.getContext(); + ASTContext &Ctx = CGM.getContext(); CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); CGF.disableDebugInfo(); @@ -514,7 +518,10 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, CGF.InitTempAlloca(ExecStatus, Bld.getInt8(/*C=*/0)); CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy)); - llvm::Value *Args[] = {WorkFn.getPointer()}; + // Set up shared arguments + Address SharedArgs = + CGF.CreateDefaultAlignTempAlloca(CGF.Int8PtrPtrTy, "shared_args"); + llvm::Value *Args[] = {WorkFn.getPointer(), SharedArgs.getPointer()}; llvm::Value *Ret = CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args); Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus); @@ -533,6 +540,9 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, // Signal start of parallel region. CGF.EmitBlock(ExecuteBB); + // Current context + ASTContext &Ctx = CGF.getContext(); + // Process work items: outlined parallel functions. for (auto *W : Work) { // Try to match this outlined function. @@ -548,14 +558,18 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, // Execute this outlined function. CGF.EmitBlock(ExecuteFNBB); - // Insert call to work function. - // FIXME: Pass arguments to outlined function from master thread. - auto *Fn = cast<llvm::Function>(W); - Address ZeroAddr = - CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".zero.addr"); - CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C=*/0)); - llvm::Value *FnArgs[] = {ZeroAddr.getPointer(), ZeroAddr.getPointer()}; - CGF.EmitCallOrInvoke(Fn, FnArgs); + // Insert call to work function via shared wrapper. The shared + // wrapper takes exactly three arguments: + // - the parallelism level; + // - the master thread ID; + // - the list of references to shared arguments. + // + // TODO: Assert that the function is a wrapper function.s + Address Capture = CGF.EmitLoadOfPointer(SharedArgs, + Ctx.getPointerType( + Ctx.getPointerType(Ctx.VoidPtrTy)).castAs<PointerType>()); + emitCall(CGF, W, {Bld.getInt16(/*ParallelLevel=*/0), + getMasterThreadID(CGF), Capture.getPointer()}); // Go to end of parallel region. CGF.EmitBranch(TerminateBB); @@ -589,23 +603,25 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { llvm::Constant *RTLFn = nullptr; switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) { case OMPRTL_NVPTX__kmpc_kernel_init: { - // Build void __kmpc_kernel_init(kmp_int32 thread_limit); - llvm::Type *TypeParams[] = {CGM.Int32Ty}; + // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t + // RequiresOMPRuntime); + llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init"); break; } case OMPRTL_NVPTX__kmpc_kernel_deinit: { - // Build void __kmpc_kernel_deinit(); + // Build void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); + llvm::Type *TypeParams[] = {CGM.Int16Ty}; llvm::FunctionType *FnTy = - llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit"); break; } case OMPRTL_NVPTX__kmpc_spmd_kernel_init: { // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, - // short RequiresOMPRuntime, short RequiresDataSharing); + // int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); @@ -621,16 +637,18 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { } case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: { /// Build void __kmpc_kernel_prepare_parallel( - /// void *outlined_function); - llvm::Type *TypeParams[] = {CGM.Int8PtrTy}; + /// void *outlined_function, void ***args, kmp_int32 nArgs); + llvm::Type *TypeParams[] = {CGM.Int8PtrTy, + CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int32Ty}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel"); break; } case OMPRTL_NVPTX__kmpc_kernel_parallel: { - /// Build bool __kmpc_kernel_parallel(void **outlined_function); - llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy}; + /// Build bool __kmpc_kernel_parallel(void **outlined_function, void ***args); + llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy, + CGM.Int8PtrPtrTy->getPointerTo(0)}; llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy); llvm::FunctionType *FnTy = llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false); @@ -849,8 +867,17 @@ void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF, llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { - return CGOpenMPRuntime::emitParallelOutlinedFunction(D, ThreadIDVar, - InnermostKind, CodeGen); + + auto *OutlinedFun = cast<llvm::Function>( + CGOpenMPRuntime::emitParallelOutlinedFunction( + D, ThreadIDVar, InnermostKind, CodeGen)); + if (!isInSpmdExecutionMode()) { + llvm::Function *WrapperFun = + createDataSharingWrapper(OutlinedFun, D); + WrapperFunctionsMap[OutlinedFun] = WrapperFun; + } + + return OutlinedFun; } llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( @@ -883,7 +910,7 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF, OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); - CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); + emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); } void CGOpenMPRuntimeNVPTX::emitParallelCall( @@ -902,15 +929,54 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { llvm::Function *Fn = cast<llvm::Function>(OutlinedFn); + llvm::Function *WFn = WrapperFunctionsMap[Fn]; + assert(WFn && "Wrapper function does not exist!"); + + // Force inline this outlined function at its call site. + Fn->setLinkage(llvm::GlobalValue::InternalLinkage); - auto &&L0ParallelGen = [this, Fn](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&L0ParallelGen = [this, WFn, &CapturedVars](CodeGenFunction &CGF, + PrePostActionTy &) { CGBuilderTy &Bld = CGF.Builder; - // Prepare for parallel region. Indicate the outlined function. - llvm::Value *Args[] = {Bld.CreateBitOrPointerCast(Fn, CGM.Int8PtrTy)}; - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), - Args); + llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy); + + if (!CapturedVars.empty()) { + // There's somehting to share, add the attribute + CGF.CurFn->addFnAttr("has-nvptx-shared-depot"); + // Prepare for parallel region. Indicate the outlined function. + Address SharedArgs = + CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, + "shared_args"); + llvm::Value *SharedArgsPtr = SharedArgs.getPointer(); + llvm::Value *Args[] = {ID, SharedArgsPtr, + Bld.getInt32(CapturedVars.size())}; + + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), + Args); + + unsigned Idx = 0; + ASTContext &Ctx = CGF.getContext(); + for (llvm::Value *V : CapturedVars) { + Address Dst = Bld.CreateConstInBoundsGEP( + CGF.EmitLoadOfPointer(SharedArgs, + Ctx.getPointerType( + Ctx.getPointerType(Ctx.VoidPtrTy)).castAs<PointerType>()), + Idx, CGF.getPointerSize()); + llvm::Value *PtrV = Bld.CreateBitCast(V, CGF.VoidPtrTy); + CGF.EmitStoreOfScalar(PtrV, Dst, /*Volatile=*/false, + Ctx.getPointerType(Ctx.VoidPtrTy)); + Idx++; + } + } else { + llvm::Value *Args[] = {ID, + llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy->getPointerTo(0)), + /*nArgs=*/Bld.getInt32(0)}; + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), + Args); + } // Activate workers. This barrier is used by the master to signal // work for the workers. @@ -925,17 +991,17 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( syncCTAThreads(CGF); // Remember for post-processing in worker loop. - Work.push_back(Fn); + Work.emplace_back(WFn); }; auto *RTLoc = emitUpdateLocation(CGF, Loc); auto *ThreadID = getThreadID(CGF, Loc); llvm::Value *Args[] = {RTLoc, ThreadID}; - auto &&SeqGen = [this, Fn, &CapturedVars, &Args](CodeGenFunction &CGF, - PrePostActionTy &) { - auto &&CodeGen = [this, Fn, &CapturedVars](CodeGenFunction &CGF, - PrePostActionTy &Action) { + auto &&SeqGen = [this, Fn, &CapturedVars, &Args, Loc](CodeGenFunction &CGF, + PrePostActionTy &) { + auto &&CodeGen = [this, Fn, &CapturedVars, Loc](CodeGenFunction &CGF, + PrePostActionTy &Action) { Action.Enter(CGF); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; @@ -944,7 +1010,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( OutlinedFnArgs.push_back( llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); - CGF.EmitCallOrInvoke(Fn, OutlinedFnArgs); + emitOutlinedFunctionCall(CGF, Loc, Fn, OutlinedFnArgs); }; RegionCodeGenTy RCG(CodeGen); @@ -980,7 +1046,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall( OutlinedFnArgs.push_back( llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); - CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); + emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); } /// This function creates calls to one of two shuffle functions to copy @@ -2238,3 +2304,183 @@ void CGOpenMPRuntimeNVPTX::emitReduction( CGF.EmitBranch(DefaultBB); CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); } + +const VarDecl * +CGOpenMPRuntimeNVPTX::translateParameter(const FieldDecl *FD, + const VarDecl *NativeParam) const { + if (!NativeParam->getType()->isReferenceType()) + return NativeParam; + QualType ArgType = NativeParam->getType(); + QualifierCollector QC; + const Type *NonQualTy = QC.strip(ArgType); + QualType PointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType(); + if (const auto *Attr = FD->getAttr<OMPCaptureKindAttr>()) { + if (Attr->getCaptureKind() == OMPC_map) { + PointeeTy = CGM.getContext().getAddrSpaceQualType(PointeeTy, + LangAS::opencl_global); + } + } + ArgType = CGM.getContext().getPointerType(PointeeTy); + QC.addRestrict(); + enum { NVPTX_local_addr = 5 }; + QC.addAddressSpace(getLangASFromTargetAS(NVPTX_local_addr)); + ArgType = QC.apply(CGM.getContext(), ArgType); + if (isa<ImplicitParamDecl>(NativeParam)) { + return ImplicitParamDecl::Create( + CGM.getContext(), /*DC=*/nullptr, NativeParam->getLocation(), + NativeParam->getIdentifier(), ArgType, ImplicitParamDecl::Other); + } + return ParmVarDecl::Create( + CGM.getContext(), + const_cast<DeclContext *>(NativeParam->getDeclContext()), + NativeParam->getLocStart(), NativeParam->getLocation(), + NativeParam->getIdentifier(), ArgType, + /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); +} + +Address +CGOpenMPRuntimeNVPTX::getParameterAddress(CodeGenFunction &CGF, + const VarDecl *NativeParam, + const VarDecl *TargetParam) const { + assert(NativeParam != TargetParam && + NativeParam->getType()->isReferenceType() && + "Native arg must not be the same as target arg."); + Address LocalAddr = CGF.GetAddrOfLocalVar(TargetParam); + QualType NativeParamType = NativeParam->getType(); + QualifierCollector QC; + const Type *NonQualTy = QC.strip(NativeParamType); + QualType NativePointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType(); + unsigned NativePointeeAddrSpace = + CGF.getContext().getTargetAddressSpace(NativePointeeTy); + QualType TargetTy = TargetParam->getType(); + llvm::Value *TargetAddr = CGF.EmitLoadOfScalar( + LocalAddr, /*Volatile=*/false, TargetTy, SourceLocation()); + // First cast to generic. + TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo( + /*AddrSpace=*/0)); + // Cast from generic to native address space. + TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo( + NativePointeeAddrSpace)); + Address NativeParamAddr = CGF.CreateMemTemp(NativeParamType); + CGF.EmitStoreOfScalar(TargetAddr, NativeParamAddr, /*Volatile=*/false, + NativeParamType); + return NativeParamAddr; +} + +void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> Args) const { + SmallVector<llvm::Value *, 4> TargetArgs; + TargetArgs.reserve(Args.size()); + auto *FnType = + cast<llvm::FunctionType>(OutlinedFn->getType()->getPointerElementType()); + for (unsigned I = 0, E = Args.size(); I < E; ++I) { + if (FnType->isVarArg() && FnType->getNumParams() <= I) { + TargetArgs.append(std::next(Args.begin(), I), Args.end()); + break; + } + llvm::Type *TargetType = FnType->getParamType(I); + llvm::Value *NativeArg = Args[I]; + if (!TargetType->isPointerTy()) { + TargetArgs.emplace_back(NativeArg); + continue; + } + llvm::Value *TargetArg = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + NativeArg, NativeArg->getType()->getPointerElementType()->getPointerTo( + /*AddrSpace=*/0)); + TargetArgs.emplace_back( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType)); + } + CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, TargetArgs); +} + +/// Emit function which wraps the outline parallel region +/// and controls the arguments which are passed to this function. +/// The wrapper ensures that the outlined function is called +/// with the correct arguments when data is shared. +llvm::Function *CGOpenMPRuntimeNVPTX::createDataSharingWrapper( + llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D) { + ASTContext &Ctx = CGM.getContext(); + const auto &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); + + // Create a function that takes as argument the source thread. + FunctionArgList WrapperArgs; + QualType Int16QTy = + Ctx.getIntTypeForBitwidth(/*DestWidth=*/16, /*Signed=*/false); + QualType Int32QTy = + Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false); + QualType Int32PtrQTy = Ctx.getPointerType(Int32QTy); + QualType VoidPtrPtrQTy = Ctx.getPointerType(Ctx.VoidPtrTy); + ImplicitParamDecl ParallelLevelArg(Ctx, Int16QTy, ImplicitParamDecl::Other); + ImplicitParamDecl WrapperArg(Ctx, Int32QTy, ImplicitParamDecl::Other); + ImplicitParamDecl SharedArgsList(Ctx, VoidPtrPtrQTy, + ImplicitParamDecl::Other); + WrapperArgs.emplace_back(&ParallelLevelArg); + WrapperArgs.emplace_back(&WrapperArg); + WrapperArgs.emplace_back(&SharedArgsList); + + auto &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, WrapperArgs); + + auto *Fn = llvm::Function::Create( + CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, + OutlinedParallelFn->getName() + "_wrapper", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); + Fn->setLinkage(llvm::GlobalValue::InternalLinkage); + + CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); + CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, CGFI, WrapperArgs); + + const auto *RD = CS.getCapturedRecordDecl(); + auto CurField = RD->field_begin(); + + // Get the array of arguments. + SmallVector<llvm::Value *, 8> Args; + + // TODO: suppport SIMD and pass actual values + Args.emplace_back(llvm::ConstantPointerNull::get( + CGM.Int32Ty->getPointerTo())); + Args.emplace_back(llvm::ConstantPointerNull::get( + CGM.Int32Ty->getPointerTo())); + + CGBuilderTy &Bld = CGF.Builder; + auto CI = CS.capture_begin(); + + // Load the start of the array + auto SharedArgs = + CGF.EmitLoadOfPointer(CGF.GetAddrOfLocalVar(&SharedArgsList), + VoidPtrPtrQTy->castAs<PointerType>()); + + // For each captured variable + for (unsigned I = 0; I < CS.capture_size(); ++I, ++CI, ++CurField) { + // Name of captured variable + StringRef Name; + if (CI->capturesThis()) + Name = "this"; + else + Name = CI->getCapturedVar()->getName(); + + // We retrieve the CLANG type of the argument. We use it to create + // an alloca which will give us the LLVM type. + QualType ElemTy = CurField->getType(); + // If this is a capture by copy the element type has to be the pointer to + // the data. + if (CI->capturesVariableByCopy()) + ElemTy = Ctx.getPointerType(ElemTy); + + // Get shared address of the captured variable. + Address ArgAddress = Bld.CreateConstInBoundsGEP( + SharedArgs, I, CGF.getPointerSize()); + Address TypedArgAddress = Bld.CreateBitCast( + ArgAddress, CGF.ConvertTypeForMem(Ctx.getPointerType(ElemTy))); + llvm::Value *Arg = CGF.EmitLoadOfScalar(TypedArgAddress, + /*Volatile=*/false, Int32PtrQTy, SourceLocation()); + Args.emplace_back(Arg); + } + + emitCall(CGF, OutlinedParallelFn, Args); + CGF.FinishFunction(); + return Fn; +} diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index ae25e94759e6..5d13408318a5 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -268,6 +268,26 @@ public: /// \return Specified function. llvm::Constant *createNVPTXRuntimeFunction(unsigned Function); + /// Translates the native parameter of outlined function if this is required + /// for target. + /// \param FD Field decl from captured record for the paramater. + /// \param NativeParam Parameter itself. + const VarDecl *translateParameter(const FieldDecl *FD, + const VarDecl *NativeParam) const override; + + /// Gets the address of the native argument basing on the address of the + /// target-specific parameter. + /// \param NativeParam Parameter itself. + /// \param TargetParam Corresponding target-specific parameter. + Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, + const VarDecl *TargetParam) const override; + + /// Emits call of the outlined function with the provided arguments, + /// translating these arguments to correct target-specific arguments. + void emitOutlinedFunctionCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> Args = llvm::None) const override; + /// Target codegen is specialized based on two programming models: the /// 'generic' fork-join model of OpenMP, and a more GPU efficient 'spmd' /// model for constructs like 'target parallel' that support it. @@ -285,6 +305,17 @@ private: // target region and used by containing directives such as 'parallel' // to emit optimized code. ExecutionMode CurrentExecutionMode; + + /// Map between an outlined function and its wrapper. + llvm::DenseMap<llvm::Function *, llvm::Function *> WrapperFunctionsMap; + + /// Emit function which wraps the outline parallel region + /// and controls the parameters which are passed to this function. + /// The wrapper ensures that the outlined function is called + /// with the correct arguments when data is shared. + llvm::Function * + createDataSharingWrapper(llvm::Function *OutlinedParallelFn, + const OMPExecutableDirective &D); }; } // CodeGen namespace. diff --git a/lib/CodeGen/CGRecordLayoutBuilder.cpp b/lib/CodeGen/CGRecordLayoutBuilder.cpp index 7d530a278fbf..1644ab4c0725 100644 --- a/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -403,6 +403,27 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field, } return; } + + // Check if current Field is better as a single field run. When current field + // has legal integer width, and its bitfield offset is naturally aligned, it + // is better to make the bitfield a separate storage component so as it can be + // accessed directly with lower cost. + auto IsBetterAsSingleFieldRun = [&](RecordDecl::field_iterator Field) { + if (!Types.getCodeGenOpts().FineGrainedBitfieldAccesses) + return false; + unsigned Width = Field->getBitWidthValue(Context); + if (!DataLayout.isLegalInteger(Width)) + return false; + // Make sure Field is natually aligned if it is treated as an IType integer. + if (getFieldBitOffset(*Field) % + Context.toBits(getAlignment(getIntNType(Width))) != + 0) + return false; + return true; + }; + + // The start field is better as a single field run. + bool StartFieldAsSingleRun = false; for (;;) { // Check to see if we need to start a new run. if (Run == FieldEnd) { @@ -414,17 +435,28 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field, Run = Field; StartBitOffset = getFieldBitOffset(*Field); Tail = StartBitOffset + Field->getBitWidthValue(Context); + StartFieldAsSingleRun = IsBetterAsSingleFieldRun(Run); } ++Field; continue; } - // Add bitfields to the run as long as they qualify. - if (Field != FieldEnd && Field->getBitWidthValue(Context) != 0 && + + // If the start field of a new run is better as a single run, or + // if current field is better as a single run, or + // if current field has zero width bitfield, or + // if the offset of current field is inconsistent with the offset of + // previous field plus its offset, + // skip the block below and go ahead to emit the storage. + // Otherwise, try to add bitfields to the run. + if (!StartFieldAsSingleRun && Field != FieldEnd && + !IsBetterAsSingleFieldRun(Field) && + Field->getBitWidthValue(Context) != 0 && Tail == getFieldBitOffset(*Field)) { Tail += Field->getBitWidthValue(Context); ++Field; continue; } + // We've hit a break-point in the run and need to emit a storage field. llvm::Type *Type = getIntNType(Tail - StartBitOffset); // Add the storage member to the record and set the bitfield info for all of @@ -435,6 +467,7 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field, Members.push_back(MemberInfo(bitsToCharUnits(StartBitOffset), MemberInfo::Field, nullptr, *Run)); Run = FieldEnd; + StartFieldAsSingleRun = false; } } diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp index a13c38646164..91fa49a46ef1 100644 --- a/lib/CodeGen/CGStmt.cpp +++ b/lib/CodeGen/CGStmt.cpp @@ -45,7 +45,7 @@ void CodeGenFunction::EmitStopPoint(const Stmt *S) { } } -void CodeGenFunction::EmitStmt(const Stmt *S) { +void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) { assert(S && "Null statement?"); PGO.setCurrentStmt(S); @@ -131,16 +131,16 @@ void CodeGenFunction::EmitStmt(const Stmt *S) { case Stmt::IndirectGotoStmtClass: EmitIndirectGotoStmt(cast<IndirectGotoStmt>(*S)); break; - case Stmt::IfStmtClass: EmitIfStmt(cast<IfStmt>(*S)); break; - case Stmt::WhileStmtClass: EmitWhileStmt(cast<WhileStmt>(*S)); break; - case Stmt::DoStmtClass: EmitDoStmt(cast<DoStmt>(*S)); break; - case Stmt::ForStmtClass: EmitForStmt(cast<ForStmt>(*S)); break; + case Stmt::IfStmtClass: EmitIfStmt(cast<IfStmt>(*S)); break; + case Stmt::WhileStmtClass: EmitWhileStmt(cast<WhileStmt>(*S), Attrs); break; + case Stmt::DoStmtClass: EmitDoStmt(cast<DoStmt>(*S), Attrs); break; + case Stmt::ForStmtClass: EmitForStmt(cast<ForStmt>(*S), Attrs); break; - case Stmt::ReturnStmtClass: EmitReturnStmt(cast<ReturnStmt>(*S)); break; + case Stmt::ReturnStmtClass: EmitReturnStmt(cast<ReturnStmt>(*S)); break; - case Stmt::SwitchStmtClass: EmitSwitchStmt(cast<SwitchStmt>(*S)); break; - case Stmt::GCCAsmStmtClass: // Intentional fall-through. - case Stmt::MSAsmStmtClass: EmitAsmStmt(cast<AsmStmt>(*S)); break; + case Stmt::SwitchStmtClass: EmitSwitchStmt(cast<SwitchStmt>(*S)); break; + case Stmt::GCCAsmStmtClass: // Intentional fall-through. + case Stmt::MSAsmStmtClass: EmitAsmStmt(cast<AsmStmt>(*S)); break; case Stmt::CoroutineBodyStmtClass: EmitCoroutineBody(cast<CoroutineBodyStmt>(*S)); break; @@ -178,7 +178,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S) { EmitCXXTryStmt(cast<CXXTryStmt>(*S)); break; case Stmt::CXXForRangeStmtClass: - EmitCXXForRangeStmt(cast<CXXForRangeStmt>(*S)); + EmitCXXForRangeStmt(cast<CXXForRangeStmt>(*S), Attrs); break; case Stmt::SEHTryStmtClass: EmitSEHTryStmt(cast<SEHTryStmt>(*S)); @@ -555,23 +555,7 @@ void CodeGenFunction::EmitLabelStmt(const LabelStmt &S) { } void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) { - const Stmt *SubStmt = S.getSubStmt(); - switch (SubStmt->getStmtClass()) { - case Stmt::DoStmtClass: - EmitDoStmt(cast<DoStmt>(*SubStmt), S.getAttrs()); - break; - case Stmt::ForStmtClass: - EmitForStmt(cast<ForStmt>(*SubStmt), S.getAttrs()); - break; - case Stmt::WhileStmtClass: - EmitWhileStmt(cast<WhileStmt>(*SubStmt), S.getAttrs()); - break; - case Stmt::CXXForRangeStmtClass: - EmitCXXForRangeStmt(cast<CXXForRangeStmt>(*SubStmt), S.getAttrs()); - break; - default: - EmitStmt(SubStmt); - } + EmitStmt(S.getSubStmt(), S.getAttrs()); } void CodeGenFunction::EmitGotoStmt(const GotoStmt &S) { @@ -2165,10 +2149,11 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { llvm::ConstantAsMetadata::get(Loc))); } - if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { - // Conservatively, mark all inline asm blocks in CUDA as convergent - // (meaning, they may call an intrinsically convergent op, such as bar.sync, - // and so can't have certain optimizations applied around them). + if (getLangOpts().assumeFunctionsAreConvergent()) { + // Conservatively, mark all inline asm blocks in CUDA or OpenCL as + // convergent (meaning, they may call an intrinsically convergent op, such + // as bar.sync, and so can't have certain optimizations applied around + // them). Result->addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::Convergent); } @@ -2210,7 +2195,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { llvm::IntegerType::get(getLLVMContext(), (unsigned)TmpSize)); Tmp = Builder.CreateTrunc(Tmp, TruncTy); } else if (TruncTy->isIntegerTy()) { - Tmp = Builder.CreateTrunc(Tmp, TruncTy); + Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy); } else if (TruncTy->isVectorTy()) { Tmp = Builder.CreateBitCast(Tmp, TruncTy); } @@ -2283,7 +2268,6 @@ CodeGenFunction::GenerateCapturedStmtFunction(const CapturedStmt &S) { Args.append(CD->param_begin(), CD->param_end()); // Create the function declaration. - FunctionType::ExtInfo ExtInfo; const CGFunctionInfo &FuncInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index cf430f860fd8..f04d28ed0d4a 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -65,6 +65,8 @@ public: for (auto &C : CS->captures()) { if (C.capturesVariable() || C.capturesVariableByCopy()) { auto *VD = C.getCapturedVar(); + assert(VD == VD->getCanonicalDecl() && + "Canonical decl must be captured."); DeclRefExpr DRE(const_cast<VarDecl *>(VD), isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && @@ -119,6 +121,14 @@ public: /// of used expression from loop statement. class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { + CodeGenFunction::OMPPrivateScope PreCondScope(CGF); + for (auto *E : S.counters()) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + (void)PreCondScope.addPrivate(VD, [&CGF, VD]() { + return CGF.CreateMemTemp(VD->getType().getNonReferenceType()); + }); + } + (void)PreCondScope.Privatize(); if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) { for (const auto *I : PreInits->decls()) @@ -136,6 +146,26 @@ public: } // namespace +static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &S, + const RegionCodeGenTy &CodeGen); + +LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { + if (auto *OrigDRE = dyn_cast<DeclRefExpr>(E)) { + if (auto *OrigVD = dyn_cast<VarDecl>(OrigDRE->getDecl())) { + OrigVD = OrigVD->getCanonicalDecl(); + bool IsCaptured = + LambdaCaptureFields.lookup(OrigVD) || + (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || + (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); + DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured, + OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); + return EmitLValue(&DRE); + } + } + return EmitLValue(E); +} + llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { auto &C = getContext(); llvm::Value *Size = nullptr; @@ -236,6 +266,12 @@ static QualType getCanonicalParamType(ASTContext &C, QualType T) { } if (T->isPointerType()) return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); + if (auto *A = T->getAsArrayTypeUnsafe()) { + if (auto *VLA = dyn_cast<VariableArrayType>(A)) + return getCanonicalParamType(C, VLA->getElementType()); + else if (!A->isVariablyModifiedType()) + return C.getCanonicalType(T); + } return C.getCanonicalParamType(T); } @@ -246,12 +282,12 @@ namespace { const CapturedStmt *S = nullptr; /// true if cast to/from UIntPtr is required for variables captured by /// value. - bool UIntPtrCastRequired = true; - /// true if only casted argumefnts must be registered as local args or VLA + const bool UIntPtrCastRequired = true; + /// true if only casted arguments must be registered as local args or VLA /// sizes. - bool RegisterCastedArgsOnly = false; + const bool RegisterCastedArgsOnly = false; /// Name of the generated function. - StringRef FunctionName; + const StringRef FunctionName; explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, bool RegisterCastedArgsOnly, StringRef FunctionName) @@ -261,9 +297,9 @@ namespace { }; } -static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( +static llvm::Function *emitOutlinedFunctionPrologue( CodeGenFunction &CGF, FunctionArgList &Args, - llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>> + llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> &LocalAddrs, llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> &VLASizes, @@ -276,10 +312,23 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( // Build the argument list. CodeGenModule &CGM = CGF.CGM; ASTContext &Ctx = CGM.getContext(); - bool HasUIntPtrArgs = false; + FunctionArgList TargetArgs; Args.append(CD->param_begin(), std::next(CD->param_begin(), CD->getContextParamPosition())); + TargetArgs.append( + CD->param_begin(), + std::next(CD->param_begin(), CD->getContextParamPosition())); auto I = FO.S->captures().begin(); + FunctionDecl *DebugFunctionDecl = nullptr; + if (!FO.UIntPtrCastRequired) { + FunctionProtoType::ExtProtoInfo EPI; + DebugFunctionDecl = FunctionDecl::Create( + Ctx, Ctx.getTranslationUnitDecl(), FO.S->getLocStart(), + SourceLocation(), DeclarationName(), Ctx.VoidTy, + Ctx.getTrivialTypeSourceInfo( + Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)), + SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); + } for (auto *FD : RD->fields()) { QualType ArgType = FD->getType(); IdentifierInfo *II = nullptr; @@ -292,7 +341,6 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( // outlined function. if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || I->capturesVariableArrayType()) { - HasUIntPtrArgs = true; if (FO.UIntPtrCastRequired) ArgType = Ctx.getUIntPtrType(); } @@ -307,20 +355,36 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( II = &Ctx.Idents.get("vla"); } if (ArgType->isVariablyModifiedType()) - ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType()); - Args.push_back(ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, - FD->getLocation(), II, ArgType, - ImplicitParamDecl::Other)); + ArgType = getCanonicalParamType(Ctx, ArgType); + VarDecl *Arg; + if (DebugFunctionDecl && (CapVar || I->capturesThis())) { + Arg = ParmVarDecl::Create( + Ctx, DebugFunctionDecl, + CapVar ? CapVar->getLocStart() : FD->getLocStart(), + CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, + /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); + } else { + Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), + II, ArgType, ImplicitParamDecl::Other); + } + Args.emplace_back(Arg); + // Do not cast arguments if we emit function with non-original types. + TargetArgs.emplace_back( + FO.UIntPtrCastRequired + ? Arg + : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); ++I; } Args.append( std::next(CD->param_begin(), CD->getContextParamPosition() + 1), CD->param_end()); + TargetArgs.append( + std::next(CD->param_begin(), CD->getContextParamPosition() + 1), + CD->param_end()); // Create the function declaration. - FunctionType::ExtInfo ExtInfo; const CGFunctionInfo &FuncInfo = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); + CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); llvm::Function *F = @@ -328,19 +392,26 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( FO.FunctionName, &CGM.getModule()); CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); if (CD->isNothrow()) - F->addFnAttr(llvm::Attribute::NoUnwind); + F->setDoesNotThrow(); // Generate the function. - CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), - CD->getBody()->getLocStart()); + CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, + FO.S->getLocStart(), CD->getBody()->getLocStart()); unsigned Cnt = CD->getContextParamPosition(); I = FO.S->captures().begin(); for (auto *FD : RD->fields()) { + // Do not map arguments if we emit function with non-original types. + Address LocalAddr(Address::invalid()); + if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { + LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, Args[Cnt], + TargetArgs[Cnt]); + } else { + LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); + } // If we are capturing a pointer by copy we don't need to do anything, just // use the value that we get from the arguments. if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { const VarDecl *CurVD = I->getCapturedVar(); - Address LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); // If the variable is a reference we need to materialize it here. if (CurVD->getType()->isReferenceType()) { Address RefAddr = CGF.CreateMemTemp( @@ -356,15 +427,14 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( continue; } - LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); - LValue ArgLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(Args[Cnt]), - Args[Cnt]->getType(), BaseInfo); + LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), + AlignmentSource::Decl); if (FD->hasCapturedVLAType()) { if (FO.UIntPtrCastRequired) { ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(), ArgLVal), - FD->getType(), BaseInfo); + FD->getType(), AlignmentSource::Decl); } auto *ExprArg = CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); @@ -376,8 +446,7 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( Address ArgAddr = ArgLVal.getAddress(); if (!VarTy->isReferenceType()) { if (ArgLVal.getType()->isLValueReferenceType()) { - ArgAddr = CGF.EmitLoadOfReference( - ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); + ArgAddr = CGF.EmitLoadOfReference(ArgLVal); } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { assert(ArgLVal.getType()->isPointerType()); ArgAddr = CGF.EmitLoadOfPointer( @@ -412,7 +481,7 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( ++I; } - return {F, HasUIntPtrArgs}; + return F; } llvm::Function * @@ -426,14 +495,17 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { getDebugInfo() && CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; FunctionArgList Args; - llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; + llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << CapturedStmtInfo->getHelperName(); + if (NeedWrapperFunction) + Out << "_debug__"; FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, - CapturedStmtInfo->getHelperName()); - llvm::Function *F; - bool HasUIntPtrArgs; - std::tie(F, HasUIntPtrArgs) = emitOutlinedFunctionPrologue( - *this, Args, LocalAddrs, VLASizes, CXXThisValue, FO); + Out.str()); + llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, + VLASizes, CXXThisValue, FO); for (const auto &LocalAddrPair : LocalAddrs) { if (LocalAddrPair.second.first) { setAddrOfLocalVar(LocalAddrPair.second.first, @@ -445,28 +517,28 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { PGO.assignRegionCounters(GlobalDecl(CD), F); CapturedStmtInfo->EmitBody(*this, CD->getBody()); FinishFunction(CD->getBodyRBrace()); - if (!NeedWrapperFunction || !HasUIntPtrArgs) + if (!NeedWrapperFunction) return F; FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, /*RegisterCastedArgsOnly=*/true, - ".nondebug_wrapper."); + CapturedStmtInfo->getHelperName()); CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); - WrapperCGF.disableDebugInfo(); Args.clear(); LocalAddrs.clear(); VLASizes.clear(); llvm::Function *WrapperF = emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, - WrapperCGF.CXXThisValue, WrapperFO).first; - LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); + WrapperCGF.CXXThisValue, WrapperFO); llvm::SmallVector<llvm::Value *, 4> CallArgs; for (const auto *Arg : Args) { llvm::Value *CallArg; auto I = LocalAddrs.find(Arg); if (I != LocalAddrs.end()) { - LValue LV = - WrapperCGF.MakeAddrLValue(I->second.second, Arg->getType(), BaseInfo); + LValue LV = WrapperCGF.MakeAddrLValue( + I->second.second, + I->second.first ? I->second.first->getType() : Arg->getType(), + AlignmentSource::Decl); CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); } else { auto EI = VLASizes.find(Arg); @@ -474,13 +546,15 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { CallArg = EI->second.second; else { LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), - Arg->getType(), BaseInfo); + Arg->getType(), + AlignmentSource::Decl); CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); } } - CallArgs.emplace_back(CallArg); + CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); } - WrapperCGF.Builder.CreateCall(F, CallArgs); + CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(), + F, CallArgs); WrapperCGF.FinishFunction(); return WrapperF; } @@ -956,7 +1030,9 @@ void CodeGenFunction::EmitOMPReductionClauseInit( auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); - if (isa<OMPArraySectionExpr>(IRef)) { + QualType Type = PrivateVD->getType(); + bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(IRef); + if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { // Store the address of the original variable associated with the LHS // implicit variable. PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { @@ -965,7 +1041,8 @@ void CodeGenFunction::EmitOMPReductionClauseInit( PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { return GetAddrOfLocalVar(PrivateVD); }); - } else if (isa<ArraySubscriptExpr>(IRef)) { + } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || + isa<ArraySubscriptExpr>(IRef)) { // Store the address of the original variable associated with the LHS // implicit variable. PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { @@ -1024,7 +1101,8 @@ void CodeGenFunction::EmitOMPReductionClauseFinal( bool WithNowait = D.getSingleClause<OMPNowaitClause>() || isOpenMPParallelDirective(D.getDirectiveKind()) || D.getDirectiveKind() == OMPD_simd; - bool SimpleReduction = D.getDirectiveKind() == OMPD_simd; + bool SimpleReduction = D.getDirectiveKind() == OMPD_simd || + D.getDirectiveKind() == OMPD_distribute_simd; // Emit nowait reduction if nowait clause is present or directive is a // parallel directive (it always has implicit barrier). CGM.getOpenMPRuntime().emitReduction( @@ -1146,9 +1224,13 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, EmitIgnoredExpr(I); } // Update the linear variables. - for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { - for (auto *U : C->updates()) - EmitIgnoredExpr(U); + // In distribute directives only loop counters may be marked as linear, no + // need to generate the code for them. + if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { + for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { + for (auto *U : C->updates()) + EmitIgnoredExpr(U); + } } // On a continue in the body, jump to the end. @@ -1488,83 +1570,90 @@ static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, CGF.EmitStopPoint(&S); } -void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { - auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - // if (PreCond) { - // for (IV in 0..LastIteration) BODY; - // <Final counter/linear vars updates>; - // } - // +static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, + PrePostActionTy &Action) { + Action.Enter(CGF); + assert(isOpenMPSimdDirective(S.getDirectiveKind()) && + "Expected simd directive"); + OMPLoopScope PreInitScope(CGF, S); + // if (PreCond) { + // for (IV in 0..LastIteration) BODY; + // <Final counter/linear vars updates>; + // } + // - // Emit: if (PreCond) - begin. - // If the condition constant folds and can be elided, avoid emitting the - // whole loop. - bool CondConstant; - llvm::BasicBlock *ContBlock = nullptr; - if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { - if (!CondConstant) - return; - } else { - auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); - ContBlock = CGF.createBasicBlock("simd.if.end"); - emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, - CGF.getProfileCount(&S)); - CGF.EmitBlock(ThenBlock); - CGF.incrementProfileCounter(&S); - } + // Emit: if (PreCond) - begin. + // If the condition constant folds and can be elided, avoid emitting the + // whole loop. + bool CondConstant; + llvm::BasicBlock *ContBlock = nullptr; + if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { + if (!CondConstant) + return; + } else { + auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); + ContBlock = CGF.createBasicBlock("simd.if.end"); + emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, + CGF.getProfileCount(&S)); + CGF.EmitBlock(ThenBlock); + CGF.incrementProfileCounter(&S); + } - // Emit the loop iteration variable. - const Expr *IVExpr = S.getIterationVariable(); - const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); - CGF.EmitVarDecl(*IVDecl); - CGF.EmitIgnoredExpr(S.getInit()); + // Emit the loop iteration variable. + const Expr *IVExpr = S.getIterationVariable(); + const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); + CGF.EmitVarDecl(*IVDecl); + CGF.EmitIgnoredExpr(S.getInit()); - // Emit the iterations count variable. - // If it is not a variable, Sema decided to calculate iterations count on - // each iteration (e.g., it is foldable into a constant). - if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { - CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); - // Emit calculation of the iterations count. - CGF.EmitIgnoredExpr(S.getCalcLastIteration()); - } + // Emit the iterations count variable. + // If it is not a variable, Sema decided to calculate iterations count on + // each iteration (e.g., it is foldable into a constant). + if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { + CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); + // Emit calculation of the iterations count. + CGF.EmitIgnoredExpr(S.getCalcLastIteration()); + } - CGF.EmitOMPSimdInit(S); + CGF.EmitOMPSimdInit(S); - emitAlignedClause(CGF, S); - (void)CGF.EmitOMPLinearClauseInit(S); - { - OMPPrivateScope LoopScope(CGF); - CGF.EmitOMPPrivateLoopCounters(S, LoopScope); - CGF.EmitOMPLinearClause(S, LoopScope); - CGF.EmitOMPPrivateClause(S, LoopScope); - CGF.EmitOMPReductionClauseInit(S, LoopScope); - bool HasLastprivateClause = - CGF.EmitOMPLastprivateClauseInit(S, LoopScope); - (void)LoopScope.Privatize(); - CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), - S.getInc(), - [&S](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S, JumpDest()); - CGF.EmitStopPoint(&S); - }, - [](CodeGenFunction &) {}); - CGF.EmitOMPSimdFinal( - S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); - // Emit final copy of the lastprivate variables at the end of loops. - if (HasLastprivateClause) - CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); - CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); - emitPostUpdateForReductionClause( - CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); - } - CGF.EmitOMPLinearClauseFinal( + emitAlignedClause(CGF, S); + (void)CGF.EmitOMPLinearClauseInit(S); + { + CodeGenFunction::OMPPrivateScope LoopScope(CGF); + CGF.EmitOMPPrivateLoopCounters(S, LoopScope); + CGF.EmitOMPLinearClause(S, LoopScope); + CGF.EmitOMPPrivateClause(S, LoopScope); + CGF.EmitOMPReductionClauseInit(S, LoopScope); + bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); + (void)LoopScope.Privatize(); + CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), + S.getInc(), + [&S](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); + CGF.EmitOMPSimdFinal( S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); - // Emit: if (PreCond) - end. - if (ContBlock) { - CGF.EmitBranch(ContBlock); - CGF.EmitBlock(ContBlock, true); - } + // Emit final copy of the lastprivate variables at the end of loops. + if (HasLastprivateClause) + CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); + emitPostUpdateForReductionClause( + CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); + } + CGF.EmitOMPLinearClauseFinal( + S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); + // Emit: if (PreCond) - end. + if (ContBlock) { + CGF.EmitBranch(ContBlock); + CGF.EmitBlock(ContBlock, true); + } +} + +void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitOMPSimdRegion(CGF, S, Action); }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); @@ -1669,7 +1758,8 @@ void CodeGenFunction::EmitOMPOuterLoop( // Tell the runtime we are done. auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { if (!DynamicOrOrdered) - CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); + CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), + S.getDirectiveKind()); }; OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); } @@ -1753,9 +1843,11 @@ void CodeGenFunction::EmitOMPForOuterLoop( RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, Ordered, DipatchRTInputValues); } else { - RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, - Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, - LoopArgs.ST, LoopArgs.Chunk); + CGOpenMPRuntime::StaticRTInput StaticInit( + IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, + LoopArgs.ST, LoopArgs.Chunk); + RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), + ScheduleKind, StaticInit); } auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, @@ -1797,10 +1889,10 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop( const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); - RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, - IVSigned, /* Ordered = */ false, LoopArgs.IL, - LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, - LoopArgs.Chunk); + CGOpenMPRuntime::StaticRTInput StaticInit( + IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, + LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); + RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit); // for combined 'distribute' and 'for' the increment expression of distribute // is store in DistInc. For 'distribute' alone, it is in Inc. @@ -1929,13 +2021,27 @@ emitInnerParallelForWhenCombined(CodeGenFunction &CGF, CodeGenFunction::JumpDest LoopExit) { auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + bool HasCancel = false; + if (!isOpenMPSimdDirective(S.getDirectiveKind())) { + if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S)) + HasCancel = D->hasCancel(); + else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S)) + HasCancel = D->hasCancel(); + else if (const auto *D = + dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S)) + HasCancel = D->hasCancel(); + } + CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), + HasCancel); CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), emitDistributeParallelForInnerBounds, emitDistributeParallelForDispatchBounds); }; emitCommonOMPParallelDirective( - CGF, S, OMPD_for, CGInlinedWorksharingLoop, + CGF, S, + isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for, + CGInlinedWorksharingLoop, emitDistributeParallelForDistributeInnerBoundParams); } @@ -1946,119 +2052,53 @@ void CodeGenFunction::EmitOMPDistributeParallelForDirective( S.getDistInc()); }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for, - /*HasCancel=*/false); - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, - /*HasCancel=*/false); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); } void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( const OMPDistributeParallelForSimdDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_distribute_parallel_for_simd, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); } void CodeGenFunction::EmitOMPDistributeSimdDirective( const OMPDistributeSimdDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); + }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_distribute_simd, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); } -void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( - const OMPTargetParallelForSimdDirective &S) { - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_target_parallel_for_simd, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); +void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { + // Emit SPMD target parallel for region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitOMPSimdRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); } void CodeGenFunction::EmitOMPTargetSimdDirective( const OMPTargetSimdDirective &S) { - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); -} - -void CodeGenFunction::EmitOMPTeamsDistributeDirective( - const OMPTeamsDistributeDirective &S) { - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_teams_distribute, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); -} - -void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( - const OMPTeamsDistributeSimdDirective &S) { - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_teams_distribute_simd, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); -} - -void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( - const OMPTeamsDistributeParallelForSimdDirective &S) { - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_teams_distribute_parallel_for_simd, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); -} - -void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( - const OMPTeamsDistributeParallelForDirective &S) { - OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_teams_distribute_parallel_for, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); -} - -void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( - const OMPTargetTeamsDistributeDirective &S) { - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_target_teams_distribute, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitOMPSimdRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); } void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( const OMPTargetTeamsDistributeParallelForDirective &S) { + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitInlinedDirective( *this, OMPD_target_teams_distribute_parallel_for, [&S](CodeGenFunction &CGF, PrePostActionTy &) { @@ -2069,6 +2109,7 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( const OMPTargetTeamsDistributeParallelForSimdDirective &S) { + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitInlinedDirective( *this, OMPD_target_teams_distribute_parallel_for_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { @@ -2077,16 +2118,6 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( }); } -void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( - const OMPTargetTeamsDistributeSimdDirective &S) { - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_target_teams_distribute_simd, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); -} - namespace { struct ScheduleKindModifiersTy { OpenMPScheduleClauseKind Kind; @@ -2209,10 +2240,11 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( // chunks that are approximately equal in size, and at most one chunk is // distributed to each thread. Note that the size of the chunks is // unspecified in this case. - RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, - IVSize, IVSigned, Ordered, - IL.getAddress(), LB.getAddress(), - UB.getAddress(), ST.getAddress()); + CGOpenMPRuntime::StaticRTInput StaticInit( + IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), + UB.getAddress(), ST.getAddress()); + RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), + ScheduleKind, StaticInit); auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); // UB = min(UB, GlobalUB); @@ -2230,7 +2262,8 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. auto &&CodeGen = [&S](CodeGenFunction &CGF) { - CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); + CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), + S.getDirectiveKind()); }; OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); } else { @@ -2444,10 +2477,11 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { // Emit static non-chunked loop. OpenMPScheduleTy ScheduleKind; ScheduleKind.Schedule = OMPC_SCHEDULE_static; + CGOpenMPRuntime::StaticRTInput StaticInit( + /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), + LB.getAddress(), UB.getAddress(), ST.getAddress()); CGF.CGM.getOpenMPRuntime().emitForStaticInit( - CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32, - /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), - UB.getAddress(), ST.getAddress()); + CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit); // UB = min(UB, GlobalUB); auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); auto *MinUBGlobalUB = CGF.Builder.CreateSelect( @@ -2460,7 +2494,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { [](CodeGenFunction &) {}); // Tell the runtime we are done. auto &&CodeGen = [&S](CodeGenFunction &CGF) { - CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); + CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), + S.getDirectiveKind()); }; CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); @@ -2731,6 +2766,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, OMPPrivateScope Scope(CGF); if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || !Data.LastprivateVars.empty()) { + enum { PrivatesParam = 2, CopyFnParam = 3 }; auto *CopyFn = CGF.Builder.CreateLoad( CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); auto *PrivatesPtr = CGF.Builder.CreateLoad( @@ -2762,7 +2798,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); CallArgs.push_back(PrivatePtr.getPointer()); } - CGF.EmitRuntimeCall(CopyFn, CallArgs); + CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), + CopyFn, CallArgs); for (auto &&Pair : LastprivateDstsOrigs) { auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); DeclRefExpr DRE( @@ -2808,7 +2845,57 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, RedCG, Cnt); } } + // Privatize all private variables except for in_reduction items. (void)Scope.Privatize(); + SmallVector<const Expr *, 4> InRedVars; + SmallVector<const Expr *, 4> InRedPrivs; + SmallVector<const Expr *, 4> InRedOps; + SmallVector<const Expr *, 4> TaskgroupDescriptors; + for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { + auto IPriv = C->privates().begin(); + auto IRed = C->reduction_ops().begin(); + auto ITD = C->taskgroup_descriptors().begin(); + for (const auto *Ref : C->varlists()) { + InRedVars.emplace_back(Ref); + InRedPrivs.emplace_back(*IPriv); + InRedOps.emplace_back(*IRed); + TaskgroupDescriptors.emplace_back(*ITD); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ITD, 1); + } + } + // Privatize in_reduction items here, because taskgroup descriptors must be + // privatized earlier. + OMPPrivateScope InRedScope(CGF); + if (!InRedVars.empty()) { + ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); + for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { + RedCG.emitSharedLValue(CGF, Cnt); + RedCG.emitAggregateType(CGF, Cnt); + // The taskgroup descriptor variable is always implicit firstprivate and + // privatized already during procoessing of the firstprivates. + llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar( + CGF.EmitLValue(TaskgroupDescriptors[Cnt]), SourceLocation()); + Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( + CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); + Replacement = Address( + CGF.EmitScalarConversion( + Replacement.getPointer(), CGF.getContext().VoidPtrTy, + CGF.getContext().getPointerType(InRedPrivs[Cnt]->getType()), + SourceLocation()), + Replacement.getAlignment()); + Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); + InRedScope.addPrivate(RedCG.getBaseDecl(Cnt), + [Replacement]() { return Replacement; }); + // FIXME: This must removed once the runtime library is fixed. + // Emit required threadprivate variables for + // initilizer/combiner/finalizer. + CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), + RedCG, Cnt); + } + } + (void)InRedScope.Privatize(); Action.Enter(CGF); BodyGen(CGF); @@ -2867,6 +2954,35 @@ void CodeGenFunction::EmitOMPTaskgroupDirective( const OMPTaskgroupDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); + if (const Expr *E = S.getReductionRef()) { + SmallVector<const Expr *, 4> LHSs; + SmallVector<const Expr *, 4> RHSs; + OMPTaskDataTy Data; + for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { + auto IPriv = C->privates().begin(); + auto IRed = C->reduction_ops().begin(); + auto ILHS = C->lhs_exprs().begin(); + auto IRHS = C->rhs_exprs().begin(); + for (const auto *Ref : C->varlists()) { + Data.ReductionVars.emplace_back(Ref); + Data.ReductionCopies.emplace_back(*IPriv); + Data.ReductionOps.emplace_back(*IRed); + LHSs.emplace_back(*ILHS); + RHSs.emplace_back(*IRHS); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ILHS, 1); + std::advance(IRHS, 1); + } + } + llvm::Value *ReductionDesc = + CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(), + LHSs, RHSs, Data); + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + CGF.EmitVarDecl(*VD); + CGF.EmitStoreOfScalar(ReductionDesc, CGF.GetAddrOfLocalVar(VD), + /*Volatile=*/false, E->getType()); + } CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); @@ -2923,6 +3039,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, incrementProfileCounter(&S); } + emitAlignedClause(*this, S); // Emit 'then' code. { // Emit helper vars inits. @@ -2944,14 +3061,18 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, OMPPrivateScope LoopScope(*this); if (EmitOMPFirstprivateClause(S, LoopScope)) { - // Emit implicit barrier to synchronize threads and avoid data races on - // initialization of firstprivate variables and post-update of + // Emit implicit barrier to synchronize threads and avoid data races + // on initialization of firstprivate variables and post-update of // lastprivate variables. CGM.getOpenMPRuntime().emitBarrierCall( - *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, - /*ForceSimpleCall=*/true); + *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); } EmitOMPPrivateClause(S, LoopScope); + if (isOpenMPSimdDirective(S.getDirectiveKind()) && + !isOpenMPParallelDirective(S.getDirectiveKind()) && + !isOpenMPTeamsDirective(S.getDirectiveKind())) + EmitOMPReductionClauseInit(S, LoopScope); HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); EmitOMPPrivateLoopCounters(S, LoopScope); (void)LoopScope.Privatize(); @@ -2964,8 +3085,8 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, if (const auto *Ch = C->getChunkSize()) { Chunk = EmitScalarExpr(Ch); Chunk = EmitScalarConversion(Chunk, Ch->getType(), - S.getIterationVariable()->getType(), - S.getLocStart()); + S.getIterationVariable()->getType(), + S.getLocStart()); } } const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); @@ -2981,10 +3102,13 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, // league. The size of the chunks is unspecified in this case. if (RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr)) { + if (isOpenMPSimdDirective(S.getDirectiveKind())) + EmitOMPSimdInit(S, /*IsMonotonic=*/true); + CGOpenMPRuntime::StaticRTInput StaticInit( + IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), + LB.getAddress(), UB.getAddress(), ST.getAddress()); RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, - IVSize, IVSigned, /* Ordered = */ false, - IL.getAddress(), LB.getAddress(), - UB.getAddress(), ST.getAddress()); + StaticInit); auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); // UB = min(UB, GlobalUB); @@ -3011,7 +3135,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, [](CodeGenFunction &) {}); EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. - RT.emitForStaticFinish(*this, S.getLocStart()); + RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind()); } else { // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. @@ -3021,13 +3145,38 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, CodeGenLoop); } - + if (isOpenMPSimdDirective(S.getDirectiveKind())) { + EmitOMPSimdFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getLocStart())); + }); + } + OpenMPDirectiveKind ReductionKind = OMPD_unknown; + if (isOpenMPParallelDirective(S.getDirectiveKind()) && + isOpenMPSimdDirective(S.getDirectiveKind())) { + ReductionKind = OMPD_parallel_for_simd; + } else if (isOpenMPParallelDirective(S.getDirectiveKind())) { + ReductionKind = OMPD_parallel_for; + } else if (isOpenMPSimdDirective(S.getDirectiveKind())) { + ReductionKind = OMPD_simd; + } else if (!isOpenMPTeamsDirective(S.getDirectiveKind()) && + S.hasClausesOfKind<OMPReductionClause>()) { + llvm_unreachable( + "No reduction clauses is allowed in distribute directive."); + } + EmitOMPReductionClauseFinal(S, ReductionKind); + // Emit post-update of the reduction variables if IsLastIter != 0. + emitPostUpdateForReductionClause( + *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getLocStart())); + }); // Emit final copy of the lastprivate variables if IsLastIter != 0. - if (HasLastprivateClause) + if (HasLastprivateClause) { EmitOMPLastprivateClauseFinal( S, /*NoFinals=*/false, - Builder.CreateIsNotNull( - EmitLoadOfScalar(IL, S.getLocStart()))); + Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); + } } // We're now done with the loop, so jump to the continuation block. @@ -3045,8 +3194,7 @@ void CodeGenFunction::EmitOMPDistributeDirective( CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, - false); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); } static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, @@ -3073,7 +3221,8 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { llvm::SmallVector<llvm::Value *, 16> CapturedVars; CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); - CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); + CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), + OutlinedFn, CapturedVars); } else { Action.Enter(CGF); CGF.EmitStmt( @@ -3259,6 +3408,7 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, case BO_GE: case BO_EQ: case BO_NE: + case BO_Cmp: case BO_AddAssign: case BO_SubAssign: case BO_AndAssign: @@ -3470,6 +3620,7 @@ static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_lastprivate: case OMPC_reduction: case OMPC_task_reduction: + case OMPC_in_reduction: case OMPC_safelen: case OMPC_simdlen: case OMPC_collapse: @@ -3552,7 +3703,7 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen) { assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); CodeGenModule &CGM = CGF.CGM; - const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); + const CapturedStmt &CS = *S.getCapturedStmt(OMPD_target); llvm::Function *Fn = nullptr; llvm::Constant *FnID = nullptr; @@ -3675,7 +3826,7 @@ void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); }; - emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); + emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); emitPostUpdateForReductionClause( *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); } @@ -3684,11 +3835,20 @@ static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, const OMPTargetTeamsDirective &S) { auto *CS = S.getCapturedStmt(OMPD_teams); Action.Enter(CGF); - auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { - // TODO: Add support for clauses. + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + CGF.EmitOMPPrivateClause(S, PrivateScope); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + Action.Enter(CGF); CGF.EmitStmt(CS->getCapturedStmt()); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); }; emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); + emitPostUpdateForReductionClause( + CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); } void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( @@ -3713,6 +3873,183 @@ void CodeGenFunction::EmitOMPTargetTeamsDirective( emitCommonOMPTargetDirective(*this, S, CodeGen); } +static void +emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, + const OMPTargetTeamsDistributeDirective &S) { + Action.Enter(CGF); + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &) { + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, + CodeGenDistribute); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); + emitPostUpdateForReductionClause(CGF, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDistributeDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsDistributeRegion(CGF, Action, S); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( + const OMPTargetTeamsDistributeDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsDistributeRegion(CGF, Action, S); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + +static void emitTargetTeamsDistributeSimdRegion( + CodeGenFunction &CGF, PrePostActionTy &Action, + const OMPTargetTeamsDistributeSimdDirective &S) { + Action.Enter(CGF); + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &) { + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, + CodeGenDistribute); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); + emitPostUpdateForReductionClause(CGF, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDistributeSimdDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsDistributeSimdRegion(CGF, Action, S); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( + const OMPTargetTeamsDistributeSimdDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsDistributeSimdRegion(CGF, Action, S); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + +void CodeGenFunction::EmitOMPTeamsDistributeDirective( + const OMPTeamsDistributeDirective &S) { + + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &) { + OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, + CodeGenDistribute); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( + const OMPTeamsDistributeSimdDirective &S) { + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &) { + OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, + CodeGenDistribute); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( + const OMPTeamsDistributeParallelForDirective &S) { + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &) { + OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, + CodeGenDistribute); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( + const OMPTeamsDistributeParallelForSimdDirective &S) { + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &) { + OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective( + CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); +} + void CodeGenFunction::EmitOMPCancellationPointDirective( const OMPCancellationPointDirective &S) { CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), @@ -3740,7 +4077,9 @@ CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || Kind == OMPD_distribute_parallel_for || - Kind == OMPD_target_parallel_for); + Kind == OMPD_target_parallel_for || + Kind == OMPD_teams_distribute_parallel_for || + Kind == OMPD_target_teams_distribute_parallel_for); return OMPCancelStack.getExitBlock(); } @@ -3913,7 +4252,14 @@ void CodeGenFunction::EmitOMPTargetEnterDataDirective( if (auto *C = S.getSingleClause<OMPDeviceClause>()) Device = C->getDevice(); - CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); + auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF, + PrePostActionTy &) { + CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond, + Device); + }; + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_enter_data, + CodeGen); } void CodeGenFunction::EmitOMPTargetExitDataDirective( @@ -3933,7 +4279,14 @@ void CodeGenFunction::EmitOMPTargetExitDataDirective( if (auto *C = S.getSingleClause<OMPDeviceClause>()) Device = C->getDevice(); - CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); + auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF, + PrePostActionTy &) { + CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond, + Device); + }; + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_exit_data, + CodeGen); } static void emitTargetParallelRegion(CodeGenFunction &CGF, @@ -3980,9 +4333,81 @@ void CodeGenFunction::EmitOMPTargetParallelDirective( emitCommonOMPTargetDirective(*this, S, CodeGen); } +static void emitTargetParallelForRegion(CodeGenFunction &CGF, + const OMPTargetParallelForDirective &S, + PrePostActionTy &Action) { + Action.Enter(CGF); + // Emit directive as a combined directive that consists of two implicit + // directives: 'parallel' with 'for' directive. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CodeGenFunction::OMPCancelStackRAII CancelRegion( + CGF, OMPD_target_parallel_for, S.hasCancel()); + CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, + emitDispatchForLoopBounds); + }; + emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, + emitEmptyBoundParameters); +} + +void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelForDirective &S) { + // Emit SPMD target parallel for region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelForRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + void CodeGenFunction::EmitOMPTargetParallelForDirective( const OMPTargetParallelForDirective &S) { - // TODO: codegen for target parallel for. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelForRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + +static void +emitTargetParallelForSimdRegion(CodeGenFunction &CGF, + const OMPTargetParallelForSimdDirective &S, + PrePostActionTy &Action) { + Action.Enter(CGF); + // Emit directive as a combined directive that consists of two implicit + // directives: 'parallel' with 'for' directive. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, + emitDispatchForLoopBounds); + }; + emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, + emitEmptyBoundParameters); +} + +void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelForSimdDirective &S) { + // Emit SPMD target parallel for region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelForSimdRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( + const OMPTargetParallelForSimdDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelForSimdRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); } /// Emit a helper variable and return corresponding lvalue. @@ -4160,5 +4585,12 @@ void CodeGenFunction::EmitOMPTargetUpdateDirective( if (auto *C = S.getSingleClause<OMPDeviceClause>()) Device = C->getDevice(); - CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); + auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF, + PrePostActionTy &) { + CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond, + Device); + }; + OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_update, + CodeGen); } diff --git a/lib/CodeGen/CGVTT.cpp b/lib/CodeGen/CGVTT.cpp index 92fd93b5ca38..78928d04220d 100644 --- a/lib/CodeGen/CGVTT.cpp +++ b/lib/CodeGen/CGVTT.cpp @@ -100,7 +100,7 @@ CodeGenVTables::EmitVTTDefinition(llvm::GlobalVariable *VTT, VTT->setComdat(CGM.getModule().getOrInsertComdat(VTT->getName())); // Set the right visibility. - CGM.setGlobalVisibility(VTT, RD); + CGM.setGlobalVisibility(VTT, RD, ForDefinition); } llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTT(const CXXRecordDecl *RD) { diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp index 64b6d0d3fe9f..2d9bf3bce926 100644 --- a/lib/CodeGen/CGVTables.cpp +++ b/lib/CodeGen/CGVTables.cpp @@ -14,11 +14,12 @@ #include "CGCXXABI.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" -#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/RecordLayout.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/Frontend/CodeGenOptions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Format.h" #include "llvm/Transforms/Utils/Cloning.h" #include <algorithm> @@ -50,7 +51,7 @@ llvm::Constant *CodeGenModule::GetAddrOfThunk(GlobalDecl GD, static void setThunkVisibility(CodeGenModule &CGM, const CXXMethodDecl *MD, const ThunkInfo &Thunk, llvm::Function *Fn) { - CGM.setGlobalVisibility(Fn, MD); + CGM.setGlobalVisibility(Fn, MD, ForDefinition); } static void setThunkProperties(CodeGenModule &CGM, const ThunkInfo &Thunk, @@ -122,6 +123,33 @@ static RValue PerformReturnAdjustment(CodeGenFunction &CGF, return RValue::get(ReturnValue); } +/// This function clones a function's DISubprogram node and enters it into +/// a value map with the intent that the map can be utilized by the cloner +/// to short-circuit Metadata node mapping. +/// Furthermore, the function resolves any DILocalVariable nodes referenced +/// by dbg.value intrinsics so they can be properly mapped during cloning. +static void resolveTopLevelMetadata(llvm::Function *Fn, + llvm::ValueToValueMapTy &VMap) { + // Clone the DISubprogram node and put it into the Value map. + auto *DIS = Fn->getSubprogram(); + if (!DIS) + return; + auto *NewDIS = DIS->replaceWithDistinct(DIS->clone()); + VMap.MD()[DIS].reset(NewDIS); + + // Find all llvm.dbg.declare intrinsics and resolve the DILocalVariable nodes + // they are referencing. + for (auto &BB : Fn->getBasicBlockList()) { + for (auto &I : BB) { + if (auto *DII = dyn_cast<llvm::DbgInfoIntrinsic>(&I)) { + auto *DILocal = DII->getVariable(); + if (!DILocal->isResolved()) + DILocal->resolve(); + } + } + } +} + // This function does roughly the same thing as GenerateThunk, but in a // very different way, so that va_start and va_end work correctly. // FIXME: This function assumes "this" is the first non-sret LLVM argument of @@ -154,6 +182,10 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn, // Clone to thunk. llvm::ValueToValueMapTy VMap; + + // We are cloning a function while some Metadata nodes are still unresolved. + // Ensure that the value mapper does not encounter any of them. + resolveTopLevelMetadata(BaseFn, VMap); llvm::Function *NewFn = llvm::CloneFunction(BaseFn, VMap); Fn->replaceAllUsesWith(NewFn); NewFn->takeName(Fn); @@ -698,7 +730,7 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD, // Create the variable that will hold the construction vtable. llvm::GlobalVariable *VTable = CGM.CreateOrReplaceCXXRuntimeVariable(Name, VTType, Linkage); - CGM.setGlobalVisibility(VTable, RD); + CGM.setGlobalVisibility(VTable, RD, ForDefinition); // V-tables are always unnamed_addr. VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h index b768eb86367b..7d07ea4516c9 100644 --- a/lib/CodeGen/CGValue.h +++ b/lib/CodeGen/CGValue.h @@ -20,6 +20,7 @@ #include "llvm/IR/Value.h" #include "llvm/IR/Type.h" #include "Address.h" +#include "CodeGenTBAA.h" namespace llvm { class Constant; @@ -148,20 +149,15 @@ static inline AlignmentSource getFieldAlignmentSource(AlignmentSource Source) { class LValueBaseInfo { AlignmentSource AlignSource; - bool MayAlias; public: - explicit LValueBaseInfo(AlignmentSource Source = AlignmentSource::Type, - bool Alias = false) - : AlignSource(Source), MayAlias(Alias) {} + explicit LValueBaseInfo(AlignmentSource Source = AlignmentSource::Type) + : AlignSource(Source) {} AlignmentSource getAlignmentSource() const { return AlignSource; } void setAlignmentSource(AlignmentSource Source) { AlignSource = Source; } - bool getMayAlias() const { return MayAlias; } - void setMayAlias(bool Alias) { MayAlias = Alias; } void mergeForCast(const LValueBaseInfo &Info) { setAlignmentSource(Info.getAlignmentSource()); - setMayAlias(getMayAlias() || Info.getMayAlias()); } }; @@ -220,6 +216,7 @@ class LValue { bool ImpreciseLifetime : 1; LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; // This flag shows if a nontemporal load/stores should be used when accessing // this lvalue. @@ -227,18 +224,9 @@ class LValue { Expr *BaseIvarExp; - /// Used by struct-path-aware TBAA. - QualType TBAABaseType; - /// Offset relative to the base type. - uint64_t TBAAOffset; - - /// TBAAInfo - TBAA information to attach to dereferences of this LValue. - llvm::MDNode *TBAAInfo; - private: - void Initialize(QualType Type, Qualifiers Quals, - CharUnits Alignment, LValueBaseInfo BaseInfo, - llvm::MDNode *TBAAInfo = nullptr) { + void Initialize(QualType Type, Qualifiers Quals, CharUnits Alignment, + LValueBaseInfo BaseInfo, TBAAAccessInfo TBAAInfo) { assert((!Alignment.isZero() || Type->isIncompleteType()) && "initializing l-value with zero alignment!"); this->Type = Type; @@ -247,6 +235,7 @@ private: assert(this->Alignment == Alignment.getQuantity() && "Alignment exceeds allowed max!"); this->BaseInfo = BaseInfo; + this->TBAAInfo = TBAAInfo; // Initialize Objective-C flags. this->Ivar = this->ObjIsArray = this->NonGC = this->GlobalObjCRef = false; @@ -254,11 +243,6 @@ private: this->Nontemporal = false; this->ThreadLocalRef = false; this->BaseIvarExp = nullptr; - - // Initialize fields for TBAA. - this->TBAABaseType = Type; - this->TBAAOffset = 0; - this->TBAAInfo = TBAAInfo; } public: @@ -318,19 +302,13 @@ public: Expr *getBaseIvarExp() const { return BaseIvarExp; } void setBaseIvarExp(Expr *V) { BaseIvarExp = V; } - QualType getTBAABaseType() const { return TBAABaseType; } - void setTBAABaseType(QualType T) { TBAABaseType = T; } - - uint64_t getTBAAOffset() const { return TBAAOffset; } - void setTBAAOffset(uint64_t O) { TBAAOffset = O; } - - llvm::MDNode *getTBAAInfo() const { return TBAAInfo; } - void setTBAAInfo(llvm::MDNode *N) { TBAAInfo = N; } + TBAAAccessInfo getTBAAInfo() const { return TBAAInfo; } + void setTBAAInfo(TBAAAccessInfo Info) { TBAAInfo = Info; } const Qualifiers &getQuals() const { return Quals; } Qualifiers &getQuals() { return Quals; } - unsigned getAddressSpace() const { return Quals.getAddressSpace(); } + LangAS getAddressSpace() const { return Quals.getAddressSpace(); } CharUnits getAlignment() const { return CharUnits::fromQuantity(Alignment); } void setAlignment(CharUnits A) { Alignment = A.getQuantity(); } @@ -383,10 +361,8 @@ public: // global register lvalue llvm::Value *getGlobalReg() const { assert(isGlobalReg()); return V; } - static LValue MakeAddr(Address address, QualType type, - ASTContext &Context, - LValueBaseInfo BaseInfo, - llvm::MDNode *TBAAInfo = nullptr) { + static LValue MakeAddr(Address address, QualType type, ASTContext &Context, + LValueBaseInfo BaseInfo, TBAAAccessInfo TBAAInfo) { Qualifiers qs = type.getQualifiers(); qs.setObjCGCAttr(Context.getObjCGCAttrKind(type)); @@ -399,24 +375,26 @@ public: } static LValue MakeVectorElt(Address vecAddress, llvm::Value *Idx, - QualType type, LValueBaseInfo BaseInfo) { + QualType type, LValueBaseInfo BaseInfo, + TBAAAccessInfo TBAAInfo) { LValue R; R.LVType = VectorElt; R.V = vecAddress.getPointer(); R.VectorIdx = Idx; R.Initialize(type, type.getQualifiers(), vecAddress.getAlignment(), - BaseInfo); + BaseInfo, TBAAInfo); return R; } static LValue MakeExtVectorElt(Address vecAddress, llvm::Constant *Elts, - QualType type, LValueBaseInfo BaseInfo) { + QualType type, LValueBaseInfo BaseInfo, + TBAAAccessInfo TBAAInfo) { LValue R; R.LVType = ExtVectorElt; R.V = vecAddress.getPointer(); R.VectorElts = Elts; R.Initialize(type, type.getQualifiers(), vecAddress.getAlignment(), - BaseInfo); + BaseInfo, TBAAInfo); return R; } @@ -426,15 +404,15 @@ public: /// bit-field refers to. /// \param Info - The information describing how to perform the bit-field /// access. - static LValue MakeBitfield(Address Addr, - const CGBitFieldInfo &Info, - QualType type, - LValueBaseInfo BaseInfo) { + static LValue MakeBitfield(Address Addr, const CGBitFieldInfo &Info, + QualType type, LValueBaseInfo BaseInfo, + TBAAAccessInfo TBAAInfo) { LValue R; R.LVType = BitField; R.V = Addr.getPointer(); R.BitFieldInfo = &Info; - R.Initialize(type, type.getQualifiers(), Addr.getAlignment(), BaseInfo); + R.Initialize(type, type.getQualifiers(), Addr.getAlignment(), BaseInfo, + TBAAInfo); return R; } @@ -443,7 +421,7 @@ public: R.LVType = GlobalReg; R.V = Reg.getPointer(); R.Initialize(type, type.getQualifiers(), Reg.getAlignment(), - LValueBaseInfo(AlignmentSource::Decl, false)); + LValueBaseInfo(AlignmentSource::Decl), TBAAAccessInfo()); return R; } diff --git a/lib/CodeGen/CodeGenABITypes.cpp b/lib/CodeGen/CodeGenABITypes.cpp index 0735a9c3dfbc..c152291b15b9 100644 --- a/lib/CodeGen/CodeGenABITypes.cpp +++ b/lib/CodeGen/CodeGenABITypes.cpp @@ -17,6 +17,7 @@ //===----------------------------------------------------------------------===// #include "clang/CodeGen/CodeGenABITypes.h" +#include "CGRecordLayout.h" #include "CodeGenModule.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/Frontend/CodeGenOptions.h" @@ -80,3 +81,9 @@ llvm::Type * CodeGen::convertTypeForMemory(CodeGenModule &CGM, QualType T) { return CGM.getTypes().ConvertTypeForMem(T); } + +unsigned CodeGen::getLLVMFieldNumber(CodeGenModule &CGM, + const RecordDecl *RD, + const FieldDecl *FD) { + return CGM.getTypes().getCGRecordLayout(RD).getLLVMFieldNo(FD); +} diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp index 4f03de55149b..6ca69d63cdce 100644 --- a/lib/CodeGen/CodeGenAction.cpp +++ b/lib/CodeGen/CodeGenAction.cpp @@ -46,6 +46,38 @@ using namespace clang; using namespace llvm; namespace clang { + class BackendConsumer; + class ClangDiagnosticHandler final : public DiagnosticHandler { + public: + ClangDiagnosticHandler(const CodeGenOptions &CGOpts, BackendConsumer *BCon) + : CodeGenOpts(CGOpts), BackendCon(BCon) {} + + bool handleDiagnostics(const DiagnosticInfo &DI) override; + + bool isAnalysisRemarkEnabled(StringRef PassName) const override { + return (CodeGenOpts.OptimizationRemarkAnalysisPattern && + CodeGenOpts.OptimizationRemarkAnalysisPattern->match(PassName)); + } + bool isMissedOptRemarkEnabled(StringRef PassName) const override { + return (CodeGenOpts.OptimizationRemarkMissedPattern && + CodeGenOpts.OptimizationRemarkMissedPattern->match(PassName)); + } + bool isPassedOptRemarkEnabled(StringRef PassName) const override { + return (CodeGenOpts.OptimizationRemarkPattern && + CodeGenOpts.OptimizationRemarkPattern->match(PassName)); + } + + bool isAnyRemarkEnabled() const override { + return (CodeGenOpts.OptimizationRemarkAnalysisPattern || + CodeGenOpts.OptimizationRemarkMissedPattern || + CodeGenOpts.OptimizationRemarkPattern); + } + + private: + const CodeGenOptions &CodeGenOpts; + BackendConsumer *BackendCon; + }; + class BackendConsumer : public ASTConsumer { using LinkModule = CodeGenAction::LinkModule; @@ -224,21 +256,20 @@ namespace clang { void *OldContext = Ctx.getInlineAsmDiagnosticContext(); Ctx.setInlineAsmDiagnosticHandler(InlineAsmDiagHandler, this); - LLVMContext::DiagnosticHandlerTy OldDiagnosticHandler = + std::unique_ptr<DiagnosticHandler> OldDiagnosticHandler = Ctx.getDiagnosticHandler(); - void *OldDiagnosticContext = Ctx.getDiagnosticContext(); - Ctx.setDiagnosticHandler(DiagnosticHandler, this); + Ctx.setDiagnosticHandler(llvm::make_unique<ClangDiagnosticHandler>( + CodeGenOpts, this)); Ctx.setDiagnosticsHotnessRequested(CodeGenOpts.DiagnosticsWithHotness); if (CodeGenOpts.DiagnosticsHotnessThreshold != 0) Ctx.setDiagnosticsHotnessThreshold( CodeGenOpts.DiagnosticsHotnessThreshold); - std::unique_ptr<llvm::tool_output_file> OptRecordFile; + std::unique_ptr<llvm::ToolOutputFile> OptRecordFile; if (!CodeGenOpts.OptRecordFile.empty()) { std::error_code EC; - OptRecordFile = - llvm::make_unique<llvm::tool_output_file>(CodeGenOpts.OptRecordFile, - EC, sys::fs::F_None); + OptRecordFile = llvm::make_unique<llvm::ToolOutputFile>( + CodeGenOpts.OptRecordFile, EC, sys::fs::F_None); if (EC) { Diags.Report(diag::err_cannot_open_file) << CodeGenOpts.OptRecordFile << EC.message(); @@ -264,7 +295,7 @@ namespace clang { Ctx.setInlineAsmDiagnosticHandler(OldHandler, OldContext); - Ctx.setDiagnosticHandler(OldDiagnosticHandler, OldDiagnosticContext); + Ctx.setDiagnosticHandler(std::move(OldDiagnosticHandler)); if (OptRecordFile) OptRecordFile->keep(); @@ -299,11 +330,6 @@ namespace clang { ((BackendConsumer*)Context)->InlineAsmDiagHandler2(SM, Loc); } - static void DiagnosticHandler(const llvm::DiagnosticInfo &DI, - void *Context) { - ((BackendConsumer *)Context)->DiagnosticHandlerImpl(DI); - } - /// Get the best possible source location to represent a diagnostic that /// may have associated debug info. const FullSourceLoc @@ -343,6 +369,11 @@ namespace clang { void BackendConsumer::anchor() {} } +bool ClangDiagnosticHandler::handleDiagnostics(const DiagnosticInfo &DI) { + BackendCon->DiagnosticHandlerImpl(DI); + return true; +} + /// ConvertBackendLocation - Convert a location in a temporary llvm::SourceMgr /// buffer to be a valid FullSourceLoc. static FullSourceLoc ConvertBackendLocation(const llvm::SMDiagnostic &D, @@ -402,6 +433,8 @@ void BackendConsumer::InlineAsmDiagHandler2(const llvm::SMDiagnostic &D, case llvm::SourceMgr::DK_Note: DiagID = diag::note_fe_inline_asm; break; + case llvm::SourceMgr::DK_Remark: + llvm_unreachable("remarks unexpected"); } // If this problem has clang-level source location information, report the // issue in the source with a note showing the instantiated @@ -600,6 +633,10 @@ void BackendConsumer::EmitOptimizationMessage( void BackendConsumer::OptimizationRemarkHandler( const llvm::DiagnosticInfoOptimizationBase &D) { + // Without hotness information, don't show noisy remarks. + if (D.isVerbose() && !D.getHotness()) + return; + if (D.isPassed()) { // Optimization remarks are active only if the -Rpass flag has a regular // expression that matches the name of the pass name in \p D. @@ -884,6 +921,8 @@ static void BitcodeInlineAsmDiagHandler(const llvm::SMDiagnostic &SM, case llvm::SourceMgr::DK_Note: DiagID = diag::note_fe_inline_asm; break; + case llvm::SourceMgr::DK_Remark: + llvm_unreachable("remarks unexpected"); } Diags->Report(DiagID).AddString("cannot compile inline asm"); diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp index c23b25ea461f..9dbd7cc3fcbf 100644 --- a/lib/CodeGen/CodeGenFunction.cpp +++ b/lib/CodeGen/CodeGenFunction.cpp @@ -33,9 +33,11 @@ #include "clang/Frontend/CodeGenOptions.h" #include "clang/Sema/SemaDiagnostic.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Operator.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" using namespace clang; using namespace CodeGen; @@ -87,7 +89,7 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext) llvm::FastMathFlags FMF; if (CGM.getLangOpts().FastMath) - FMF.setUnsafeAlgebra(); + FMF.setFast(); if (CGM.getLangOpts().FiniteMathOnly) { FMF.setNoNaNs(); FMF.setNoInfs(); @@ -101,6 +103,9 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext) if (CGM.getCodeGenOpts().ReciprocalMath) { FMF.setAllowReciprocal(); } + if (CGM.getCodeGenOpts().Reassociate) { + FMF.setAllowReassoc(); + } Builder.setFastMathFlags(FMF); } @@ -118,27 +123,32 @@ CodeGenFunction::~CodeGenFunction() { } CharUnits CodeGenFunction::getNaturalPointeeTypeAlignment(QualType T, - LValueBaseInfo *BaseInfo) { - return getNaturalTypeAlignment(T->getPointeeType(), BaseInfo, - /*forPointee*/ true); + LValueBaseInfo *BaseInfo, + TBAAAccessInfo *TBAAInfo) { + return getNaturalTypeAlignment(T->getPointeeType(), BaseInfo, TBAAInfo, + /* forPointeeType= */ true); } CharUnits CodeGenFunction::getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo, + TBAAAccessInfo *TBAAInfo, bool forPointeeType) { + if (TBAAInfo) + *TBAAInfo = CGM.getTBAAAccessInfo(T); + // Honor alignment typedef attributes even on incomplete types. // We also honor them straight for C++ class types, even as pointees; // there's an expressivity gap here. if (auto TT = T->getAs<TypedefType>()) { if (auto Align = TT->getDecl()->getMaxAlignment()) { if (BaseInfo) - *BaseInfo = LValueBaseInfo(AlignmentSource::AttributedType, false); + *BaseInfo = LValueBaseInfo(AlignmentSource::AttributedType); return getContext().toCharUnitsFromBits(Align); } } if (BaseInfo) - *BaseInfo = LValueBaseInfo(AlignmentSource::Type, false); + *BaseInfo = LValueBaseInfo(AlignmentSource::Type); CharUnits Alignment; if (T->isIncompleteType()) { @@ -169,9 +179,10 @@ CharUnits CodeGenFunction::getNaturalTypeAlignment(QualType T, LValue CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) { LValueBaseInfo BaseInfo; - CharUnits Alignment = getNaturalTypeAlignment(T, &BaseInfo); + TBAAAccessInfo TBAAInfo; + CharUnits Alignment = getNaturalTypeAlignment(T, &BaseInfo, &TBAAInfo); return LValue::MakeAddr(Address(V, Alignment), T, getContext(), BaseInfo, - CGM.getTBAAInfo(T)); + TBAAInfo); } /// Given a value of type T* that may not be to a complete object, @@ -179,8 +190,10 @@ LValue CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) { LValue CodeGenFunction::MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T) { LValueBaseInfo BaseInfo; - CharUnits Align = getNaturalTypeAlignment(T, &BaseInfo, /*pointee*/ true); - return MakeAddrLValue(Address(V, Align), T, BaseInfo); + TBAAAccessInfo TBAAInfo; + CharUnits Align = getNaturalTypeAlignment(T, &BaseInfo, &TBAAInfo, + /* forPointeeType= */ true); + return MakeAddrLValue(Address(V, Align), T, BaseInfo, TBAAInfo); } @@ -344,8 +357,13 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { // Emit function epilog (to return). llvm::DebugLoc Loc = EmitReturnBlock(); - if (ShouldInstrumentFunction()) - EmitFunctionInstrumentation("__cyg_profile_func_exit"); + if (ShouldInstrumentFunction()) { + if (CGM.getCodeGenOpts().InstrumentFunctions) + CurFn->addFnAttr("instrument-function-exit", "__cyg_profile_func_exit"); + if (CGM.getCodeGenOpts().InstrumentFunctionsAfterInlining) + CurFn->addFnAttr("instrument-function-exit-inlined", + "__cyg_profile_func_exit"); + } // Emit debug descriptor for function end. if (CGDebugInfo *DI = getDebugInfo()) @@ -411,12 +429,26 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { I->first->replaceAllUsesWith(I->second); I->first->eraseFromParent(); } + + // Eliminate CleanupDestSlot alloca by replacing it with SSA values and + // PHIs if the current function is a coroutine. We don't do it for all + // functions as it may result in slight increase in numbers of instructions + // if compiled with no optimizations. We do it for coroutine as the lifetime + // of CleanupDestSlot alloca make correct coroutine frame building very + // difficult. + if (NormalCleanupDest && isCoroutine()) { + llvm::DominatorTree DT(*CurFn); + llvm::PromoteMemToReg(NormalCleanupDest, DT); + NormalCleanupDest = nullptr; + } } /// ShouldInstrumentFunction - Return true if the current function should be /// instrumented with __cyg_profile_func_* calls bool CodeGenFunction::ShouldInstrumentFunction() { - if (!CGM.getCodeGenOpts().InstrumentFunctions) + if (!CGM.getCodeGenOpts().InstrumentFunctions && + !CGM.getCodeGenOpts().InstrumentFunctionsAfterInlining && + !CGM.getCodeGenOpts().InstrumentFunctionEntryBare) return false; if (!CurFuncDecl || CurFuncDecl->hasAttr<NoInstrumentFunctionAttr>()) return false; @@ -429,29 +461,47 @@ bool CodeGenFunction::ShouldXRayInstrumentFunction() const { return CGM.getCodeGenOpts().XRayInstrumentFunctions; } -/// EmitFunctionInstrumentation - Emit LLVM code to call the specified -/// instrumentation function with the current function and the call site, if -/// function instrumentation is enabled. -void CodeGenFunction::EmitFunctionInstrumentation(const char *Fn) { - auto NL = ApplyDebugLocation::CreateArtificial(*this); - // void __cyg_profile_func_{enter,exit} (void *this_fn, void *call_site); - llvm::PointerType *PointerTy = Int8PtrTy; - llvm::Type *ProfileFuncArgs[] = { PointerTy, PointerTy }; - llvm::FunctionType *FunctionTy = - llvm::FunctionType::get(VoidTy, ProfileFuncArgs, false); - - llvm::Constant *F = CGM.CreateRuntimeFunction(FunctionTy, Fn); - llvm::CallInst *CallSite = Builder.CreateCall( - CGM.getIntrinsic(llvm::Intrinsic::returnaddress), - llvm::ConstantInt::get(Int32Ty, 0), - "callsite"); - - llvm::Value *args[] = { - llvm::ConstantExpr::getBitCast(CurFn, PointerTy), - CallSite - }; +/// AlwaysEmitXRayCustomEvents - Return true if we should emit IR for calls to +/// the __xray_customevent(...) builin calls, when doing XRay instrumentation. +bool CodeGenFunction::AlwaysEmitXRayCustomEvents() const { + return CGM.getCodeGenOpts().XRayAlwaysEmitCustomEvents; +} - EmitNounwindRuntimeCall(F, args); +llvm::Constant * +CodeGenFunction::EncodeAddrForUseInPrologue(llvm::Function *F, + llvm::Constant *Addr) { + // Addresses stored in prologue data can't require run-time fixups and must + // be PC-relative. Run-time fixups are undesirable because they necessitate + // writable text segments, which are unsafe. And absolute addresses are + // undesirable because they break PIE mode. + + // Add a layer of indirection through a private global. Taking its address + // won't result in a run-time fixup, even if Addr has linkonce_odr linkage. + auto *GV = new llvm::GlobalVariable(CGM.getModule(), Addr->getType(), + /*isConstant=*/true, + llvm::GlobalValue::PrivateLinkage, Addr); + + // Create a PC-relative address. + auto *GOTAsInt = llvm::ConstantExpr::getPtrToInt(GV, IntPtrTy); + auto *FuncAsInt = llvm::ConstantExpr::getPtrToInt(F, IntPtrTy); + auto *PCRelAsInt = llvm::ConstantExpr::getSub(GOTAsInt, FuncAsInt); + return (IntPtrTy == Int32Ty) + ? PCRelAsInt + : llvm::ConstantExpr::getTrunc(PCRelAsInt, Int32Ty); +} + +llvm::Value * +CodeGenFunction::DecodeAddrUsedInPrologue(llvm::Value *F, + llvm::Value *EncodedAddr) { + // Reconstruct the address of the global. + auto *PCRelAsInt = Builder.CreateSExt(EncodedAddr, IntPtrTy); + auto *FuncAsInt = Builder.CreatePtrToInt(F, IntPtrTy, "func_addr.int"); + auto *GOTAsInt = Builder.CreateAdd(PCRelAsInt, FuncAsInt, "global_addr.int"); + auto *GOTAddr = Builder.CreateIntToPtr(GOTAsInt, Int8PtrPtrTy, "global_addr"); + + // Load the original pointer through the global. + return Builder.CreateLoad(Address(GOTAddr, getPointerAlign()), + "decoded_addr"); } static void removeImageAccessQualifier(std::string& TyName) { @@ -480,8 +530,8 @@ static void removeImageAccessQualifier(std::string& TyName) { // for example in clGetKernelArgInfo() implementation between the address // spaces with targets without unique mapping to the OpenCL address spaces // (basically all single AS CPUs). -static unsigned ArgInfoAddressSpace(unsigned LangAS) { - switch (LangAS) { +static unsigned ArgInfoAddressSpace(LangAS AS) { + switch (AS) { case LangAS::opencl_global: return 1; case LangAS::opencl_constant: return 2; case LangAS::opencl_local: return 3; @@ -621,7 +671,10 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn, // Get image and pipe access qualifier: if (ty->isImageType()|| ty->isPipeType()) { - const OpenCLAccessAttr *A = parm->getAttr<OpenCLAccessAttr>(); + const Decl *PDecl = parm; + if (auto *TD = dyn_cast<TypedefType>(ty)) + PDecl = TD->getDecl(); + const OpenCLAccessAttr *A = PDecl->getAttr<OpenCLAccessAttr>(); if (A && A->isWriteOnly()) accessQuals.push_back(llvm::MDString::get(Context, "write_only")); else if (A && A->isReadWrite()) @@ -721,6 +774,35 @@ static void markAsIgnoreThreadCheckingAtRuntime(llvm::Function *Fn) { Fn->removeFnAttr(llvm::Attribute::SanitizeThread); } +static bool matchesStlAllocatorFn(const Decl *D, const ASTContext &Ctx) { + auto *MD = dyn_cast_or_null<CXXMethodDecl>(D); + if (!MD || !MD->getDeclName().getAsIdentifierInfo() || + !MD->getDeclName().getAsIdentifierInfo()->isStr("allocate") || + (MD->getNumParams() != 1 && MD->getNumParams() != 2)) + return false; + + if (MD->parameters()[0]->getType().getCanonicalType() != Ctx.getSizeType()) + return false; + + if (MD->getNumParams() == 2) { + auto *PT = MD->parameters()[1]->getType()->getAs<PointerType>(); + if (!PT || !PT->isVoidPointerType() || + !PT->getPointeeType().isConstQualified()) + return false; + } + + return true; +} + +/// Return the UBSan prologue signature for \p FD if one is available. +static llvm::Constant *getPrologueSignature(CodeGenModule &CGM, + const FunctionDecl *FD) { + if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) + if (!MD->isStatic()) + return nullptr; + return CGM.getTargetCodeGenInfo().getUBSanFunctionSignature(CGM); +} + void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, @@ -744,8 +826,19 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, CurFnInfo = &FnInfo; assert(CurFn->isDeclaration() && "Function already has body?"); - if (CGM.isInSanitizerBlacklist(Fn, Loc)) - SanOpts.clear(); + // If this function has been blacklisted for any of the enabled sanitizers, + // disable the sanitizer for the function. + do { +#define SANITIZER(NAME, ID) \ + if (SanOpts.empty()) \ + break; \ + if (SanOpts.has(SanitizerKind::ID)) \ + if (CGM.isInSanitizerBlacklist(SanitizerKind::ID, Fn, Loc)) \ + SanOpts.set(SanitizerKind::ID, false); + +#include "clang/Basic/Sanitizers.def" +#undef SANITIZER + } while (0); if (D) { // Apply the no_sanitize* attributes to SanOpts. @@ -756,6 +849,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, // Apply sanitizer attributes to the function. if (SanOpts.hasOneOf(SanitizerKind::Address | SanitizerKind::KernelAddress)) Fn->addFnAttr(llvm::Attribute::SanitizeAddress); + if (SanOpts.hasOneOf(SanitizerKind::HWAddress)) + Fn->addFnAttr(llvm::Attribute::SanitizeHWAddress); if (SanOpts.has(SanitizerKind::Thread)) Fn->addFnAttr(llvm::Attribute::SanitizeThread); if (SanOpts.has(SanitizerKind::Memory)) @@ -780,6 +875,14 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, } } + // Ignore unrelated casts in STL allocate() since the allocator must cast + // from void* to T* before object initialization completes. Don't match on the + // namespace because not all allocators are in std:: + if (D && SanOpts.has(SanitizerKind::CFIUnrelatedCast)) { + if (matchesStlAllocatorFn(D, getContext())) + SanOpts.Mask &= ~SanitizerKind::CFIUnrelatedCast; + } + // Apply xray attributes to the function (as a string, for now) if (D && ShouldXRayInstrumentFunction()) { if (const auto *XRayAttr = D->getAttr<XRayInstrumentAttr>()) { @@ -799,14 +902,14 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, } } - if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) - if (CGM.getLangOpts().OpenMP && FD->hasAttr<OMPDeclareSimdDeclAttr>()) - CGM.getOpenMPRuntime().emitDeclareSimdFunction(FD, Fn); - // Add no-jump-tables value. Fn->addFnAttr("no-jump-tables", llvm::toStringRef(CGM.getCodeGenOpts().NoUseJumpTables)); + // Add profile-sample-accurate value. + if (CGM.getCodeGenOpts().ProfileSampleAccurate) + Fn->addFnAttr("profile-sample-accurate"); + if (getLangOpts().OpenCL) { // Add metadata for a kernel function. if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) @@ -817,11 +920,13 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, // prologue data. if (getLangOpts().CPlusPlus && SanOpts.has(SanitizerKind::Function)) { if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { - if (llvm::Constant *PrologueSig = - CGM.getTargetCodeGenInfo().getUBSanFunctionSignature(CGM)) { + if (llvm::Constant *PrologueSig = getPrologueSignature(CGM, FD)) { llvm::Constant *FTRTTIConst = CGM.GetAddrOfRTTIDescriptor(FD->getType(), /*ForEH=*/true); - llvm::Constant *PrologueStructElems[] = { PrologueSig, FTRTTIConst }; + llvm::Constant *FTRTTIConstEncoded = + EncodeAddrForUseInPrologue(Fn, FTRTTIConst); + llvm::Constant *PrologueStructElems[] = {PrologueSig, + FTRTTIConstEncoded}; llvm::Constant *PrologueStructConst = llvm::ConstantStruct::getAnon(PrologueStructElems, /*Packed=*/true); Fn->setPrologueData(PrologueStructConst); @@ -885,8 +990,16 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, DI->EmitFunctionStart(GD, Loc, StartLoc, FnType, CurFn, Builder); } - if (ShouldInstrumentFunction()) - EmitFunctionInstrumentation("__cyg_profile_func_enter"); + if (ShouldInstrumentFunction()) { + if (CGM.getCodeGenOpts().InstrumentFunctions) + CurFn->addFnAttr("instrument-function-entry", "__cyg_profile_func_enter"); + if (CGM.getCodeGenOpts().InstrumentFunctionsAfterInlining) + CurFn->addFnAttr("instrument-function-entry-inlined", + "__cyg_profile_func_enter"); + if (CGM.getCodeGenOpts().InstrumentFunctionEntryBare) + CurFn->addFnAttr("instrument-function-entry-inlined", + "__cyg_profile_func_enter_bare"); + } // Since emitting the mcount call here impacts optimizations such as function // inlining, we just add an attribute to insert a mcount call in backend. @@ -896,8 +1009,10 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, if (CGM.getCodeGenOpts().CallFEntry) Fn->addFnAttr("fentry-call", "true"); else { - if (!CurFuncDecl || !CurFuncDecl->hasAttr<NoInstrumentFunctionAttr>()) - Fn->addFnAttr("counting-function", getTarget().getMCountName()); + if (!CurFuncDecl || !CurFuncDecl->hasAttr<NoInstrumentFunctionAttr>()) { + Fn->addFnAttr("instrument-function-entry-inlined", + getTarget().getMCountName()); + } } } @@ -1185,16 +1300,11 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, !getLangOpts().CUDAIsDevice && FD->hasAttr<CUDAGlobalAttr>()) CGM.getCUDARuntime().emitDeviceStub(*this, Args); - else if (isa<CXXConversionDecl>(FD) && - cast<CXXConversionDecl>(FD)->isLambdaToBlockPointerConversion()) { - // The lambda conversion to block pointer is special; the semantics can't be - // expressed in the AST, so IRGen needs to special-case it. - EmitLambdaToBlockPointerBody(Args); - } else if (isa<CXXMethodDecl>(FD) && - cast<CXXMethodDecl>(FD)->isLambdaStaticInvoker()) { + else if (isa<CXXMethodDecl>(FD) && + cast<CXXMethodDecl>(FD)->isLambdaStaticInvoker()) { // The lambda static invoker function is special, because it forwards or // clones the body of the function call operator (but is actually static). - EmitLambdaStaticInvokeFunction(cast<CXXMethodDecl>(FD)); + EmitLambdaStaticInvokeBody(cast<CXXMethodDecl>(FD)); } else if (FD->isDefaulted() && isa<CXXMethodDecl>(FD) && (cast<CXXMethodDecl>(FD)->isCopyAssignmentOperator() || cast<CXXMethodDecl>(FD)->isMoveAssignmentOperator())) { diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 6a1fa487ed14..ab5bbc03db95 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -76,6 +76,10 @@ class ObjCAtThrowStmt; class ObjCAtSynchronizedStmt; class ObjCAutoreleasePoolStmt; +namespace analyze_os_log { +class OSLogBufferLayout; +} + namespace CodeGen { class CodeGenTypes; class CGCallee; @@ -111,6 +115,7 @@ enum TypeEvaluationKind { SANITIZER_CHECK(DynamicTypeCacheMiss, dynamic_type_cache_miss, 0) \ SANITIZER_CHECK(FloatCastOverflow, float_cast_overflow, 0) \ SANITIZER_CHECK(FunctionTypeMismatch, function_type_mismatch, 0) \ + SANITIZER_CHECK(InvalidBuiltin, invalid_builtin, 0) \ SANITIZER_CHECK(LoadInvalidValue, load_invalid_value, 0) \ SANITIZER_CHECK(MissingReturn, missing_return, 0) \ SANITIZER_CHECK(MulOverflow, mul_overflow, 0) \ @@ -220,6 +225,10 @@ public: }; CGCoroInfo CurCoro; + bool isCoroutine() const { + return CurCoro.Data != nullptr; + } + /// CurGD - The GlobalDecl for the current function being compiled. GlobalDecl CurGD; @@ -262,9 +271,9 @@ public: if (I->capturesThis()) CXXThisFieldDecl = *Field; else if (I->capturesVariable()) - CaptureFields[I->getCapturedVar()] = *Field; + CaptureFields[I->getCapturedVar()->getCanonicalDecl()] = *Field; else if (I->capturesVariableByCopy()) - CaptureFields[I->getCapturedVar()] = *Field; + CaptureFields[I->getCapturedVar()->getCanonicalDecl()] = *Field; } } @@ -278,7 +287,7 @@ public: /// \brief Lookup the captured field decl for a variable. virtual const FieldDecl *lookup(const VarDecl *VD) const { - return CaptureFields.lookup(VD); + return CaptureFields.lookup(VD->getCanonicalDecl()); } bool isCXXThisExprCaptured() const { return getThisFieldDecl() != nullptr; } @@ -708,6 +717,7 @@ public: llvm::function_ref<Address()> PrivateGen) { assert(PerformCleanup && "adding private to dead scope"); + LocalVD = LocalVD->getCanonicalDecl(); // Only save it once. if (SavedLocals.count(LocalVD)) return false; @@ -758,8 +768,9 @@ public: ForceCleanup(); } - /// Checks if the global variable is captured in current function. + /// Checks if the global variable is captured in current function. bool isGlobalVarCaptured(const VarDecl *VD) const { + VD = VD->getCanonicalDecl(); return !VD->isLocalVarDeclOrParm() && CGF.LocalDeclMap.count(VD) > 0; } @@ -819,7 +830,7 @@ public: /// block through the normal cleanup handling code (if any) and then /// on to \arg Dest. void EmitBranchThroughCleanup(JumpDest Dest); - + /// isObviouslyBranchWithoutCleanups - Return true if a branch to the /// specified destination obviously has no cleanups to run. 'false' is always /// a conservatively correct answer for this method. @@ -1038,7 +1049,7 @@ public: if (Data.isValid()) Data.unbind(CGF); } }; - + private: CGDebugInfo *DebugInfo; bool DisableDebugInfo; @@ -1156,19 +1167,6 @@ private: }; OpenMPCancelExitStack OMPCancelStack; - /// Controls insertion of cancellation exit blocks in worksharing constructs. - class OMPCancelStackRAII { - CodeGenFunction &CGF; - - public: - OMPCancelStackRAII(CodeGenFunction &CGF, OpenMPDirectiveKind Kind, - bool HasCancel) - : CGF(CGF) { - CGF.OMPCancelStack.enter(CGF, Kind, HasCancel); - } - ~OMPCancelStackRAII() { CGF.OMPCancelStack.exit(CGF); } - }; - CodeGenPGO PGO; /// Calculate branch weights appropriate for PGO data @@ -1427,7 +1425,7 @@ private: /// Add OpenCL kernel arg metadata and the kernel attribute meatadata to /// the function metadata. - void EmitOpenCLKernelMetadata(const FunctionDecl *FD, + void EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::Function *Fn); public: @@ -1436,10 +1434,10 @@ public: CodeGenTypes &getTypes() const { return CGM.getTypes(); } ASTContext &getContext() const { return CGM.getContext(); } - CGDebugInfo *getDebugInfo() { - if (DisableDebugInfo) + CGDebugInfo *getDebugInfo() { + if (DisableDebugInfo) return nullptr; - return DebugInfo; + return DebugInfo; } void disableDebugInfo() { DisableDebugInfo = true; } void enableDebugInfo() { DisableDebugInfo = false; } @@ -1577,13 +1575,21 @@ public: // Block Bits //===--------------------------------------------------------------------===// - llvm::Value *EmitBlockLiteral(const BlockExpr *); + /// Emit block literal. + /// \return an LLVM value which is a pointer to a struct which contains + /// information about the block, including the block invoke function, the + /// captured variables, etc. + /// \param InvokeF will contain the block invoke function if it is not + /// nullptr. + llvm::Value *EmitBlockLiteral(const BlockExpr *, + llvm::Function **InvokeF = nullptr); static void destroyBlockInfos(CGBlockInfo *info); llvm::Function *GenerateBlockFunction(GlobalDecl GD, const CGBlockInfo &Info, const DeclMapTy &ldm, - bool IsLambdaConversionToBlock); + bool IsLambdaConversionToBlock, + bool BuildGlobalBlock); llvm::Constant *GenerateCopyHelperFunction(const CGBlockInfo &blockInfo); llvm::Constant *GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo); @@ -1642,10 +1648,9 @@ public: void EmitForwardingCallToLambda(const CXXMethodDecl *LambdaCallOperator, CallArgList &CallArgs); - void EmitLambdaToBlockPointerBody(FunctionArgList &Args); void EmitLambdaBlockInvokeBody(); void EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD); - void EmitLambdaStaticInvokeFunction(const CXXMethodDecl *MD); + void EmitLambdaStaticInvokeBody(const CXXMethodDecl *MD); void EmitAsanPrologueOrEpilogue(bool Prologue); /// \brief Emit the unified return block, trying to avoid its emission when @@ -1766,13 +1771,18 @@ public: /// instrumented with XRay nop sleds. bool ShouldXRayInstrumentFunction() const; - /// EmitFunctionInstrumentation - Emit LLVM code to call the specified - /// instrumentation function with the current function and the call site, if - /// function instrumentation is enabled. - void EmitFunctionInstrumentation(const char *Fn); + /// AlwaysEmitXRayCustomEvents - Return true if we must unconditionally emit + /// XRay custom event handling calls. + bool AlwaysEmitXRayCustomEvents() const; - /// EmitMCountInstrumentation - Emit call to .mcount. - void EmitMCountInstrumentation(); + /// Encode an address into a form suitable for use in a function prologue. + llvm::Constant *EncodeAddrForUseInPrologue(llvm::Function *F, + llvm::Constant *Addr); + + /// Decode an address used in a function prologue, encoded by \c + /// EncodeAddrForUseInPrologue. + llvm::Value *DecodeAddrUsedInPrologue(llvm::Value *F, + llvm::Value *EncodedAddr); /// EmitFunctionProlog - Emit the target specific LLVM code to load the /// arguments for the given function. This is also responsible for naming the @@ -1816,8 +1826,7 @@ public: /// TypeOfSelfObject - Return type of object that this self represents. QualType TypeOfSelfObject(); - /// hasAggregateLLVMType - Return true if the specified AST type will map into - /// an aggregate LLVM type or is void. + /// getEvaluationKind - Return the TypeEvaluationKind of QualType \c T. static TypeEvaluationKind getEvaluationKind(QualType T); static bool hasScalarEvaluationKind(QualType T) { @@ -1896,33 +1905,53 @@ public: //===--------------------------------------------------------------------===// LValue MakeAddrLValue(Address Addr, QualType T, - LValueBaseInfo BaseInfo = - LValueBaseInfo(AlignmentSource::Type)) { - return LValue::MakeAddr(Addr, T, getContext(), BaseInfo, - CGM.getTBAAInfo(T)); + AlignmentSource Source = AlignmentSource::Type) { + return LValue::MakeAddr(Addr, T, getContext(), LValueBaseInfo(Source), + CGM.getTBAAAccessInfo(T)); + } + + LValue MakeAddrLValue(Address Addr, QualType T, LValueBaseInfo BaseInfo, + TBAAAccessInfo TBAAInfo) { + return LValue::MakeAddr(Addr, T, getContext(), BaseInfo, TBAAInfo); } LValue MakeAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, - LValueBaseInfo BaseInfo = - LValueBaseInfo(AlignmentSource::Type)) { + AlignmentSource Source = AlignmentSource::Type) { return LValue::MakeAddr(Address(V, Alignment), T, getContext(), - BaseInfo, CGM.getTBAAInfo(T)); + LValueBaseInfo(Source), CGM.getTBAAAccessInfo(T)); + } + + LValue MakeAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, + LValueBaseInfo BaseInfo, TBAAAccessInfo TBAAInfo) { + return LValue::MakeAddr(Address(V, Alignment), T, getContext(), + BaseInfo, TBAAInfo); } LValue MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T); LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T); CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo = nullptr, + TBAAAccessInfo *TBAAInfo = nullptr, bool forPointeeType = false); CharUnits getNaturalPointeeTypeAlignment(QualType T, - LValueBaseInfo *BaseInfo = nullptr); - - Address EmitLoadOfReference(Address Ref, const ReferenceType *RefTy, - LValueBaseInfo *BaseInfo = nullptr); - LValue EmitLoadOfReferenceLValue(Address Ref, const ReferenceType *RefTy); + LValueBaseInfo *BaseInfo = nullptr, + TBAAAccessInfo *TBAAInfo = nullptr); + + Address EmitLoadOfReference(LValue RefLVal, + LValueBaseInfo *PointeeBaseInfo = nullptr, + TBAAAccessInfo *PointeeTBAAInfo = nullptr); + LValue EmitLoadOfReferenceLValue(LValue RefLVal); + LValue EmitLoadOfReferenceLValue(Address RefAddr, QualType RefTy, + AlignmentSource Source = + AlignmentSource::Type) { + LValue RefLVal = MakeAddrLValue(RefAddr, RefTy, LValueBaseInfo(Source), + CGM.getTBAAAccessInfo(RefTy)); + return EmitLoadOfReferenceLValue(RefLVal); + } Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, - LValueBaseInfo *BaseInfo = nullptr); + LValueBaseInfo *BaseInfo = nullptr, + TBAAAccessInfo *TBAAInfo = nullptr); LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy); /// CreateTempAlloca - This creates an alloca and inserts it into the entry @@ -2345,6 +2374,12 @@ public: TCK_NonnullAssign }; + /// Determine whether the pointer type check \p TCK permits null pointers. + static bool isNullPointerAllowed(TypeCheckKind TCK); + + /// Determine whether the pointer type check \p TCK requires a vptr check. + static bool isVptrCheckRequired(TypeCheckKind TCK, QualType Ty); + /// \brief Whether any type-checking sanitizers are enabled. If \c false, /// calls to EmitTypeCheck can be skipped. bool sanitizePerformTypeCheck() const; @@ -2464,7 +2499,7 @@ public: }; AutoVarEmission EmitAutoVarAlloca(const VarDecl &var); void EmitAutoVarInit(const AutoVarEmission &emission); - void EmitAutoVarCleanups(const AutoVarEmission &emission); + void EmitAutoVarCleanups(const AutoVarEmission &emission); void emitAutoVarTypeCleanup(const AutoVarEmission &emission, QualType::DestructionKind dtorKind); @@ -2486,7 +2521,7 @@ public: bool isIndirect() const { return Alignment != 0; } llvm::Value *getAnyValue() const { return Value; } - + llvm::Value *getDirectValue() const { assert(!isIndirect()); return Value; @@ -2532,7 +2567,7 @@ public: /// This function may clear the current insertion point; callers should use /// EnsureInsertPoint if they wish to subsequently generate code without first /// calling EmitBlock, EmitBranch, or EmitStmt. - void EmitStmt(const Stmt *S); + void EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs = None); /// EmitSimpleStmt - Try to emit a "simple" statement which does not /// necessarily require an insertion point or debug information; typically @@ -2635,6 +2670,19 @@ public: void EmitCXXForRangeStmt(const CXXForRangeStmt &S, ArrayRef<const Attr *> Attrs = None); + /// Controls insertion of cancellation exit blocks in worksharing constructs. + class OMPCancelStackRAII { + CodeGenFunction &CGF; + + public: + OMPCancelStackRAII(CodeGenFunction &CGF, OpenMPDirectiveKind Kind, + bool HasCancel) + : CGF(CGF) { + CGF.OMPCancelStack.enter(CGF, Kind, HasCancel); + } + ~OMPCancelStackRAII() { CGF.OMPCancelStack.exit(CGF); } + }; + /// Returns calculated size of the specified type. llvm::Value *getTypeSize(QualType Ty); LValue InitCapturedStruct(const CapturedStmt &S); @@ -2841,9 +2889,30 @@ public: static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S); + /// Emit device code for the target parallel for directive. + static void EmitOMPTargetParallelForDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelForDirective &S); + /// Emit device code for the target parallel for simd directive. + static void EmitOMPTargetParallelForSimdDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelForSimdDirective &S); + /// Emit device code for the target teams directive. static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S); + /// Emit device code for the target teams distribute directive. + static void EmitOMPTargetTeamsDistributeDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDistributeDirective &S); + /// Emit device code for the target teams distribute simd directive. + static void EmitOMPTargetTeamsDistributeSimdDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDistributeSimdDirective &S); + /// Emit device code for the target simd directive. + static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, + StringRef ParentName, + const OMPTargetSimdDirective &S); /// \brief Emit inner loop of the worksharing/simd construct. /// /// \param S Directive, for which the inner loop must be emitted. @@ -2875,9 +2944,9 @@ public: const CodeGenLoopBoundsTy &CodeGenLoopBounds, const CodeGenDispatchBoundsTy &CGDispatchBounds); -private: - /// Helpers for blocks - llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info); + /// Emit code for the distribute loop-based directive. + void EmitOMPDistributeLoop(const OMPLoopDirective &S, + const CodeGenLoopTy &CodeGenLoop, Expr *IncExpr); /// Helpers for the OpenMP loop directives. void EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic = false); @@ -2885,8 +2954,15 @@ private: const OMPLoopDirective &D, const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen); - void EmitOMPDistributeLoop(const OMPLoopDirective &S, - const CodeGenLoopTy &CodeGenLoop, Expr *IncExpr); + /// Emits the lvalue for the expression with possibly captured variable. + LValue EmitOMPSharedLValue(const Expr *E); + +private: + /// Helpers for blocks. Returns invoke function by \p InvokeF if it is not + /// nullptr. It should be called without \p InvokeF if the caller does not + /// need invoke function to be returned. + llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info, + llvm::Function **InvokeF = nullptr); /// struct with the values to be passed to the OpenMP loop-related functions struct OMPLoopArguments { @@ -3034,11 +3110,15 @@ public: /// the LLVM value representation. llvm::Value *EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, - LValueBaseInfo BaseInfo = - LValueBaseInfo(AlignmentSource::Type), - llvm::MDNode *TBAAInfo = nullptr, - QualType TBAABaseTy = QualType(), - uint64_t TBAAOffset = 0, + AlignmentSource Source = AlignmentSource::Type, + bool isNontemporal = false) { + return EmitLoadOfScalar(Addr, Volatile, Ty, Loc, LValueBaseInfo(Source), + CGM.getTBAAAccessInfo(Ty), isNontemporal); + } + + llvm::Value *EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, + SourceLocation Loc, LValueBaseInfo BaseInfo, + TBAAAccessInfo TBAAInfo, bool isNontemporal = false); /// EmitLoadOfScalar - Load a scalar value from an address, taking @@ -3052,11 +3132,16 @@ public: /// the LLVM value representation. void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, - LValueBaseInfo BaseInfo = - LValueBaseInfo(AlignmentSource::Type), - llvm::MDNode *TBAAInfo = nullptr, bool isInit = false, - QualType TBAABaseTy = QualType(), - uint64_t TBAAOffset = 0, bool isNontemporal = false); + AlignmentSource Source = AlignmentSource::Type, + bool isInit = false, bool isNontemporal = false) { + EmitStoreOfScalar(Value, Addr, Volatile, Ty, LValueBaseInfo(Source), + CGM.getTBAAAccessInfo(Ty), isInit, isNontemporal); + } + + void EmitStoreOfScalar(llvm::Value *Value, Address Addr, + bool Volatile, QualType Ty, + LValueBaseInfo BaseInfo, TBAAAccessInfo TBAAInfo, + bool isInit = false, bool isNontemporal = false); /// EmitStoreOfScalar - Store a scalar value to an address, taking /// care to appropriately convert from the memory representation to @@ -3120,13 +3205,14 @@ public: LValue EmitCastLValue(const CastExpr *E); LValue EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E); LValue EmitOpaqueValueLValue(const OpaqueValueExpr *e); - + Address EmitExtVectorElementLValue(LValue V); RValue EmitRValueForField(LValue LV, const FieldDecl *FD, SourceLocation Loc); Address EmitArrayToPointerDecay(const Expr *Array, - LValueBaseInfo *BaseInfo = nullptr); + LValueBaseInfo *BaseInfo = nullptr, + TBAAAccessInfo *TBAAInfo = nullptr); class ConstantEmission { llvm::PointerIntPair<llvm::Constant*, 1, bool> ValueAndIsReference; @@ -3159,6 +3245,7 @@ public: }; ConstantEmission tryEmitAsConstant(DeclRefExpr *refExpr); + ConstantEmission tryEmitAsConstant(const MemberExpr *ME); RValue EmitPseudoObjectRValue(const PseudoObjectExpr *e, AggValueSlot slot = AggValueSlot::ignored()); @@ -3235,12 +3322,12 @@ public: void EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee, ArrayRef<llvm::Value*> args); - CGCallee BuildAppleKextVirtualCall(const CXXMethodDecl *MD, + CGCallee BuildAppleKextVirtualCall(const CXXMethodDecl *MD, NestedNameSpecifier *Qual, llvm::Type *Ty); - + CGCallee BuildAppleKextVirtualDestructorCall(const CXXDestructorDecl *DD, - CXXDtorType Type, + CXXDtorType Type, const CXXRecordDecl *RD); RValue @@ -3267,7 +3354,8 @@ public: Address EmitCXXMemberDataPointerAddress(const Expr *E, Address base, llvm::Value *memberPtr, const MemberPointerType *memberPtrType, - LValueBaseInfo *BaseInfo = nullptr); + LValueBaseInfo *BaseInfo = nullptr, + TBAAAccessInfo *TBAAInfo = nullptr); RValue EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E, ReturnValueSlot ReturnValue); @@ -3286,6 +3374,13 @@ public: unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue); + /// Emit IR for __builtin_os_log_format. + RValue emitBuiltinOSLogFormat(const CallExpr &E); + + llvm::Function *generateBuiltinOSLogHelperFunction( + const analyze_os_log::OSLogBufferLayout &Layout, + CharUnits BufferAlignment); + RValue EmitBlockCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue); /// EmitTargetBuiltinExpr - Emit the given builtin call. Returns 0 if the call @@ -3329,6 +3424,7 @@ public: llvm::Value *EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E); private: enum class MSVCIntrin; @@ -3406,11 +3502,11 @@ public: static Destroyer destroyARCWeak; static Destroyer emitARCIntrinsicUse; - void EmitObjCAutoreleasePoolPop(llvm::Value *Ptr); + void EmitObjCAutoreleasePoolPop(llvm::Value *Ptr); llvm::Value *EmitObjCAutoreleasePoolPush(); llvm::Value *EmitObjCMRRAutoreleasePoolPush(); void EmitObjCAutoreleasePoolCleanup(llvm::Value *Ptr); - void EmitObjCMRRAutoreleasePoolPop(llvm::Value *Ptr); + void EmitObjCMRRAutoreleasePoolPop(llvm::Value *Ptr); /// \brief Emits a reference binding to the passed in expression. RValue EmitReferenceBindingToExpr(const Expr *E); @@ -3498,6 +3594,14 @@ public: void EmitCXXGuardedInit(const VarDecl &D, llvm::GlobalVariable *DeclPtr, bool PerformInit); + enum class GuardKind { VariableGuard, TlsGuard }; + + /// Emit a branch to select whether or not to perform guarded initialization. + void EmitCXXGuardedInitBranch(llvm::Value *NeedsInit, + llvm::BasicBlock *InitBlock, + llvm::BasicBlock *NoInitBlock, + GuardKind Kind, const VarDecl *D); + /// GenerateCXXGlobalInitFunc - Generates code for initializing global /// variables. void GenerateCXXGlobalInitFunc(llvm::Function *Fn, @@ -3517,7 +3621,7 @@ public: bool PerformInit); void EmitCXXConstructExpr(const CXXConstructExpr *E, AggValueSlot Dest); - + void EmitSynthesizedCXXCopyCtor(Address Dest, Address Src, const Expr *Exp); void enterFullExpression(const ExprWithCleanups *E) { @@ -3566,7 +3670,7 @@ public: /// Determine if the given statement might introduce a declaration into the /// current scope, by being a (possibly-labelled) DeclStmt. static bool mightAddDeclToScope(const Stmt *S); - + /// ConstantFoldsToSimpleInteger - If the specified expression does not fold /// to a constant, or if it does but contains a label, return false. If it /// constant folds return true and set the boolean result in Result. @@ -3607,6 +3711,17 @@ public: SourceLocation Loc, const Twine &Name = ""); + /// Specifies which type of sanitizer check to apply when handling a + /// particular builtin. + enum BuiltinCheckKind { + BCK_CTZPassedZero, + BCK_CLZPassedZero, + }; + + /// Emits an argument for a call to a builtin. If the builtin sanitizer is + /// enabled, a runtime check specified by \p Kind is also emitted. + llvm::Value *EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind); + /// \brief Emit a description of a type in a format suitable for passing to /// a runtime sanitizer handler. llvm::Constant *EmitCheckTypeDescriptor(QualType T); @@ -3820,7 +3935,13 @@ public: /// reasonable to just ignore the returned alignment when it isn't from an /// explicit source. Address EmitPointerWithAlignment(const Expr *Addr, - LValueBaseInfo *BaseInfo = nullptr); + LValueBaseInfo *BaseInfo = nullptr, + TBAAAccessInfo *TBAAInfo = nullptr); + + /// If \p E references a parameter with pass_object_size info or a constant + /// array size modifier, emit the object size divided by the size of \p EltTy. + /// Otherwise return null. + llvm::Value *LoadPassedObjectSize(const Expr *E, QualType EltTy); void EmitSanitizerStatReport(llvm::SanitizerStatKind SSK); @@ -3835,6 +3956,11 @@ private: void AddObjCARCExceptionMetadata(llvm::Instruction *Inst); llvm::Value *GetValueForARMHint(unsigned BuiltinID); + llvm::Value *EmitX86CpuIs(const CallExpr *E); + llvm::Value *EmitX86CpuIs(StringRef CPUStr); + llvm::Value *EmitX86CpuSupports(const CallExpr *E); + llvm::Value *EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs); + llvm::Value *EmitX86CpuInit(); }; /// Helper class with most of the code for saving a value for a diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index 5561d4520cc8..c59dc71da596 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -23,7 +23,7 @@ #include "CGOpenMPRuntimeNVPTX.h" #include "CodeGenFunction.h" #include "CodeGenPGO.h" -#include "CodeGenTBAA.h" +#include "ConstantEmitter.h" #include "CoverageMappingGen.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" @@ -60,6 +60,11 @@ using namespace clang; using namespace CodeGen; +static llvm::cl::opt<bool> LimitedCoverage( + "limited-coverage-experimental", llvm::cl::ZeroOrMore, llvm::cl::Hidden, + llvm::cl::desc("Emit limited coverage mapping information (experimental)"), + llvm::cl::init(false)); + static const char AnnotationSection[] = "llvm.metadata"; static CGCXXABI *createCXXABI(CodeGenModule &CGM) { @@ -131,7 +136,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO, // Enable TBAA unless it's suppressed. ThreadSanitizer needs TBAA even at O0. if (LangOpts.Sanitize.has(SanitizerKind::Thread) || (!CodeGenOpts.RelaxedAliasing && CodeGenOpts.OptimizationLevel > 0)) - TBAA.reset(new CodeGenTBAA(Context, VMContext, CodeGenOpts, getLangOpts(), + TBAA.reset(new CodeGenTBAA(Context, TheModule, CodeGenOpts, getLangOpts(), getCXXABI().getMangleContext())); // If debug info or coverage generation is enabled, create the CGDebugInfo @@ -436,7 +441,7 @@ void CodeGenModule::Release() { if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86) getModule().addModuleFlag(llvm::Module::Error, "NumRegisterParameters", CodeGenOpts.NumRegisterParameters); - + if (CodeGenOpts.DwarfVersion) { // We actually want the latest version when there are conflicts. // We can change from Warning to Latest if such mode is supported. @@ -470,17 +475,11 @@ void CodeGenModule::Release() { getModule().addModuleFlag(llvm::Module::Warning, "Debug Info Version", llvm::DEBUG_METADATA_VERSION); - // Width of wchar_t in bytes - uint64_t WCharWidth = - Context.getTypeSizeInChars(Context.getWideCharType()).getQuantity(); - assert((LangOpts.ShortWChar || - llvm::TargetLibraryInfoImpl::getTargetWCharSize(Target.getTriple()) == - Target.getWCharWidth() / 8) && - "LLVM wchar_t size out of sync"); - // We need to record the widths of enums and wchar_t, so that we can generate // the correct build attributes in the ARM backend. wchar_size is also used by // TargetLibraryInfo. + uint64_t WCharWidth = + Context.getTypeSizeInChars(Context.getWideCharType()).getQuantity(); getModule().addModuleFlag(llvm::Module::Error, "wchar_size", WCharWidth); llvm::Triple::ArchType Arch = Context.getTargetInfo().getTriple().getArch(); @@ -573,16 +572,27 @@ void CodeGenModule::RefreshTypeCacheForClass(const CXXRecordDecl *RD) { Types.RefreshTypeCacheForClass(RD); } -llvm::MDNode *CodeGenModule::getTBAAInfo(QualType QTy) { +llvm::MDNode *CodeGenModule::getTBAATypeInfo(QualType QTy) { if (!TBAA) return nullptr; - return TBAA->getTBAAInfo(QTy); + return TBAA->getTypeInfo(QTy); } -llvm::MDNode *CodeGenModule::getTBAAInfoForVTablePtr() { +TBAAAccessInfo CodeGenModule::getTBAAAccessInfo(QualType AccessType) { + // Pointee values may have incomplete types, but they shall never be + // dereferenced. + if (AccessType->isIncompleteType()) + return TBAAAccessInfo::getIncompleteInfo(); + + uint64_t Size = Context.getTypeSizeInChars(AccessType).getQuantity(); + return TBAAAccessInfo(getTBAATypeInfo(AccessType), Size); +} + +TBAAAccessInfo +CodeGenModule::getTBAAVTablePtrAccessInfo(llvm::Type *VTablePtrType) { if (!TBAA) - return nullptr; - return TBAA->getTBAAInfoForVTablePtr(); + return TBAAAccessInfo(); + return TBAA->getVTablePtrAccessInfo(VTablePtrType); } llvm::MDNode *CodeGenModule::getTBAAStructInfo(QualType QTy) { @@ -591,26 +601,37 @@ llvm::MDNode *CodeGenModule::getTBAAStructInfo(QualType QTy) { return TBAA->getTBAAStructInfo(QTy); } -llvm::MDNode *CodeGenModule::getTBAAStructTagInfo(QualType BaseTy, - llvm::MDNode *AccessN, - uint64_t O) { +llvm::MDNode *CodeGenModule::getTBAABaseTypeInfo(QualType QTy) { + if (!TBAA) + return nullptr; + return TBAA->getBaseTypeInfo(QTy); +} + +llvm::MDNode *CodeGenModule::getTBAAAccessTagInfo(TBAAAccessInfo Info) { if (!TBAA) return nullptr; - return TBAA->getTBAAStructTagInfo(BaseTy, AccessN, O); + return TBAA->getAccessTagInfo(Info); +} + +TBAAAccessInfo CodeGenModule::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo, + TBAAAccessInfo TargetInfo) { + if (!TBAA) + return TBAAAccessInfo(); + return TBAA->mergeTBAAInfoForCast(SourceInfo, TargetInfo); +} + +TBAAAccessInfo +CodeGenModule::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA, + TBAAAccessInfo InfoB) { + if (!TBAA) + return TBAAAccessInfo(); + return TBAA->mergeTBAAInfoForConditionalOperator(InfoA, InfoB); } -/// Decorate the instruction with a TBAA tag. For both scalar TBAA -/// and struct-path aware TBAA, the tag has the same format: -/// base type, access type and offset. -/// When ConvertTypeToTag is true, we create a tag based on the scalar type. void CodeGenModule::DecorateInstructionWithTBAA(llvm::Instruction *Inst, - llvm::MDNode *TBAAInfo, - bool ConvertTypeToTag) { - if (ConvertTypeToTag && TBAA) - Inst->setMetadata(llvm::LLVMContext::MD_tbaa, - TBAA->getTBAAScalarTagInfo(TBAAInfo)); - else - Inst->setMetadata(llvm::LLVMContext::MD_tbaa, TBAAInfo); + TBAAAccessInfo TBAAInfo) { + if (llvm::MDNode *Tag = getTBAAAccessTagInfo(TBAAInfo)) + Inst->setMetadata(llvm::LLVMContext::MD_tbaa, Tag); } void CodeGenModule::DecorateInstructionWithInvariantGroup( @@ -648,7 +669,8 @@ llvm::ConstantInt *CodeGenModule::getSize(CharUnits size) { } void CodeGenModule::setGlobalVisibility(llvm::GlobalValue *GV, - const NamedDecl *D) const { + const NamedDecl *D, + ForDefinition_t IsForDefinition) const { // Internal definitions always have default visibility. if (GV->hasLocalLinkage()) { GV->setVisibility(llvm::GlobalValue::DefaultVisibility); @@ -657,7 +679,8 @@ void CodeGenModule::setGlobalVisibility(llvm::GlobalValue *GV, // Set visibility for definitions. LinkageInfo LV = D->getLinkageAndVisibility(); - if (LV.isVisibilityExplicit() || !GV->hasAvailableExternallyLinkage()) + if (LV.isVisibilityExplicit() || + (IsForDefinition && !GV->hasAvailableExternallyLinkage())) GV->setVisibility(GetLLVMVisibility(LV.getVisibility())); } @@ -712,9 +735,9 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) { } } - StringRef &FoundStr = MangledDeclNames[CanonicalGD]; - if (!FoundStr.empty()) - return FoundStr; + auto FoundName = MangledDeclNames.find(CanonicalGD); + if (FoundName != MangledDeclNames.end()) + return FoundName->second; const auto *ND = cast<NamedDecl>(GD.getDecl()); SmallString<256> Buffer; @@ -745,7 +768,7 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) { // Keep the first result in the case of a mangling collision. auto Result = Manglings.insert(std::make_pair(Str, GD)); - return FoundStr = Result.first->first(); + return MangledDeclNames[CanonicalGD] = Result.first->first(); } StringRef CodeGenModule::getBlockMangledName(GlobalDecl GD, @@ -756,7 +779,7 @@ StringRef CodeGenModule::getBlockMangledName(GlobalDecl GD, SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); if (!D) - MangleCtx.mangleGlobalBlock(BD, + MangleCtx.mangleGlobalBlock(BD, dyn_cast_or_null<VarDecl>(initializedGlobalDecl.getDecl()), Out); else if (const auto *CD = dyn_cast<CXXConstructorDecl>(D)) MangleCtx.mangleCtorBlock(CD, GD.getCtorType(), BD, Out); @@ -1038,7 +1061,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, void CodeGenModule::SetCommonAttributes(const Decl *D, llvm::GlobalValue *GV) { if (const auto *ND = dyn_cast_or_null<NamedDecl>(D)) - setGlobalVisibility(GV, ND); + setGlobalVisibility(GV, ND, ForDefinition); else GV->setVisibility(llvm::GlobalValue::DefaultVisibility); @@ -1080,7 +1103,7 @@ void CodeGenModule::setNonAliasAttributes(const Decl *D, GO->setSection(SA->getName()); } - getTargetCodeGenInfo().setTargetAttributes(D, GO, *this); + getTargetCodeGenInfo().setTargetAttributes(D, GO, *this, ForDefinition); } void CodeGenModule::SetInternalFunctionAttributes(const Decl *D, @@ -1094,8 +1117,8 @@ void CodeGenModule::SetInternalFunctionAttributes(const Decl *D, setNonAliasAttributes(D, F); } -static void setLinkageAndVisibilityForGV(llvm::GlobalValue *GV, - const NamedDecl *ND) { +static void setLinkageForGV(llvm::GlobalValue *GV, + const NamedDecl *ND) { // Set linkage and visibility in case we never see a definition. LinkageInfo LV = ND->getLinkageAndVisibility(); if (!isExternallyVisible(LV.getLinkage())) { @@ -1111,10 +1134,6 @@ static void setLinkageAndVisibilityForGV(llvm::GlobalValue *GV, // separate linkage types for this. GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage); } - - // Set visibility on a declaration only if it's explicit. - if (LV.isVisibilityExplicit()) - GV->setVisibility(CodeGenModule::GetLLVMVisibility(LV.getVisibility())); } } @@ -1138,6 +1157,7 @@ void CodeGenModule::CreateFunctionTypeMetadata(const FunctionDecl *FD, llvm::Metadata *MD = CreateMetadataIdentifierForType(FD->getType()); F->addTypeMetadata(0, MD); + F->addTypeMetadata(0, CreateMetadataIdentifierGeneralized(FD->getType())); // Emit a hash-based bit set entry for cross-DSO calls. if (CodeGenOpts.SanitizeCfiCrossDso) @@ -1147,7 +1167,9 @@ void CodeGenModule::CreateFunctionTypeMetadata(const FunctionDecl *FD, void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, bool IsIncompleteFunction, - bool IsThunk) { + bool IsThunk, + ForDefinition_t IsForDefinition) { + if (llvm::Intrinsic::ID IID = F->getIntrinsicID()) { // If this is an intrinsic function, set the function's attributes // to the intrinsic's attributes. @@ -1157,8 +1179,13 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, const auto *FD = cast<FunctionDecl>(GD.getDecl()); - if (!IsIncompleteFunction) + if (!IsIncompleteFunction) { SetLLVMFunctionAttributes(FD, getTypes().arrangeGlobalDeclaration(GD), F); + // Setup target-specific attributes. + if (!IsForDefinition) + getTargetCodeGenInfo().setTargetAttributes(FD, F, *this, + NotForDefinition); + } // Add the Returned attribute for "this", except for iOS 5 and earlier // where substantial code, including the libstdc++ dylib, was compiled with @@ -1175,7 +1202,8 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, // Only a few attributes are set on declarations; these may later be // overridden by a definition. - setLinkageAndVisibilityForGV(F, FD); + setLinkageForGV(F, FD); + setGlobalVisibility(F, FD, NotForDefinition); if (FD->getAttr<PragmaClangTextSectionAttr>()) { F->addFnAttr("implicit-section-name"); @@ -1210,6 +1238,9 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, // is handled with better precision by the receiving DSO. if (!CodeGenOpts.SanitizeCfiCrossDso) CreateFunctionTypeMetadata(FD, F); + + if (getLangOpts().OpenMP && FD->hasAttr<OMPDeclareSimdDeclAttr>()) + getOpenMPRuntime().emitDeclareSimdFunction(FD, F); } void CodeGenModule::addUsedGlobal(llvm::GlobalValue *GV) { @@ -1530,20 +1561,21 @@ void CodeGenModule::AddGlobalAnnotations(const ValueDecl *D, Annotations.push_back(EmitAnnotateAttr(GV, I, D->getLocation())); } -bool CodeGenModule::isInSanitizerBlacklist(llvm::Function *Fn, +bool CodeGenModule::isInSanitizerBlacklist(SanitizerMask Kind, + llvm::Function *Fn, SourceLocation Loc) const { const auto &SanitizerBL = getContext().getSanitizerBlacklist(); // Blacklist by function name. - if (SanitizerBL.isBlacklistedFunction(Fn->getName())) + if (SanitizerBL.isBlacklistedFunction(Kind, Fn->getName())) return true; // Blacklist by location. if (Loc.isValid()) - return SanitizerBL.isBlacklistedLocation(Loc); + return SanitizerBL.isBlacklistedLocation(Kind, Loc); // If location is unknown, this may be a compiler-generated function. Assume // it's located in the main file. auto &SM = Context.getSourceManager(); if (const auto *MainFile = SM.getFileEntryForID(SM.getMainFileID())) { - return SanitizerBL.isBlacklistedFile(MainFile->getName()); + return SanitizerBL.isBlacklistedFile(Kind, MainFile->getName()); } return false; } @@ -1552,13 +1584,14 @@ bool CodeGenModule::isInSanitizerBlacklist(llvm::GlobalVariable *GV, SourceLocation Loc, QualType Ty, StringRef Category) const { // For now globals can be blacklisted only in ASan and KASan. - if (!LangOpts.Sanitize.hasOneOf( - SanitizerKind::Address | SanitizerKind::KernelAddress)) + const SanitizerMask EnabledAsanMask = LangOpts.Sanitize.Mask & + (SanitizerKind::Address | SanitizerKind::KernelAddress | SanitizerKind::HWAddress); + if (!EnabledAsanMask) return false; const auto &SanitizerBL = getContext().getSanitizerBlacklist(); - if (SanitizerBL.isBlacklistedGlobal(GV->getName(), Category)) + if (SanitizerBL.isBlacklistedGlobal(EnabledAsanMask, GV->getName(), Category)) return true; - if (SanitizerBL.isBlacklistedLocation(Loc, Category)) + if (SanitizerBL.isBlacklistedLocation(EnabledAsanMask, Loc, Category)) return true; // Check global type. if (!Ty.isNull()) { @@ -1570,7 +1603,7 @@ bool CodeGenModule::isInSanitizerBlacklist(llvm::GlobalVariable *GV, // We allow to blacklist only record types (classes, structs etc.) if (Ty->isRecordType()) { std::string TypeStr = Ty.getAsString(getContext().getPrintingPolicy()); - if (SanitizerBL.isBlacklistedType(TypeStr, Category)) + if (SanitizerBL.isBlacklistedType(EnabledAsanMask, TypeStr, Category)) return true; } } @@ -1986,12 +2019,12 @@ bool CodeGenModule::shouldOpportunisticallyEmitVTables() { void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { const auto *D = cast<ValueDecl>(GD.getDecl()); - PrettyStackTraceDecl CrashInfo(const_cast<ValueDecl *>(D), D->getLocation(), + PrettyStackTraceDecl CrashInfo(const_cast<ValueDecl *>(D), D->getLocation(), Context.getSourceManager(), "Generating code for declaration"); - + if (isa<FunctionDecl>(D)) { - // At -O0, don't generate IR for functions with available_externally + // At -O0, don't generate IR for functions with available_externally // linkage. if (!shouldEmitFunction(GD)) return; @@ -2017,7 +2050,7 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { if (const auto *VD = dyn_cast<VarDecl>(D)) return EmitGlobalVarDefinition(VD, !VD->hasDefinition()); - + llvm_unreachable("Invalid argument to EmitGlobalDefinition()"); } @@ -2123,7 +2156,8 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( assert(F->getName() == MangledName && "name was uniqued!"); if (D) - SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk); + SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk, + IsForDefinition); if (ExtraAttrs.hasAttributes(llvm::AttributeList::FunctionIndex)) { llvm::AttrBuilder B(ExtraAttrs, llvm::AttributeList::FunctionIndex); F->addAttributes(llvm::AttributeList::FunctionIndex, B); @@ -2259,7 +2293,8 @@ CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, F->setCallingConv(getRuntimeCC()); if (!Local && getTriple().isOSBinFormatCOFF() && - !getCodeGenOpts().LTOVisibilityPublicStd) { + !getCodeGenOpts().LTOVisibilityPublicStd && + !getTriple().isWindowsGNUEnvironment()) { const FunctionDecl *FD = GetRuntimeFunctionDecl(Context, Name); if (!FD || FD->hasAttr<DLLImportAttr>()) { F->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); @@ -2408,7 +2443,8 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, GV->setAlignment(getContext().getDeclAlign(D).getQuantity()); - setLinkageAndVisibilityForGV(GV, D); + setLinkageForGV(GV, D); + setGlobalVisibility(GV, D, NotForDefinition); if (D->getTLSKind()) { if (D->getTLSKind() == VarDecl::TLS_Dynamic) @@ -2422,18 +2458,65 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, EmitGlobalVarDefinition(D); } + // Emit section information for extern variables. + if (D->hasExternalStorage()) { + if (const SectionAttr *SA = D->getAttr<SectionAttr>()) + GV->setSection(SA->getName()); + } + // Handle XCore specific ABI requirements. if (getTriple().getArch() == llvm::Triple::xcore && D->getLanguageLinkage() == CLanguageLinkage && D->getType().isConstant(Context) && isExternallyVisible(D->getLinkageAndVisibility().getLinkage())) GV->setSection(".cp.rodata"); + + // Check if we a have a const declaration with an initializer, we may be + // able to emit it as available_externally to expose it's value to the + // optimizer. + if (Context.getLangOpts().CPlusPlus && GV->hasExternalLinkage() && + D->getType().isConstQualified() && !GV->hasInitializer() && + !D->hasDefinition() && D->hasInit() && !D->hasAttr<DLLImportAttr>()) { + const auto *Record = + Context.getBaseElementType(D->getType())->getAsCXXRecordDecl(); + bool HasMutableFields = Record && Record->hasMutableFields(); + if (!HasMutableFields) { + const VarDecl *InitDecl; + const Expr *InitExpr = D->getAnyInitializer(InitDecl); + if (InitExpr) { + ConstantEmitter emitter(*this); + llvm::Constant *Init = emitter.tryEmitForInitializer(*InitDecl); + if (Init) { + auto *InitType = Init->getType(); + if (GV->getType()->getElementType() != InitType) { + // The type of the initializer does not match the definition. + // This happens when an initializer has a different type from + // the type of the global (because of padding at the end of a + // structure for instance). + GV->setName(StringRef()); + // Make a new global with the correct type, this is now guaranteed + // to work. + auto *NewGV = cast<llvm::GlobalVariable>( + GetAddrOfGlobalVar(D, InitType, IsForDefinition)); + + // Erase the old global, since it is no longer used. + cast<llvm::GlobalValue>(GV)->eraseFromParent(); + GV = NewGV; + } else { + GV->setInitializer(Init); + GV->setConstant(true); + GV->setLinkage(llvm::GlobalValue::AvailableExternallyLinkage); + } + emitter.finalize(GV); + } + } + } + } } - auto ExpectedAS = + LangAS ExpectedAS = D ? D->getType().getAddressSpace() - : static_cast<unsigned>(LangOpts.OpenCL ? LangAS::opencl_global - : LangAS::Default); + : (LangOpts.OpenCL ? LangAS::opencl_global : LangAS::Default); assert(getContext().getTargetAddressSpace(ExpectedAS) == Ty->getPointerAddressSpace()); if (AddrSpace != ExpectedAS) @@ -2474,7 +2557,7 @@ CodeGenModule::GetAddrOfGlobal(GlobalDecl GD, } llvm::GlobalVariable * -CodeGenModule::CreateOrReplaceCXXRuntimeVariable(StringRef Name, +CodeGenModule::CreateOrReplaceCXXRuntimeVariable(StringRef Name, llvm::Type *Ty, llvm::GlobalValue::LinkageTypes Linkage) { llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name); @@ -2490,7 +2573,7 @@ CodeGenModule::CreateOrReplaceCXXRuntimeVariable(StringRef Name, assert(GV->isDeclaration() && "Declaration has wrong type!"); OldGV = GV; } - + // Create a new variable. GV = new llvm::GlobalVariable(getModule(), Ty, /*isConstant=*/true, Linkage, nullptr, Name); @@ -2498,13 +2581,13 @@ CodeGenModule::CreateOrReplaceCXXRuntimeVariable(StringRef Name, if (OldGV) { // Replace occurrences of the old variable if needed. GV->takeName(OldGV); - + if (!OldGV->use_empty()) { llvm::Constant *NewPtrForOldDecl = llvm::ConstantExpr::getBitCast(GV, OldGV->getType()); OldGV->replaceAllUsesWith(NewPtrForOldDecl); } - + OldGV->eraseFromParent(); } @@ -2572,11 +2655,10 @@ CharUnits CodeGenModule::GetTargetTypeStoreSize(llvm::Type *Ty) const { getDataLayout().getTypeStoreSizeInBits(Ty)); } -unsigned CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) { - unsigned AddrSpace; +LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) { + LangAS AddrSpace = LangAS::Default; if (LangOpts.OpenCL) { - AddrSpace = D ? D->getType().getAddressSpace() - : static_cast<unsigned>(LangAS::opencl_global); + AddrSpace = D ? D->getType().getAddressSpace() : LangAS::opencl_global; assert(AddrSpace == LangAS::opencl_global || AddrSpace == LangAS::opencl_constant || AddrSpace == LangAS::opencl_local || @@ -2678,6 +2760,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, const VarDecl *InitDecl; const Expr *InitExpr = D->getAnyInitializer(InitDecl); + Optional<ConstantEmitter> emitter; + // CUDA E.2.4.1 "__shared__ variables cannot have an initialization // as part of their declaration." Sema has already checked for // error cases, so we just need to set Init to UndefValue. @@ -2698,7 +2782,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, Init = EmitNullConstant(D->getType()); } else { initializedGlobalDecl = GlobalDecl(D); - Init = EmitConstantInit(*InitDecl); + emitter.emplace(*this); + Init = emitter->tryEmitForInitializer(*InitDecl); if (!Init) { QualType T = InitExpr->getType(); @@ -2811,7 +2896,9 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, Linkage = llvm::GlobalValue::InternalLinkage; } } + GV->setInitializer(Init); + if (emitter) emitter->finalize(GV); // If it is safe to mark the global 'constant', do so now. GV->setConstant(!NeedsGlobalCtor && !NeedsGlobalDtor && @@ -3176,7 +3263,7 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, setFunctionDLLStorageClass(GD, Fn); // FIXME: this is redundant with part of setFunctionDefinitionAttributes - setGlobalVisibility(Fn, D); + setGlobalVisibility(Fn, D, ForDefinition); MaybeHandleStaticInExternC(D, Fn); @@ -3497,11 +3584,15 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { return ConstantAddress(GV, Alignment); } +bool CodeGenModule::getExpressionLocationsEnabled() const { + return !CodeGenOpts.EmitCodeView || CodeGenOpts.DebugColumnInfo; +} + QualType CodeGenModule::getObjCFastEnumerationStateType() { if (ObjCFastEnumerationStateType.isNull()) { RecordDecl *D = Context.buildImplicitRecord("__objcFastEnumerationState"); D->startDefinition(); - + QualType FieldTypes[] = { Context.UnsignedLongTy, Context.getPointerType(Context.getObjCIdType()), @@ -3509,7 +3600,7 @@ QualType CodeGenModule::getObjCFastEnumerationStateType() { Context.getConstantArrayType(Context.UnsignedLongTy, llvm::APInt(32, 5), ArrayType::Normal, 0) }; - + for (size_t i = 0; i < 4; ++i) { FieldDecl *Field = FieldDecl::Create(Context, D, @@ -3522,18 +3613,18 @@ QualType CodeGenModule::getObjCFastEnumerationStateType() { Field->setAccess(AS_public); D->addDecl(Field); } - + D->completeDefinition(); ObjCFastEnumerationStateType = Context.getTagDeclType(D); } - + return ObjCFastEnumerationStateType; } llvm::Constant * CodeGenModule::GetConstantArrayFromStringLiteral(const StringLiteral *E) { assert(!E->getType()->isPointerType() && "Strings are always arrays"); - + // Don't emit it as the address of the string, emit the string data itself // as an inline array. if (E->getCharByteWidth() == 1) { @@ -3559,11 +3650,11 @@ CodeGenModule::GetConstantArrayFromStringLiteral(const StringLiteral *E) { Elements.resize(NumElements); return llvm::ConstantDataArray::get(VMContext, Elements); } - + assert(ElemTy->getPrimitiveSizeInBits() == 32); SmallVector<uint32_t, 32> Elements; Elements.reserve(NumElements); - + for(unsigned i = 0, e = E->getLength(); i != e; ++i) Elements.push_back(E->getCodeUnit(i)); Elements.resize(NumElements); @@ -3727,12 +3818,18 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( !EvalResult.hasSideEffects()) Value = &EvalResult.Val; + LangAS AddrSpace = + VD ? GetGlobalVarAddressSpace(VD) : MaterializedType.getAddressSpace(); + + Optional<ConstantEmitter> emitter; llvm::Constant *InitialValue = nullptr; bool Constant = false; llvm::Type *Type; if (Value) { // The temporary has a constant initializer, use it. - InitialValue = EmitConstantValue(*Value, MaterializedType, nullptr); + emitter.emplace(*this); + InitialValue = emitter->emitForInitializer(*Value, AddrSpace, + MaterializedType); Constant = isTypeConstant(MaterializedType, /*ExcludeCtor*/Value); Type = InitialValue->getType(); } else { @@ -3757,13 +3854,12 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( Linkage = llvm::GlobalVariable::InternalLinkage; } } - unsigned AddrSpace = - VD ? GetGlobalVarAddressSpace(VD) : MaterializedType.getAddressSpace(); auto TargetAS = getContext().getTargetAddressSpace(AddrSpace); auto *GV = new llvm::GlobalVariable( getModule(), Type, Constant, Linkage, InitialValue, Name.c_str(), /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS); - setGlobalVisibility(GV, VD); + if (emitter) emitter->finalize(GV); + setGlobalVisibility(GV, VD, ForDefinition); GV->setAlignment(Align.getQuantity()); if (supportsCOMDAT() && GV->isWeakForLinker()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); @@ -3850,11 +3946,11 @@ void CodeGenModule::EmitObjCIvarInitializations(ObjCImplementationDecl *D) { if (D->getNumIvarInitializers() == 0 || AllTrivialInitializers(*this, D)) return; - + IdentifierInfo *II = &getContext().Idents.get(".cxx_construct"); Selector cxxSelector = getContext().Selectors.getSelector(0, &II); // The constructor returns 'self'. - ObjCMethodDecl *CTORMethod = ObjCMethodDecl::Create(getContext(), + ObjCMethodDecl *CTORMethod = ObjCMethodDecl::Create(getContext(), D->getLocation(), D->getLocation(), cxxSelector, @@ -3945,6 +4041,13 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { case Decl::Namespace: EmitDeclContext(cast<NamespaceDecl>(D)); break; + case Decl::ClassTemplateSpecialization: { + const auto *Spec = cast<ClassTemplateSpecializationDecl>(D); + if (DebugInfo && + Spec->getSpecializationKind() == TSK_ExplicitInstantiationDefinition && + Spec->hasDefinition()) + DebugInfo->completeTemplateDefinition(*Spec); + } LLVM_FALLTHROUGH; case Decl::CXXRecord: if (DebugInfo) { if (auto *ES = D->getASTContext().getExternalSource()) @@ -3983,7 +4086,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { if (cast<FunctionDecl>(D)->getDescribedFunctionTemplate() || cast<FunctionDecl>(D)->isLateTemplateParsed()) return; - + getCXXABI().EmitCXXConstructors(cast<CXXConstructorDecl>(D)); break; case Decl::CXXDestructor: @@ -4009,7 +4112,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { ObjCRuntime->GenerateProtocol(Proto); break; } - + case Decl::ObjCCategoryImpl: // Categories have properties but don't support synthesize so we // can ignore them here. @@ -4131,15 +4234,6 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { EmitOMPThreadPrivateDecl(cast<OMPThreadPrivateDecl>(D)); break; - case Decl::ClassTemplateSpecialization: { - const auto *Spec = cast<ClassTemplateSpecializationDecl>(D); - if (DebugInfo && - Spec->getSpecializationKind() == TSK_ExplicitInstantiationDefinition && - Spec->hasDefinition()) - DebugInfo->completeTemplateDefinition(*Spec); - break; - } - case Decl::OMPDeclareReduction: EmitOMPDeclareReduction(cast<OMPDeclareReductionDecl>(D)); break; @@ -4166,6 +4260,9 @@ void CodeGenModule::AddDeferredUnusedCoverageMapping(Decl *D) { case Decl::CXXDestructor: { if (!cast<FunctionDecl>(D)->doesThisDeclarationHaveABody()) return; + SourceManager &SM = getContext().getSourceManager(); + if (LimitedCoverage && SM.getMainFileID() != SM.getFileID(D->getLocStart())) + return; auto I = DeferredEmptyCoverageMappingDecls.find(D); if (I == DeferredEmptyCoverageMappingDecls.end()) DeferredEmptyCoverageMappingDecls[D] = true; @@ -4192,20 +4289,10 @@ void CodeGenModule::ClearUnusedCoverageMapping(const Decl *D) { } void CodeGenModule::EmitDeferredUnusedCoverageMappings() { - std::vector<const Decl *> DeferredDecls; - for (const auto &I : DeferredEmptyCoverageMappingDecls) { - if (!I.second) + for (const auto &Entry : DeferredEmptyCoverageMappingDecls) { + if (!Entry.second) continue; - DeferredDecls.push_back(I.first); - } - // Sort the declarations by their location to make sure that the tests get a - // predictable order for the coverage mapping for the unused declarations. - if (CodeGenOpts.DumpCoverageMapping) - std::sort(DeferredDecls.begin(), DeferredDecls.end(), - [] (const Decl *LHS, const Decl *RHS) { - return LHS->getLocStart() < RHS->getLocStart(); - }); - for (const auto *D : DeferredDecls) { + const Decl *D = Entry.first; switch (D->getKind()) { case Decl::CXXConversion: case Decl::CXXMethod: @@ -4414,7 +4501,7 @@ llvm::Constant *CodeGenModule::GetAddrOfRTTIDescriptor(QualType Ty, // and it's not for EH? if (!ForEH && !getLangOpts().RTTI) return llvm::Constant::getNullValue(Int8PtrTy); - + if (ForEH && Ty->isObjCObjectPointerType() && LangOpts.ObjCRuntime.isGNUFamily()) return ObjCRuntime->GetEHType(Ty); @@ -4456,6 +4543,60 @@ llvm::Metadata *CodeGenModule::CreateMetadataIdentifierForType(QualType T) { return InternalId; } +// Generalize pointer types to a void pointer with the qualifiers of the +// originally pointed-to type, e.g. 'const char *' and 'char * const *' +// generalize to 'const void *' while 'char *' and 'const char **' generalize to +// 'void *'. +static QualType GeneralizeType(ASTContext &Ctx, QualType Ty) { + if (!Ty->isPointerType()) + return Ty; + + return Ctx.getPointerType( + QualType(Ctx.VoidTy).withCVRQualifiers( + Ty->getPointeeType().getCVRQualifiers())); +} + +// Apply type generalization to a FunctionType's return and argument types +static QualType GeneralizeFunctionType(ASTContext &Ctx, QualType Ty) { + if (auto *FnType = Ty->getAs<FunctionProtoType>()) { + SmallVector<QualType, 8> GeneralizedParams; + for (auto &Param : FnType->param_types()) + GeneralizedParams.push_back(GeneralizeType(Ctx, Param)); + + return Ctx.getFunctionType( + GeneralizeType(Ctx, FnType->getReturnType()), + GeneralizedParams, FnType->getExtProtoInfo()); + } + + if (auto *FnType = Ty->getAs<FunctionNoProtoType>()) + return Ctx.getFunctionNoProtoType( + GeneralizeType(Ctx, FnType->getReturnType())); + + llvm_unreachable("Encountered unknown FunctionType"); +} + +llvm::Metadata *CodeGenModule::CreateMetadataIdentifierGeneralized(QualType T) { + T = GeneralizeFunctionType(getContext(), T); + + llvm::Metadata *&InternalId = GeneralizedMetadataIdMap[T.getCanonicalType()]; + if (InternalId) + return InternalId; + + if (isExternallyVisible(T->getLinkage())) { + std::string OutName; + llvm::raw_string_ostream Out(OutName); + getCXXABI().getMangleContext().mangleTypeName(T, Out); + Out << ".generalized"; + + InternalId = llvm::MDString::get(getLLVMContext(), Out.str()); + } else { + InternalId = llvm::MDNode::getDistinct(getLLVMContext(), + llvm::ArrayRef<llvm::Metadata *>()); + } + + return InternalId; +} + /// Returns whether this module needs the "all-vtables" type identifier. bool CodeGenModule::NeedAllVtablesTypeId() const { // Returns true if at least one of vtable-based CFI checkers is enabled and @@ -4497,14 +4638,23 @@ void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, // If we have a TargetAttr build up the feature map based on that. TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse(); + ParsedAttr.Features.erase( + llvm::remove_if(ParsedAttr.Features, + [&](const std::string &Feat) { + return !Target.isValidFeatureName( + StringRef{Feat}.substr(1)); + }), + ParsedAttr.Features.end()); + // Make a copy of the features as passed on the command line into the // beginning of the additional features from the function to override. ParsedAttr.Features.insert(ParsedAttr.Features.begin(), Target.getTargetOpts().FeaturesAsWritten.begin(), Target.getTargetOpts().FeaturesAsWritten.end()); - if (ParsedAttr.Architecture != "") - TargetCPU = ParsedAttr.Architecture ; + if (ParsedAttr.Architecture != "" && + Target.isValidCPUName(ParsedAttr.Architecture)) + TargetCPU = ParsedAttr.Architecture; // Now populate the feature map, first with the TargetCPU which is either // the default or a new one from the target attribute string. Then we'll use @@ -4527,8 +4677,8 @@ llvm::SanitizerStatReport &CodeGenModule::getSanStats() { llvm::Value * CodeGenModule::createOpenCLIntToSamplerConversion(const Expr *E, CodeGenFunction &CGF) { - llvm::Constant *C = EmitConstantExpr(E, E->getType(), &CGF); - auto SamplerT = getOpenCLRuntime().getSamplerType(); + llvm::Constant *C = ConstantEmitter(CGF).emitAbstract(E, E->getType()); + auto SamplerT = getOpenCLRuntime().getSamplerType(E->getType().getTypePtr()); auto FTy = llvm::FunctionType::get(SamplerT, {C->getType()}, false); return CGF.Builder.CreateCall(CreateRuntimeFunction(FTy, "__translate_sampler_initializer"), diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h index b162e72d1992..22c4463b2c81 100644 --- a/lib/CodeGen/CodeGenModule.h +++ b/lib/CodeGen/CodeGenModule.h @@ -490,14 +490,16 @@ private: /// @} - llvm::DenseMap<const Decl *, bool> DeferredEmptyCoverageMappingDecls; + llvm::MapVector<const Decl *, bool> DeferredEmptyCoverageMappingDecls; std::unique_ptr<CoverageMappingModuleGen> CoverageMapping; /// Mapping from canonical types to their metadata identifiers. We need to /// maintain this mapping because identifiers may be formed from distinct /// MDNodes. - llvm::DenseMap<QualType, llvm::Metadata *> MetadataIdMap; + typedef llvm::DenseMap<QualType, llvm::Metadata *> MetadataTypeMap; + MetadataTypeMap MetadataIdMap; + MetadataTypeMap GeneralizedMetadataIdMap; public: CodeGenModule(ASTContext &C, const HeaderSearchOptions &headersearchopts, @@ -513,6 +515,9 @@ public: /// Finalize LLVM code generation. void Release(); + /// Return true if we should emit location information for expressions. + bool getExpressionLocationsEnabled() const; + /// Return a reference to the configured Objective-C runtime. CGObjCRuntime &getObjCRuntime() { if (!ObjCRuntime) createObjCRuntime(); @@ -649,25 +654,53 @@ public: CtorList &getGlobalCtors() { return GlobalCtors; } CtorList &getGlobalDtors() { return GlobalDtors; } - llvm::MDNode *getTBAAInfo(QualType QTy); - llvm::MDNode *getTBAAInfoForVTablePtr(); + /// getTBAATypeInfo - Get metadata used to describe accesses to objects of + /// the given type. + llvm::MDNode *getTBAATypeInfo(QualType QTy); + + /// getTBAAAccessInfo - Get TBAA information that describes an access to + /// an object of the given type. + TBAAAccessInfo getTBAAAccessInfo(QualType AccessType); + + /// getTBAAVTablePtrAccessInfo - Get the TBAA information that describes an + /// access to a virtual table pointer. + TBAAAccessInfo getTBAAVTablePtrAccessInfo(llvm::Type *VTablePtrType); + llvm::MDNode *getTBAAStructInfo(QualType QTy); - /// Return the path-aware tag for given base type, access node and offset. - llvm::MDNode *getTBAAStructTagInfo(QualType BaseTy, llvm::MDNode *AccessN, - uint64_t O); + + /// getTBAABaseTypeInfo - Get metadata that describes the given base access + /// type. Return null if the type is not suitable for use in TBAA access tags. + llvm::MDNode *getTBAABaseTypeInfo(QualType QTy); + + /// getTBAAAccessTagInfo - Get TBAA tag for a given memory access. + llvm::MDNode *getTBAAAccessTagInfo(TBAAAccessInfo Info); + + /// mergeTBAAInfoForCast - Get merged TBAA information for the purposes of + /// type casts. + TBAAAccessInfo mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo, + TBAAAccessInfo TargetInfo); + + /// mergeTBAAInfoForConditionalOperator - Get merged TBAA information for the + /// purposes of conditional operator. + TBAAAccessInfo mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA, + TBAAAccessInfo InfoB); + + /// getTBAAInfoForSubobject - Get TBAA information for an access with a given + /// base lvalue. + TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType) { + if (Base.getTBAAInfo().isMayAlias()) + return TBAAAccessInfo::getMayAliasInfo(); + return getTBAAAccessInfo(AccessType); + } bool isTypeConstant(QualType QTy, bool ExcludeCtorDtor); bool isPaddedAtomicType(QualType type); bool isPaddedAtomicType(const AtomicType *type); - /// Decorate the instruction with a TBAA tag. For scalar TBAA, the tag - /// is the same as the type. For struct-path aware TBAA, the tag - /// is different from the type: base type, access type and offset. - /// When ConvertTypeToTag is true, we create a tag based on the scalar type. + /// DecorateInstructionWithTBAA - Decorate the instruction with a TBAA tag. void DecorateInstructionWithTBAA(llvm::Instruction *Inst, - llvm::MDNode *TBAAInfo, - bool ConvertTypeToTag = true); + TBAAAccessInfo TBAAInfo); /// Adds !invariant.barrier !tag to instruction void DecorateInstructionWithInvariantGroup(llvm::Instruction *I, @@ -677,7 +710,8 @@ public: llvm::ConstantInt *getSize(CharUnits numChars); /// Set the visibility for the given LLVM GlobalValue. - void setGlobalVisibility(llvm::GlobalValue *GV, const NamedDecl *D) const; + void setGlobalVisibility(llvm::GlobalValue *GV, const NamedDecl *D, + ForDefinition_t IsForDefinition) const; /// Set the TLS mode for the given LLVM GlobalValue for the thread-local /// variable declaration D. @@ -718,7 +752,7 @@ public: /// /// For languages without explicit address spaces, if D has default address /// space, target-specific global or constant address space may be returned. - unsigned GetGlobalVarAddressSpace(const VarDecl *D); + LangAS GetGlobalVarAddressSpace(const VarDecl *D); /// Return the llvm::Constant for the address of the given global variable. /// If Ty is non-null and if the global doesn't exist, then it will be created @@ -942,27 +976,6 @@ public: llvm::Constant *getMemberPointerConstant(const UnaryOperator *e); - /// Try to emit the initializer for the given declaration as a constant; - /// returns 0 if the expression cannot be emitted as a constant. - llvm::Constant *EmitConstantInit(const VarDecl &D, - CodeGenFunction *CGF = nullptr); - - /// Try to emit the given expression as a constant; returns 0 if the - /// expression cannot be emitted as a constant. - llvm::Constant *EmitConstantExpr(const Expr *E, QualType DestType, - CodeGenFunction *CGF = nullptr); - - /// Emit the given constant value as a constant, in the type's scalar - /// representation. - llvm::Constant *EmitConstantValue(const APValue &Value, QualType DestType, - CodeGenFunction *CGF = nullptr); - - /// Emit the given constant value as a constant, in the type's memory - /// representation. - llvm::Constant *EmitConstantValueForMemory(const APValue &Value, - QualType DestType, - CodeGenFunction *CGF = nullptr); - /// \brief Emit type info if type of an expression is a variably modified /// type. Also emit proper debug info for cast types. void EmitExplicitCastExprType(const ExplicitCastExpr *E, @@ -1124,7 +1137,8 @@ public: /// annotations are emitted during finalization of the LLVM code. void AddGlobalAnnotations(const ValueDecl *D, llvm::GlobalValue *GV); - bool isInSanitizerBlacklist(llvm::Function *Fn, SourceLocation Loc) const; + bool isInSanitizerBlacklist(SanitizerMask Kind, llvm::Function *Fn, + SourceLocation Loc) const; bool isInSanitizerBlacklist(llvm::GlobalVariable *GV, SourceLocation Loc, QualType Ty, @@ -1148,8 +1162,7 @@ public: /// are emitted lazily. void EmitGlobal(GlobalDecl D); - bool TryEmitDefinitionAsAlias(GlobalDecl Alias, GlobalDecl Target, - bool InEveryTU); + bool TryEmitDefinitionAsAlias(GlobalDecl Alias, GlobalDecl Target); bool TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D); /// Set attributes for a global definition. @@ -1199,6 +1212,11 @@ public: /// internal identifiers). llvm::Metadata *CreateMetadataIdentifierForType(QualType T); + /// Create a metadata identifier for the generalization of the given type. + /// This may either be an MDString (for external identifiers) or a distinct + /// unnamed MDNode (for internal identifiers). + llvm::Metadata *CreateMetadataIdentifierGeneralized(QualType T); + /// Create and attach type metadata to the given function. void CreateFunctionTypeMetadata(const FunctionDecl *FD, llvm::Function *F); @@ -1239,7 +1257,8 @@ private: /// Set function attributes for a function declaration. void SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, - bool IsIncompleteFunction, bool IsThunk); + bool IsIncompleteFunction, bool IsThunk, + ForDefinition_t IsForDefinition); void EmitGlobalDefinition(GlobalDecl D, llvm::GlobalValue *GV = nullptr); @@ -1355,6 +1374,7 @@ private: bool AttrOnCallSite, llvm::AttrBuilder &FuncAttrs); }; + } // end namespace CodeGen } // end namespace clang diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp index c3d66c1dabc5..295893c64fbc 100644 --- a/lib/CodeGen/CodeGenPGO.cpp +++ b/lib/CodeGen/CodeGenPGO.cpp @@ -22,9 +22,10 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/MD5.h" -static llvm::cl::opt<bool> EnableValueProfiling( - "enable-value-profiling", llvm::cl::ZeroOrMore, - llvm::cl::desc("Enable value profiling"), llvm::cl::init(false)); +static llvm::cl::opt<bool> + EnableValueProfiling("enable-value-profiling", llvm::cl::ZeroOrMore, + llvm::cl::desc("Enable value profiling"), + llvm::cl::Hidden, llvm::cl::init(false)); using namespace clang; using namespace CodeGen; @@ -47,6 +48,15 @@ void CodeGenPGO::setFuncName(llvm::Function *Fn) { llvm::createPGOFuncNameMetadata(*Fn, FuncName); } +/// The version of the PGO hash algorithm. +enum PGOHashVersion : unsigned { + PGO_HASH_V1, + PGO_HASH_V2, + + // Keep this set to the latest hash version. + PGO_HASH_LATEST = PGO_HASH_V2 +}; + namespace { /// \brief Stable hasher for PGO region counters. /// @@ -61,6 +71,7 @@ namespace { class PGOHash { uint64_t Working; unsigned Count; + PGOHashVersion HashVersion; llvm::MD5 MD5; static const int NumBitsPerType = 6; @@ -93,24 +104,53 @@ public: BinaryOperatorLAnd, BinaryOperatorLOr, BinaryConditionalOperator, + // The preceding values are available with PGO_HASH_V1. + + EndOfScope, + IfThenBranch, + IfElseBranch, + GotoStmt, + IndirectGotoStmt, + BreakStmt, + ContinueStmt, + ReturnStmt, + ThrowExpr, + UnaryOperatorLNot, + BinaryOperatorLT, + BinaryOperatorGT, + BinaryOperatorLE, + BinaryOperatorGE, + BinaryOperatorEQ, + BinaryOperatorNE, + // The preceding values are available with PGO_HASH_V2. // Keep this last. It's for the static assert that follows. LastHashType }; static_assert(LastHashType <= TooBig, "Too many types in HashType"); - // TODO: When this format changes, take in a version number here, and use the - // old hash calculation for file formats that used the old hash. - PGOHash() : Working(0), Count(0) {} + PGOHash(PGOHashVersion HashVersion) + : Working(0), Count(0), HashVersion(HashVersion), MD5() {} void combine(HashType Type); uint64_t finalize(); + PGOHashVersion getHashVersion() const { return HashVersion; } }; const int PGOHash::NumBitsPerType; const unsigned PGOHash::NumTypesPerWord; const unsigned PGOHash::TooBig; +/// Get the PGO hash version used in the given indexed profile. +static PGOHashVersion getPGOHashVersion(llvm::IndexedInstrProfReader *PGOReader, + CodeGenModule &CGM) { + if (PGOReader->getVersion() <= 4) + return PGO_HASH_V1; + return PGO_HASH_V2; +} + /// A RecursiveASTVisitor that fills a map of statements to PGO counters. struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> { + using Base = RecursiveASTVisitor<MapRegionCounters>; + /// The next counter value to assign. unsigned NextCounter; /// The function hash. @@ -118,8 +158,9 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> { /// The map of statements to counters. llvm::DenseMap<const Stmt *, unsigned> &CounterMap; - MapRegionCounters(llvm::DenseMap<const Stmt *, unsigned> &CounterMap) - : NextCounter(0), CounterMap(CounterMap) {} + MapRegionCounters(PGOHashVersion HashVersion, + llvm::DenseMap<const Stmt *, unsigned> &CounterMap) + : NextCounter(0), Hash(HashVersion), CounterMap(CounterMap) {} // Blocks and lambdas are handled as separate functions, so we need not // traverse them in the parent context. @@ -145,16 +186,66 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> { return true; } - bool VisitStmt(const Stmt *S) { - auto Type = getHashType(S); - if (Type == PGOHash::None) - return true; + /// If \p S gets a fresh counter, update the counter mappings. Return the + /// V1 hash of \p S. + PGOHash::HashType updateCounterMappings(Stmt *S) { + auto Type = getHashType(PGO_HASH_V1, S); + if (Type != PGOHash::None) + CounterMap[S] = NextCounter++; + return Type; + } - CounterMap[S] = NextCounter++; - Hash.combine(Type); + /// Include \p S in the function hash. + bool VisitStmt(Stmt *S) { + auto Type = updateCounterMappings(S); + if (Hash.getHashVersion() != PGO_HASH_V1) + Type = getHashType(Hash.getHashVersion(), S); + if (Type != PGOHash::None) + Hash.combine(Type); return true; } - PGOHash::HashType getHashType(const Stmt *S) { + + bool TraverseIfStmt(IfStmt *If) { + // If we used the V1 hash, use the default traversal. + if (Hash.getHashVersion() == PGO_HASH_V1) + return Base::TraverseIfStmt(If); + + // Otherwise, keep track of which branch we're in while traversing. + VisitStmt(If); + for (Stmt *CS : If->children()) { + if (!CS) + continue; + if (CS == If->getThen()) + Hash.combine(PGOHash::IfThenBranch); + else if (CS == If->getElse()) + Hash.combine(PGOHash::IfElseBranch); + TraverseStmt(CS); + } + Hash.combine(PGOHash::EndOfScope); + return true; + } + +// If the statement type \p N is nestable, and its nesting impacts profile +// stability, define a custom traversal which tracks the end of the statement +// in the hash (provided we're not using the V1 hash). +#define DEFINE_NESTABLE_TRAVERSAL(N) \ + bool Traverse##N(N *S) { \ + Base::Traverse##N(S); \ + if (Hash.getHashVersion() != PGO_HASH_V1) \ + Hash.combine(PGOHash::EndOfScope); \ + return true; \ + } + + DEFINE_NESTABLE_TRAVERSAL(WhileStmt) + DEFINE_NESTABLE_TRAVERSAL(DoStmt) + DEFINE_NESTABLE_TRAVERSAL(ForStmt) + DEFINE_NESTABLE_TRAVERSAL(CXXForRangeStmt) + DEFINE_NESTABLE_TRAVERSAL(ObjCForCollectionStmt) + DEFINE_NESTABLE_TRAVERSAL(CXXTryStmt) + DEFINE_NESTABLE_TRAVERSAL(CXXCatchStmt) + + /// Get version \p HashVersion of the PGO hash for \p S. + PGOHash::HashType getHashType(PGOHashVersion HashVersion, const Stmt *S) { switch (S->getStmtClass()) { default: break; @@ -192,9 +283,53 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> { return PGOHash::BinaryOperatorLAnd; if (BO->getOpcode() == BO_LOr) return PGOHash::BinaryOperatorLOr; + if (HashVersion == PGO_HASH_V2) { + switch (BO->getOpcode()) { + default: + break; + case BO_LT: + return PGOHash::BinaryOperatorLT; + case BO_GT: + return PGOHash::BinaryOperatorGT; + case BO_LE: + return PGOHash::BinaryOperatorLE; + case BO_GE: + return PGOHash::BinaryOperatorGE; + case BO_EQ: + return PGOHash::BinaryOperatorEQ; + case BO_NE: + return PGOHash::BinaryOperatorNE; + } + } break; } } + + if (HashVersion == PGO_HASH_V2) { + switch (S->getStmtClass()) { + default: + break; + case Stmt::GotoStmtClass: + return PGOHash::GotoStmt; + case Stmt::IndirectGotoStmtClass: + return PGOHash::IndirectGotoStmt; + case Stmt::BreakStmtClass: + return PGOHash::BreakStmt; + case Stmt::ContinueStmtClass: + return PGOHash::ContinueStmt; + case Stmt::ReturnStmtClass: + return PGOHash::ReturnStmt; + case Stmt::CXXThrowExprClass: + return PGOHash::ThrowExpr; + case Stmt::UnaryOperatorClass: { + const UnaryOperator *UO = cast<UnaryOperator>(S); + if (UO->getOpcode() == UO_LNot) + return PGOHash::UnaryOperatorLNot; + break; + } + } + } + return PGOHash::None; } }; @@ -653,8 +788,14 @@ void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) { } void CodeGenPGO::mapRegionCounters(const Decl *D) { + // Use the latest hash version when inserting instrumentation, but use the + // version in the indexed profile if we're reading PGO data. + PGOHashVersion HashVersion = PGO_HASH_LATEST; + if (auto *PGOReader = CGM.getPGOReader()) + HashVersion = getPGOHashVersion(PGOReader, CGM); + RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>); - MapRegionCounters Walker(*RegionCounterMap); + MapRegionCounters Walker(HashVersion, *RegionCounterMap); if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) Walker.TraverseDecl(const_cast<FunctionDecl *>(FD)); else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D)) diff --git a/lib/CodeGen/CodeGenTBAA.cpp b/lib/CodeGen/CodeGenTBAA.cpp index 8a75a552d9fa..f394ea288d46 100644 --- a/lib/CodeGen/CodeGenTBAA.cpp +++ b/lib/CodeGen/CodeGenTBAA.cpp @@ -25,16 +25,18 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" using namespace clang; using namespace CodeGen; -CodeGenTBAA::CodeGenTBAA(ASTContext &Ctx, llvm::LLVMContext& VMContext, +CodeGenTBAA::CodeGenTBAA(ASTContext &Ctx, llvm::Module &M, const CodeGenOptions &CGO, const LangOptions &Features, MangleContext &MContext) - : Context(Ctx), CodeGenOpts(CGO), Features(Features), MContext(MContext), - MDHelper(VMContext), Root(nullptr), Char(nullptr) { -} + : Context(Ctx), Module(M), CodeGenOpts(CGO), + Features(Features), MContext(MContext), MDHelper(M.getContext()), + Root(nullptr), Char(nullptr) +{} CodeGenTBAA::~CodeGenTBAA() { } @@ -54,10 +56,10 @@ llvm::MDNode *CodeGenTBAA::getRoot() { return Root; } -// For both scalar TBAA and struct-path aware TBAA, the scalar type has the -// same format: name, parent node, and offset. -llvm::MDNode *CodeGenTBAA::createTBAAScalarType(StringRef Name, - llvm::MDNode *Parent) { +llvm::MDNode *CodeGenTBAA::createScalarTypeNode(StringRef Name, + llvm::MDNode *Parent, + uint64_t Size) { + (void)Size; // TODO: Support generation of size-aware type nodes. return MDHelper.createTBAAScalarTypeNode(Name, Parent); } @@ -67,7 +69,7 @@ llvm::MDNode *CodeGenTBAA::getChar() { // these special powers only cover user-accessible memory, and doesn't // include things like vtables. if (!Char) - Char = createTBAAScalarType("omnipotent char", getRoot()); + Char = createScalarTypeNode("omnipotent char", getRoot(), /* Size= */ 1); return Char; } @@ -88,21 +90,27 @@ static bool TypeHasMayAlias(QualType QTy) { return false; } -llvm::MDNode * -CodeGenTBAA::getTBAAInfo(QualType QTy) { - // At -O0 or relaxed aliasing, TBAA is not emitted for regular types. - if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing) - return nullptr; - - // If the type has the may_alias attribute (even on a typedef), it is - // effectively in the general char alias class. - if (TypeHasMayAlias(QTy)) - return getChar(); - - const Type *Ty = Context.getCanonicalType(QTy).getTypePtr(); +/// Check if the given type is a valid base type to be used in access tags. +static bool isValidBaseType(QualType QTy) { + if (QTy->isReferenceType()) + return false; + if (const RecordType *TTy = QTy->getAs<RecordType>()) { + const RecordDecl *RD = TTy->getDecl()->getDefinition(); + // Incomplete types are not valid base access types. + if (!RD) + return false; + if (RD->hasFlexibleArrayMember()) + return false; + // RD can be struct, union, class, interface or enum. + // For now, we only handle struct and class. + if (RD->isStruct() || RD->isClass()) + return true; + } + return false; +} - if (llvm::MDNode *N = MetadataCache[Ty]) - return N; +llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { + uint64_t Size = Context.getTypeSizeInChars(Ty).getQuantity(); // Handle builtin types. if (const BuiltinType *BTy = dyn_cast<BuiltinType>(Ty)) { @@ -120,22 +128,21 @@ CodeGenTBAA::getTBAAInfo(QualType QTy) { // Unsigned types can alias their corresponding signed types. case BuiltinType::UShort: - return getTBAAInfo(Context.ShortTy); + return getTypeInfo(Context.ShortTy); case BuiltinType::UInt: - return getTBAAInfo(Context.IntTy); + return getTypeInfo(Context.IntTy); case BuiltinType::ULong: - return getTBAAInfo(Context.LongTy); + return getTypeInfo(Context.LongTy); case BuiltinType::ULongLong: - return getTBAAInfo(Context.LongLongTy); + return getTypeInfo(Context.LongLongTy); case BuiltinType::UInt128: - return getTBAAInfo(Context.Int128Ty); + return getTypeInfo(Context.Int128Ty); // Treat all other builtin types as distinct types. This includes // treating wchar_t, char16_t, and char32_t as distinct from their // "underlying types". default: - return MetadataCache[Ty] = - createTBAAScalarType(BTy->getName(Features), getChar()); + return createScalarTypeNode(BTy->getName(Features), getChar(), Size); } } @@ -143,14 +150,13 @@ CodeGenTBAA::getTBAAInfo(QualType QTy) { // an object through a glvalue of other than one of the following types the // behavior is undefined: [...] a char, unsigned char, or std::byte type." if (Ty->isStdByteType()) - return MetadataCache[Ty] = getChar(); + return getChar(); - // Handle pointers. + // Handle pointers and references. // TODO: Implement C++'s type "similarity" and consider dis-"similar" // pointers distinct. - if (Ty->isPointerType()) - return MetadataCache[Ty] = createTBAAScalarType("any pointer", - getChar()); + if (Ty->isPointerType() || Ty->isReferenceType()) + return createScalarTypeNode("any pointer", getChar(), Size); // Enum types are distinct types. In C++ they have "underlying types", // however they aren't related for TBAA. @@ -160,20 +166,53 @@ CodeGenTBAA::getTBAAInfo(QualType QTy) { // TODO: Is there a way to get a program-wide unique name for a // decl with local linkage or no linkage? if (!Features.CPlusPlus || !ETy->getDecl()->isExternallyVisible()) - return MetadataCache[Ty] = getChar(); + return getChar(); SmallString<256> OutName; llvm::raw_svector_ostream Out(OutName); MContext.mangleTypeName(QualType(ETy, 0), Out); - return MetadataCache[Ty] = createTBAAScalarType(OutName, getChar()); + return createScalarTypeNode(OutName, getChar(), Size); } // For now, handle any other kind of type conservatively. - return MetadataCache[Ty] = getChar(); + return getChar(); +} + +llvm::MDNode *CodeGenTBAA::getTypeInfo(QualType QTy) { + // At -O0 or relaxed aliasing, TBAA is not emitted for regular types. + if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing) + return nullptr; + + // If the type has the may_alias attribute (even on a typedef), it is + // effectively in the general char alias class. + if (TypeHasMayAlias(QTy)) + return getChar(); + + // We need this function to not fall back to returning the "omnipotent char" + // type node for aggregate and union types. Otherwise, any dereference of an + // aggregate will result into the may-alias access descriptor, meaning all + // subsequent accesses to direct and indirect members of that aggregate will + // be considered may-alias too. + // TODO: Combine getTypeInfo() and getBaseTypeInfo() into a single function. + if (isValidBaseType(QTy)) + return getBaseTypeInfo(QTy); + + const Type *Ty = Context.getCanonicalType(QTy).getTypePtr(); + if (llvm::MDNode *N = MetadataCache[Ty]) + return N; + + // Note that the following helper call is allowed to add new nodes to the + // cache, which invalidates all its previously obtained iterators. So we + // first generate the node for the type and then add that node to the cache. + llvm::MDNode *TypeNode = getTypeInfoHelper(Ty); + return MetadataCache[Ty] = TypeNode; } -llvm::MDNode *CodeGenTBAA::getTBAAInfoForVTablePtr() { - return createTBAAScalarType("vtable pointer", getRoot()); +TBAAAccessInfo CodeGenTBAA::getVTablePtrAccessInfo(llvm::Type *VTablePtrType) { + llvm::DataLayout DL(&Module); + unsigned Size = DL.getPointerTypeSize(VTablePtrType); + return TBAAAccessInfo(createScalarTypeNode("vtable pointer", getRoot(), Size), + Size); } bool @@ -212,8 +251,8 @@ CodeGenTBAA::CollectFields(uint64_t BaseOffset, /* Otherwise, treat whatever it is as a field. */ uint64_t Offset = BaseOffset; uint64_t Size = Context.getTypeSizeInChars(QTy).getQuantity(); - llvm::MDNode *TBAAInfo = MayAlias ? getChar() : getTBAAInfo(QTy); - llvm::MDNode *TBAATag = getTBAAScalarTagInfo(TBAAInfo); + llvm::MDNode *TBAAType = MayAlias ? getChar() : getTypeInfo(QTy); + llvm::MDNode *TBAATag = getAccessTagInfo(TBAAAccessInfo(TBAAType, Size)); Fields.push_back(llvm::MDBuilder::TBAAStructField(Offset, Size, TBAATag)); return true; } @@ -233,46 +272,23 @@ CodeGenTBAA::getTBAAStructInfo(QualType QTy) { return StructMetadataCache[Ty] = nullptr; } -/// Check if the given type can be handled by path-aware TBAA. -static bool isTBAAPathStruct(QualType QTy) { - if (const RecordType *TTy = QTy->getAs<RecordType>()) { - const RecordDecl *RD = TTy->getDecl()->getDefinition(); - if (RD->hasFlexibleArrayMember()) - return false; - // RD can be struct, union, class, interface or enum. - // For now, we only handle struct and class. - if (RD->isStruct() || RD->isClass()) - return true; - } - return false; -} - -llvm::MDNode * -CodeGenTBAA::getTBAAStructTypeInfo(QualType QTy) { - const Type *Ty = Context.getCanonicalType(QTy).getTypePtr(); - assert(isTBAAPathStruct(QTy)); - - if (llvm::MDNode *N = StructTypeMetadataCache[Ty]) - return N; - - if (const RecordType *TTy = QTy->getAs<RecordType>()) { +llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) { + if (auto *TTy = dyn_cast<RecordType>(Ty)) { const RecordDecl *RD = TTy->getDecl()->getDefinition(); - const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); - SmallVector <std::pair<llvm::MDNode*, uint64_t>, 4> Fields; - unsigned idx = 0; - for (RecordDecl::field_iterator i = RD->field_begin(), - e = RD->field_end(); i != e; ++i, ++idx) { - QualType FieldQTy = i->getType(); - llvm::MDNode *FieldNode; - if (isTBAAPathStruct(FieldQTy)) - FieldNode = getTBAAStructTypeInfo(FieldQTy); - else - FieldNode = getTBAAInfo(FieldQTy); - if (!FieldNode) - return StructTypeMetadataCache[Ty] = nullptr; - Fields.push_back(std::make_pair( - FieldNode, Layout.getFieldOffset(idx) / Context.getCharWidth())); + SmallVector<llvm::MDBuilder::TBAAStructField, 4> Fields; + for (FieldDecl *Field : RD->fields()) { + QualType FieldQTy = Field->getType(); + llvm::MDNode *TypeNode = isValidBaseType(FieldQTy) ? + getBaseTypeInfo(FieldQTy) : getTypeInfo(FieldQTy); + if (!TypeNode) + return BaseTypeMetadataCache[Ty] = nullptr; + + uint64_t BitOffset = Layout.getFieldOffset(Field->getFieldIndex()); + uint64_t Offset = Context.toCharUnitsFromBits(BitOffset).getQuantity(); + uint64_t Size = Context.getTypeSizeInChars(FieldQTy).getQuantity(); + Fields.push_back(llvm::MDBuilder::TBAAStructField(Offset, Size, + TypeNode)); } SmallString<256> OutName; @@ -283,47 +299,80 @@ CodeGenTBAA::getTBAAStructTypeInfo(QualType QTy) { } else { OutName = RD->getName(); } + + // TODO: Support size-aware type nodes and create one here for the + // given aggregate type. + // Create the struct type node with a vector of pairs (offset, type). - return StructTypeMetadataCache[Ty] = - MDHelper.createTBAAStructTypeNode(OutName, Fields); + SmallVector<std::pair<llvm::MDNode*, uint64_t>, 4> OffsetsAndTypes; + for (const auto &Field : Fields) + OffsetsAndTypes.push_back(std::make_pair(Field.Type, Field.Offset)); + return MDHelper.createTBAAStructTypeNode(OutName, OffsetsAndTypes); } - return StructMetadataCache[Ty] = nullptr; + return nullptr; } -/// Return a TBAA tag node for both scalar TBAA and struct-path aware TBAA. -llvm::MDNode * -CodeGenTBAA::getTBAAStructTagInfo(QualType BaseQTy, llvm::MDNode *AccessNode, - uint64_t Offset) { - if (!AccessNode) +llvm::MDNode *CodeGenTBAA::getBaseTypeInfo(QualType QTy) { + if (!isValidBaseType(QTy)) + return nullptr; + + const Type *Ty = Context.getCanonicalType(QTy).getTypePtr(); + if (llvm::MDNode *N = BaseTypeMetadataCache[Ty]) + return N; + + // Note that the following helper call is allowed to add new nodes to the + // cache, which invalidates all its previously obtained iterators. So we + // first generate the node for the type and then add that node to the cache. + llvm::MDNode *TypeNode = getBaseTypeInfoHelper(Ty); + return BaseTypeMetadataCache[Ty] = TypeNode; +} + +llvm::MDNode *CodeGenTBAA::getAccessTagInfo(TBAAAccessInfo Info) { + assert(!Info.isIncomplete() && "Access to an object of an incomplete type!"); + + if (Info.isMayAlias()) + Info = TBAAAccessInfo(getChar(), Info.Size); + + if (!Info.AccessType) return nullptr; if (!CodeGenOpts.StructPathTBAA) - return getTBAAScalarTagInfo(AccessNode); + Info = TBAAAccessInfo(Info.AccessType, Info.Size); - const Type *BTy = Context.getCanonicalType(BaseQTy).getTypePtr(); - TBAAPathTag PathTag = TBAAPathTag(BTy, AccessNode, Offset); - if (llvm::MDNode *N = StructTagMetadataCache[PathTag]) + llvm::MDNode *&N = AccessTagMetadataCache[Info]; + if (N) return N; - llvm::MDNode *BNode = nullptr; - if (isTBAAPathStruct(BaseQTy)) - BNode = getTBAAStructTypeInfo(BaseQTy); - if (!BNode) - return StructTagMetadataCache[PathTag] = - MDHelper.createTBAAStructTagNode(AccessNode, AccessNode, 0); + if (!Info.BaseType) { + Info.BaseType = Info.AccessType; + assert(!Info.Offset && "Nonzero offset for an access with no base type!"); + } + return N = MDHelper.createTBAAStructTagNode(Info.BaseType, Info.AccessType, + Info.Offset); +} - return StructTagMetadataCache[PathTag] = - MDHelper.createTBAAStructTagNode(BNode, AccessNode, Offset); +TBAAAccessInfo CodeGenTBAA::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo, + TBAAAccessInfo TargetInfo) { + if (SourceInfo.isMayAlias() || TargetInfo.isMayAlias()) + return TBAAAccessInfo::getMayAliasInfo(); + return TargetInfo; } -llvm::MDNode * -CodeGenTBAA::getTBAAScalarTagInfo(llvm::MDNode *AccessNode) { - if (!AccessNode) - return nullptr; - if (llvm::MDNode *N = ScalarTagMetadataCache[AccessNode]) - return N; +TBAAAccessInfo +CodeGenTBAA::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA, + TBAAAccessInfo InfoB) { + if (InfoA == InfoB) + return InfoA; + + if (!InfoA || !InfoB) + return TBAAAccessInfo(); + + if (InfoA.isMayAlias() || InfoB.isMayAlias()) + return TBAAAccessInfo::getMayAliasInfo(); - return ScalarTagMetadataCache[AccessNode] = - MDHelper.createTBAAStructTagNode(AccessNode, AccessNode, 0); + // TODO: Implement the rest of the logic here. For example, two accesses + // with same final access types result in an access to an object of that final + // access type regardless of their base types. + return TBAAAccessInfo::getMayAliasInfo(); } diff --git a/lib/CodeGen/CodeGenTBAA.h b/lib/CodeGen/CodeGenTBAA.h index ddb063d9e88a..a5b1f66bcd1a 100644 --- a/lib/CodeGen/CodeGenTBAA.h +++ b/lib/CodeGen/CodeGenTBAA.h @@ -30,20 +30,94 @@ namespace clang { class Type; namespace CodeGen { - class CGRecordLayout; +class CGRecordLayout; - struct TBAAPathTag { - TBAAPathTag(const Type *B, const llvm::MDNode *A, uint64_t O) - : BaseT(B), AccessN(A), Offset(O) {} - const Type *BaseT; - const llvm::MDNode *AccessN; - uint64_t Offset; - }; +// TBAAAccessKind - A kind of TBAA memory access descriptor. +enum class TBAAAccessKind : unsigned { + Ordinary, + MayAlias, + Incomplete, +}; + +// TBAAAccessInfo - Describes a memory access in terms of TBAA. +struct TBAAAccessInfo { + TBAAAccessInfo(TBAAAccessKind Kind, llvm::MDNode *BaseType, + llvm::MDNode *AccessType, uint64_t Offset, uint64_t Size) + : Kind(Kind), BaseType(BaseType), AccessType(AccessType), + Offset(Offset), Size(Size) + {} + + TBAAAccessInfo(llvm::MDNode *BaseType, llvm::MDNode *AccessType, + uint64_t Offset, uint64_t Size) + : TBAAAccessInfo(TBAAAccessKind::Ordinary, BaseType, AccessType, + Offset, Size) + {} + + explicit TBAAAccessInfo(llvm::MDNode *AccessType, uint64_t Size) + : TBAAAccessInfo(/* BaseType= */ nullptr, AccessType, /* Offset= */ 0, Size) + {} + + TBAAAccessInfo() + : TBAAAccessInfo(/* AccessType= */ nullptr, /* Size= */ 0) + {} + + static TBAAAccessInfo getMayAliasInfo() { + return TBAAAccessInfo(TBAAAccessKind::MayAlias, + /* BaseType= */ nullptr, /* AccessType= */ nullptr, + /* Offset= */ 0, /* Size= */ 0); + } + + bool isMayAlias() const { return Kind == TBAAAccessKind::MayAlias; } + + static TBAAAccessInfo getIncompleteInfo() { + return TBAAAccessInfo(TBAAAccessKind::Incomplete, + /* BaseType= */ nullptr, /* AccessType= */ nullptr, + /* Offset= */ 0, /* Size= */ 0); + } + + bool isIncomplete() const { return Kind == TBAAAccessKind::Incomplete; } + + bool operator==(const TBAAAccessInfo &Other) const { + return Kind == Other.Kind && + BaseType == Other.BaseType && + AccessType == Other.AccessType && + Offset == Other.Offset && + Size == Other.Size; + } + + bool operator!=(const TBAAAccessInfo &Other) const { + return !(*this == Other); + } + + explicit operator bool() const { + return *this != TBAAAccessInfo(); + } + + /// Kind - The kind of the access descriptor. + TBAAAccessKind Kind; + + /// BaseType - The base/leading access type. May be null if this access + /// descriptor represents an access that is not considered to be an access + /// to an aggregate or union member. + llvm::MDNode *BaseType; + + /// AccessType - The final access type. May be null if there is no TBAA + /// information available about this access. + llvm::MDNode *AccessType; + + /// Offset - The byte offset of the final access within the base one. Must be + /// zero if the base access type is not specified. + uint64_t Offset; + + /// Size - The size of access, in bytes. + uint64_t Size; +}; /// CodeGenTBAA - This class organizes the cross-module state that is used /// while lowering AST types to LLVM types. class CodeGenTBAA { ASTContext &Context; + llvm::Module &Module; const CodeGenOptions &CodeGenOpts; const LangOptions &Features; MangleContext &MContext; @@ -54,12 +128,10 @@ class CodeGenTBAA { /// MetadataCache - This maps clang::Types to scalar llvm::MDNodes describing /// them. llvm::DenseMap<const Type *, llvm::MDNode *> MetadataCache; - /// This maps clang::Types to a struct node in the type DAG. - llvm::DenseMap<const Type *, llvm::MDNode *> StructTypeMetadataCache; - /// This maps TBAAPathTags to a tag node. - llvm::DenseMap<TBAAPathTag, llvm::MDNode *> StructTagMetadataCache; - /// This maps a scalar type to a scalar tag node. - llvm::DenseMap<const llvm::MDNode *, llvm::MDNode *> ScalarTagMetadataCache; + /// This maps clang::Types to a base access type in the type DAG. + llvm::DenseMap<const Type *, llvm::MDNode *> BaseTypeMetadataCache; + /// This maps TBAA access descriptors to tag nodes. + llvm::DenseMap<TBAAAccessInfo, llvm::MDNode *> AccessTagMetadataCache; /// StructMetadataCache - This maps clang::Types to llvm::MDNodes describing /// them for struct assignments. @@ -83,39 +155,52 @@ class CodeGenTBAA { SmallVectorImpl<llvm::MDBuilder::TBAAStructField> &Fields, bool MayAlias); - /// A wrapper function to create a scalar type. For struct-path aware TBAA, - /// the scalar type has the same format as the struct type: name, offset, - /// pointer to another node in the type DAG. - llvm::MDNode *createTBAAScalarType(StringRef Name, llvm::MDNode *Parent); + /// createScalarTypeNode - A wrapper function to create a metadata node + /// describing a scalar type. + llvm::MDNode *createScalarTypeNode(StringRef Name, llvm::MDNode *Parent, + uint64_t Size); + + /// getTypeInfoHelper - An internal helper function to generate metadata used + /// to describe accesses to objects of the given type. + llvm::MDNode *getTypeInfoHelper(const Type *Ty); + + /// getBaseTypeInfoHelper - An internal helper function to generate metadata + /// used to describe accesses to objects of the given base type. + llvm::MDNode *getBaseTypeInfoHelper(const Type *Ty); public: - CodeGenTBAA(ASTContext &Ctx, llvm::LLVMContext &VMContext, - const CodeGenOptions &CGO, - const LangOptions &Features, - MangleContext &MContext); + CodeGenTBAA(ASTContext &Ctx, llvm::Module &M, const CodeGenOptions &CGO, + const LangOptions &Features, MangleContext &MContext); ~CodeGenTBAA(); - /// getTBAAInfo - Get the TBAA MDNode to be used for a dereference - /// of the given type. - llvm::MDNode *getTBAAInfo(QualType QTy); + /// getTypeInfo - Get metadata used to describe accesses to objects of the + /// given type. + llvm::MDNode *getTypeInfo(QualType QTy); - /// getTBAAInfoForVTablePtr - Get the TBAA MDNode to be used for a - /// dereference of a vtable pointer. - llvm::MDNode *getTBAAInfoForVTablePtr(); + /// getVTablePtrAccessInfo - Get the TBAA information that describes an + /// access to a virtual table pointer. + TBAAAccessInfo getVTablePtrAccessInfo(llvm::Type *VTablePtrType); /// getTBAAStructInfo - Get the TBAAStruct MDNode to be used for a memcpy of /// the given type. llvm::MDNode *getTBAAStructInfo(QualType QTy); - /// Get the MDNode in the type DAG for given struct type QType. - llvm::MDNode *getTBAAStructTypeInfo(QualType QType); - /// Get the tag MDNode for a given base type, the actual scalar access MDNode - /// and offset into the base type. - llvm::MDNode *getTBAAStructTagInfo(QualType BaseQType, - llvm::MDNode *AccessNode, uint64_t Offset); + /// getBaseTypeInfo - Get metadata that describes the given base access type. + /// Return null if the type is not suitable for use in TBAA access tags. + llvm::MDNode *getBaseTypeInfo(QualType QTy); + + /// getAccessTagInfo - Get TBAA tag for a given memory access. + llvm::MDNode *getAccessTagInfo(TBAAAccessInfo Info); + + /// mergeTBAAInfoForCast - Get merged TBAA information for the purpose of + /// type casts. + TBAAAccessInfo mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo, + TBAAAccessInfo TargetInfo); - /// Get the scalar tag MDNode for a given scalar type. - llvm::MDNode *getTBAAScalarTagInfo(llvm::MDNode *AccessNode); + /// mergeTBAAInfoForConditionalOperator - Get merged TBAA information for the + /// purpose of conditional operator. + TBAAAccessInfo mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA, + TBAAAccessInfo InfoB); }; } // end namespace CodeGen @@ -123,32 +208,39 @@ public: namespace llvm { -template<> struct DenseMapInfo<clang::CodeGen::TBAAPathTag> { - static clang::CodeGen::TBAAPathTag getEmptyKey() { - return clang::CodeGen::TBAAPathTag( - DenseMapInfo<const clang::Type *>::getEmptyKey(), - DenseMapInfo<const MDNode *>::getEmptyKey(), +template<> struct DenseMapInfo<clang::CodeGen::TBAAAccessInfo> { + static clang::CodeGen::TBAAAccessInfo getEmptyKey() { + unsigned UnsignedKey = DenseMapInfo<unsigned>::getEmptyKey(); + return clang::CodeGen::TBAAAccessInfo( + static_cast<clang::CodeGen::TBAAAccessKind>(UnsignedKey), + DenseMapInfo<MDNode *>::getEmptyKey(), + DenseMapInfo<MDNode *>::getEmptyKey(), + DenseMapInfo<uint64_t>::getEmptyKey(), DenseMapInfo<uint64_t>::getEmptyKey()); } - static clang::CodeGen::TBAAPathTag getTombstoneKey() { - return clang::CodeGen::TBAAPathTag( - DenseMapInfo<const clang::Type *>::getTombstoneKey(), - DenseMapInfo<const MDNode *>::getTombstoneKey(), + static clang::CodeGen::TBAAAccessInfo getTombstoneKey() { + unsigned UnsignedKey = DenseMapInfo<unsigned>::getTombstoneKey(); + return clang::CodeGen::TBAAAccessInfo( + static_cast<clang::CodeGen::TBAAAccessKind>(UnsignedKey), + DenseMapInfo<MDNode *>::getTombstoneKey(), + DenseMapInfo<MDNode *>::getTombstoneKey(), + DenseMapInfo<uint64_t>::getTombstoneKey(), DenseMapInfo<uint64_t>::getTombstoneKey()); } - static unsigned getHashValue(const clang::CodeGen::TBAAPathTag &Val) { - return DenseMapInfo<const clang::Type *>::getHashValue(Val.BaseT) ^ - DenseMapInfo<const MDNode *>::getHashValue(Val.AccessN) ^ - DenseMapInfo<uint64_t>::getHashValue(Val.Offset); + static unsigned getHashValue(const clang::CodeGen::TBAAAccessInfo &Val) { + auto KindValue = static_cast<unsigned>(Val.Kind); + return DenseMapInfo<unsigned>::getHashValue(KindValue) ^ + DenseMapInfo<MDNode *>::getHashValue(Val.BaseType) ^ + DenseMapInfo<MDNode *>::getHashValue(Val.AccessType) ^ + DenseMapInfo<uint64_t>::getHashValue(Val.Offset) ^ + DenseMapInfo<uint64_t>::getHashValue(Val.Size); } - static bool isEqual(const clang::CodeGen::TBAAPathTag &LHS, - const clang::CodeGen::TBAAPathTag &RHS) { - return LHS.BaseT == RHS.BaseT && - LHS.AccessN == RHS.AccessN && - LHS.Offset == RHS.Offset; + static bool isEqual(const clang::CodeGen::TBAAAccessInfo &LHS, + const clang::CodeGen::TBAAAccessInfo &RHS) { + return LHS == RHS; } }; diff --git a/lib/CodeGen/CodeGenTypeCache.h b/lib/CodeGen/CodeGenTypeCache.h index 450eab48a3b4..2af7b30eafb4 100644 --- a/lib/CodeGen/CodeGenTypeCache.h +++ b/lib/CodeGen/CodeGenTypeCache.h @@ -15,6 +15,7 @@ #define LLVM_CLANG_LIB_CODEGEN_CODEGENTYPECACHE_H #include "clang/AST/CharUnits.h" +#include "clang/Basic/AddressSpaces.h" #include "llvm/IR/CallingConv.h" namespace llvm { @@ -94,7 +95,7 @@ struct CodeGenTypeCache { unsigned char SizeAlignInBytes; }; - unsigned ASTAllocaAddressSpace; + LangAS ASTAllocaAddressSpace; CharUnits getSizeSize() const { return CharUnits::fromQuantity(SizeSizeInBytes); @@ -114,7 +115,7 @@ struct CodeGenTypeCache { llvm::CallingConv::ID BuiltinCC; llvm::CallingConv::ID getBuiltinCC() const { return BuiltinCC; } - unsigned getASTAllocaAddressSpace() const { return ASTAllocaAddressSpace; } + LangAS getASTAllocaAddressSpace() const { return ASTAllocaAddressSpace; } }; } // end namespace CodeGen diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp index 9306c4fbaff8..529a13b7adc8 100644 --- a/lib/CodeGen/CodeGenTypes.cpp +++ b/lib/CodeGen/CodeGenTypes.cpp @@ -443,12 +443,18 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { static_cast<unsigned>(Context.getTypeSize(T))); break; - case BuiltinType::Half: - // Half FP can either be storage-only (lowered to i16) or native. + case BuiltinType::Float16: ResultType = getTypeForFormat(getLLVMContext(), Context.getFloatTypeSemantics(T), - Context.getLangOpts().NativeHalfType || - Context.getLangOpts().HalfArgsAndReturns); + /* UseNativeHalf = */ true); + break; + + case BuiltinType::Half: + // Half FP can either be storage-only (lowered to i16) or native. + ResultType = getTypeForFormat( + getLLVMContext(), Context.getFloatTypeSemantics(T), + Context.getLangOpts().NativeHalfType || + !Context.getTargetInfo().useFP16ConversionIntrinsics()); break; case BuiltinType::Float: case BuiltinType::Double: @@ -639,7 +645,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { break; } case Type::Pipe: { - ResultType = CGM.getOpenCLRuntime().getPipeType(); + ResultType = CGM.getOpenCLRuntime().getPipeType(cast<PipeType>(Ty)); break; } } diff --git a/lib/CodeGen/CodeGenTypes.h b/lib/CodeGen/CodeGenTypes.h index 9d0e3ded23e4..d082342bf592 100644 --- a/lib/CodeGen/CodeGenTypes.h +++ b/lib/CodeGen/CodeGenTypes.h @@ -164,8 +164,6 @@ class CodeGenTypes { llvm::SmallSet<const Type *, 8> RecordsWithOpaqueMemberPointers; - unsigned ClangCallConvToLLVMCallConv(CallingConv CC); - public: CodeGenTypes(CodeGenModule &cgm); ~CodeGenTypes(); @@ -180,6 +178,9 @@ public: llvm::LLVMContext &getLLVMContext() { return TheModule.getContext(); } const CodeGenOptions &getCodeGenOpts() const; + /// Convert clang calling convention to LLVM callilng convention. + unsigned ClangCallConvToLLVMCallConv(CallingConv CC); + /// ConvertType - Convert type T into a llvm::Type. llvm::Type *ConvertType(QualType T); diff --git a/lib/CodeGen/ConstantEmitter.h b/lib/CodeGen/ConstantEmitter.h new file mode 100644 index 000000000000..90c9fcd8cf81 --- /dev/null +++ b/lib/CodeGen/ConstantEmitter.h @@ -0,0 +1,178 @@ +//===--- ConstantEmitter.h - IR constant emission ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// A helper class for emitting expressions and values as llvm::Constants +// and as initializers for global variables. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_CODEGEN_CONSTANTEMITTER_H +#define LLVM_CLANG_LIB_CODEGEN_CONSTANTEMITTER_H + +#include "CodeGenFunction.h" +#include "CodeGenModule.h" + +namespace clang { +namespace CodeGen { + +class ConstantEmitter { +public: + CodeGenModule &CGM; + CodeGenFunction *CGF; + +private: + bool Abstract = false; + + /// Whether non-abstract components of the emitter have been initialized. + bool InitializedNonAbstract = false; + + /// Whether the emitter has been finalized. + bool Finalized = false; + + /// Whether the constant-emission failed. + bool Failed = false; + + /// The AST address space where this (non-abstract) initializer is going. + /// Used for generating appropriate placeholders. + LangAS DestAddressSpace; + + llvm::SmallVector<std::pair<llvm::Constant *, llvm::GlobalVariable*>, 4> + PlaceholderAddresses; + +public: + ConstantEmitter(CodeGenModule &CGM, CodeGenFunction *CGF = nullptr) + : CGM(CGM), CGF(CGF) {} + + /// Initialize this emission in the context of the given function. + /// Use this if the expression might contain contextaul references like + /// block addresses or PredefinedExprs. + ConstantEmitter(CodeGenFunction &CGF) + : CGM(CGF.CGM), CGF(&CGF) {} + + ConstantEmitter(const ConstantEmitter &other) = delete; + ConstantEmitter &operator=(const ConstantEmitter &other) = delete; + + ~ConstantEmitter(); + + /// Is the current emission context abstract? + bool isAbstract() const { + return Abstract; + } + + /// Try to emit the initiaizer of the given declaration as an abstract + /// constant. If this succeeds, the emission must be finalized. + llvm::Constant *tryEmitForInitializer(const VarDecl &D); + llvm::Constant *tryEmitForInitializer(const Expr *E, LangAS destAddrSpace, + QualType destType); + llvm::Constant *emitForInitializer(const APValue &value, LangAS destAddrSpace, + QualType destType); + + void finalize(llvm::GlobalVariable *global); + + // All of the "abstract" emission methods below permit the emission to + // be immediately discarded without finalizing anything. Therefore, they + // must also promise not to do anything that will, in the future, require + // finalization: + // + // - using the CGF (if present) for anything other than establishing + // semantic context; for example, an expression with ignored + // side-effects must not be emitted as an abstract expression + // + // - doing anything that would not be safe to duplicate within an + // initializer or to propagate to another context; for example, + // side effects, or emitting an initialization that requires a + // reference to its current location. + + /// Try to emit the initializer of the given declaration as an abstract + /// constant. + llvm::Constant *tryEmitAbstractForInitializer(const VarDecl &D); + + /// Emit the result of the given expression as an abstract constant, + /// asserting that it succeeded. This is only safe to do when the + /// expression is known to be a constant expression with either a fairly + /// simple type or a known simple form. + llvm::Constant *emitAbstract(const Expr *E, QualType T); + llvm::Constant *emitAbstract(SourceLocation loc, const APValue &value, + QualType T); + + /// Try to emit the result of the given expression as an abstract constant. + llvm::Constant *tryEmitAbstract(const Expr *E, QualType T); + llvm::Constant *tryEmitAbstractForMemory(const Expr *E, QualType T); + + llvm::Constant *tryEmitAbstract(const APValue &value, QualType T); + llvm::Constant *tryEmitAbstractForMemory(const APValue &value, QualType T); + + llvm::Constant *emitNullForMemory(QualType T) { + return emitNullForMemory(CGM, T); + } + llvm::Constant *emitForMemory(llvm::Constant *C, QualType T) { + return emitForMemory(CGM, C, T); + } + + static llvm::Constant *emitNullForMemory(CodeGenModule &CGM, QualType T); + static llvm::Constant *emitForMemory(CodeGenModule &CGM, llvm::Constant *C, + QualType T); + + // These are private helper routines of the constant emitter that + // can't actually be private because things are split out into helper + // functions and classes. + + llvm::Constant *tryEmitPrivateForVarInit(const VarDecl &D); + + llvm::Constant *tryEmitPrivate(const Expr *E, QualType T); + llvm::Constant *tryEmitPrivateForMemory(const Expr *E, QualType T); + + llvm::Constant *tryEmitPrivate(const APValue &value, QualType T); + llvm::Constant *tryEmitPrivateForMemory(const APValue &value, QualType T); + + /// Get the address of the current location. This is a constant + /// that will resolve, after finalization, to the address of the + /// 'signal' value that is registered with the emitter later. + llvm::GlobalValue *getCurrentAddrPrivate(); + + /// Register a 'signal' value with the emitter to inform it where to + /// resolve a placeholder. The signal value must be unique in the + /// initializer; it might, for example, be the address of a global that + /// refers to the current-address value in its own initializer. + /// + /// Uses of the placeholder must be properly anchored before finalizing + /// the emitter, e.g. by being installed as the initializer of a global + /// variable. That is, it must be possible to replaceAllUsesWith + /// the placeholder with the proper address of the signal. + void registerCurrentAddrPrivate(llvm::Constant *signal, + llvm::GlobalValue *placeholder); + +private: + void initializeNonAbstract(LangAS destAS) { + assert(!InitializedNonAbstract); + InitializedNonAbstract = true; + DestAddressSpace = destAS; + } + llvm::Constant *markIfFailed(llvm::Constant *init) { + if (!init) + Failed = true; + return init; + } + + struct AbstractState { + bool OldValue; + size_t OldPlaceholdersSize; + }; + AbstractState pushAbstract() { + AbstractState saved = { Abstract, PlaceholderAddresses.size() }; + Abstract = true; + return saved; + } + llvm::Constant *validateAndPopAbstract(llvm::Constant *C, AbstractState save); +}; + +} +} + +#endif diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp index a1023473bdd3..89a30dc7040c 100644 --- a/lib/CodeGen/CoverageMappingGen.cpp +++ b/lib/CodeGen/CoverageMappingGen.cpp @@ -29,7 +29,7 @@ using namespace clang; using namespace CodeGen; using namespace llvm::coverage; -void CoverageSourceInfo::SourceRangeSkipped(SourceRange Range) { +void CoverageSourceInfo::SourceRangeSkipped(SourceRange Range, SourceLocation) { SkippedRanges.push_back(Range); } @@ -45,10 +45,19 @@ class SourceMappingRegion { /// \brief The region's ending location. Optional<SourceLocation> LocEnd; + /// Whether this region should be emitted after its parent is emitted. + bool DeferRegion; + + /// Whether this region is a gap region. The count from a gap region is set + /// as the line execution count if there are no other regions on the line. + bool GapRegion; + public: SourceMappingRegion(Counter Count, Optional<SourceLocation> LocStart, - Optional<SourceLocation> LocEnd) - : Count(Count), LocStart(LocStart), LocEnd(LocEnd) {} + Optional<SourceLocation> LocEnd, bool DeferRegion = false, + bool GapRegion = false) + : Count(Count), LocStart(LocStart), LocEnd(LocEnd), + DeferRegion(DeferRegion), GapRegion(GapRegion) {} const Counter &getCounter() const { return Count; } @@ -71,6 +80,47 @@ public: assert(LocEnd && "Region has no end location"); return *LocEnd; } + + bool isDeferred() const { return DeferRegion; } + + void setDeferred(bool Deferred) { DeferRegion = Deferred; } + + bool isGap() const { return GapRegion; } + + void setGap(bool Gap) { GapRegion = Gap; } +}; + +/// Spelling locations for the start and end of a source region. +struct SpellingRegion { + /// The line where the region starts. + unsigned LineStart; + + /// The column where the region starts. + unsigned ColumnStart; + + /// The line where the region ends. + unsigned LineEnd; + + /// The column where the region ends. + unsigned ColumnEnd; + + SpellingRegion(SourceManager &SM, SourceLocation LocStart, + SourceLocation LocEnd) { + LineStart = SM.getSpellingLineNumber(LocStart); + ColumnStart = SM.getSpellingColumnNumber(LocStart); + LineEnd = SM.getSpellingLineNumber(LocEnd); + ColumnEnd = SM.getSpellingColumnNumber(LocEnd); + } + + SpellingRegion(SourceManager &SM, SourceMappingRegion &R) + : SpellingRegion(SM, R.getStartLoc(), R.getEndLoc()) {} + + /// Check if the start and end locations appear in source order, i.e + /// top->bottom, left->right. + bool isInSourceOrder() const { + return (LineStart < LineEnd) || + (LineStart == LineEnd && ColumnStart <= ColumnEnd); + } }; /// \brief Provides the common functionality for the different @@ -241,12 +291,9 @@ public: auto CovFileID = getCoverageFileID(LocStart); if (!CovFileID) continue; - unsigned LineStart = SM.getSpellingLineNumber(LocStart); - unsigned ColumnStart = SM.getSpellingColumnNumber(LocStart); - unsigned LineEnd = SM.getSpellingLineNumber(LocEnd); - unsigned ColumnEnd = SM.getSpellingColumnNumber(LocEnd); + SpellingRegion SR{SM, LocStart, LocEnd}; auto Region = CounterMappingRegion::makeSkipped( - *CovFileID, LineStart, ColumnStart, LineEnd, ColumnEnd); + *CovFileID, SR.LineStart, SR.ColumnStart, SR.LineEnd, SR.ColumnEnd); // Make sure that we only collect the regions that are inside // the souce code of this function. if (Region.LineStart >= FileLineRanges[*CovFileID].first && @@ -284,16 +331,19 @@ public: if (Filter.count(std::make_pair(LocStart, LocEnd))) continue; - // Find the spilling locations for the mapping region. - unsigned LineStart = SM.getSpellingLineNumber(LocStart); - unsigned ColumnStart = SM.getSpellingColumnNumber(LocStart); - unsigned LineEnd = SM.getSpellingLineNumber(LocEnd); - unsigned ColumnEnd = SM.getSpellingColumnNumber(LocEnd); - - assert(LineStart <= LineEnd && "region start and end out of order"); - MappingRegions.push_back(CounterMappingRegion::makeRegion( - Region.getCounter(), *CovFileID, LineStart, ColumnStart, LineEnd, - ColumnEnd)); + // Find the spelling locations for the mapping region. + SpellingRegion SR{SM, LocStart, LocEnd}; + assert(SR.isInSourceOrder() && "region start and end out of order"); + + if (Region.isGap()) { + MappingRegions.push_back(CounterMappingRegion::makeGapRegion( + Region.getCounter(), *CovFileID, SR.LineStart, SR.ColumnStart, + SR.LineEnd, SR.ColumnEnd)); + } else { + MappingRegions.push_back(CounterMappingRegion::makeRegion( + Region.getCounter(), *CovFileID, SR.LineStart, SR.ColumnStart, + SR.LineEnd, SR.ColumnEnd)); + } } } @@ -317,14 +367,11 @@ public: "region spans multiple files"); Filter.insert(std::make_pair(ParentLoc, LocEnd)); - unsigned LineStart = SM.getSpellingLineNumber(ParentLoc); - unsigned ColumnStart = SM.getSpellingColumnNumber(ParentLoc); - unsigned LineEnd = SM.getSpellingLineNumber(LocEnd); - unsigned ColumnEnd = SM.getSpellingColumnNumber(LocEnd); - + SpellingRegion SR{SM, ParentLoc, LocEnd}; + assert(SR.isInSourceOrder() && "region start and end out of order"); MappingRegions.push_back(CounterMappingRegion::makeExpansion( - *ParentFileID, *ExpandedFileID, LineStart, ColumnStart, LineEnd, - ColumnEnd)); + *ParentFileID, *ExpandedFileID, SR.LineStart, SR.ColumnStart, + SR.LineEnd, SR.ColumnEnd)); } return Filter; } @@ -389,6 +436,10 @@ struct CounterCoverageMappingBuilder /// \brief A stack of currently live regions. std::vector<SourceMappingRegion> RegionStack; + /// The currently deferred region: its end location and count can be set once + /// its parent has been popped from the region stack. + Optional<SourceMappingRegion> DeferredRegion; + CounterExpressionBuilder Builder; /// \brief A location in the most recently visited file or macro. @@ -397,6 +448,9 @@ struct CounterCoverageMappingBuilder /// expressions cross file or macro boundaries. SourceLocation MostRecentLocation; + /// Location of the last terminated region. + Optional<std::pair<SourceLocation, size_t>> LastTerminatedRegion; + /// \brief Return a counter for the subtraction of \c RHS from \c LHS Counter subtractCounters(Counter LHS, Counter RHS) { return Builder.subtract(LHS, RHS); @@ -424,19 +478,84 @@ struct CounterCoverageMappingBuilder /// used with popRegions to exit a "scope", ending the region that was pushed. size_t pushRegion(Counter Count, Optional<SourceLocation> StartLoc = None, Optional<SourceLocation> EndLoc = None) { - if (StartLoc) + if (StartLoc) { MostRecentLocation = *StartLoc; + completeDeferred(Count, MostRecentLocation); + } RegionStack.emplace_back(Count, StartLoc, EndLoc); return RegionStack.size() - 1; } + /// Complete any pending deferred region by setting its end location and + /// count, and then pushing it onto the region stack. + size_t completeDeferred(Counter Count, SourceLocation DeferredEndLoc) { + size_t Index = RegionStack.size(); + if (!DeferredRegion) + return Index; + + // Consume the pending region. + SourceMappingRegion DR = DeferredRegion.getValue(); + DeferredRegion = None; + + // If the region ends in an expansion, find the expansion site. + FileID StartFile = SM.getFileID(DR.getStartLoc()); + if (SM.getFileID(DeferredEndLoc) != StartFile) { + if (isNestedIn(DeferredEndLoc, StartFile)) { + do { + DeferredEndLoc = getIncludeOrExpansionLoc(DeferredEndLoc); + } while (StartFile != SM.getFileID(DeferredEndLoc)); + } else { + return Index; + } + } + + // The parent of this deferred region ends where the containing decl ends, + // so the region isn't useful. + if (DR.getStartLoc() == DeferredEndLoc) + return Index; + + // If we're visiting statements in non-source order (e.g switch cases or + // a loop condition) we can't construct a sensible deferred region. + if (!SpellingRegion(SM, DR.getStartLoc(), DeferredEndLoc).isInSourceOrder()) + return Index; + + DR.setGap(true); + DR.setCounter(Count); + DR.setEndLoc(DeferredEndLoc); + handleFileExit(DeferredEndLoc); + RegionStack.push_back(DR); + return Index; + } + + /// Complete a deferred region created after a terminated region at the + /// top-level. + void completeTopLevelDeferredRegion(Counter Count, + SourceLocation DeferredEndLoc) { + if (DeferredRegion || !LastTerminatedRegion) + return; + + if (LastTerminatedRegion->second != RegionStack.size()) + return; + + SourceLocation Start = LastTerminatedRegion->first; + if (SM.getFileID(Start) != SM.getMainFileID()) + return; + + SourceMappingRegion DR = RegionStack.back(); + DR.setStartLoc(Start); + DR.setDeferred(false); + DeferredRegion = DR; + completeDeferred(Count, DeferredEndLoc); + } + /// \brief Pop regions from the stack into the function's list of regions. /// /// Adds all regions from \c ParentIndex to the top of the stack to the /// function's \c SourceRegions. void popRegions(size_t ParentIndex) { assert(RegionStack.size() >= ParentIndex && "parent not in stack"); + bool ParentOfDeferredRegion = false; while (RegionStack.size() > ParentIndex) { SourceMappingRegion &Region = RegionStack.back(); if (Region.hasStartLoc()) { @@ -467,10 +586,34 @@ struct CounterCoverageMappingBuilder MostRecentLocation = getIncludeOrExpansionLoc(EndLoc); assert(SM.isWrittenInSameFile(Region.getStartLoc(), EndLoc)); + assert(SpellingRegion(SM, Region).isInSourceOrder()); SourceRegions.push_back(Region); + + if (ParentOfDeferredRegion) { + ParentOfDeferredRegion = false; + + // If there's an existing deferred region, keep the old one, because + // it means there are two consecutive returns (or a similar pattern). + if (!DeferredRegion.hasValue() && + // File IDs aren't gathered within macro expansions, so it isn't + // useful to try and create a deferred region inside of one. + !EndLoc.isMacroID()) + DeferredRegion = + SourceMappingRegion(Counter::getZero(), EndLoc, None); + } + } else if (Region.isDeferred()) { + assert(!ParentOfDeferredRegion && "Consecutive deferred regions"); + ParentOfDeferredRegion = true; } RegionStack.pop_back(); + + // If the zero region pushed after the last terminated region no longer + // exists, clear its cached information. + if (LastTerminatedRegion && + RegionStack.size() < LastTerminatedRegion->second) + LastTerminatedRegion = None; } + assert(!ParentOfDeferredRegion && "Deferred region with no parent"); } /// \brief Return the currently active region. @@ -481,15 +624,17 @@ struct CounterCoverageMappingBuilder /// \brief Propagate counts through the children of \c S. Counter propagateCounts(Counter TopCount, const Stmt *S) { - size_t Index = pushRegion(TopCount, getStart(S), getEnd(S)); + SourceLocation StartLoc = getStart(S); + SourceLocation EndLoc = getEnd(S); + size_t Index = pushRegion(TopCount, StartLoc, EndLoc); Visit(S); Counter ExitCount = getRegion().getCounter(); popRegions(Index); // The statement may be spanned by an expansion. Make sure we handle a file // exit out of this expansion before moving to the next statement. - if (SM.isBeforeInTranslationUnit(getStart(S), S->getLocStart())) - MostRecentLocation = getEnd(S); + if (SM.isBeforeInTranslationUnit(StartLoc, S->getLocStart())) + MostRecentLocation = EndLoc; return ExitCount; } @@ -577,9 +722,11 @@ struct CounterCoverageMappingBuilder SourceLocation Loc = MostRecentLocation; while (isNestedIn(Loc, ParentFile)) { SourceLocation FileStart = getStartOfFileOrMacro(Loc); - if (StartLocs.insert(FileStart).second) + if (StartLocs.insert(FileStart).second) { SourceRegions.emplace_back(*ParentCounter, FileStart, getEndOfFileOrMacro(Loc)); + assert(SpellingRegion(SM, SourceRegions.back()).isInSourceOrder()); + } Loc = getIncludeOrExpansionLoc(Loc); } } @@ -595,15 +742,53 @@ struct CounterCoverageMappingBuilder handleFileExit(StartLoc); if (!Region.hasStartLoc()) Region.setStartLoc(StartLoc); + + completeDeferred(Region.getCounter(), StartLoc); } /// \brief Mark \c S as a terminator, starting a zero region. void terminateRegion(const Stmt *S) { extendRegion(S); SourceMappingRegion &Region = getRegion(); + SourceLocation EndLoc = getEnd(S); if (!Region.hasEndLoc()) - Region.setEndLoc(getEnd(S)); + Region.setEndLoc(EndLoc); pushRegion(Counter::getZero()); + auto &ZeroRegion = getRegion(); + ZeroRegion.setDeferred(true); + LastTerminatedRegion = {EndLoc, RegionStack.size()}; + } + + /// Find a valid gap range between \p AfterLoc and \p BeforeLoc. + Optional<SourceRange> findGapAreaBetween(SourceLocation AfterLoc, + SourceLocation BeforeLoc) { + // If the start and end locations of the gap are both within the same macro + // file, the range may not be in source order. + if (AfterLoc.isMacroID() || BeforeLoc.isMacroID()) + return None; + if (!SM.isWrittenInSameFile(AfterLoc, BeforeLoc)) + return None; + return {{AfterLoc, BeforeLoc}}; + } + + /// Find the source range after \p AfterStmt and before \p BeforeStmt. + Optional<SourceRange> findGapAreaBetween(const Stmt *AfterStmt, + const Stmt *BeforeStmt) { + return findGapAreaBetween(getPreciseTokenLocEnd(getEnd(AfterStmt)), + getStart(BeforeStmt)); + } + + /// Emit a gap region between \p StartLoc and \p EndLoc with the given count. + void fillGapAreaWithCount(SourceLocation StartLoc, SourceLocation EndLoc, + Counter Count) { + if (StartLoc == EndLoc) + return; + assert(SpellingRegion(SM, StartLoc, EndLoc).isInSourceOrder()); + handleFileExit(StartLoc); + size_t Index = pushRegion(Count, StartLoc, EndLoc); + getRegion().setGap(true); + handleFileExit(EndLoc); + popRegions(Index); } /// \brief Keep counts of breaks and continues inside loops. @@ -617,13 +802,15 @@ struct CounterCoverageMappingBuilder CoverageMappingModuleGen &CVM, llvm::DenseMap<const Stmt *, unsigned> &CounterMap, SourceManager &SM, const LangOptions &LangOpts) - : CoverageMappingBuilder(CVM, SM, LangOpts), CounterMap(CounterMap) {} + : CoverageMappingBuilder(CVM, SM, LangOpts), CounterMap(CounterMap), + DeferredRegion(None) {} /// \brief Write the mapping data to the output stream void write(llvm::raw_ostream &OS) { llvm::SmallVector<unsigned, 8> VirtualFileMapping; gatherFileIDs(VirtualFileMapping); SourceRegionFilter Filter = emitExpansionRegions(); + assert(!DeferredRegion && "Deferred region never completed"); emitSourceRegions(Filter); gatherSkippedRegions(); @@ -644,14 +831,42 @@ struct CounterCoverageMappingBuilder handleFileExit(getEnd(S)); } + /// Determine whether the final deferred region emitted in \p Body should be + /// discarded. + static bool discardFinalDeferredRegionInDecl(Stmt *Body) { + if (auto *CS = dyn_cast<CompoundStmt>(Body)) { + Stmt *LastStmt = CS->body_back(); + if (auto *IfElse = dyn_cast<IfStmt>(LastStmt)) { + if (auto *Else = dyn_cast_or_null<CompoundStmt>(IfElse->getElse())) + LastStmt = Else->body_back(); + else + LastStmt = IfElse->getElse(); + } + return dyn_cast_or_null<ReturnStmt>(LastStmt); + } + return false; + } + void VisitDecl(const Decl *D) { + assert(!DeferredRegion && "Deferred region never completed"); + Stmt *Body = D->getBody(); // Do not propagate region counts into system headers. if (Body && SM.isInSystemHeader(SM.getSpellingLoc(getStart(Body)))) return; - propagateCounts(getRegionCounter(Body), Body); + Counter ExitCount = propagateCounts(getRegionCounter(Body), Body); + assert(RegionStack.empty() && "Regions entered but never exited"); + + if (DeferredRegion) { + // Complete (or discard) any deferred regions introduced by the last + // statement. + if (discardFinalDeferredRegionInDecl(Body)) + DeferredRegion = None; + else + popRegions(completeDeferred(ExitCount, getEnd(Body))); + } } void VisitReturnStmt(const ReturnStmt *S) { @@ -671,10 +886,12 @@ struct CounterCoverageMappingBuilder void VisitGotoStmt(const GotoStmt *S) { terminateRegion(S); } void VisitLabelStmt(const LabelStmt *S) { + Counter LabelCount = getRegionCounter(S); SourceLocation Start = getStart(S); + completeTopLevelDeferredRegion(LabelCount, Start); // We can't extendRegion here or we risk overlapping with our new region. handleFileExit(Start); - pushRegion(getRegionCounter(S), Start); + pushRegion(LabelCount, Start); Visit(S->getSubStmt()); } @@ -682,6 +899,8 @@ struct CounterCoverageMappingBuilder assert(!BreakContinueStack.empty() && "break not in a loop or switch!"); BreakContinueStack.back().BreakCount = addCounters( BreakContinueStack.back().BreakCount, getRegion().getCounter()); + // FIXME: a break in a switch should terminate regions for all preceding + // case statements, not just the most recent one. terminateRegion(S); } @@ -692,6 +911,16 @@ struct CounterCoverageMappingBuilder terminateRegion(S); } + void VisitCallExpr(const CallExpr *E) { + VisitStmt(E); + + // Terminate the region when we hit a noreturn function. + // (This is helpful dealing with switch statements.) + QualType CalleeType = E->getCallee()->getType(); + if (getFunctionExtInfo(*CalleeType).getNoReturn()) + terminateRegion(E); + } + void VisitWhileStmt(const WhileStmt *S) { extendRegion(S); @@ -710,6 +939,11 @@ struct CounterCoverageMappingBuilder propagateCounts(CondCount, S->getCond()); adjustForOutOfOrderTraversal(getEnd(S)); + // The body count applies to the area immediately after the increment. + auto Gap = findGapAreaBetween(S->getCond(), S->getBody()); + if (Gap) + fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), BodyCount); + Counter OutCount = addCounters(BC.BreakCount, subtractCounters(CondCount, BodyCount)); if (OutCount != ParentCount) @@ -764,6 +998,12 @@ struct CounterCoverageMappingBuilder adjustForOutOfOrderTraversal(getEnd(S)); } + // The body count applies to the area immediately after the increment. + auto Gap = findGapAreaBetween(getPreciseTokenLocEnd(S->getRParenLoc()), + getStart(S->getBody())); + if (Gap) + fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), BodyCount); + Counter OutCount = addCounters(BC.BreakCount, subtractCounters(CondCount, BodyCount)); if (OutCount != ParentCount) @@ -783,6 +1023,12 @@ struct CounterCoverageMappingBuilder Counter BackedgeCount = propagateCounts(BodyCount, S->getBody()); BreakContinue BC = BreakContinueStack.pop_back_val(); + // The body count applies to the area immediately after the range. + auto Gap = findGapAreaBetween(getPreciseTokenLocEnd(S->getRParenLoc()), + getStart(S->getBody())); + if (Gap) + fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), BodyCount); + Counter LoopCount = addCounters(ParentCount, BackedgeCount, BC.ContinueCount); Counter OutCount = @@ -803,6 +1049,12 @@ struct CounterCoverageMappingBuilder Counter BackedgeCount = propagateCounts(BodyCount, S->getBody()); BreakContinue BC = BreakContinueStack.pop_back_val(); + // The body count applies to the area immediately after the collection. + auto Gap = findGapAreaBetween(getPreciseTokenLocEnd(S->getRParenLoc()), + getStart(S->getBody())); + if (Gap) + fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), BodyCount); + Counter LoopCount = addCounters(ParentCount, BackedgeCount, BC.ContinueCount); Counter OutCount = @@ -823,15 +1075,20 @@ struct CounterCoverageMappingBuilder extendRegion(Body); if (const auto *CS = dyn_cast<CompoundStmt>(Body)) { if (!CS->body_empty()) { - // The body of the switch needs a zero region so that fallthrough counts - // behave correctly, but it would be misleading to include the braces of - // the compound statement in the zeroed area, so we need to handle this - // specially. + // Make a region for the body of the switch. If the body starts with + // a case, that case will reuse this region; otherwise, this covers + // the unreachable code at the beginning of the switch body. size_t Index = - pushRegion(Counter::getZero(), getStart(CS->body_front()), - getEnd(CS->body_back())); + pushRegion(Counter::getZero(), getStart(CS->body_front())); for (const auto *Child : CS->children()) Visit(Child); + + // Set the end for the body of the switch, if it isn't already set. + for (size_t i = RegionStack.size(); i != Index; --i) { + if (!RegionStack[i - 1].hasEndLoc()) + RegionStack[i - 1].setEndLoc(getEnd(CS->body_back())); + } + popRegions(Index); } } else @@ -889,12 +1146,21 @@ struct CounterCoverageMappingBuilder // counter for the body when looking at the coverage. propagateCounts(ParentCount, S->getCond()); + // The 'then' count applies to the area immediately after the condition. + auto Gap = findGapAreaBetween(S->getCond(), S->getThen()); + if (Gap) + fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), ThenCount); + extendRegion(S->getThen()); Counter OutCount = propagateCounts(ThenCount, S->getThen()); Counter ElseCount = subtractCounters(ParentCount, ThenCount); if (const Stmt *Else = S->getElse()) { - extendRegion(S->getElse()); + // The 'else' count applies to the area immediately after the 'then'. + Gap = findGapAreaBetween(S->getThen(), Else); + if (Gap) + fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), ElseCount); + extendRegion(Else); OutCount = addCounters(OutCount, propagateCounts(ElseCount, Else)); } else OutCount = addCounters(OutCount, ElseCount); @@ -931,25 +1197,34 @@ struct CounterCoverageMappingBuilder Visit(E->getCond()); if (!isa<BinaryConditionalOperator>(E)) { + // The 'then' count applies to the area immediately after the condition. + auto Gap = + findGapAreaBetween(E->getQuestionLoc(), getStart(E->getTrueExpr())); + if (Gap) + fillGapAreaWithCount(Gap->getBegin(), Gap->getEnd(), TrueCount); + extendRegion(E->getTrueExpr()); propagateCounts(TrueCount, E->getTrueExpr()); } + extendRegion(E->getFalseExpr()); propagateCounts(subtractCounters(ParentCount, TrueCount), E->getFalseExpr()); } void VisitBinLAnd(const BinaryOperator *E) { - extendRegion(E); - Visit(E->getLHS()); + extendRegion(E->getLHS()); + propagateCounts(getRegion().getCounter(), E->getLHS()); + handleFileExit(getEnd(E->getLHS())); extendRegion(E->getRHS()); propagateCounts(getRegionCounter(E), E->getRHS()); } void VisitBinLOr(const BinaryOperator *E) { - extendRegion(E); - Visit(E->getLHS()); + extendRegion(E->getLHS()); + propagateCounts(getRegion().getCounter(), E->getLHS()); + handleFileExit(getEnd(E->getLHS())); extendRegion(E->getRHS()); propagateCounts(getRegionCounter(E), E->getRHS()); @@ -992,6 +1267,9 @@ static void dump(llvm::raw_ostream &OS, StringRef FunctionName, case CounterMappingRegion::SkippedRegion: OS << "Skipped,"; break; + case CounterMappingRegion::GapRegion: + OS << "Gap,"; + break; } OS << "File " << R.FileID << ", " << R.LineStart << ":" << R.ColumnStart diff --git a/lib/CodeGen/CoverageMappingGen.h b/lib/CodeGen/CoverageMappingGen.h index b6789c2a79f1..d07ed5ebcf2b 100644 --- a/lib/CodeGen/CoverageMappingGen.h +++ b/lib/CodeGen/CoverageMappingGen.h @@ -39,7 +39,7 @@ class CoverageSourceInfo : public PPCallbacks { public: ArrayRef<SourceRange> getSkippedRanges() const { return SkippedRanges; } - void SourceRangeSkipped(SourceRange Range) override; + void SourceRangeSkipped(SourceRange Range, SourceLocation EndifLoc) override; }; namespace CodeGen { diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp index bd4cb9a3667b..c375b82ea936 100644 --- a/lib/CodeGen/ItaniumCXXABI.cpp +++ b/lib/CodeGen/ItaniumCXXABI.cpp @@ -165,9 +165,17 @@ public: Address Ptr, QualType ElementType, const CXXDestructorDecl *Dtor) override; + /// Itanium says that an _Unwind_Exception has to be "double-word" + /// aligned (and thus the end of it is also so-aligned), meaning 16 + /// bytes. Of course, that was written for the actual Itanium, + /// which is a 64-bit platform. Classically, the ABI doesn't really + /// specify the alignment on other platforms, but in practice + /// libUnwind declares the struct with __attribute__((aligned)), so + /// we assume that alignment here. (It's generally 16 bytes, but + /// some targets overwrite it.) CharUnits getAlignmentOfExnObject() { - unsigned Align = CGM.getContext().getTargetInfo().getExnObjectAlignment(); - return CGM.getContext().toCharUnitsFromBits(Align); + auto align = CGM.getContext().getTargetDefaultAlignForAttributeAligned(); + return CGM.getContext().toCharUnitsFromBits(align); } void emitRethrow(CodeGenFunction &CGF, bool isNoReturn) override; @@ -292,6 +300,14 @@ public: // linkage together with vtables when needed. if (ForVTable && !Thunk->hasLocalLinkage()) Thunk->setLinkage(llvm::GlobalValue::AvailableExternallyLinkage); + + // Propagate dllexport storage, to enable the linker to generate import + // thunks as necessary (e.g. when a parent class has a key function and a + // child class doesn't, and the construction vtable for the parent in the + // child needs to reference the parent's thunks). + const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl()); + if (MD->hasAttr<DLLExportAttr>()) + Thunk->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); } llvm::Value *performThisAdjustment(CodeGenFunction &CGF, Address This, @@ -373,6 +389,10 @@ public: void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) override; + std::pair<llvm::Value *, const CXXRecordDecl *> + LoadVTablePtr(CodeGenFunction &CGF, Address This, + const CXXRecordDecl *RD) override; + private: bool hasAnyUnusedVirtualInlineFunction(const CXXRecordDecl *RD) const { const auto &VtableLayout = @@ -546,9 +566,9 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( llvm::Value *MemFnPtr, const MemberPointerType *MPT) { CGBuilderTy &Builder = CGF.Builder; - const FunctionProtoType *FPT = + const FunctionProtoType *FPT = MPT->getPointeeType()->getAs<FunctionProtoType>(); - const CXXRecordDecl *RD = + const CXXRecordDecl *RD = cast<CXXRecordDecl>(MPT->getClass()->getAs<RecordType>()->getDecl()); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType( @@ -575,10 +595,10 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( Ptr = Builder.CreateInBoundsGEP(Ptr, Adj); This = Builder.CreateBitCast(Ptr, This->getType(), "this.adjusted"); ThisPtrForCall = This; - + // Load the function pointer. llvm::Value *FnAsInt = Builder.CreateExtractValue(MemFnPtr, 0, "memptr.ptr"); - + // If the LSB in the function pointer is 1, the function pointer points to // a virtual function. llvm::Value *IsVirtual; @@ -626,7 +646,7 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( CGF.EmitBlock(FnNonVirtual); llvm::Value *NonVirtualFn = Builder.CreateIntToPtr(FnAsInt, FTy->getPointerTo(), "memptr.nonvirtualfn"); - + // We're done. CGF.EmitBlock(FnEnd); llvm::PHINode *CalleePtr = Builder.CreatePHI(FTy->getPointerTo(), 2); @@ -791,7 +811,7 @@ llvm::Constant * ItaniumCXXABI::EmitNullMemberPointer(const MemberPointerType *MPT) { // Itanium C++ ABI 2.3: // A NULL pointer is represented as -1. - if (MPT->isMemberDataPointer()) + if (MPT->isMemberDataPointer()) return llvm::ConstantInt::get(CGM.PtrDiffTy, -1ULL, /*isSigned=*/true); llvm::Constant *Zero = llvm::ConstantInt::get(CGM.PtrDiffTy, 0); @@ -868,7 +888,7 @@ llvm::Constant *ItaniumCXXABI::BuildMemberPointer(const CXXMethodDecl *MD, (UseARMMethodPtrABI ? 2 : 1) * ThisAdjustment.getQuantity()); } - + return llvm::ConstantStruct::getAnon(MemPtr); } @@ -927,7 +947,7 @@ ItaniumCXXABI::EmitMemberPointerComparison(CodeGenFunction &CGF, // (L.ptr == 0 && ((L.adj|R.adj) & 1) == 0))) // The inequality tautologies have exactly the same structure, except // applying De Morgan's laws. - + llvm::Value *LPtr = Builder.CreateExtractValue(L, 0, "lhs.memptr.ptr"); llvm::Value *RPtr = Builder.CreateExtractValue(R, 0, "rhs.memptr.ptr"); @@ -980,7 +1000,7 @@ ItaniumCXXABI::EmitMemberPointerIsNotNull(CodeGenFunction &CGF, llvm::Constant::getAllOnesValue(MemPtr->getType()); return Builder.CreateICmpNE(MemPtr, NegativeOne, "memptr.tobool"); } - + // In Itanium, a member function pointer is not null if 'ptr' is not null. llvm::Value *Ptr = Builder.CreateExtractValue(MemPtr, 0, "memptr.ptr"); @@ -1138,9 +1158,9 @@ static llvm::Constant *getItaniumDynamicCastFn(CodeGenFunction &CGF) { // const abi::__class_type_info *src, // const abi::__class_type_info *dst, // std::ptrdiff_t src2dst_offset); - + llvm::Type *Int8PtrTy = CGF.Int8PtrTy; - llvm::Type *PtrDiffTy = + llvm::Type *PtrDiffTy = CGF.ConvertType(CGF.getContext().getPointerDiffType()); llvm::Type *Args[4] = { Int8PtrTy, Int8PtrTy, Int8PtrTy, PtrDiffTy }; @@ -1427,8 +1447,9 @@ void ItaniumCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) { if (CGF.CurFuncDecl && CGF.CurFuncDecl->hasAttr<NakedAttr>()) return; - /// Initialize the 'this' slot. - EmitThisParam(CGF); + /// Initialize the 'this' slot. In the Itanium C++ ABI, no prologue + /// adjustments are required, becuase they are all handled by thunks. + setCXXABIThisValue(CGF, loadIncomingCXXThis(CGF)); /// Initialize the 'vtt' slot if needed. if (getStructorImplicitParamDecl(CGF)) { @@ -1510,7 +1531,7 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, VTable->setComdat(CGM.getModule().getOrInsertComdat(VTable->getName())); // Set the right visibility. - CGM.setGlobalVisibility(VTable, RD); + CGM.setGlobalVisibility(VTable, RD, ForDefinition); // Use pointer alignment for the vtable. Otherwise we would align them based // on the size of the initializer which doesn't make sense as only single @@ -1620,6 +1641,7 @@ llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD, VTable = CGM.CreateOrReplaceCXXRuntimeVariable( Name, VTableType, llvm::GlobalValue::ExternalLinkage); VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + CGM.setGlobalVisibility(VTable, RD, NotForDefinition); if (RD->hasAttr<DLLImportAttr>()) VTable->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); @@ -2111,30 +2133,31 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, (UseARMGuardVarABI && !useInt8GuardVariable) ? Builder.CreateAnd(LI, llvm::ConstantInt::get(CGM.Int8Ty, 1)) : LI; - llvm::Value *isInitialized = Builder.CreateIsNull(V, "guard.uninitialized"); + llvm::Value *NeedsInit = Builder.CreateIsNull(V, "guard.uninitialized"); llvm::BasicBlock *InitCheckBlock = CGF.createBasicBlock("init.check"); llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end"); // Check if the first byte of the guard variable is zero. - Builder.CreateCondBr(isInitialized, InitCheckBlock, EndBlock); + CGF.EmitCXXGuardedInitBranch(NeedsInit, InitCheckBlock, EndBlock, + CodeGenFunction::GuardKind::VariableGuard, &D); CGF.EmitBlock(InitCheckBlock); // Variables used when coping with thread-safe statics and exceptions. - if (threadsafe) { + if (threadsafe) { // Call __cxa_guard_acquire. llvm::Value *V = CGF.EmitNounwindRuntimeCall(getGuardAcquireFn(CGM, guardPtrTy), guard); - + llvm::BasicBlock *InitBlock = CGF.createBasicBlock("init"); - + Builder.CreateCondBr(Builder.CreateIsNotNull(V, "tobool"), InitBlock, EndBlock); - + // Call __cxa_guard_abort along the exceptional edge. CGF.EHStack.pushCleanup<CallGuardAbort>(EHCleanup, guard); - + CGF.EmitBlock(InitBlock); } @@ -2447,11 +2470,11 @@ LValue ItaniumCXXABI::EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, /// if it's a base constructor or destructor with virtual bases. bool ItaniumCXXABI::NeedsVTTParameter(GlobalDecl GD) { const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl()); - + // We don't have any virtual bases, just return early. if (!MD->getParent()->getNumVBases()) return false; - + // Check if we have a base constructor. if (isa<CXXConstructorDecl>(MD) && GD.getCtorType() == Ctor_Base) return true; @@ -2459,7 +2482,7 @@ bool ItaniumCXXABI::NeedsVTTParameter(GlobalDecl GD) { // Check if we have a base destructor. if (isa<CXXDestructorDecl>(MD) && GD.getDtorType() == Dtor_Base) return true; - + return false; } @@ -2648,6 +2671,7 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) { case BuiltinType::Float: case BuiltinType::Double: case BuiltinType::LongDouble: + case BuiltinType::Float16: case BuiltinType::Float128: case BuiltinType::Char16: case BuiltinType::Char32: @@ -2981,15 +3005,13 @@ static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM, if (RD->hasAttr<DLLImportAttr>() && ShouldUseExternalRTTIDescriptor(CGM, Ty)) return llvm::GlobalValue::ExternalLinkage; - if (RD->isDynamicClass()) { - llvm::GlobalValue::LinkageTypes LT = CGM.getVTableLinkage(RD); - // MinGW won't export the RTTI information when there is a key function. - // Make sure we emit our own copy instead of attempting to dllimport it. - if (RD->hasAttr<DLLImportAttr>() && - llvm::GlobalValue::isAvailableExternallyLinkage(LT)) - LT = llvm::GlobalValue::LinkOnceODRLinkage; - return LT; - } + // MinGW always uses LinkOnceODRLinkage for type info. + if (RD->isDynamicClass() && + !CGM.getContext() + .getTargetInfo() + .getTriple() + .isWindowsGNUEnvironment()) + return CGM.getVTableLinkage(RD); } return llvm::GlobalValue::LinkOnceODRLinkage; @@ -3648,6 +3670,18 @@ void ItaniumCXXABI::emitCXXStructor(const CXXMethodDecl *MD, !CGM.TryEmitBaseDestructorAsAlias(DD)) return; + // FIXME: The deleting destructor is equivalent to the selected operator + // delete if: + // * either the delete is a destroying operator delete or the destructor + // would be trivial if it weren't virtual, + // * the conversion from the 'this' parameter to the first parameter of the + // destructor is equivalent to a bitcast, + // * the destructor does not have an implicit "this" return, and + // * the operator delete has the same calling convention and IR function type + // as the destructor. + // In such cases we should try to emit the deleting dtor as an alias to the + // selected 'operator delete'. + llvm::Function *Fn = CGM.codegenCXXStructor(MD, Type); if (CGType == StructorCodegen::COMDAT) { @@ -4011,3 +4045,9 @@ ItaniumCXXABI::emitTerminateForUnexpectedException(CodeGenFunction &CGF, } return CGF.EmitNounwindRuntimeCall(CGF.CGM.getTerminateFn()); } + +std::pair<llvm::Value *, const CXXRecordDecl *> +ItaniumCXXABI::LoadVTablePtr(CodeGenFunction &CGF, Address This, + const CXXRecordDecl *RD) { + return {CGF.GetVTablePtr(This, CGM.Int8PtrTy, RD), RD}; +} diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp index 1bd2937e4747..ffb3681c2585 100644 --- a/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/lib/CodeGen/MicrosoftCXXABI.cpp @@ -244,9 +244,6 @@ public: void addImplicitStructorParams(CodeGenFunction &CGF, QualType &ResTy, FunctionArgList &Params) override; - llvm::Value *adjustThisParameterInVirtualFunctionPrologue( - CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This) override; - void EmitInstanceFunctionProlog(CodeGenFunction &CGF) override; AddedStructorArgs @@ -581,7 +578,7 @@ private: return GetVBaseOffsetFromVBPtr(CGF, Base, VBPOffset, VBTOffset, VBPtr); } - std::pair<Address, llvm::Value *> + std::tuple<Address, llvm::Value *, const CXXRecordDecl *> performBaseAdjustment(CodeGenFunction &CGF, Address Value, QualType SrcRecordTy); @@ -748,6 +745,10 @@ public: llvm::GlobalVariable *getThrowInfo(QualType T) override; + std::pair<llvm::Value *, const CXXRecordDecl *> + LoadVTablePtr(CodeGenFunction &CGF, Address This, + const CXXRecordDecl *RD) override; + private: typedef std::pair<const CXXRecordDecl *, CharUnits> VFTableIdTy; typedef llvm::DenseMap<VFTableIdTy, llvm::GlobalVariable *> VTablesMapTy; @@ -929,7 +930,7 @@ void MicrosoftCXXABI::emitBeginCatch(CodeGenFunction &CGF, /// We need to perform a generic polymorphic operation (like a typeid /// or a cast), which requires an object with a vfptr. Adjust the /// address to point to an object with a vfptr. -std::pair<Address, llvm::Value *> +std::tuple<Address, llvm::Value *, const CXXRecordDecl *> MicrosoftCXXABI::performBaseAdjustment(CodeGenFunction &CGF, Address Value, QualType SrcRecordTy) { Value = CGF.Builder.CreateBitCast(Value, CGF.Int8PtrTy); @@ -940,7 +941,8 @@ MicrosoftCXXABI::performBaseAdjustment(CodeGenFunction &CGF, Address Value, // covers non-virtual base subobjects: a class with its own virtual // functions would be a candidate to be a primary base. if (Context.getASTRecordLayout(SrcDecl).hasExtendableVFPtr()) - return std::make_pair(Value, llvm::ConstantInt::get(CGF.Int32Ty, 0)); + return std::make_tuple(Value, llvm::ConstantInt::get(CGF.Int32Ty, 0), + SrcDecl); // Okay, one of the vbases must have a vfptr, or else this isn't // actually a polymorphic class. @@ -959,7 +961,7 @@ MicrosoftCXXABI::performBaseAdjustment(CodeGenFunction &CGF, Address Value, llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(Value.getPointer(), Offset); CharUnits VBaseAlign = CGF.CGM.getVBaseAlignment(Value.getAlignment(), SrcDecl, PolymorphicBase); - return std::make_pair(Address(Ptr, VBaseAlign), Offset); + return std::make_tuple(Address(Ptr, VBaseAlign), Offset, PolymorphicBase); } bool MicrosoftCXXABI::shouldTypeidBeNullChecked(bool IsDeref, @@ -990,7 +992,7 @@ llvm::Value *MicrosoftCXXABI::EmitTypeid(CodeGenFunction &CGF, QualType SrcRecordTy, Address ThisPtr, llvm::Type *StdTypeInfoPtrTy) { - std::tie(ThisPtr, std::ignore) = + std::tie(ThisPtr, std::ignore, std::ignore) = performBaseAdjustment(CGF, ThisPtr, SrcRecordTy); auto Typeid = emitRTtypeidCall(CGF, ThisPtr.getPointer()).getInstruction(); return CGF.Builder.CreateBitCast(Typeid, StdTypeInfoPtrTy); @@ -1014,7 +1016,8 @@ llvm::Value *MicrosoftCXXABI::EmitDynamicCastCall( CGF.CGM.GetAddrOfRTTIDescriptor(DestRecordTy.getUnqualifiedType()); llvm::Value *Offset; - std::tie(This, Offset) = performBaseAdjustment(CGF, This, SrcRecordTy); + std::tie(This, Offset, std::ignore) = + performBaseAdjustment(CGF, This, SrcRecordTy); llvm::Value *ThisPtr = This.getPointer(); Offset = CGF.Builder.CreateTrunc(Offset, CGF.Int32Ty); @@ -1040,7 +1043,8 @@ llvm::Value * MicrosoftCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, Address Value, QualType SrcRecordTy, QualType DestTy) { - std::tie(Value, std::ignore) = performBaseAdjustment(CGF, Value, SrcRecordTy); + std::tie(Value, std::ignore, std::ignore) = + performBaseAdjustment(CGF, Value, SrcRecordTy); // PVOID __RTCastToVoid( // PVOID inptr) @@ -1433,50 +1437,54 @@ void MicrosoftCXXABI::addImplicitStructorParams(CodeGenFunction &CGF, } } -llvm::Value *MicrosoftCXXABI::adjustThisParameterInVirtualFunctionPrologue( - CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This) { - // In this ABI, every virtual function takes a pointer to one of the - // subobjects that first defines it as the 'this' parameter, rather than a - // pointer to the final overrider subobject. Thus, we need to adjust it back - // to the final overrider subobject before use. - // See comments in the MicrosoftVFTableContext implementation for the details. - CharUnits Adjustment = getVirtualFunctionPrologueThisAdjustment(GD); - if (Adjustment.isZero()) - return This; - - unsigned AS = cast<llvm::PointerType>(This->getType())->getAddressSpace(); - llvm::Type *charPtrTy = CGF.Int8Ty->getPointerTo(AS), - *thisTy = This->getType(); - - This = CGF.Builder.CreateBitCast(This, charPtrTy); - assert(Adjustment.isPositive()); - This = CGF.Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, This, - -Adjustment.getQuantity()); - return CGF.Builder.CreateBitCast(This, thisTy); -} - void MicrosoftCXXABI::EmitInstanceFunctionProlog(CodeGenFunction &CGF) { // Naked functions have no prolog. if (CGF.CurFuncDecl && CGF.CurFuncDecl->hasAttr<NakedAttr>()) return; - EmitThisParam(CGF); + // Overridden virtual methods of non-primary bases need to adjust the incoming + // 'this' pointer in the prologue. In this hierarchy, C::b will subtract + // sizeof(void*) to adjust from B* to C*: + // struct A { virtual void a(); }; + // struct B { virtual void b(); }; + // struct C : A, B { virtual void b(); }; + // + // Leave the value stored in the 'this' alloca unadjusted, so that the + // debugger sees the unadjusted value. Microsoft debuggers require this, and + // will apply the ThisAdjustment in the method type information. + // FIXME: Do something better for DWARF debuggers, which won't expect this, + // without making our codegen depend on debug info settings. + llvm::Value *This = loadIncomingCXXThis(CGF); + const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl()); + if (!CGF.CurFuncIsThunk && MD->isVirtual()) { + CharUnits Adjustment = getVirtualFunctionPrologueThisAdjustment(CGF.CurGD); + if (!Adjustment.isZero()) { + unsigned AS = cast<llvm::PointerType>(This->getType())->getAddressSpace(); + llvm::Type *charPtrTy = CGF.Int8Ty->getPointerTo(AS), + *thisTy = This->getType(); + This = CGF.Builder.CreateBitCast(This, charPtrTy); + assert(Adjustment.isPositive()); + This = CGF.Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, This, + -Adjustment.getQuantity()); + This = CGF.Builder.CreateBitCast(This, thisTy, "this.adjusted"); + } + } + setCXXABIThisValue(CGF, This); - /// If this is a function that the ABI specifies returns 'this', initialize - /// the return slot to 'this' at the start of the function. - /// - /// Unlike the setting of return types, this is done within the ABI - /// implementation instead of by clients of CGCXXABI because: - /// 1) getThisValue is currently protected - /// 2) in theory, an ABI could implement 'this' returns some other way; - /// HasThisReturn only specifies a contract, not the implementation + // If this is a function that the ABI specifies returns 'this', initialize + // the return slot to 'this' at the start of the function. + // + // Unlike the setting of return types, this is done within the ABI + // implementation instead of by clients of CGCXXABI because: + // 1) getThisValue is currently protected + // 2) in theory, an ABI could implement 'this' returns some other way; + // HasThisReturn only specifies a contract, not the implementation if (HasThisReturn(CGF.CurGD)) CGF.Builder.CreateStore(getThisValue(CGF), CGF.ReturnValue); else if (hasMostDerivedReturn(CGF.CurGD)) CGF.Builder.CreateStore(CGF.EmitCastToVoidPtr(getThisValue(CGF)), CGF.ReturnValue); - const CXXMethodDecl *MD = cast<CXXMethodDecl>(CGF.CurGD.getDecl()); if (isa<CXXConstructorDecl>(MD) && MD->getParent()->getNumVBases()) { assert(getStructorImplicitParamDecl(CGF) && "no implicit parameter for a constructor with virtual bases?"); @@ -1961,7 +1969,7 @@ llvm::Function *MicrosoftCXXABI::EmitVirtualMemPtrThunk( // Start defining the function. CGF.StartFunction(GlobalDecl(), FnInfo.getReturnType(), ThunkFn, FnInfo, FunctionArgs, MD->getLocation(), SourceLocation()); - EmitThisParam(CGF); + setCXXABIThisValue(CGF, loadIncomingCXXThis(CGF)); // Load the vfptr and then callee from the vftable. The callee should have // adjusted 'this' so that the vfptr is at offset zero. @@ -2461,11 +2469,12 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D, // Test our bit from the guard variable. llvm::ConstantInt *Bit = llvm::ConstantInt::get(GuardTy, 1ULL << GuardNum); llvm::LoadInst *LI = Builder.CreateLoad(GuardAddr); - llvm::Value *IsInitialized = - Builder.CreateICmpNE(Builder.CreateAnd(LI, Bit), Zero); + llvm::Value *NeedsInit = + Builder.CreateICmpEQ(Builder.CreateAnd(LI, Bit), Zero); llvm::BasicBlock *InitBlock = CGF.createBasicBlock("init"); llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end"); - Builder.CreateCondBr(IsInitialized, EndBlock, InitBlock); + CGF.EmitCXXGuardedInitBranch(NeedsInit, InitBlock, EndBlock, + CodeGenFunction::GuardKind::VariableGuard, &D); // Set our bit in the guard variable and emit the initializer and add a global // destructor if appropriate. @@ -2500,7 +2509,8 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D, Builder.CreateICmpSGT(FirstGuardLoad, InitThreadEpoch); llvm::BasicBlock *AttemptInitBlock = CGF.createBasicBlock("init.attempt"); llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end"); - Builder.CreateCondBr(IsUninitialized, AttemptInitBlock, EndBlock); + CGF.EmitCXXGuardedInitBranch(IsUninitialized, AttemptInitBlock, EndBlock, + CodeGenFunction::GuardKind::VariableGuard, &D); // This BasicBlock attempts to determine whether or not this thread is // responsible for doing the initialization. @@ -3803,7 +3813,7 @@ static void emitCXXDestructor(CodeGenModule &CGM, const CXXDestructorDecl *dtor, if (!dtor->getParent()->getNumVBases() && (dtorType == StructorType::Complete || dtorType == StructorType::Base)) { bool ProducedAlias = !CGM.TryEmitDefinitionAsAlias( - GlobalDecl(dtor, Dtor_Complete), GlobalDecl(dtor, Dtor_Base), true); + GlobalDecl(dtor, Dtor_Complete), GlobalDecl(dtor, Dtor_Base)); if (ProducedAlias) { if (dtorType == StructorType::Complete) return; @@ -3898,7 +3908,7 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD, FunctionArgs, CD->getLocation(), SourceLocation()); // Create a scope with an artificial location for the body of this function. auto AL = ApplyDebugLocation::CreateArtificial(CGF); - EmitThisParam(CGF); + setCXXABIThisValue(CGF, loadIncomingCXXThis(CGF)); llvm::Value *This = getThisValue(CGF); llvm::Value *SrcVal = @@ -4241,3 +4251,11 @@ void MicrosoftCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) { }; CGF.EmitNoreturnRuntimeCallOrInvoke(getThrowFn(), Args); } + +std::pair<llvm::Value *, const CXXRecordDecl *> +MicrosoftCXXABI::LoadVTablePtr(CodeGenFunction &CGF, Address This, + const CXXRecordDecl *RD) { + std::tie(This, std::ignore, RD) = + performBaseAdjustment(CGF, This, QualType(RD->getTypeForDecl(), 0)); + return {CGF.GetVTablePtr(This, CGM.Int8PtrTy, RD), RD}; +} diff --git a/lib/CodeGen/ModuleBuilder.cpp b/lib/CodeGen/ModuleBuilder.cpp index fc642850d60a..8aa9bfb421b4 100644 --- a/lib/CodeGen/ModuleBuilder.cpp +++ b/lib/CodeGen/ModuleBuilder.cpp @@ -119,6 +119,14 @@ namespace { return Builder->GetAddrOfGlobal(global, ForDefinition_t(isForDefinition)); } + llvm::Module *StartModule(llvm::StringRef ModuleName, + llvm::LLVMContext &C) { + assert(!M && "Replacing existing Module?"); + M.reset(new llvm::Module(ModuleName, C)); + Initialize(*Ctx); + return M.get(); + } + void Initialize(ASTContext &Context) override { Ctx = &Context; @@ -317,6 +325,11 @@ llvm::Constant *CodeGenerator::GetAddrOfGlobal(GlobalDecl global, ->GetAddrOfGlobal(global, isForDefinition); } +llvm::Module *CodeGenerator::StartModule(llvm::StringRef ModuleName, + llvm::LLVMContext &C) { + return static_cast<CodeGeneratorImpl*>(this)->StartModule(ModuleName, C); +} + CodeGenerator *clang::CreateLLVMCodeGen( DiagnosticsEngine &Diags, llvm::StringRef ModuleName, const HeaderSearchOptions &HeaderSearchOpts, diff --git a/lib/CodeGen/SanitizerMetadata.cpp b/lib/CodeGen/SanitizerMetadata.cpp index 9848e3e452f4..f891cfbe4bb2 100644 --- a/lib/CodeGen/SanitizerMetadata.cpp +++ b/lib/CodeGen/SanitizerMetadata.cpp @@ -26,7 +26,8 @@ void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV, QualType Ty, bool IsDynInit, bool IsBlacklisted) { if (!CGM.getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address | - SanitizerKind::KernelAddress)) + SanitizerKind::KernelAddress | + SanitizerKind::HWAddress)) return; IsDynInit &= !CGM.isInSanitizerBlacklist(GV, Loc, Ty, "init"); IsBlacklisted |= CGM.isInSanitizerBlacklist(GV, Loc, Ty); @@ -58,7 +59,8 @@ void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV, void SanitizerMetadata::reportGlobalToASan(llvm::GlobalVariable *GV, const VarDecl &D, bool IsDynInit) { if (!CGM.getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address | - SanitizerKind::KernelAddress)) + SanitizerKind::KernelAddress | + SanitizerKind::HWAddress)) return; std::string QualName; llvm::raw_string_ostream OS(QualName); @@ -76,7 +78,8 @@ void SanitizerMetadata::disableSanitizerForGlobal(llvm::GlobalVariable *GV) { // For now, just make sure the global is not modified by the ASan // instrumentation. if (CGM.getLangOpts().Sanitize.hasOneOf(SanitizerKind::Address | - SanitizerKind::KernelAddress)) + SanitizerKind::KernelAddress | + SanitizerKind::HWAddress)) reportGlobalToASan(GV, SourceLocation(), "", QualType(), false, true); } diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index ece3a407eae3..4b8006428f8f 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -14,6 +14,7 @@ #include "TargetInfo.h" #include "ABIInfo.h" +#include "CGBlocks.h" #include "CGCXXABI.h" #include "CGValue.h" #include "CodeGenFunction.h" @@ -22,7 +23,9 @@ #include "clang/CodeGen/SwiftCallingConv.h" #include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Type.h" #include "llvm/Support/raw_ostream.h" @@ -420,18 +423,17 @@ llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule & return llvm::ConstantPointerNull::get(T); } -unsigned TargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, - const VarDecl *D) const { +LangAS TargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const { assert(!CGM.getLangOpts().OpenCL && !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && "Address space agnostic languages only"); - return D ? D->getType().getAddressSpace() - : static_cast<unsigned>(LangAS::Default); + return D ? D->getType().getAddressSpace() : LangAS::Default; } llvm::Value *TargetCodeGenInfo::performAddrSpaceCast( - CodeGen::CodeGenFunction &CGF, llvm::Value *Src, unsigned SrcAddr, - unsigned DestAddr, llvm::Type *DestTy, bool isNonNull) const { + CodeGen::CodeGenFunction &CGF, llvm::Value *Src, LangAS SrcAddr, + LangAS DestAddr, llvm::Type *DestTy, bool isNonNull) const { // Since target may map different address spaces in AST to the same address // space, an address space conversion may end up as a bitcast. if (auto *C = dyn_cast<llvm::Constant>(Src)) @@ -441,13 +443,18 @@ llvm::Value *TargetCodeGenInfo::performAddrSpaceCast( llvm::Constant * TargetCodeGenInfo::performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *Src, - unsigned SrcAddr, unsigned DestAddr, + LangAS SrcAddr, LangAS DestAddr, llvm::Type *DestTy) const { // Since target may map different address spaces in AST to the same address // space, an address space conversion may end up as a bitcast. return llvm::ConstantExpr::getPointerCast(Src, DestTy); } +llvm::SyncScope::ID +TargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, llvm::LLVMContext &C) const { + return C.getOrInsertSyncScopeID(""); /* default sync scope */ +} + static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays); /// isEmptyField - Return true iff a the field is "empty", that is it @@ -869,7 +876,10 @@ bool IsX86_MMXType(llvm::Type *IRType) { static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, StringRef Constraint, llvm::Type* Ty) { - if ((Constraint == "y" || Constraint == "&y") && Ty->isVectorTy()) { + bool IsMMXCons = llvm::StringSwitch<bool>(Constraint) + .Cases("y", "&y", "^Ym", true) + .Default(false); + if (IsMMXCons && Ty->isVectorTy()) { if (cast<llvm::VectorType>(Ty)->getBitWidth() != 64) { // Invalid MMX constraint return nullptr; @@ -886,8 +896,14 @@ static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, /// X86_VectorCall calling convention. Shared between x86_32 and x86_64. static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) { if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { - if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) + if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) { + if (BT->getKind() == BuiltinType::LongDouble) { + if (&Context.getTargetInfo().getLongDoubleFormat() == + &llvm::APFloat::x87DoubleExtended()) + return false; + } return true; + } } else if (const VectorType *VT = Ty->getAs<VectorType>()) { // vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX // registers specially. @@ -1041,7 +1057,8 @@ public: const llvm::Triple &Triple, const CodeGenOptions &Opts); void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override; + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override; int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { // Darwin uses different dwarf register numbers for EH. @@ -1070,14 +1087,14 @@ public: getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override { unsigned Sig = (0xeb << 0) | // jmp rel8 (0x06 << 8) | // .+0x08 - ('F' << 16) | - ('T' << 24); + ('v' << 16) | + ('2' << 24); return llvm::ConstantInt::get(CGM.Int32Ty, Sig); } StringRef getARCRetainAutoreleasedReturnValueMarker() const override { return "movl\t%ebp, %ebp" - "\t\t## marker for objc_retainAutoreleaseReturnValue"; + "\t\t// marker for objc_retainAutoreleaseReturnValue"; } }; @@ -1900,7 +1917,6 @@ bool X86_32TargetCodeGenInfo::isStructReturnInRegABI( case llvm::Triple::DragonFly: case llvm::Triple::FreeBSD: case llvm::Triple::OpenBSD: - case llvm::Triple::Bitrig: case llvm::Triple::Win32: return true; default: @@ -1908,9 +1924,11 @@ bool X86_32TargetCodeGenInfo::isStructReturnInRegABI( } } -void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const { +void X86_32TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const { + if (!IsForDefinition) + return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { // Get the LLVM function. @@ -2260,23 +2278,28 @@ public: llvm::Constant * getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override { - unsigned Sig; - if (getABIInfo().has64BitPointers()) - Sig = (0xeb << 0) | // jmp rel8 - (0x0a << 8) | // .+0x0c - ('F' << 16) | - ('T' << 24); - else - Sig = (0xeb << 0) | // jmp rel8 - (0x06 << 8) | // .+0x08 - ('F' << 16) | - ('T' << 24); + unsigned Sig = (0xeb << 0) | // jmp rel8 + (0x06 << 8) | // .+0x08 + ('v' << 16) | + ('2' << 24); return llvm::ConstantInt::get(CGM.Int32Ty, Sig); } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override { + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override { + if (!IsForDefinition) + return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { + if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { + // Get the LLVM function. + auto *Fn = cast<llvm::Function>(GV); + + // Now add the 'alignstack' attribute with a value of 16. + llvm::AttrBuilder B; + B.addStackAlignmentAttr(16); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + } if (FD->hasAttr<AnyX86InterruptAttr>()) { llvm::Function *Fn = cast<llvm::Function>(GV); Fn->setCallingConv(llvm::CallingConv::X86_INTR); @@ -2323,7 +2346,8 @@ public: Win32StructABI, NumRegisterParameters, false) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override; + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override; void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { @@ -2351,11 +2375,12 @@ static void addStackProbeSizeTargetAttribute(const Decl *D, } } -void WinX86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const { - X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); - +void WinX86_32TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const { + X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition); + if (!IsForDefinition) + return; addStackProbeSizeTargetAttribute(D, GV, CGM); } @@ -2366,7 +2391,8 @@ public: : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override; + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override; int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override { return 7; @@ -2395,12 +2421,22 @@ public: } }; -void WinX86_64TargetCodeGenInfo::setTargetAttributes(const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const { - TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); - +void WinX86_64TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const { + TargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition); + if (!IsForDefinition) + return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { + if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) { + // Get the LLVM function. + auto *Fn = cast<llvm::Function>(GV); + + // Now add the 'alignstack' attribute with a value of 16. + llvm::AttrBuilder B; + B.addStackAlignmentAttr(16); + Fn->addAttributes(llvm::AttributeList::FunctionIndex, B); + } if (FD->hasAttr<AnyX86InterruptAttr>()) { llvm::Function *Fn = cast<llvm::Function>(GV); Fn->setCallingConv(llvm::CallingConv::X86_INTR); @@ -3514,18 +3550,27 @@ void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { unsigned FreeSSERegs = IsRegCall ? 16 : 8; unsigned NeededInt, NeededSSE; - if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() && - !FI.getReturnType()->getTypePtr()->isUnionType()) { - FI.getReturnInfo() = - classifyRegCallStructType(FI.getReturnType(), NeededInt, NeededSSE); - if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { - FreeIntRegs -= NeededInt; - FreeSSERegs -= NeededSSE; - } else { - FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); - } - } else if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + if (!getCXXABI().classifyReturnType(FI)) { + if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() && + !FI.getReturnType()->getTypePtr()->isUnionType()) { + FI.getReturnInfo() = + classifyRegCallStructType(FI.getReturnType(), NeededInt, NeededSSE); + if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { + FreeIntRegs -= NeededInt; + FreeSSERegs -= NeededSSE; + } else { + FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); + } + } else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>()) { + // Complex Long Double Type is passed in Memory when Regcall + // calling convention is used. + const ComplexType *CT = FI.getReturnType()->getAs<ComplexType>(); + if (getContext().getCanonicalType(CT->getElementType()) == + getContext().LongDoubleTy) + FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType()); + } else + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + } // If the return value is indirect, then the hidden argument is consuming one // integer register. @@ -3991,7 +4036,10 @@ Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, namespace { /// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information. class PPC32_SVR4_ABIInfo : public DefaultABIInfo { -bool IsSoftFloatABI; + bool IsSoftFloatABI; + + CharUnits getParamTypeAlignment(QualType Ty) const; + public: PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI) : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI) {} @@ -4013,13 +4061,46 @@ public: bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override; }; +} +CharUnits PPC32_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const { + // Complex types are passed just like their elements + if (const ComplexType *CTy = Ty->getAs<ComplexType>()) + Ty = CTy->getElementType(); + + if (Ty->isVectorType()) + return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16 + : 4); + + // For single-element float/vector structs, we consider the whole type + // to have the same alignment requirements as its single element. + const Type *AlignTy = nullptr; + if (const Type *EltType = isSingleElementStruct(Ty, getContext())) { + const BuiltinType *BT = EltType->getAs<BuiltinType>(); + if ((EltType->isVectorType() && getContext().getTypeSize(EltType) == 128) || + (BT && BT->isFloatingPoint())) + AlignTy = EltType; + } + + if (AlignTy) + return CharUnits::fromQuantity(AlignTy->isVectorType() ? 16 : 4); + return CharUnits::fromQuantity(4); } // TODO: this implementation is now likely redundant with // DefaultABIInfo::EmitVAArg. Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, QualType Ty) const { + if (getTarget().getTriple().isOSDarwin()) { + auto TI = getContext().getTypeInfoInChars(Ty); + TI.second = getParamTypeAlignment(Ty); + + CharUnits SlotSize = CharUnits::fromQuantity(4); + return emitVoidPtrVAArg(CGF, VAList, Ty, + classifyArgumentType(Ty).isIndirect(), TI, SlotSize, + /*AllowHigherAlign=*/true); + } + const unsigned OverflowLimit = 8; if (const ComplexType *CTy = Ty->getAs<ComplexType>()) { // TODO: Implement this. For now ignore. @@ -4860,7 +4941,7 @@ public: : TargetCodeGenInfo(new AArch64ABIInfo(CGT, Kind)) {} StringRef getARCRetainAutoreleasedReturnValueMarker() const override { - return "mov\tfp, fp\t\t# marker for objc_retainAutoreleaseReturnValue"; + return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue"; } int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { @@ -4869,6 +4950,22 @@ public: bool doesReturnSlotInterfereWithArgs() const override { return false; } }; + +class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo { +public: + WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind K) + : AArch64TargetCodeGenInfo(CGT, K) {} + + void getDependentLibraryOption(llvm::StringRef Lib, + llvm::SmallString<24> &Opt) const override { + Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib); + } + + void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, + llvm::SmallString<32> &Opt) const override { + Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; + } +}; } ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { @@ -5450,7 +5547,7 @@ public: } StringRef getARCRetainAutoreleasedReturnValueMarker() const override { - return "mov\tr7, r7\t\t@ marker for objc_retainAutoreleaseReturnValue"; + return "mov\tr7, r7\t\t// marker for objc_retainAutoreleaseReturnValue"; } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, @@ -5468,7 +5565,10 @@ public: } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override { + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override { + if (!IsForDefinition) + return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; @@ -5510,7 +5610,8 @@ public: : ARMTargetCodeGenInfo(CGT, K) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override; + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override; void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { @@ -5524,8 +5625,11 @@ public: }; void WindowsARMTargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { - ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM); + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const { + ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM, IsForDefinition); + if (!IsForDefinition) + return; addStackProbeSizeTargetAttribute(D, GV, CGM); } } @@ -6051,7 +6155,9 @@ public: : TargetCodeGenInfo(new NVPTXABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; + CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const override; + private: // Adds a NamedMDNode with F, Name, and Operand as operands, and adds the // resulting MDNode to the nvvm.annotations MDNode. @@ -6105,9 +6211,11 @@ Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, llvm_unreachable("NVPTX does not support varargs"); } -void NVPTXTargetCodeGenInfo:: -setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const{ +void NVPTXTargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const { + if (!IsForDefinition) + return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; @@ -6211,7 +6319,7 @@ public: return occupiesMoreThan(CGT, scalars, /*total*/ 4); } bool isSwiftErrorInRegister() const override { - return true; + return false; } }; @@ -6543,14 +6651,17 @@ public: MSP430TargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; + CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const override; }; } -void MSP430TargetCodeGenInfo::setTargetAttributes(const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const { +void MSP430TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const { + if (!IsForDefinition) + return; if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) { if (const MSP430InterruptAttr *attr = FD->getAttr<MSP430InterruptAttr>()) { // Handle 'interrupt' attribute: @@ -6609,10 +6720,21 @@ public: } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override { + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override { const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; llvm::Function *Fn = cast<llvm::Function>(GV); + + if (FD->hasAttr<MipsLongCallAttr>()) + Fn->addFnAttr("long-call"); + else if (FD->hasAttr<MipsShortCallAttr>()) + Fn->addFnAttr("short-call"); + + // Other attributes do not have a meaning for declarations. + if (!IsForDefinition) + return; + if (FD->hasAttr<Mips16Attr>()) { Fn->addFnAttr("mips16"); } @@ -6974,7 +7096,10 @@ public: : TargetCodeGenInfo(new DefaultABIInfo(CGT)) { } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &CGM) const override { + CodeGen::CodeGenModule &CGM, + ForDefinition_t IsForDefinition) const override { + if (!IsForDefinition) + return; const auto *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; auto *Fn = cast<llvm::Function>(GV); @@ -7002,11 +7127,15 @@ public: : DefaultTargetCodeGenInfo(CGT) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; + CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const override; }; void TCETargetCodeGenInfo::setTargetAttributes( - const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const { + if (!IsForDefinition) + return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; @@ -7302,38 +7431,138 @@ public: namespace { class AMDGPUABIInfo final : public DefaultABIInfo { +private: + static const unsigned MaxNumRegsForArgsRet = 16; + + unsigned numRegsForType(QualType Ty) const; + + bool isHomogeneousAggregateBaseType(QualType Ty) const override; + bool isHomogeneousAggregateSmallEnough(const Type *Base, + uint64_t Members) const override; + public: - explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {} + explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) : + DefaultABIInfo(CGT) {} -private: - ABIArgInfo classifyArgumentType(QualType Ty) const; + ABIArgInfo classifyReturnType(QualType RetTy) const; + ABIArgInfo classifyKernelArgumentType(QualType Ty) const; + ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const; void computeInfo(CGFunctionInfo &FI) const override; }; +bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { + return true; +} + +bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough( + const Type *Base, uint64_t Members) const { + uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32; + + // Homogeneous Aggregates may occupy at most 16 registers. + return Members * NumRegs <= MaxNumRegsForArgsRet; +} + +/// Estimate number of registers the type will use when passed in registers. +unsigned AMDGPUABIInfo::numRegsForType(QualType Ty) const { + unsigned NumRegs = 0; + + if (const VectorType *VT = Ty->getAs<VectorType>()) { + // Compute from the number of elements. The reported size is based on the + // in-memory size, which includes the padding 4th element for 3-vectors. + QualType EltTy = VT->getElementType(); + unsigned EltSize = getContext().getTypeSize(EltTy); + + // 16-bit element vectors should be passed as packed. + if (EltSize == 16) + return (VT->getNumElements() + 1) / 2; + + unsigned EltNumRegs = (EltSize + 31) / 32; + return EltNumRegs * VT->getNumElements(); + } + + if (const RecordType *RT = Ty->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl(); + assert(!RD->hasFlexibleArrayMember()); + + for (const FieldDecl *Field : RD->fields()) { + QualType FieldTy = Field->getType(); + NumRegs += numRegsForType(FieldTy); + } + + return NumRegs; + } + + return (getContext().getTypeSize(Ty) + 31) / 32; +} + void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const { + llvm::CallingConv::ID CC = FI.getCallingConvention(); + if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); - unsigned CC = FI.getCallingConvention(); - for (auto &Arg : FI.arguments()) - if (CC == llvm::CallingConv::AMDGPU_KERNEL) - Arg.info = classifyArgumentType(Arg.type); - else - Arg.info = DefaultABIInfo::classifyArgumentType(Arg.type); + unsigned NumRegsLeft = MaxNumRegsForArgsRet; + for (auto &Arg : FI.arguments()) { + if (CC == llvm::CallingConv::AMDGPU_KERNEL) { + Arg.info = classifyKernelArgumentType(Arg.type); + } else { + Arg.info = classifyArgumentType(Arg.type, NumRegsLeft); + } + } } -/// \brief Classify argument of given type \p Ty. -ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty) const { - llvm::StructType *StrTy = dyn_cast<llvm::StructType>(CGT.ConvertType(Ty)); - if (!StrTy) { - return DefaultABIInfo::classifyArgumentType(Ty); +ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const { + if (isAggregateTypeForABI(RetTy)) { + // Records with non-trivial destructors/copy-constructors should not be + // returned by value. + if (!getRecordArgABI(RetTy, getCXXABI())) { + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), RetTy, true)) + return ABIArgInfo::getIgnore(); + + // Lower single-element structs to just return a regular value. + if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + + if (const RecordType *RT = RetTy->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl(); + if (RD->hasFlexibleArrayMember()) + return DefaultABIInfo::classifyReturnType(RetTy); + } + + // Pack aggregates <= 4 bytes into single VGPR or pair. + uint64_t Size = getContext().getTypeSize(RetTy); + if (Size <= 16) + return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); + + if (Size <= 32) + return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); + + if (Size <= 64) { + llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); + return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); + } + + if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet) + return ABIArgInfo::getDirect(); + } } + // Otherwise just do the default thing. + return DefaultABIInfo::classifyReturnType(RetTy); +} + +/// For kernels all parameters are really passed in a special buffer. It doesn't +/// make sense to pass anything byval, so everything must be direct. +ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const { + Ty = useFirstFieldIfTransparentUnion(Ty); + + // TODO: Can we omit empty structs? + // Coerce single element structs to its element. - if (StrTy->getNumElements() == 1) { - return ABIArgInfo::getDirect(); - } + if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); // If we set CanBeFlattened to true, CodeGen will expand the struct to its // individual elements, which confuses the Clover OpenCL backend; therefore we @@ -7341,30 +7570,102 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty) const { return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); } +ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, + unsigned &NumRegsLeft) const { + assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow"); + + Ty = useFirstFieldIfTransparentUnion(Ty); + + if (isAggregateTypeForABI(Ty)) { + // Records with non-trivial destructors/copy-constructors should not be + // passed by value. + if (auto RAA = getRecordArgABI(Ty, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + // Lower single-element structs to just pass a regular value. TODO: We + // could do reasonable-size multiple-element structs too, using getExpand(), + // though watch out for things like bitfields. + if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + + if (const RecordType *RT = Ty->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl(); + if (RD->hasFlexibleArrayMember()) + return DefaultABIInfo::classifyArgumentType(Ty); + } + + // Pack aggregates <= 8 bytes into single VGPR or pair. + uint64_t Size = getContext().getTypeSize(Ty); + if (Size <= 64) { + unsigned NumRegs = (Size + 31) / 32; + NumRegsLeft -= std::min(NumRegsLeft, NumRegs); + + if (Size <= 16) + return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); + + if (Size <= 32) + return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); + + // XXX: Should this be i64 instead, and should the limit increase? + llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); + return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); + } + + if (NumRegsLeft > 0) { + unsigned NumRegs = numRegsForType(Ty); + if (NumRegsLeft >= NumRegs) { + NumRegsLeft -= NumRegs; + return ABIArgInfo::getDirect(); + } + } + } + + // Otherwise just do the default thing. + ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty); + if (!ArgInfo.isIndirect()) { + unsigned NumRegs = numRegsForType(Ty); + NumRegsLeft -= std::min(NumRegs, NumRegsLeft); + } + + return ArgInfo; +} + class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { public: AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(new AMDGPUABIInfo(CGT)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const override; + CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const override; unsigned getOpenCLKernelCallingConv() const override; llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, llvm::PointerType *T, QualType QT) const override; - unsigned getASTAllocaAddressSpace() const override { - return LangAS::FirstTargetAddressSpace + - getABIInfo().getDataLayout().getAllocaAddrSpace(); - } - unsigned getGlobalVarAddressSpace(CodeGenModule &CGM, - const VarDecl *D) const override; + LangAS getASTAllocaAddressSpace() const override { + return getLangASFromTargetAS( + getABIInfo().getDataLayout().getAllocaAddrSpace()); + } + LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const override; + llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S, + llvm::LLVMContext &C) const override; + llvm::Function * + createEnqueuedBlockKernel(CodeGenFunction &CGF, + llvm::Function *BlockInvokeFunc, + llvm::Value *BlockLiteral) const override; }; } void AMDGPUTargetCodeGenInfo::setTargetAttributes( - const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const { + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const { + if (!IsForDefinition) + return; const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); if (!FD) return; @@ -7441,21 +7742,19 @@ llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer( llvm::ConstantPointerNull::get(NPT), PT); } -unsigned +LangAS AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const { assert(!CGM.getLangOpts().OpenCL && !(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) && "Address space agnostic languages only"); - unsigned DefaultGlobalAS = - LangAS::FirstTargetAddressSpace + - CGM.getContext().getTargetAddressSpace(LangAS::opencl_global); + LangAS DefaultGlobalAS = getLangASFromTargetAS( + CGM.getContext().getTargetAddressSpace(LangAS::opencl_global)); if (!D) return DefaultGlobalAS; - unsigned AddrSpace = D->getType().getAddressSpace(); - assert(AddrSpace == LangAS::Default || - AddrSpace >= LangAS::FirstTargetAddressSpace); + LangAS AddrSpace = D->getType().getAddressSpace(); + assert(AddrSpace == LangAS::Default || isTargetAddressSpace(AddrSpace)); if (AddrSpace != LangAS::Default) return AddrSpace; @@ -7466,6 +7765,26 @@ AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM, return DefaultGlobalAS; } +llvm::SyncScope::ID +AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, + llvm::LLVMContext &C) const { + StringRef Name; + switch (S) { + case SyncScope::OpenCLWorkGroup: + Name = "workgroup"; + break; + case SyncScope::OpenCLDevice: + Name = "agent"; + break; + case SyncScope::OpenCLAllSVMDevices: + Name = ""; + break; + case SyncScope::OpenCLSubGroup: + Name = "subgroup"; + } + return C.getOrInsertSyncScopeID(Name); +} + //===----------------------------------------------------------------------===// // SPARC v8 ABI Implementation. // Based on the SPARC Compliance Definition version 2.4.1. @@ -8506,7 +8825,8 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { if (getTarget().getABI() == "darwinpcs") Kind = AArch64ABIInfo::DarwinPCS; else if (Triple.isOSWindows()) - Kind = AArch64ABIInfo::Win64; + return SetCGInfo( + new WindowsAArch64TargetCodeGenInfo(Types, AArch64ABIInfo::Win64)); return SetCGInfo(new AArch64TargetCodeGenInfo(Types, Kind)); } @@ -8636,3 +8956,108 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { return SetCGInfo(new SPIRTargetCodeGenInfo(Types)); } } + +/// Create an OpenCL kernel for an enqueued block. +/// +/// The kernel has the same function type as the block invoke function. Its +/// name is the name of the block invoke function postfixed with "_kernel". +/// It simply calls the block invoke function then returns. +llvm::Function * +TargetCodeGenInfo::createEnqueuedBlockKernel(CodeGenFunction &CGF, + llvm::Function *Invoke, + llvm::Value *BlockLiteral) const { + auto *InvokeFT = Invoke->getFunctionType(); + llvm::SmallVector<llvm::Type *, 2> ArgTys; + for (auto &P : InvokeFT->params()) + ArgTys.push_back(P); + auto &C = CGF.getLLVMContext(); + std::string Name = Invoke->getName().str() + "_kernel"; + auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false); + auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name, + &CGF.CGM.getModule()); + auto IP = CGF.Builder.saveIP(); + auto *BB = llvm::BasicBlock::Create(C, "entry", F); + auto &Builder = CGF.Builder; + Builder.SetInsertPoint(BB); + llvm::SmallVector<llvm::Value *, 2> Args; + for (auto &A : F->args()) + Args.push_back(&A); + Builder.CreateCall(Invoke, Args); + Builder.CreateRetVoid(); + Builder.restoreIP(IP); + return F; +} + +/// Create an OpenCL kernel for an enqueued block. +/// +/// The type of the first argument (the block literal) is the struct type +/// of the block literal instead of a pointer type. The first argument +/// (block literal) is passed directly by value to the kernel. The kernel +/// allocates the same type of struct on stack and stores the block literal +/// to it and passes its pointer to the block invoke function. The kernel +/// has "enqueued-block" function attribute and kernel argument metadata. +llvm::Function *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel( + CodeGenFunction &CGF, llvm::Function *Invoke, + llvm::Value *BlockLiteral) const { + auto &Builder = CGF.Builder; + auto &C = CGF.getLLVMContext(); + + auto *BlockTy = BlockLiteral->getType()->getPointerElementType(); + auto *InvokeFT = Invoke->getFunctionType(); + llvm::SmallVector<llvm::Type *, 2> ArgTys; + llvm::SmallVector<llvm::Metadata *, 8> AddressQuals; + llvm::SmallVector<llvm::Metadata *, 8> AccessQuals; + llvm::SmallVector<llvm::Metadata *, 8> ArgTypeNames; + llvm::SmallVector<llvm::Metadata *, 8> ArgBaseTypeNames; + llvm::SmallVector<llvm::Metadata *, 8> ArgTypeQuals; + llvm::SmallVector<llvm::Metadata *, 8> ArgNames; + + ArgTys.push_back(BlockTy); + ArgTypeNames.push_back(llvm::MDString::get(C, "__block_literal")); + AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0))); + ArgBaseTypeNames.push_back(llvm::MDString::get(C, "__block_literal")); + ArgTypeQuals.push_back(llvm::MDString::get(C, "")); + AccessQuals.push_back(llvm::MDString::get(C, "none")); + ArgNames.push_back(llvm::MDString::get(C, "block_literal")); + for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) { + ArgTys.push_back(InvokeFT->getParamType(I)); + ArgTypeNames.push_back(llvm::MDString::get(C, "void*")); + AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3))); + AccessQuals.push_back(llvm::MDString::get(C, "none")); + ArgBaseTypeNames.push_back(llvm::MDString::get(C, "void*")); + ArgTypeQuals.push_back(llvm::MDString::get(C, "")); + ArgNames.push_back( + llvm::MDString::get(C, (Twine("local_arg") + Twine(I)).str())); + } + std::string Name = Invoke->getName().str() + "_kernel"; + auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false); + auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name, + &CGF.CGM.getModule()); + F->addFnAttr("enqueued-block"); + auto IP = CGF.Builder.saveIP(); + auto *BB = llvm::BasicBlock::Create(C, "entry", F); + Builder.SetInsertPoint(BB); + unsigned BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlignment(BlockTy); + auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr); + BlockPtr->setAlignment(BlockAlign); + Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign); + auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0)); + llvm::SmallVector<llvm::Value *, 2> Args; + Args.push_back(Cast); + for (auto I = F->arg_begin() + 1, E = F->arg_end(); I != E; ++I) + Args.push_back(I); + Builder.CreateCall(Invoke, Args); + Builder.CreateRetVoid(); + Builder.restoreIP(IP); + + F->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals)); + F->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals)); + F->setMetadata("kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames)); + F->setMetadata("kernel_arg_base_type", + llvm::MDNode::get(C, ArgBaseTypeNames)); + F->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals)); + if (CGF.CGM.getCodeGenOpts().EmitOpenCLArgMetadata) + F->setMetadata("kernel_arg_name", llvm::MDNode::get(C, ArgNames)); + + return F; +} diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h index 952ef96c4aef..d745e420c4a5 100644 --- a/lib/CodeGen/TargetInfo.h +++ b/lib/CodeGen/TargetInfo.h @@ -15,9 +15,11 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_TARGETINFO_H #define LLVM_CLANG_LIB_CODEGEN_TARGETINFO_H +#include "CodeGenModule.h" #include "CGValue.h" #include "clang/AST/Type.h" #include "clang/Basic/LLVM.h" +#include "clang/Basic/SyncScope.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" @@ -34,8 +36,8 @@ class Decl; namespace CodeGen { class ABIInfo; class CallArgList; -class CodeGenModule; class CodeGenFunction; +class CGBlockInfo; class CGFunctionInfo; /// TargetCodeGenInfo - This class organizes various target-specific @@ -55,7 +57,8 @@ public: /// setTargetAttributes - Provides a convenient hook to handle extra /// target-specific attributes for the given global. virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const {} + CodeGen::CodeGenModule &M, + ForDefinition_t IsForDefinition) const {} /// emitTargetMD - Provides a convenient hook to handle extra /// target-specific metadata for the given global. @@ -233,11 +236,11 @@ public: /// other than OpenCL and CUDA. /// If \p D is nullptr, returns the default target favored address space /// for global variable. - virtual unsigned getGlobalVarAddressSpace(CodeGenModule &CGM, - const VarDecl *D) const; + virtual LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, + const VarDecl *D) const; /// Get the AST address space for alloca. - virtual unsigned getASTAllocaAddressSpace() const { return LangAS::Default; } + virtual LangAS getASTAllocaAddressSpace() const { return LangAS::Default; } /// Perform address space cast of an expression of pointer type. /// \param V is the LLVM value to be casted to another address space. @@ -246,9 +249,8 @@ public: /// \param DestTy is the destination LLVM pointer type. /// \param IsNonNull is the flag indicating \p V is known to be non null. virtual llvm::Value *performAddrSpaceCast(CodeGen::CodeGenFunction &CGF, - llvm::Value *V, unsigned SrcAddr, - unsigned DestAddr, - llvm::Type *DestTy, + llvm::Value *V, LangAS SrcAddr, + LangAS DestAddr, llvm::Type *DestTy, bool IsNonNull = false) const; /// Perform address space cast of a constant expression of pointer type. @@ -256,9 +258,45 @@ public: /// \param SrcAddr is the language address space of \p V. /// \param DestAddr is the targeted language address space. /// \param DestTy is the destination LLVM pointer type. - virtual llvm::Constant * - performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *V, unsigned SrcAddr, - unsigned DestAddr, llvm::Type *DestTy) const; + virtual llvm::Constant *performAddrSpaceCast(CodeGenModule &CGM, + llvm::Constant *V, + LangAS SrcAddr, LangAS DestAddr, + llvm::Type *DestTy) const; + + /// Get the syncscope used in LLVM IR. + virtual llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S, + llvm::LLVMContext &C) const; + + /// Inteface class for filling custom fields of a block literal for OpenCL. + class TargetOpenCLBlockHelper { + public: + typedef std::pair<llvm::Value *, StringRef> ValueTy; + TargetOpenCLBlockHelper() {} + virtual ~TargetOpenCLBlockHelper() {} + /// Get the custom field types for OpenCL blocks. + virtual llvm::SmallVector<llvm::Type *, 1> getCustomFieldTypes() = 0; + /// Get the custom field values for OpenCL blocks. + virtual llvm::SmallVector<ValueTy, 1> + getCustomFieldValues(CodeGenFunction &CGF, const CGBlockInfo &Info) = 0; + virtual bool areAllCustomFieldValuesConstant(const CGBlockInfo &Info) = 0; + /// Get the custom field values for OpenCL blocks if all values are LLVM + /// constants. + virtual llvm::SmallVector<llvm::Constant *, 1> + getCustomFieldValues(CodeGenModule &CGM, const CGBlockInfo &Info) = 0; + }; + virtual TargetOpenCLBlockHelper *getTargetOpenCLBlockHelper() const { + return nullptr; + } + + /// Create an OpenCL kernel for an enqueued block. The kernel function is + /// a wrapper for the block invoke function with target-specific calling + /// convention and ABI as an OpenCL kernel. The wrapper function accepts + /// block context and block arguments in target-specific way and calls + /// the original block invoke function. + virtual llvm::Function * + createEnqueuedBlockKernel(CodeGenFunction &CGF, + llvm::Function *BlockInvokeFunc, + llvm::Value *BlockLiteral) const; }; } // namespace CodeGen |