diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-01-14 15:38:35 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-01-14 15:38:35 +0000 |
commit | d2e0a8dd949ab874c6d66f97106bd5c270e2fa7d (patch) | |
tree | e8a99a0386e8f6bece630700da5915c8a312c2d9 /lib/CodeGen | |
parent | fdc82ccb3f2b23a89e7002fe8238e1422b00f96a (diff) |
Diffstat (limited to 'lib/CodeGen')
-rw-r--r-- | lib/CodeGen/CGBuiltin.cpp | 10 | ||||
-rw-r--r-- | lib/CodeGen/CGDecl.cpp | 4 | ||||
-rw-r--r-- | lib/CodeGen/CGDeclCXX.cpp | 3 | ||||
-rw-r--r-- | lib/CodeGen/CGOpenMPRuntime.cpp | 16 | ||||
-rw-r--r-- | lib/CodeGen/CGOpenMPRuntime.h | 42 | ||||
-rw-r--r-- | lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 223 | ||||
-rw-r--r-- | lib/CodeGen/CGOpenMPRuntimeNVPTX.h | 40 | ||||
-rw-r--r-- | lib/CodeGen/CGStmt.cpp | 4 | ||||
-rw-r--r-- | lib/CodeGen/CGStmtOpenMP.cpp | 10 | ||||
-rw-r--r-- | lib/CodeGen/CodeGenFunction.h | 2 | ||||
-rw-r--r-- | lib/CodeGen/CodeGenModule.cpp | 8 | ||||
-rw-r--r-- | lib/CodeGen/ItaniumCXXABI.cpp | 42 |
12 files changed, 355 insertions, 49 deletions
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 4d34b3e9222f..2ede1d46b3d5 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -4318,9 +4318,9 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, } if (BuiltinID == ARM::BI__builtin_arm_rbit) { - return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit), - EmitScalarExpr(E->getArg(0)), - "rbit"); + llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } if (BuiltinID == ARM::BI__clear_cache) { @@ -5226,14 +5226,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, "rbit of unusual size!"); llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall( - CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit"); + CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } if (BuiltinID == AArch64::BI__builtin_arm_rbit64) { assert((getContext().getTypeSize(E->getType()) == 64) && "rbit of unusual size!"); llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall( - CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit"); + CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } if (BuiltinID == AArch64::BI__clear_cache) { diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp index d76136380160..0a88b2310beb 100644 --- a/lib/CodeGen/CGDecl.cpp +++ b/lib/CodeGen/CGDecl.cpp @@ -311,7 +311,7 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D, if (!Init) { if (!getLangOpts().CPlusPlus) CGM.ErrorUnsupported(D.getInit(), "constant l-value expression"); - else if (Builder.GetInsertBlock()) { + else if (HaveInsertPoint()) { // Since we have a static initializer, this global variable can't // be constant. GV->setConstant(false); @@ -352,7 +352,7 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D, GV->setConstant(CGM.isTypeConstant(D.getType(), true)); GV->setInitializer(Init); - if (hasNontrivialDestruction(D.getType())) { + if (hasNontrivialDestruction(D.getType()) && HaveInsertPoint()) { // We have a constant initializer, but a nontrivial destructor. We still // need to perform a guarded "initialization" in order to register the // destructor. diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp index 8d9d0b21bfe1..f56e18216931 100644 --- a/lib/CodeGen/CGDeclCXX.cpp +++ b/lib/CodeGen/CGDeclCXX.cpp @@ -353,9 +353,6 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D, if (D->getTLSKind()) { // FIXME: Should we support init_priority for thread_local? - // FIXME: Ideally, initialization of instantiated thread_local static data - // members of class templates should not trigger initialization of other - // entities in the TU. // FIXME: We only need to register one __cxa_thread_atexit function for the // entire TU. CXXThreadLocalInits.push_back(Fn); diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 27af344fae87..db9de2ab6ad5 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -99,10 +99,11 @@ class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { public: CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, const RegionCodeGenTy &CodeGen, - OpenMPDirectiveKind Kind, bool HasCancel) + OpenMPDirectiveKind Kind, bool HasCancel, + StringRef HelperName) : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, HasCancel), - ThreadIDVar(ThreadIDVar) { + ThreadIDVar(ThreadIDVar), HelperName(HelperName) { assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); } @@ -111,7 +112,7 @@ public: const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } /// \brief Get the name of the capture helper. - StringRef getHelperName() const override { return ".omp_outlined."; } + StringRef getHelperName() const override { return HelperName; } static bool classof(const CGCapturedStmtInfo *Info) { return CGOpenMPRegionInfo::classof(Info) && @@ -123,6 +124,7 @@ private: /// \brief A variable or parameter storing global thread id for OpenMP /// constructs. const VarDecl *ThreadIDVar; + StringRef HelperName; }; /// \brief API for captured statement code generation in OpenMP constructs. @@ -855,7 +857,7 @@ llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) HasCancel = OPFD->hasCancel(); CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, - HasCancel); + HasCancel, getOutlinedHelperName()); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); return CGF.GenerateOpenMPCapturedStmtFunction(*CS); } @@ -1892,9 +1894,9 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( /// } else { /// ElseGen(); /// } -static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, - const RegionCodeGenTy &ThenGen, - const RegionCodeGenTy &ElseGen) { +void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, + const RegionCodeGenTy &ThenGen, + const RegionCodeGenTy &ElseGen) { CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); // If the condition constant folds and can be elided, try to avoid emitting diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 9a784dff0ae8..61ddc702ed24 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -130,6 +130,35 @@ protected: bool IsOffloadEntry, const RegionCodeGenTy &CodeGen); + /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen + /// function. Here is the logic: + /// if (Cond) { + /// ThenGen(); + /// } else { + /// ElseGen(); + /// } + void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, + const RegionCodeGenTy &ThenGen, + const RegionCodeGenTy &ElseGen); + + /// \brief Emits object of ident_t type with info for source location. + /// \param Flags Flags for OpenMP location. + /// + llvm::Value *emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, + unsigned Flags = 0); + + /// \brief Returns pointer to ident_t type. + llvm::Type *getIdentTyPointerTy(); + + /// \brief Gets thread id value for the current thread. + /// + llvm::Value *getThreadID(CodeGenFunction &CGF, SourceLocation Loc); + + /// \brief Get the function name of an outlined region. + // The name can be customized depending on the target. + // + virtual StringRef getOutlinedHelperName() const { return ".omp_outlined."; } + private: /// \brief Default const ident_t object used for initialization of all other /// ident_t objects. @@ -388,15 +417,6 @@ private: /// \brief Build type kmp_routine_entry_t (if not built yet). void emitKmpRoutineEntryT(QualType KmpInt32Ty); - /// \brief Emits object of ident_t type with info for source location. - /// \param Flags Flags for OpenMP location. - /// - llvm::Value *emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, - unsigned Flags = 0); - - /// \brief Returns pointer to ident_t type. - llvm::Type *getIdentTyPointerTy(); - /// \brief Returns pointer to kmpc_micro type. llvm::Type *getKmpc_MicroPointerTy(); @@ -432,10 +452,6 @@ private: /// stored. virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc); - /// \brief Gets thread id value for the current thread. - /// - llvm::Value *getThreadID(CodeGenFunction &CGF, SourceLocation Loc); - /// \brief Gets (if variable with the given name already exist) or creates /// internal global variable with the specified Name. The created variable has /// linkage CommonLinkage by default and is initialized by null value. diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index bc1458b1c203..6a6d832e33cd 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -26,8 +26,57 @@ enum OpenMPRTLFunctionNVPTX { OMPRTL_NVPTX__kmpc_kernel_init, /// \brief Call to void __kmpc_kernel_deinit(); OMPRTL_NVPTX__kmpc_kernel_deinit, + /// \brief Call to void __kmpc_kernel_prepare_parallel(void + /// *outlined_function); + OMPRTL_NVPTX__kmpc_kernel_prepare_parallel, + /// \brief Call to bool __kmpc_kernel_parallel(void **outlined_function); + OMPRTL_NVPTX__kmpc_kernel_parallel, + /// \brief Call to void __kmpc_kernel_end_parallel(); + OMPRTL_NVPTX__kmpc_kernel_end_parallel, + /// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 + /// global_tid); + OMPRTL_NVPTX__kmpc_serialized_parallel, + /// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 + /// global_tid); + OMPRTL_NVPTX__kmpc_end_serialized_parallel, }; -} // namespace + +/// Pre(post)-action for different OpenMP constructs specialized for NVPTX. +class NVPTXActionTy final : public PrePostActionTy { + llvm::Value *EnterCallee; + ArrayRef<llvm::Value *> EnterArgs; + llvm::Value *ExitCallee; + ArrayRef<llvm::Value *> ExitArgs; + bool Conditional; + llvm::BasicBlock *ContBlock = nullptr; + +public: + NVPTXActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, + llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, + bool Conditional = false) + : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), + ExitArgs(ExitArgs), Conditional(Conditional) {} + void Enter(CodeGenFunction &CGF) override { + llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); + if (Conditional) { + llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); + auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); + ContBlock = CGF.createBasicBlock("omp_if.end"); + // Generate the branch (If-stmt) + CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); + CGF.EmitBlock(ThenBlock); + } + } + void Done(CodeGenFunction &CGF) { + // Emit the rest of blocks/branches + CGF.EmitBranch(ContBlock); + CGF.EmitBlock(ContBlock, true); + } + void Exit(CodeGenFunction &CGF) override { + CGF.EmitRuntimeCall(ExitCallee, ExitArgs); + } +}; +} // anonymous namespace /// Get the GPU warp size. static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) { @@ -118,6 +167,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericKernel(const OMPExecutableDirective &D, const RegionCodeGenTy &CodeGen) { EntryFunctionState EST; WorkerFunctionState WST(CGM); + Work.clear(); // Emit target region as a standalone region. class NVPTXPrePostActionTy : public PrePostActionTy { @@ -246,7 +296,10 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, CGF.InitTempAlloca(ExecStatus, Bld.getInt8(/*C=*/0)); CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy)); - // TODO: Call into runtime to get parallel work. + llvm::Value *Args[] = {WorkFn.getPointer()}; + llvm::Value *Ret = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args); + Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus); // On termination condition (workid == 0), exit loop. llvm::Value *ShouldTerminate = @@ -261,10 +314,42 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, // Signal start of parallel region. CGF.EmitBlock(ExecuteBB); - // TODO: Add parallel work. + + // Process work items: outlined parallel functions. + for (auto *W : Work) { + // Try to match this outlined function. + auto *ID = Bld.CreatePointerBitCastOrAddrSpaceCast(W, CGM.Int8PtrTy); + + llvm::Value *WorkFnMatch = + Bld.CreateICmpEQ(Bld.CreateLoad(WorkFn), ID, "work_match"); + + llvm::BasicBlock *ExecuteFNBB = CGF.createBasicBlock(".execute.fn"); + llvm::BasicBlock *CheckNextBB = CGF.createBasicBlock(".check.next"); + Bld.CreateCondBr(WorkFnMatch, ExecuteFNBB, CheckNextBB); + + // Execute this outlined function. + CGF.EmitBlock(ExecuteFNBB); + + // Insert call to work function. + // FIXME: Pass arguments to outlined function from master thread. + auto *Fn = cast<llvm::Function>(W); + Address ZeroAddr = + CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".zero.addr"); + CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C=*/0)); + llvm::Value *FnArgs[] = {ZeroAddr.getPointer(), ZeroAddr.getPointer()}; + CGF.EmitCallOrInvoke(Fn, FnArgs); + + // Go to end of parallel region. + CGF.EmitBranch(TerminateBB); + + CGF.EmitBlock(CheckNextBB); + } // Signal end of parallel region. CGF.EmitBlock(TerminateBB); + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel), + llvm::None); CGF.EmitBranch(BarrierBB); // All active and inactive workers wait at a barrier after parallel region. @@ -296,10 +381,53 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { case OMPRTL_NVPTX__kmpc_kernel_deinit: { // Build void __kmpc_kernel_deinit(); llvm::FunctionType *FnTy = - llvm::FunctionType::get(CGM.VoidTy, {}, /*isVarArg*/ false); + llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit"); break; } + case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: { + /// Build void __kmpc_kernel_prepare_parallel( + /// void *outlined_function); + llvm::Type *TypeParams[] = {CGM.Int8PtrTy}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel"); + break; + } + case OMPRTL_NVPTX__kmpc_kernel_parallel: { + /// Build bool __kmpc_kernel_parallel(void **outlined_function); + llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy}; + llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy); + llvm::FunctionType *FnTy = + llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel"); + break; + } + case OMPRTL_NVPTX__kmpc_kernel_end_parallel: { + /// Build void __kmpc_kernel_end_parallel(); + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel"); + break; + } + case OMPRTL_NVPTX__kmpc_serialized_parallel: { + // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 + // global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); + break; + } + case OMPRTL_NVPTX__kmpc_end_serialized_parallel: { + // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 + // global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); + break; + } } return RTLFn; } @@ -362,9 +490,12 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOrTeamsOutlinedFunction( OutlinedFun = cast<llvm::Function>(OutlinedFunVal); OutlinedFun->removeFnAttr(llvm::Attribute::NoInline); OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline); - } else - llvm_unreachable("parallel directive is not yet supported for nvptx " - "backend."); + } else { + llvm::Value *OutlinedFunVal = + CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( + D, ThreadIDVar, InnermostKind, CodeGen); + OutlinedFun = cast<llvm::Function>(OutlinedFunVal); + } return OutlinedFun; } @@ -387,3 +518,81 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF, OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); } + +void CGOpenMPRuntimeNVPTX::emitParallelCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { + if (!CGF.HaveInsertPoint()) + return; + + emitGenericParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond); +} + +void CGOpenMPRuntimeNVPTX::emitGenericParallelCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { + llvm::Function *Fn = cast<llvm::Function>(OutlinedFn); + + auto &&L0ParallelGen = [this, Fn, &CapturedVars](CodeGenFunction &CGF, + PrePostActionTy &) { + CGBuilderTy &Bld = CGF.Builder; + + // Prepare for parallel region. Indicate the outlined function. + llvm::Value *Args[] = {Bld.CreateBitOrPointerCast(Fn, CGM.Int8PtrTy)}; + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel), + Args); + + // Activate workers. This barrier is used by the master to signal + // work for the workers. + syncCTAThreads(CGF); + + // OpenMP [2.5, Parallel Construct, p.49] + // There is an implied barrier at the end of a parallel region. After the + // end of a parallel region, only the master thread of the team resumes + // execution of the enclosing task region. + // + // The master waits at this barrier until all workers are done. + syncCTAThreads(CGF); + + // Remember for post-processing in worker loop. + Work.push_back(Fn); + }; + + auto *RTLoc = emitUpdateLocation(CGF, Loc); + auto *ThreadID = getThreadID(CGF, Loc); + llvm::Value *Args[] = {RTLoc, ThreadID}; + + auto &&SeqGen = [this, Fn, &CapturedVars, &Args](CodeGenFunction &CGF, + PrePostActionTy &) { + auto &&CodeGen = [this, Fn, &CapturedVars, &Args](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + + llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; + OutlinedFnArgs.push_back( + llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); + OutlinedFnArgs.push_back( + llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); + OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); + CGF.EmitCallOrInvoke(Fn, OutlinedFnArgs); + }; + + RegionCodeGenTy RCG(CodeGen); + NVPTXActionTy Action( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel), + Args, + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel), + Args); + RCG.setAction(Action); + RCG(CGF); + }; + + if (IfCond) + emitOMPIfClause(CGF, IfCond, L0ParallelGen, SeqGen); + else { + CodeGenFunction::RunCleanupsScope Scope(CGF); + RegionCodeGenTy ThenRCG(L0ParallelGen); + ThenRCG(CGF); + } +} diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index 63a02965a5bd..4010b46a4cbd 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -25,6 +25,9 @@ namespace CodeGen { class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime { private: + // Parallel outlined function work for workers to execute. + llvm::SmallVector<llvm::Function *, 16> Work; + struct EntryFunctionState { llvm::BasicBlock *ExitBB = nullptr; }; @@ -100,6 +103,29 @@ private: bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override; + /// \brief Emits code for parallel or serial call of the \a OutlinedFn with + /// variables captured in a record which address is stored in \a + /// CapturedStruct. + /// This call is for the Generic Execution Mode. + /// \param OutlinedFn Outlined function to be run in parallel threads. Type of + /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). + /// \param CapturedVars A pointer to the record with the references to + /// variables used in \a OutlinedFn function. + /// \param IfCond Condition in the associated 'if' clause, if it was + /// specified, nullptr otherwise. + void emitGenericParallelCall(CodeGenFunction &CGF, SourceLocation Loc, + llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> CapturedVars, + const Expr *IfCond); + +protected: + /// \brief Get the function name of an outlined region. + // The name can be customized depending on the target. + // + StringRef getOutlinedHelperName() const override { + return "__omp_outlined__"; + } + public: explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); @@ -137,6 +163,20 @@ public: void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars) override; + + /// \brief Emits code for parallel or serial call of the \a OutlinedFn with + /// variables captured in a record which address is stored in \a + /// CapturedStruct. + /// \param OutlinedFn Outlined function to be run in parallel threads. Type of + /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). + /// \param CapturedVars A pointer to the record with the references to + /// variables used in \a OutlinedFn function. + /// \param IfCond Condition in the associated 'if' clause, if it was + /// specified, nullptr otherwise. + void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, + llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> CapturedVars, + const Expr *IfCond) override; }; } // CodeGen namespace. diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp index 8d391f95d9f7..8370607db50f 100644 --- a/lib/CodeGen/CGStmt.cpp +++ b/lib/CodeGen/CGStmt.cpp @@ -330,6 +330,10 @@ void CodeGenFunction::EmitStmt(const Stmt *S) { EmitOMPTargetTeamsDistributeParallelForSimdDirective( cast<OMPTargetTeamsDistributeParallelForSimdDirective>(*S)); break; + case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass: + EmitOMPTargetTeamsDistributeSimdDirective( + cast<OMPTargetTeamsDistributeSimdDirective>(*S)); + break; } } diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index 386c4f0fe69c..39e1cdfdbe2a 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -2042,6 +2042,16 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( }); } +void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( + const OMPTargetTeamsDistributeSimdDirective &S) { + CGM.getOpenMPRuntime().emitInlinedDirective( + *this, OMPD_target_teams_distribute_simd, + [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStmt( + cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }); +} + /// \brief Emit a helper variable and return corresponding lvalue. static LValue EmitOMPHelperVar(CodeGenFunction &CGF, const DeclRefExpr *Helper) { diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 05522cd40024..586134023240 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -2701,6 +2701,8 @@ public: const OMPTargetTeamsDistributeParallelForDirective &S); void EmitOMPTargetTeamsDistributeParallelForSimdDirective( const OMPTargetTeamsDistributeParallelForSimdDirective &S); + void EmitOMPTargetTeamsDistributeSimdDirective( + const OMPTargetTeamsDistributeSimdDirective &S); /// Emit outlined function for the target directive. static std::pair<llvm::Function * /*OutlinedFn*/, diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index ab29d2dbb566..36005430ae4c 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -1243,9 +1243,15 @@ void CodeGenModule::EmitModuleLinkOptions() { SmallVector<clang::Module *, 16> Stack; // Seed the stack with imported modules. - for (Module *M : ImportedModules) + for (Module *M : ImportedModules) { + // Do not add any link flags when an implementation TU of a module imports + // a header of that same module. + if (M->getTopLevelModuleName() == getLangOpts().CurrentModule && + !getLangOpts().isCompilingModule()) + continue; if (Visited.insert(M).second) Stack.push_back(M); + } // Find all of the modules to import, making a little effort to prune // non-leaf modules. diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp index b5d90ea59a49..f7a8dd66c527 100644 --- a/lib/CodeGen/ItaniumCXXABI.cpp +++ b/lib/CodeGen/ItaniumCXXABI.cpp @@ -2272,7 +2272,21 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( ArrayRef<llvm::Function *> CXXThreadLocalInits, ArrayRef<const VarDecl *> CXXThreadLocalInitVars) { llvm::Function *InitFunc = nullptr; - if (!CXXThreadLocalInits.empty()) { + + // Separate initializers into those with ordered (or partially-ordered) + // initialization and those with unordered initialization. + llvm::SmallVector<llvm::Function *, 8> OrderedInits; + llvm::SmallDenseMap<const VarDecl *, llvm::Function *> UnorderedInits; + for (unsigned I = 0; I != CXXThreadLocalInits.size(); ++I) { + if (isTemplateInstantiation( + CXXThreadLocalInitVars[I]->getTemplateSpecializationKind())) + UnorderedInits[CXXThreadLocalInitVars[I]->getCanonicalDecl()] = + CXXThreadLocalInits[I]; + else + OrderedInits.push_back(CXXThreadLocalInits[I]); + } + + if (!OrderedInits.empty()) { // Generate a guarded initialization function. llvm::FunctionType *FTy = llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); @@ -2289,24 +2303,28 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( CharUnits GuardAlign = CharUnits::One(); Guard->setAlignment(GuardAlign.getQuantity()); - CodeGenFunction(CGM) - .GenerateCXXGlobalInitFunc(InitFunc, CXXThreadLocalInits, - Address(Guard, GuardAlign)); + CodeGenFunction(CGM).GenerateCXXGlobalInitFunc(InitFunc, OrderedInits, + Address(Guard, GuardAlign)); // On Darwin platforms, use CXX_FAST_TLS calling convention. if (CGM.getTarget().getTriple().isOSDarwin()) { InitFunc->setCallingConv(llvm::CallingConv::CXX_FAST_TLS); InitFunc->addFnAttr(llvm::Attribute::NoUnwind); } } + + // Emit thread wrappers. for (const VarDecl *VD : CXXThreadLocals) { llvm::GlobalVariable *Var = cast<llvm::GlobalVariable>(CGM.GetGlobalValue(CGM.getMangledName(VD))); + llvm::Function *Wrapper = getOrCreateThreadLocalWrapper(VD, Var); // Some targets require that all access to thread local variables go through // the thread wrapper. This means that we cannot attempt to create a thread // wrapper or a thread helper. - if (isThreadWrapperReplaceable(VD, CGM) && !VD->hasDefinition()) + if (isThreadWrapperReplaceable(VD, CGM) && !VD->hasDefinition()) { + Wrapper->setLinkage(llvm::Function::ExternalLinkage); continue; + } // Mangle the name for the thread_local initialization function. SmallString<256> InitFnName; @@ -2322,18 +2340,21 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( bool InitIsInitFunc = false; if (VD->hasDefinition()) { InitIsInitFunc = true; - if (InitFunc) + llvm::Function *InitFuncToUse = InitFunc; + if (isTemplateInstantiation(VD->getTemplateSpecializationKind())) + InitFuncToUse = UnorderedInits.lookup(VD->getCanonicalDecl()); + if (InitFuncToUse) Init = llvm::GlobalAlias::create(Var->getLinkage(), InitFnName.str(), - InitFunc); + InitFuncToUse); } else { // Emit a weak global function referring to the initialization function. // This function will not exist if the TU defining the thread_local // variable in question does not need any dynamic initialization for // its thread_local variables. llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, false); - Init = llvm::Function::Create( - FnTy, llvm::GlobalVariable::ExternalWeakLinkage, InitFnName.str(), - &CGM.getModule()); + Init = llvm::Function::Create(FnTy, + llvm::GlobalVariable::ExternalWeakLinkage, + InitFnName.str(), &CGM.getModule()); const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); CGM.SetLLVMFunctionAttributes(nullptr, FI, cast<llvm::Function>(Init)); } @@ -2341,7 +2362,6 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( if (Init) Init->setVisibility(Var->getVisibility()); - llvm::Function *Wrapper = getOrCreateThreadLocalWrapper(VD, Var); llvm::LLVMContext &Context = CGM.getModule().getContext(); llvm::BasicBlock *Entry = llvm::BasicBlock::Create(Context, "", Wrapper); CGBuilderTy Builder(CGM, Entry); |