diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-02-11 12:38:04 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2023-02-11 12:38:11 +0000 |
commit | e3b557809604d036af6e00c60f012c2025b59a5e (patch) | |
tree | 8a11ba2269a3b669601e2fd41145b174008f4da8 /clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | |
parent | 08e8dd7b9db7bb4a9de26d44c1cbfd24e869c014 (diff) |
Diffstat (limited to 'clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp')
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 431 |
1 files changed, 37 insertions, 394 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 6dea846f486f..e8c5f04db49f 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -15,6 +15,7 @@ #include "CodeGenFunction.h" #include "clang/AST/Attr.h" #include "clang/AST/DeclOpenMP.h" +#include "clang/AST/OpenMPClause.h" #include "clang/AST/StmtOpenMP.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/Cuda.h" @@ -73,30 +74,15 @@ private: CGOpenMPRuntimeGPU::ExecutionMode SavedExecMode = CGOpenMPRuntimeGPU::EM_Unknown; CGOpenMPRuntimeGPU::ExecutionMode &ExecMode; - bool SavedRuntimeMode = false; - bool *RuntimeMode = nullptr; public: - /// Constructor for Non-SPMD mode. - ExecutionRuntimeModesRAII(CGOpenMPRuntimeGPU::ExecutionMode &ExecMode) - : ExecMode(ExecMode) { - SavedExecMode = ExecMode; - ExecMode = CGOpenMPRuntimeGPU::EM_NonSPMD; - } - /// Constructor for SPMD mode. ExecutionRuntimeModesRAII(CGOpenMPRuntimeGPU::ExecutionMode &ExecMode, - bool &RuntimeMode, bool FullRuntimeMode) - : ExecMode(ExecMode), RuntimeMode(&RuntimeMode) { + CGOpenMPRuntimeGPU::ExecutionMode EntryMode) + : ExecMode(ExecMode) { SavedExecMode = ExecMode; - SavedRuntimeMode = RuntimeMode; - ExecMode = CGOpenMPRuntimeGPU::EM_SPMD; - RuntimeMode = FullRuntimeMode; - } - ~ExecutionRuntimeModesRAII() { - ExecMode = SavedExecMode; - if (RuntimeMode) - *RuntimeMode = SavedRuntimeMode; + ExecMode = EntryMode; } + ~ExecutionRuntimeModesRAII() { ExecMode = SavedExecMode; } }; /// GPU Configuration: This information can be derived from cuda registers, @@ -109,9 +95,6 @@ enum MachineConfiguration : unsigned { /// Global memory alignment for performance. GlobalMemoryAlignment = 128, - - /// Maximal size of the shared memory buffer. - SharedMemorySize = 128, }; static const ValueDecl *getPrivateItem(const Expr *RefExpr) { @@ -444,9 +427,8 @@ public: markAsEscaped(VD); if (isa<OMPCapturedExprDecl>(VD)) VisitValueDecl(VD); - else if (const auto *VarD = dyn_cast<VarDecl>(VD)) - if (VarD->isInitCapture()) - VisitValueDecl(VD); + else if (VD->isInitCapture()) + VisitValueDecl(VD); } void VisitUnaryOperator(const UnaryOperator *E) { if (!E) @@ -746,274 +728,13 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, "Unknown programming model for OpenMP directive on NVPTX target."); } -/// Check if the directive is loops based and has schedule clause at all or has -/// static scheduling. -static bool hasStaticScheduling(const OMPExecutableDirective &D) { - assert(isOpenMPWorksharingDirective(D.getDirectiveKind()) && - isOpenMPLoopDirective(D.getDirectiveKind()) && - "Expected loop-based directive."); - return !D.hasClausesOfKind<OMPOrderedClause>() && - (!D.hasClausesOfKind<OMPScheduleClause>() || - llvm::any_of(D.getClausesOfKind<OMPScheduleClause>(), - [](const OMPScheduleClause *C) { - return C->getScheduleKind() == OMPC_SCHEDULE_static; - })); -} - -/// Check for inner (nested) lightweight runtime construct, if any -static bool hasNestedLightweightDirective(ASTContext &Ctx, - const OMPExecutableDirective &D) { - assert(supportsSPMDExecutionMode(Ctx, D) && "Expected SPMD mode directive."); - const auto *CS = D.getInnermostCapturedStmt(); - const auto *Body = - CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); - const Stmt *ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); - - if (const auto *NestedDir = - dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { - OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); - switch (D.getDirectiveKind()) { - case OMPD_target: - if (isOpenMPParallelDirective(DKind) && - isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && - hasStaticScheduling(*NestedDir)) - return true; - if (DKind == OMPD_teams_distribute_simd || DKind == OMPD_simd) - return true; - if (DKind == OMPD_parallel) { - Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( - /*IgnoreCaptured=*/true); - if (!Body) - return false; - ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); - if (const auto *NND = - dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { - DKind = NND->getDirectiveKind(); - if (isOpenMPWorksharingDirective(DKind) && - isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) - return true; - } - } else if (DKind == OMPD_teams) { - Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( - /*IgnoreCaptured=*/true); - if (!Body) - return false; - ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); - if (const auto *NND = - dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { - DKind = NND->getDirectiveKind(); - if (isOpenMPParallelDirective(DKind) && - isOpenMPWorksharingDirective(DKind) && - isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) - return true; - if (DKind == OMPD_parallel) { - Body = NND->getInnermostCapturedStmt()->IgnoreContainers( - /*IgnoreCaptured=*/true); - if (!Body) - return false; - ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); - if (const auto *NND = - dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { - DKind = NND->getDirectiveKind(); - if (isOpenMPWorksharingDirective(DKind) && - isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) - return true; - } - } - } - } - return false; - case OMPD_target_teams: - if (isOpenMPParallelDirective(DKind) && - isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && - hasStaticScheduling(*NestedDir)) - return true; - if (DKind == OMPD_distribute_simd || DKind == OMPD_simd) - return true; - if (DKind == OMPD_parallel) { - Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( - /*IgnoreCaptured=*/true); - if (!Body) - return false; - ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); - if (const auto *NND = - dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { - DKind = NND->getDirectiveKind(); - if (isOpenMPWorksharingDirective(DKind) && - isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) - return true; - } - } - return false; - case OMPD_target_parallel: - if (DKind == OMPD_simd) - return true; - return isOpenMPWorksharingDirective(DKind) && - isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir); - case OMPD_target_teams_distribute: - case OMPD_target_simd: - case OMPD_target_parallel_for: - case OMPD_target_parallel_for_simd: - case OMPD_target_teams_distribute_simd: - case OMPD_target_teams_distribute_parallel_for: - case OMPD_target_teams_distribute_parallel_for_simd: - case OMPD_parallel: - case OMPD_for: - case OMPD_parallel_for: - case OMPD_parallel_master: - case OMPD_parallel_sections: - case OMPD_for_simd: - case OMPD_parallel_for_simd: - case OMPD_cancel: - case OMPD_cancellation_point: - case OMPD_ordered: - case OMPD_threadprivate: - case OMPD_allocate: - case OMPD_task: - case OMPD_simd: - case OMPD_sections: - case OMPD_section: - case OMPD_single: - case OMPD_master: - case OMPD_critical: - case OMPD_taskyield: - case OMPD_barrier: - case OMPD_taskwait: - case OMPD_taskgroup: - case OMPD_atomic: - case OMPD_flush: - case OMPD_depobj: - case OMPD_scan: - case OMPD_teams: - case OMPD_target_data: - case OMPD_target_exit_data: - case OMPD_target_enter_data: - case OMPD_distribute: - case OMPD_distribute_simd: - case OMPD_distribute_parallel_for: - case OMPD_distribute_parallel_for_simd: - case OMPD_teams_distribute: - case OMPD_teams_distribute_simd: - case OMPD_teams_distribute_parallel_for: - case OMPD_teams_distribute_parallel_for_simd: - case OMPD_target_update: - case OMPD_declare_simd: - case OMPD_declare_variant: - case OMPD_begin_declare_variant: - case OMPD_end_declare_variant: - case OMPD_declare_target: - case OMPD_end_declare_target: - case OMPD_declare_reduction: - case OMPD_declare_mapper: - case OMPD_taskloop: - case OMPD_taskloop_simd: - case OMPD_master_taskloop: - case OMPD_master_taskloop_simd: - case OMPD_parallel_master_taskloop: - case OMPD_parallel_master_taskloop_simd: - case OMPD_requires: - case OMPD_unknown: - default: - llvm_unreachable("Unexpected directive."); - } - } - - return false; -} - -/// Checks if the construct supports lightweight runtime. It must be SPMD -/// construct + inner loop-based construct with static scheduling. -static bool supportsLightweightRuntime(ASTContext &Ctx, - const OMPExecutableDirective &D) { - if (!supportsSPMDExecutionMode(Ctx, D)) - return false; - OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); - switch (DirectiveKind) { - case OMPD_target: - case OMPD_target_teams: - case OMPD_target_parallel: - return hasNestedLightweightDirective(Ctx, D); - case OMPD_target_parallel_for: - case OMPD_target_parallel_for_simd: - case OMPD_target_teams_distribute_parallel_for: - case OMPD_target_teams_distribute_parallel_for_simd: - // (Last|First)-privates must be shared in parallel region. - return hasStaticScheduling(D); - case OMPD_target_simd: - case OMPD_target_teams_distribute_simd: - return true; - case OMPD_target_teams_distribute: - return false; - case OMPD_parallel: - case OMPD_for: - case OMPD_parallel_for: - case OMPD_parallel_master: - case OMPD_parallel_sections: - case OMPD_for_simd: - case OMPD_parallel_for_simd: - case OMPD_cancel: - case OMPD_cancellation_point: - case OMPD_ordered: - case OMPD_threadprivate: - case OMPD_allocate: - case OMPD_task: - case OMPD_simd: - case OMPD_sections: - case OMPD_section: - case OMPD_single: - case OMPD_master: - case OMPD_critical: - case OMPD_taskyield: - case OMPD_barrier: - case OMPD_taskwait: - case OMPD_taskgroup: - case OMPD_atomic: - case OMPD_flush: - case OMPD_depobj: - case OMPD_scan: - case OMPD_teams: - case OMPD_target_data: - case OMPD_target_exit_data: - case OMPD_target_enter_data: - case OMPD_distribute: - case OMPD_distribute_simd: - case OMPD_distribute_parallel_for: - case OMPD_distribute_parallel_for_simd: - case OMPD_teams_distribute: - case OMPD_teams_distribute_simd: - case OMPD_teams_distribute_parallel_for: - case OMPD_teams_distribute_parallel_for_simd: - case OMPD_target_update: - case OMPD_declare_simd: - case OMPD_declare_variant: - case OMPD_begin_declare_variant: - case OMPD_end_declare_variant: - case OMPD_declare_target: - case OMPD_end_declare_target: - case OMPD_declare_reduction: - case OMPD_declare_mapper: - case OMPD_taskloop: - case OMPD_taskloop_simd: - case OMPD_master_taskloop: - case OMPD_master_taskloop_simd: - case OMPD_parallel_master_taskloop: - case OMPD_parallel_master_taskloop_simd: - case OMPD_requires: - case OMPD_unknown: - default: - break; - } - llvm_unreachable( - "Unknown programming model for OpenMP directive on NVPTX target."); -} - void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { - ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode); + ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_NonSPMD); EntryFunctionState EST; WrapperFunctionsMap.clear(); @@ -1048,8 +769,7 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D, void CGOpenMPRuntimeGPU::emitKernelInit(CodeGenFunction &CGF, EntryFunctionState &EST, bool IsSPMD) { CGBuilderTy &Bld = CGF.Builder; - Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD, requiresFullRuntime())); - IsInTargetMasterThreadRegion = IsSPMD; + Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD)); if (!IsSPMD) emitGenericVarsProlog(CGF, EST.Loc); } @@ -1061,7 +781,7 @@ void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF, emitGenericVarsEpilog(CGF); CGBuilderTy &Bld = CGF.Builder; - OMPBuilder.createTargetDeinit(Bld, IsSPMD, requiresFullRuntime()); + OMPBuilder.createTargetDeinit(Bld, IsSPMD); } void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D, @@ -1070,10 +790,7 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { - ExecutionRuntimeModesRAII ModeRAII( - CurrentExecutionMode, RequiresFullRuntime, - CGM.getLangOpts().OpenMPCUDAForceFullRuntime || - !supportsLightweightRuntime(CGM.getContext(), D)); + ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_SPMD); EntryFunctionState EST; // Emit target region as a standalone region. @@ -1116,36 +833,10 @@ static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name, llvm::ConstantInt::get(CGM.Int8Ty, Mode ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC), Twine(Name, "_exec_mode")); + GVMode->setVisibility(llvm::GlobalVariable::ProtectedVisibility); CGM.addCompilerUsedGlobal(GVMode); } -void CGOpenMPRuntimeGPU::createOffloadEntry(llvm::Constant *ID, - llvm::Constant *Addr, - uint64_t Size, int32_t, - llvm::GlobalValue::LinkageTypes) { - // TODO: Add support for global variables on the device after declare target - // support. - llvm::Function *Fn = dyn_cast<llvm::Function>(Addr); - if (!Fn) - return; - - llvm::Module &M = CGM.getModule(); - llvm::LLVMContext &Ctx = CGM.getLLVMContext(); - - // Get "nvvm.annotations" metadata node. - llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); - - llvm::Metadata *MDVals[] = { - llvm::ConstantAsMetadata::get(Fn), llvm::MDString::get(Ctx, "kernel"), - llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))}; - // Append metadata to nvvm.annotations. - MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); - - // Add a function attribute for the kernel. - Fn->addFnAttr(llvm::Attribute::get(Ctx, "kernel")); -} - void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, @@ -1166,39 +857,14 @@ void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction( setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode); } -namespace { -LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); -/// Enum for accesseing the reserved_2 field of the ident_t struct. -enum ModeFlagsTy : unsigned { - /// Bit set to 1 when in SPMD mode. - KMP_IDENT_SPMD_MODE = 0x01, - /// Bit set to 1 when a simplified runtime is used. - KMP_IDENT_SIMPLE_RT_MODE = 0x02, - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/KMP_IDENT_SIMPLE_RT_MODE) -}; - -/// Special mode Undefined. Is the combination of Non-SPMD mode + SimpleRuntime. -static const ModeFlagsTy UndefinedMode = - (~KMP_IDENT_SPMD_MODE) & KMP_IDENT_SIMPLE_RT_MODE; -} // anonymous namespace - -unsigned CGOpenMPRuntimeGPU::getDefaultLocationReserved2Flags() const { - switch (getExecutionMode()) { - case EM_SPMD: - if (requiresFullRuntime()) - return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE); - return KMP_IDENT_SPMD_MODE | KMP_IDENT_SIMPLE_RT_MODE; - case EM_NonSPMD: - assert(requiresFullRuntime() && "Expected full runtime."); - return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE); - case EM_Unknown: - return UndefinedMode; - } - llvm_unreachable("Unknown flags are requested."); -} - CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) - : CGOpenMPRuntime(CGM, "_", "$") { + : CGOpenMPRuntime(CGM) { + llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, true, + hasRequiresUnifiedSharedMemory(), + CGM.getLangOpts().OpenMPOffloadMandatory); + OMPBuilder.setConfig(Config); + OffloadEntriesInfoManager.setConfig(Config); + if (!CGM.getLangOpts().OpenMPIsDevice) llvm_unreachable("OpenMP can only handle device code."); @@ -1214,6 +880,8 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) "__omp_rtl_assume_threads_oversubscription"); OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPNoThreadState, "__omp_rtl_assume_no_thread_state"); + OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPNoNestedParallelism, + "__omp_rtl_assume_no_nested_parallelism"); } void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF, @@ -1241,33 +909,13 @@ llvm::Function *CGOpenMPRuntimeGPU::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { // Emit target region as a standalone region. - class NVPTXPrePostActionTy : public PrePostActionTy { - bool &IsInParallelRegion; - bool PrevIsInParallelRegion; - - public: - NVPTXPrePostActionTy(bool &IsInParallelRegion) - : IsInParallelRegion(IsInParallelRegion) {} - void Enter(CodeGenFunction &CGF) override { - PrevIsInParallelRegion = IsInParallelRegion; - IsInParallelRegion = true; - } - void Exit(CodeGenFunction &CGF) override { - IsInParallelRegion = PrevIsInParallelRegion; - } - } Action(IsInParallelRegion); - CodeGen.setAction(Action); bool PrevIsInTTDRegion = IsInTTDRegion; IsInTTDRegion = false; - bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion; - IsInTargetMasterThreadRegion = false; auto *OutlinedFun = cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen)); - IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion; IsInTTDRegion = PrevIsInTTDRegion; - if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD && - !IsInParallelRegion) { + if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD) { llvm::Function *WrapperFun = createParallelDataSharingWrapper(OutlinedFun, D); WrapperFunctionsMap[OutlinedFun] = WrapperFun; @@ -1330,7 +978,7 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction( getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions); if (!LastPrivatesReductions.empty()) { GlobalizedRD = ::buildRecordForGlobalizedVars( - CGM.getContext(), llvm::None, LastPrivatesReductions, + CGM.getContext(), std::nullopt, LastPrivatesReductions, MappedDeclsFields, WarpSize); } } else if (!LastPrivatesReductions.empty()) { @@ -3307,7 +2955,7 @@ void CGOpenMPRuntimeGPU::emitReduction( ++Cnt; } const RecordDecl *TeamReductionRec = ::buildRecordForGlobalizedVars( - CGM.getContext(), PrivatesReductions, llvm::None, VarFieldMap, + CGM.getContext(), PrivatesReductions, std::nullopt, VarFieldMap, C.getLangOpts().OpenMPCUDAReductionBufNum); TeamsReductions.push_back(TeamReductionRec); if (!KernelTeamsReductionPtr) { @@ -3379,7 +3027,7 @@ void CGOpenMPRuntimeGPU::emitReduction( llvm::Value *EndArgs[] = {ThreadId}; RegionCodeGenTy RCG(CodeGen); NVPTXActionTy Action( - nullptr, llvm::None, + nullptr, std::nullopt, OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_nvptx_end_reduce_nowait), EndArgs); @@ -3435,7 +3083,7 @@ CGOpenMPRuntimeGPU::getParameterAddress(CodeGenFunction &CGF, const Type *NonQualTy = QC.strip(NativeParamType); QualType NativePointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType(); unsigned NativePointeeAddrSpace = - CGF.getContext().getTargetAddressSpace(NativePointeeTy); + CGF.getTypes().getTargetAddressSpace(NativePointeeTy); QualType TargetTy = TargetParam->getType(); llvm::Value *TargetAddr = CGF.EmitLoadOfScalar( LocalAddr, /*Volatile=*/false, TargetTy, SourceLocation()); @@ -3659,16 +3307,6 @@ void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF, assert(VD->isCanonicalDecl() && "Expected canonical declaration"); Data.insert(std::make_pair(VD, MappedVarData())); } - if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) { - CheckVarsEscapingDeclContext VarChecker(CGF, llvm::None); - VarChecker.Visit(Body); - I->getSecond().SecondaryLocalVarData.emplace(); - DeclToAddrMapTy &Data = *I->getSecond().SecondaryLocalVarData; - for (const ValueDecl *VD : VarChecker.getEscapedDecls()) { - assert(VD->isCanonicalDecl() && "Expected canonical declaration"); - Data.insert(std::make_pair(VD, MappedVarData())); - } - } if (!NeedToDelayGlobalization) { emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true); struct GlobalizationScope final : EHScopeStack::Cleanup { @@ -3810,7 +3448,7 @@ void CGOpenMPRuntimeGPU::adjustTargetSpecificDataForLambdas( else VDLVal = CGF.MakeAddrLValue( VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); - llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; + llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures; FieldDecl *ThisCapture = nullptr; RD->getCaptureFields(Captures, ThisCapture); if (ThisCapture && CGF.CapturedStmtInfo->isCXXThisExprCaptured()) { @@ -3822,13 +3460,15 @@ void CGOpenMPRuntimeGPU::adjustTargetSpecificDataForLambdas( for (const LambdaCapture &LC : RD->captures()) { if (LC.getCaptureKind() != LCK_ByRef) continue; - const VarDecl *VD = LC.getCapturedVar(); - if (!CS->capturesVariable(VD)) + const ValueDecl *VD = LC.getCapturedVar(); + // FIXME: For now VD is always a VarDecl because OpenMP does not support + // capturing structured bindings in lambdas yet. + if (!CS->capturesVariable(cast<VarDecl>(VD))) continue; auto It = Captures.find(VD); assert(It != Captures.end() && "Found lambda capture without field."); LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); - Address VDAddr = CGF.GetAddrOfLocalVar(VD); + Address VDAddr = CGF.GetAddrOfLocalVar(cast<VarDecl>(VD)); if (VD->getType().getCanonicalType()->isReferenceType()) VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr, VD->getType().getCanonicalType()) @@ -3913,6 +3553,9 @@ void CGOpenMPRuntimeGPU::processRequiresDirective( case CudaArch::SM_75: case CudaArch::SM_80: case CudaArch::SM_86: + case CudaArch::SM_87: + case CudaArch::SM_89: + case CudaArch::SM_90: case CudaArch::GFX600: case CudaArch::GFX601: case CudaArch::GFX602: @@ -4006,10 +3649,10 @@ llvm::Value *CGOpenMPRuntimeGPU::getGPUNumThreads(CodeGenFunction &CGF) { llvm::Function *F = M->getFunction(LocSize); if (!F) { F = llvm::Function::Create( - llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false), + llvm::FunctionType::get(CGF.Int32Ty, std::nullopt, false), llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule()); } - return Bld.CreateCall(F, llvm::None, "nvptx_num_threads"); + return Bld.CreateCall(F, std::nullopt, "nvptx_num_threads"); } llvm::Value *CGOpenMPRuntimeGPU::getGPUThreadID(CodeGenFunction &CGF) { |