src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2023-02-11 12:38:04 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2023-02-11 12:38:11 +0000
commit	e3b557809604d036af6e00c60f012c2025b59a5e (patch)
tree	8a11ba2269a3b669601e2fd41145b174008f4da8 /clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
parent	08e8dd7b9db7bb4a9de26d44c1cbfd24e869c014 (diff)

vendor/llvm-project/llvmorg-16-init-18548-gb0daacf58f41

Diffstat (limited to 'clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp')

-rw-r--r--

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

431

1 files changed, 37 insertions, 394 deletions

diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 6dea846f486f..e8c5f04db49f 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

@@ -15,6 +15,7 @@

#include "CodeGenFunction.h"

#include "clang/AST/Attr.h"

#include "clang/AST/DeclOpenMP.h"

+#include "clang/AST/OpenMPClause.h"

#include "clang/AST/StmtOpenMP.h"

#include "clang/AST/StmtVisitor.h"

#include "clang/Basic/Cuda.h"

@@ -73,30 +74,15 @@ private:

CGOpenMPRuntimeGPU::ExecutionMode SavedExecMode =

CGOpenMPRuntimeGPU::EM_Unknown;

CGOpenMPRuntimeGPU::ExecutionMode &ExecMode;

- bool SavedRuntimeMode = false;

- bool *RuntimeMode = nullptr;

public:

- /// Constructor for Non-SPMD mode.

- ExecutionRuntimeModesRAII(CGOpenMPRuntimeGPU::ExecutionMode &ExecMode)

- : ExecMode(ExecMode) {

- SavedExecMode = ExecMode;

- ExecMode = CGOpenMPRuntimeGPU::EM_NonSPMD;

- }

- /// Constructor for SPMD mode.

ExecutionRuntimeModesRAII(CGOpenMPRuntimeGPU::ExecutionMode &ExecMode,

- bool &RuntimeMode, bool FullRuntimeMode)

- : ExecMode(ExecMode), RuntimeMode(&RuntimeMode) {

+ CGOpenMPRuntimeGPU::ExecutionMode EntryMode)

+ : ExecMode(ExecMode) {

SavedExecMode = ExecMode;

- SavedRuntimeMode = RuntimeMode;

- ExecMode = CGOpenMPRuntimeGPU::EM_SPMD;

- RuntimeMode = FullRuntimeMode;

- }

- ~ExecutionRuntimeModesRAII() {

- ExecMode = SavedExecMode;

- if (RuntimeMode)

- *RuntimeMode = SavedRuntimeMode;

+ ExecMode = EntryMode;

}

+ ~ExecutionRuntimeModesRAII() { ExecMode = SavedExecMode; }

};

/// GPU Configuration: This information can be derived from cuda registers,

@@ -109,9 +95,6 @@ enum MachineConfiguration : unsigned {

/// Global memory alignment for performance.

GlobalMemoryAlignment = 128,

- /// Maximal size of the shared memory buffer.

- SharedMemorySize = 128,

};

static const ValueDecl *getPrivateItem(const Expr *RefExpr) {

@@ -444,9 +427,8 @@ public:

markAsEscaped(VD);

if (isa<OMPCapturedExprDecl>(VD))

VisitValueDecl(VD);

- else if (const auto *VarD = dyn_cast<VarDecl>(VD))

- if (VarD->isInitCapture())

- VisitValueDecl(VD);

+ else if (VD->isInitCapture())

+ VisitValueDecl(VD);

}

void VisitUnaryOperator(const UnaryOperator *E) {

if (!E)

@@ -746,274 +728,13 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,

"Unknown programming model for OpenMP directive on NVPTX target.");

}

-/// Check if the directive is loops based and has schedule clause at all or has

-/// static scheduling.

-static bool hasStaticScheduling(const OMPExecutableDirective &D) {

- assert(isOpenMPWorksharingDirective(D.getDirectiveKind()) &&

- isOpenMPLoopDirective(D.getDirectiveKind()) &&

- "Expected loop-based directive.");

- return !D.hasClausesOfKind<OMPOrderedClause>() &&

- (!D.hasClausesOfKind<OMPScheduleClause>() ||

- llvm::any_of(D.getClausesOfKind<OMPScheduleClause>(),

- [](const OMPScheduleClause *C) {

- return C->getScheduleKind() == OMPC_SCHEDULE_static;

- }));

-/// Check for inner (nested) lightweight runtime construct, if any

-static bool hasNestedLightweightDirective(ASTContext &Ctx,

- const OMPExecutableDirective &D) {

- assert(supportsSPMDExecutionMode(Ctx, D) && "Expected SPMD mode directive.");

- const auto *CS = D.getInnermostCapturedStmt();

- const auto *Body =

- CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);

- const Stmt *ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);

- if (const auto *NestedDir =

- dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {

- OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();

- switch (D.getDirectiveKind()) {

- case OMPD_target:

- if (isOpenMPParallelDirective(DKind) &&

- isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&

- hasStaticScheduling(*NestedDir))

- return true;

- if (DKind == OMPD_teams_distribute_simd || DKind == OMPD_simd)

- return true;

- if (DKind == OMPD_parallel) {

- Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(

- /*IgnoreCaptured=*/true);

- if (!Body)

- return false;

- ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);

- if (const auto *NND =

- dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {

- DKind = NND->getDirectiveKind();

- if (isOpenMPWorksharingDirective(DKind) &&

- isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))

- return true;

- }

- } else if (DKind == OMPD_teams) {

- Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(

- /*IgnoreCaptured=*/true);

- if (!Body)

- return false;

- ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);

- if (const auto *NND =

- dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {

- DKind = NND->getDirectiveKind();

- if (isOpenMPParallelDirective(DKind) &&

- isOpenMPWorksharingDirective(DKind) &&

- isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))

- return true;

- if (DKind == OMPD_parallel) {

- Body = NND->getInnermostCapturedStmt()->IgnoreContainers(

- /*IgnoreCaptured=*/true);

- if (!Body)

- return false;

- ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);

- if (const auto *NND =

- dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {

- DKind = NND->getDirectiveKind();

- if (isOpenMPWorksharingDirective(DKind) &&

- isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))

- return true;

- }

- return false;

- case OMPD_target_teams:

- if (isOpenMPParallelDirective(DKind) &&

- isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&

- hasStaticScheduling(*NestedDir))

- return true;

- if (DKind == OMPD_distribute_simd || DKind == OMPD_simd)

- return true;

- if (DKind == OMPD_parallel) {

- Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(

- /*IgnoreCaptured=*/true);

- if (!Body)

- return false;

- ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);

- if (const auto *NND =

- dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {

- DKind = NND->getDirectiveKind();

- if (isOpenMPWorksharingDirective(DKind) &&

- isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))

- return true;

- }

- return false;

- case OMPD_target_parallel:

- if (DKind == OMPD_simd)

- return true;

- return isOpenMPWorksharingDirective(DKind) &&

- isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir);

- case OMPD_target_teams_distribute:

- case OMPD_target_simd:

- case OMPD_target_parallel_for:

- case OMPD_target_parallel_for_simd:

- case OMPD_target_teams_distribute_simd:

- case OMPD_target_teams_distribute_parallel_for:

- case OMPD_target_teams_distribute_parallel_for_simd:

- case OMPD_parallel:

- case OMPD_for:

- case OMPD_parallel_for:

- case OMPD_parallel_master:

- case OMPD_parallel_sections:

- case OMPD_for_simd:

- case OMPD_parallel_for_simd:

- case OMPD_cancel:

- case OMPD_cancellation_point:

- case OMPD_ordered:

- case OMPD_threadprivate:

- case OMPD_allocate:

- case OMPD_task:

- case OMPD_simd:

- case OMPD_sections:

- case OMPD_section:

- case OMPD_single:

- case OMPD_master:

- case OMPD_critical:

- case OMPD_taskyield:

- case OMPD_barrier:

- case OMPD_taskwait:

- case OMPD_taskgroup:

- case OMPD_atomic:

- case OMPD_flush:

- case OMPD_depobj:

- case OMPD_scan:

- case OMPD_teams:

- case OMPD_target_data:

- case OMPD_target_exit_data:

- case OMPD_target_enter_data:

- case OMPD_distribute:

- case OMPD_distribute_simd:

- case OMPD_distribute_parallel_for:

- case OMPD_distribute_parallel_for_simd:

- case OMPD_teams_distribute:

- case OMPD_teams_distribute_simd:

- case OMPD_teams_distribute_parallel_for:

- case OMPD_teams_distribute_parallel_for_simd:

- case OMPD_target_update:

- case OMPD_declare_simd:

- case OMPD_declare_variant:

- case OMPD_begin_declare_variant:

- case OMPD_end_declare_variant:

- case OMPD_declare_target:

- case OMPD_end_declare_target:

- case OMPD_declare_reduction:

- case OMPD_declare_mapper:

- case OMPD_taskloop:

- case OMPD_taskloop_simd:

- case OMPD_master_taskloop:

- case OMPD_master_taskloop_simd:

- case OMPD_parallel_master_taskloop:

- case OMPD_parallel_master_taskloop_simd:

- case OMPD_requires:

- case OMPD_unknown:

- default:

- llvm_unreachable("Unexpected directive.");

- }

- return false;

-/// Checks if the construct supports lightweight runtime. It must be SPMD

-/// construct + inner loop-based construct with static scheduling.

-static bool supportsLightweightRuntime(ASTContext &Ctx,

- const OMPExecutableDirective &D) {

- if (!supportsSPMDExecutionMode(Ctx, D))

- return false;

- OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();

- switch (DirectiveKind) {

- case OMPD_target:

- case OMPD_target_teams:

- case OMPD_target_parallel:

- return hasNestedLightweightDirective(Ctx, D);

- case OMPD_target_parallel_for:

- case OMPD_target_parallel_for_simd:

- case OMPD_target_teams_distribute_parallel_for:

- case OMPD_target_teams_distribute_parallel_for_simd:

- // (Last|First)-privates must be shared in parallel region.

- return hasStaticScheduling(D);

- case OMPD_target_simd:

- case OMPD_target_teams_distribute_simd:

- return true;

- case OMPD_target_teams_distribute:

- return false;

- case OMPD_parallel:

- case OMPD_for:

- case OMPD_parallel_for:

- case OMPD_parallel_master:

- case OMPD_parallel_sections:

- case OMPD_for_simd:

- case OMPD_parallel_for_simd:

- case OMPD_cancel:

- case OMPD_cancellation_point:

- case OMPD_ordered:

- case OMPD_threadprivate:

- case OMPD_allocate:

- case OMPD_task:

- case OMPD_simd:

- case OMPD_sections:

- case OMPD_section:

- case OMPD_single:

- case OMPD_master:

- case OMPD_critical:

- case OMPD_taskyield:

- case OMPD_barrier:

- case OMPD_taskwait:

- case OMPD_taskgroup:

- case OMPD_atomic:

- case OMPD_flush:

- case OMPD_depobj:

- case OMPD_scan:

- case OMPD_teams:

- case OMPD_target_data:

- case OMPD_target_exit_data:

- case OMPD_target_enter_data:

- case OMPD_distribute:

- case OMPD_distribute_simd:

- case OMPD_distribute_parallel_for:

- case OMPD_distribute_parallel_for_simd:

- case OMPD_teams_distribute:

- case OMPD_teams_distribute_simd:

- case OMPD_teams_distribute_parallel_for:

- case OMPD_teams_distribute_parallel_for_simd:

- case OMPD_target_update:

- case OMPD_declare_simd:

- case OMPD_declare_variant:

- case OMPD_begin_declare_variant:

- case OMPD_end_declare_variant:

- case OMPD_declare_target:

- case OMPD_end_declare_target:

- case OMPD_declare_reduction:

- case OMPD_declare_mapper:

- case OMPD_taskloop:

- case OMPD_taskloop_simd:

- case OMPD_master_taskloop:

- case OMPD_master_taskloop_simd:

- case OMPD_parallel_master_taskloop:

- case OMPD_parallel_master_taskloop_simd:

- case OMPD_requires:

- case OMPD_unknown:

- default:

- break;

- }

- llvm_unreachable(

- "Unknown programming model for OpenMP directive on NVPTX target.");

void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,

StringRef ParentName,

llvm::Function *&OutlinedFn,

llvm::Constant *&OutlinedFnID,

bool IsOffloadEntry,

const RegionCodeGenTy &CodeGen) {

- ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode);

+ ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_NonSPMD);

EntryFunctionState EST;

WrapperFunctionsMap.clear();

@@ -1048,8 +769,7 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,

void CGOpenMPRuntimeGPU::emitKernelInit(CodeGenFunction &CGF,

EntryFunctionState &EST, bool IsSPMD) {

CGBuilderTy &Bld = CGF.Builder;

- Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD, requiresFullRuntime()));

- IsInTargetMasterThreadRegion = IsSPMD;

+ Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD));

if (!IsSPMD)

emitGenericVarsProlog(CGF, EST.Loc);

}

@@ -1061,7 +781,7 @@ void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF,

emitGenericVarsEpilog(CGF);

CGBuilderTy &Bld = CGF.Builder;

- OMPBuilder.createTargetDeinit(Bld, IsSPMD, requiresFullRuntime());

+ OMPBuilder.createTargetDeinit(Bld, IsSPMD);

}

void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,

@@ -1070,10 +790,7 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,

llvm::Constant *&OutlinedFnID,

bool IsOffloadEntry,

const RegionCodeGenTy &CodeGen) {

- ExecutionRuntimeModesRAII ModeRAII(

- CurrentExecutionMode, RequiresFullRuntime,

- CGM.getLangOpts().OpenMPCUDAForceFullRuntime ||

- !supportsLightweightRuntime(CGM.getContext(), D));

+ ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_SPMD);

EntryFunctionState EST;

// Emit target region as a standalone region.

@@ -1116,36 +833,10 @@ static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name,

llvm::ConstantInt::get(CGM.Int8Ty, Mode ? OMP_TGT_EXEC_MODE_SPMD

: OMP_TGT_EXEC_MODE_GENERIC),

Twine(Name, "_exec_mode"));

+ GVMode->setVisibility(llvm::GlobalVariable::ProtectedVisibility);

CGM.addCompilerUsedGlobal(GVMode);

}

-void CGOpenMPRuntimeGPU::createOffloadEntry(llvm::Constant *ID,

- llvm::Constant *Addr,

- uint64_t Size, int32_t,

- llvm::GlobalValue::LinkageTypes) {

- // TODO: Add support for global variables on the device after declare target

- // support.

- llvm::Function *Fn = dyn_cast<llvm::Function>(Addr);

- if (!Fn)

- return;

- llvm::Module &M = CGM.getModule();

- llvm::LLVMContext &Ctx = CGM.getLLVMContext();

- // Get "nvvm.annotations" metadata node.

- llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");

- llvm::Metadata *MDVals[] = {

- llvm::ConstantAsMetadata::get(Fn), llvm::MDString::get(Ctx, "kernel"),

- llvm::ConstantAsMetadata::get(

- llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};

- // Append metadata to nvvm.annotations.

- MD->addOperand(llvm::MDNode::get(Ctx, MDVals));

- // Add a function attribute for the kernel.

- Fn->addFnAttr(llvm::Attribute::get(Ctx, "kernel"));

void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(

const OMPExecutableDirective &D, StringRef ParentName,

llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,

@@ -1166,39 +857,14 @@ void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(

setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode);

}

-namespace {

-LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();

-/// Enum for accesseing the reserved_2 field of the ident_t struct.

-enum ModeFlagsTy : unsigned {

- /// Bit set to 1 when in SPMD mode.

- KMP_IDENT_SPMD_MODE = 0x01,

- /// Bit set to 1 when a simplified runtime is used.

- KMP_IDENT_SIMPLE_RT_MODE = 0x02,

- LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/KMP_IDENT_SIMPLE_RT_MODE)

-};

-/// Special mode Undefined. Is the combination of Non-SPMD mode + SimpleRuntime.

-static const ModeFlagsTy UndefinedMode =

- (~KMP_IDENT_SPMD_MODE) & KMP_IDENT_SIMPLE_RT_MODE;

-} // anonymous namespace

-unsigned CGOpenMPRuntimeGPU::getDefaultLocationReserved2Flags() const {

- switch (getExecutionMode()) {

- case EM_SPMD:

- if (requiresFullRuntime())

- return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE);

- return KMP_IDENT_SPMD_MODE | KMP_IDENT_SIMPLE_RT_MODE;

- case EM_NonSPMD:

- assert(requiresFullRuntime() && "Expected full runtime.");

- return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE);

- case EM_Unknown:

- return UndefinedMode;

- }

- llvm_unreachable("Unknown flags are requested.");

CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)

- : CGOpenMPRuntime(CGM, "_", "$") {

+ : CGOpenMPRuntime(CGM) {

+ llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, true,

+ hasRequiresUnifiedSharedMemory(),

+ CGM.getLangOpts().OpenMPOffloadMandatory);

+ OMPBuilder.setConfig(Config);

+ OffloadEntriesInfoManager.setConfig(Config);

if (!CGM.getLangOpts().OpenMPIsDevice)

llvm_unreachable("OpenMP can only handle device code.");

@@ -1214,6 +880,8 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)

"__omp_rtl_assume_threads_oversubscription");

OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPNoThreadState,

"__omp_rtl_assume_no_thread_state");

+ OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPNoNestedParallelism,

+ "__omp_rtl_assume_no_nested_parallelism");

}

void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF,

@@ -1241,33 +909,13 @@ llvm::Function *CGOpenMPRuntimeGPU::emitParallelOutlinedFunction(

const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,

OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {

// Emit target region as a standalone region.

- class NVPTXPrePostActionTy : public PrePostActionTy {

- bool &IsInParallelRegion;

- bool PrevIsInParallelRegion;

- public:

- NVPTXPrePostActionTy(bool &IsInParallelRegion)

- : IsInParallelRegion(IsInParallelRegion) {}

- void Enter(CodeGenFunction &CGF) override {

- PrevIsInParallelRegion = IsInParallelRegion;

- IsInParallelRegion = true;

- }

- void Exit(CodeGenFunction &CGF) override {

- IsInParallelRegion = PrevIsInParallelRegion;

- }

- } Action(IsInParallelRegion);

- CodeGen.setAction(Action);

bool PrevIsInTTDRegion = IsInTTDRegion;

IsInTTDRegion = false;

- bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion;

- IsInTargetMasterThreadRegion = false;

auto *OutlinedFun =

cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(

D, ThreadIDVar, InnermostKind, CodeGen));

- IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;

IsInTTDRegion = PrevIsInTTDRegion;

- if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD &&

- !IsInParallelRegion) {

+ if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD) {

llvm::Function *WrapperFun =

createParallelDataSharingWrapper(OutlinedFun, D);

WrapperFunctionsMap[OutlinedFun] = WrapperFun;

@@ -1330,7 +978,7 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction(

getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions);

if (!LastPrivatesReductions.empty()) {

GlobalizedRD = ::buildRecordForGlobalizedVars(

- CGM.getContext(), llvm::None, LastPrivatesReductions,

+ CGM.getContext(), std::nullopt, LastPrivatesReductions,

MappedDeclsFields, WarpSize);

}

} else if (!LastPrivatesReductions.empty()) {

@@ -3307,7 +2955,7 @@ void CGOpenMPRuntimeGPU::emitReduction(

++Cnt;

}

const RecordDecl *TeamReductionRec = ::buildRecordForGlobalizedVars(

- CGM.getContext(), PrivatesReductions, llvm::None, VarFieldMap,

+ CGM.getContext(), PrivatesReductions, std::nullopt, VarFieldMap,

C.getLangOpts().OpenMPCUDAReductionBufNum);

TeamsReductions.push_back(TeamReductionRec);

if (!KernelTeamsReductionPtr) {

@@ -3379,7 +3027,7 @@ void CGOpenMPRuntimeGPU::emitReduction(

llvm::Value *EndArgs[] = {ThreadId};

RegionCodeGenTy RCG(CodeGen);

NVPTXActionTy Action(

- nullptr, llvm::None,

+ nullptr, std::nullopt,

OMPBuilder.getOrCreateRuntimeFunction(

CGM.getModule(), OMPRTL___kmpc_nvptx_end_reduce_nowait),

EndArgs);

@@ -3435,7 +3083,7 @@ CGOpenMPRuntimeGPU::getParameterAddress(CodeGenFunction &CGF,

const Type *NonQualTy = QC.strip(NativeParamType);

QualType NativePointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType();

unsigned NativePointeeAddrSpace =

- CGF.getContext().getTargetAddressSpace(NativePointeeTy);

+ CGF.getTypes().getTargetAddressSpace(NativePointeeTy);

QualType TargetTy = TargetParam->getType();

llvm::Value *TargetAddr = CGF.EmitLoadOfScalar(

LocalAddr, /*Volatile=*/false, TargetTy, SourceLocation());

@@ -3659,16 +3307,6 @@ void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF,

assert(VD->isCanonicalDecl() && "Expected canonical declaration");

Data.insert(std::make_pair(VD, MappedVarData()));

}

- if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) {

- CheckVarsEscapingDeclContext VarChecker(CGF, llvm::None);

- VarChecker.Visit(Body);

- I->getSecond().SecondaryLocalVarData.emplace();

- DeclToAddrMapTy &Data = *I->getSecond().SecondaryLocalVarData;

- for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {

- assert(VD->isCanonicalDecl() && "Expected canonical declaration");

- Data.insert(std::make_pair(VD, MappedVarData()));

- }

if (!NeedToDelayGlobalization) {

emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true);

struct GlobalizationScope final : EHScopeStack::Cleanup {

@@ -3810,7 +3448,7 @@ void CGOpenMPRuntimeGPU::adjustTargetSpecificDataForLambdas(

else

VDLVal = CGF.MakeAddrLValue(

VDAddr, VD->getType().getCanonicalType().getNonReferenceType());

- llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;

+ llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;

FieldDecl *ThisCapture = nullptr;

RD->getCaptureFields(Captures, ThisCapture);

if (ThisCapture && CGF.CapturedStmtInfo->isCXXThisExprCaptured()) {

@@ -3822,13 +3460,15 @@ void CGOpenMPRuntimeGPU::adjustTargetSpecificDataForLambdas(

for (const LambdaCapture &LC : RD->captures()) {

if (LC.getCaptureKind() != LCK_ByRef)

continue;

- const VarDecl *VD = LC.getCapturedVar();

- if (!CS->capturesVariable(VD))

+ const ValueDecl *VD = LC.getCapturedVar();

+ // FIXME: For now VD is always a VarDecl because OpenMP does not support

+ // capturing structured bindings in lambdas yet.

+ if (!CS->capturesVariable(cast<VarDecl>(VD)))

continue;

auto It = Captures.find(VD);

assert(It != Captures.end() && "Found lambda capture without field.");

LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);

- Address VDAddr = CGF.GetAddrOfLocalVar(VD);

+ Address VDAddr = CGF.GetAddrOfLocalVar(cast<VarDecl>(VD));

if (VD->getType().getCanonicalType()->isReferenceType())

VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr,

VD->getType().getCanonicalType())

@@ -3913,6 +3553,9 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(

case CudaArch::SM_75:

case CudaArch::SM_80:

case CudaArch::SM_86:

+ case CudaArch::SM_87:

+ case CudaArch::SM_89:

+ case CudaArch::SM_90:

case CudaArch::GFX600:

case CudaArch::GFX601:

case CudaArch::GFX602:

@@ -4006,10 +3649,10 @@ llvm::Value *CGOpenMPRuntimeGPU::getGPUNumThreads(CodeGenFunction &CGF) {

llvm::Function *F = M->getFunction(LocSize);

if (!F) {

F = llvm::Function::Create(

- llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false),

+ llvm::FunctionType::get(CGF.Int32Ty, std::nullopt, false),

llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule());

}

- return Bld.CreateCall(F, llvm::None, "nvptx_num_threads");

+ return Bld.CreateCall(F, std::nullopt, "nvptx_num_threads");

}

llvm::Value *CGOpenMPRuntimeGPU::getGPUThreadID(CodeGenFunction &CGF) {