aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-02-11 12:38:04 +0000
committerDimitry Andric <dim@FreeBSD.org>2023-02-11 12:38:11 +0000
commite3b557809604d036af6e00c60f012c2025b59a5e (patch)
tree8a11ba2269a3b669601e2fd41145b174008f4da8 /clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
parent08e8dd7b9db7bb4a9de26d44c1cbfd24e869c014 (diff)
Diffstat (limited to 'clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp431
1 files changed, 37 insertions, 394 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 6dea846f486f..e8c5f04db49f 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -15,6 +15,7 @@
#include "CodeGenFunction.h"
#include "clang/AST/Attr.h"
#include "clang/AST/DeclOpenMP.h"
+#include "clang/AST/OpenMPClause.h"
#include "clang/AST/StmtOpenMP.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/Cuda.h"
@@ -73,30 +74,15 @@ private:
CGOpenMPRuntimeGPU::ExecutionMode SavedExecMode =
CGOpenMPRuntimeGPU::EM_Unknown;
CGOpenMPRuntimeGPU::ExecutionMode &ExecMode;
- bool SavedRuntimeMode = false;
- bool *RuntimeMode = nullptr;
public:
- /// Constructor for Non-SPMD mode.
- ExecutionRuntimeModesRAII(CGOpenMPRuntimeGPU::ExecutionMode &ExecMode)
- : ExecMode(ExecMode) {
- SavedExecMode = ExecMode;
- ExecMode = CGOpenMPRuntimeGPU::EM_NonSPMD;
- }
- /// Constructor for SPMD mode.
ExecutionRuntimeModesRAII(CGOpenMPRuntimeGPU::ExecutionMode &ExecMode,
- bool &RuntimeMode, bool FullRuntimeMode)
- : ExecMode(ExecMode), RuntimeMode(&RuntimeMode) {
+ CGOpenMPRuntimeGPU::ExecutionMode EntryMode)
+ : ExecMode(ExecMode) {
SavedExecMode = ExecMode;
- SavedRuntimeMode = RuntimeMode;
- ExecMode = CGOpenMPRuntimeGPU::EM_SPMD;
- RuntimeMode = FullRuntimeMode;
- }
- ~ExecutionRuntimeModesRAII() {
- ExecMode = SavedExecMode;
- if (RuntimeMode)
- *RuntimeMode = SavedRuntimeMode;
+ ExecMode = EntryMode;
}
+ ~ExecutionRuntimeModesRAII() { ExecMode = SavedExecMode; }
};
/// GPU Configuration: This information can be derived from cuda registers,
@@ -109,9 +95,6 @@ enum MachineConfiguration : unsigned {
/// Global memory alignment for performance.
GlobalMemoryAlignment = 128,
-
- /// Maximal size of the shared memory buffer.
- SharedMemorySize = 128,
};
static const ValueDecl *getPrivateItem(const Expr *RefExpr) {
@@ -444,9 +427,8 @@ public:
markAsEscaped(VD);
if (isa<OMPCapturedExprDecl>(VD))
VisitValueDecl(VD);
- else if (const auto *VarD = dyn_cast<VarDecl>(VD))
- if (VarD->isInitCapture())
- VisitValueDecl(VD);
+ else if (VD->isInitCapture())
+ VisitValueDecl(VD);
}
void VisitUnaryOperator(const UnaryOperator *E) {
if (!E)
@@ -746,274 +728,13 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
"Unknown programming model for OpenMP directive on NVPTX target.");
}
-/// Check if the directive is loops based and has schedule clause at all or has
-/// static scheduling.
-static bool hasStaticScheduling(const OMPExecutableDirective &D) {
- assert(isOpenMPWorksharingDirective(D.getDirectiveKind()) &&
- isOpenMPLoopDirective(D.getDirectiveKind()) &&
- "Expected loop-based directive.");
- return !D.hasClausesOfKind<OMPOrderedClause>() &&
- (!D.hasClausesOfKind<OMPScheduleClause>() ||
- llvm::any_of(D.getClausesOfKind<OMPScheduleClause>(),
- [](const OMPScheduleClause *C) {
- return C->getScheduleKind() == OMPC_SCHEDULE_static;
- }));
-}
-
-/// Check for inner (nested) lightweight runtime construct, if any
-static bool hasNestedLightweightDirective(ASTContext &Ctx,
- const OMPExecutableDirective &D) {
- assert(supportsSPMDExecutionMode(Ctx, D) && "Expected SPMD mode directive.");
- const auto *CS = D.getInnermostCapturedStmt();
- const auto *Body =
- CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
- const Stmt *ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
-
- if (const auto *NestedDir =
- dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
- OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
- switch (D.getDirectiveKind()) {
- case OMPD_target:
- if (isOpenMPParallelDirective(DKind) &&
- isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
- hasStaticScheduling(*NestedDir))
- return true;
- if (DKind == OMPD_teams_distribute_simd || DKind == OMPD_simd)
- return true;
- if (DKind == OMPD_parallel) {
- Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
- /*IgnoreCaptured=*/true);
- if (!Body)
- return false;
- ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
- if (const auto *NND =
- dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
- DKind = NND->getDirectiveKind();
- if (isOpenMPWorksharingDirective(DKind) &&
- isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
- return true;
- }
- } else if (DKind == OMPD_teams) {
- Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
- /*IgnoreCaptured=*/true);
- if (!Body)
- return false;
- ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
- if (const auto *NND =
- dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
- DKind = NND->getDirectiveKind();
- if (isOpenMPParallelDirective(DKind) &&
- isOpenMPWorksharingDirective(DKind) &&
- isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
- return true;
- if (DKind == OMPD_parallel) {
- Body = NND->getInnermostCapturedStmt()->IgnoreContainers(
- /*IgnoreCaptured=*/true);
- if (!Body)
- return false;
- ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
- if (const auto *NND =
- dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
- DKind = NND->getDirectiveKind();
- if (isOpenMPWorksharingDirective(DKind) &&
- isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
- return true;
- }
- }
- }
- }
- return false;
- case OMPD_target_teams:
- if (isOpenMPParallelDirective(DKind) &&
- isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
- hasStaticScheduling(*NestedDir))
- return true;
- if (DKind == OMPD_distribute_simd || DKind == OMPD_simd)
- return true;
- if (DKind == OMPD_parallel) {
- Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
- /*IgnoreCaptured=*/true);
- if (!Body)
- return false;
- ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
- if (const auto *NND =
- dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
- DKind = NND->getDirectiveKind();
- if (isOpenMPWorksharingDirective(DKind) &&
- isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
- return true;
- }
- }
- return false;
- case OMPD_target_parallel:
- if (DKind == OMPD_simd)
- return true;
- return isOpenMPWorksharingDirective(DKind) &&
- isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir);
- case OMPD_target_teams_distribute:
- case OMPD_target_simd:
- case OMPD_target_parallel_for:
- case OMPD_target_parallel_for_simd:
- case OMPD_target_teams_distribute_simd:
- case OMPD_target_teams_distribute_parallel_for:
- case OMPD_target_teams_distribute_parallel_for_simd:
- case OMPD_parallel:
- case OMPD_for:
- case OMPD_parallel_for:
- case OMPD_parallel_master:
- case OMPD_parallel_sections:
- case OMPD_for_simd:
- case OMPD_parallel_for_simd:
- case OMPD_cancel:
- case OMPD_cancellation_point:
- case OMPD_ordered:
- case OMPD_threadprivate:
- case OMPD_allocate:
- case OMPD_task:
- case OMPD_simd:
- case OMPD_sections:
- case OMPD_section:
- case OMPD_single:
- case OMPD_master:
- case OMPD_critical:
- case OMPD_taskyield:
- case OMPD_barrier:
- case OMPD_taskwait:
- case OMPD_taskgroup:
- case OMPD_atomic:
- case OMPD_flush:
- case OMPD_depobj:
- case OMPD_scan:
- case OMPD_teams:
- case OMPD_target_data:
- case OMPD_target_exit_data:
- case OMPD_target_enter_data:
- case OMPD_distribute:
- case OMPD_distribute_simd:
- case OMPD_distribute_parallel_for:
- case OMPD_distribute_parallel_for_simd:
- case OMPD_teams_distribute:
- case OMPD_teams_distribute_simd:
- case OMPD_teams_distribute_parallel_for:
- case OMPD_teams_distribute_parallel_for_simd:
- case OMPD_target_update:
- case OMPD_declare_simd:
- case OMPD_declare_variant:
- case OMPD_begin_declare_variant:
- case OMPD_end_declare_variant:
- case OMPD_declare_target:
- case OMPD_end_declare_target:
- case OMPD_declare_reduction:
- case OMPD_declare_mapper:
- case OMPD_taskloop:
- case OMPD_taskloop_simd:
- case OMPD_master_taskloop:
- case OMPD_master_taskloop_simd:
- case OMPD_parallel_master_taskloop:
- case OMPD_parallel_master_taskloop_simd:
- case OMPD_requires:
- case OMPD_unknown:
- default:
- llvm_unreachable("Unexpected directive.");
- }
- }
-
- return false;
-}
-
-/// Checks if the construct supports lightweight runtime. It must be SPMD
-/// construct + inner loop-based construct with static scheduling.
-static bool supportsLightweightRuntime(ASTContext &Ctx,
- const OMPExecutableDirective &D) {
- if (!supportsSPMDExecutionMode(Ctx, D))
- return false;
- OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
- switch (DirectiveKind) {
- case OMPD_target:
- case OMPD_target_teams:
- case OMPD_target_parallel:
- return hasNestedLightweightDirective(Ctx, D);
- case OMPD_target_parallel_for:
- case OMPD_target_parallel_for_simd:
- case OMPD_target_teams_distribute_parallel_for:
- case OMPD_target_teams_distribute_parallel_for_simd:
- // (Last|First)-privates must be shared in parallel region.
- return hasStaticScheduling(D);
- case OMPD_target_simd:
- case OMPD_target_teams_distribute_simd:
- return true;
- case OMPD_target_teams_distribute:
- return false;
- case OMPD_parallel:
- case OMPD_for:
- case OMPD_parallel_for:
- case OMPD_parallel_master:
- case OMPD_parallel_sections:
- case OMPD_for_simd:
- case OMPD_parallel_for_simd:
- case OMPD_cancel:
- case OMPD_cancellation_point:
- case OMPD_ordered:
- case OMPD_threadprivate:
- case OMPD_allocate:
- case OMPD_task:
- case OMPD_simd:
- case OMPD_sections:
- case OMPD_section:
- case OMPD_single:
- case OMPD_master:
- case OMPD_critical:
- case OMPD_taskyield:
- case OMPD_barrier:
- case OMPD_taskwait:
- case OMPD_taskgroup:
- case OMPD_atomic:
- case OMPD_flush:
- case OMPD_depobj:
- case OMPD_scan:
- case OMPD_teams:
- case OMPD_target_data:
- case OMPD_target_exit_data:
- case OMPD_target_enter_data:
- case OMPD_distribute:
- case OMPD_distribute_simd:
- case OMPD_distribute_parallel_for:
- case OMPD_distribute_parallel_for_simd:
- case OMPD_teams_distribute:
- case OMPD_teams_distribute_simd:
- case OMPD_teams_distribute_parallel_for:
- case OMPD_teams_distribute_parallel_for_simd:
- case OMPD_target_update:
- case OMPD_declare_simd:
- case OMPD_declare_variant:
- case OMPD_begin_declare_variant:
- case OMPD_end_declare_variant:
- case OMPD_declare_target:
- case OMPD_end_declare_target:
- case OMPD_declare_reduction:
- case OMPD_declare_mapper:
- case OMPD_taskloop:
- case OMPD_taskloop_simd:
- case OMPD_master_taskloop:
- case OMPD_master_taskloop_simd:
- case OMPD_parallel_master_taskloop:
- case OMPD_parallel_master_taskloop_simd:
- case OMPD_requires:
- case OMPD_unknown:
- default:
- break;
- }
- llvm_unreachable(
- "Unknown programming model for OpenMP directive on NVPTX target.");
-}
-
void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
StringRef ParentName,
llvm::Function *&OutlinedFn,
llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen) {
- ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode);
+ ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_NonSPMD);
EntryFunctionState EST;
WrapperFunctionsMap.clear();
@@ -1048,8 +769,7 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
void CGOpenMPRuntimeGPU::emitKernelInit(CodeGenFunction &CGF,
EntryFunctionState &EST, bool IsSPMD) {
CGBuilderTy &Bld = CGF.Builder;
- Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD, requiresFullRuntime()));
- IsInTargetMasterThreadRegion = IsSPMD;
+ Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD));
if (!IsSPMD)
emitGenericVarsProlog(CGF, EST.Loc);
}
@@ -1061,7 +781,7 @@ void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF,
emitGenericVarsEpilog(CGF);
CGBuilderTy &Bld = CGF.Builder;
- OMPBuilder.createTargetDeinit(Bld, IsSPMD, requiresFullRuntime());
+ OMPBuilder.createTargetDeinit(Bld, IsSPMD);
}
void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
@@ -1070,10 +790,7 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen) {
- ExecutionRuntimeModesRAII ModeRAII(
- CurrentExecutionMode, RequiresFullRuntime,
- CGM.getLangOpts().OpenMPCUDAForceFullRuntime ||
- !supportsLightweightRuntime(CGM.getContext(), D));
+ ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode, EM_SPMD);
EntryFunctionState EST;
// Emit target region as a standalone region.
@@ -1116,36 +833,10 @@ static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name,
llvm::ConstantInt::get(CGM.Int8Ty, Mode ? OMP_TGT_EXEC_MODE_SPMD
: OMP_TGT_EXEC_MODE_GENERIC),
Twine(Name, "_exec_mode"));
+ GVMode->setVisibility(llvm::GlobalVariable::ProtectedVisibility);
CGM.addCompilerUsedGlobal(GVMode);
}
-void CGOpenMPRuntimeGPU::createOffloadEntry(llvm::Constant *ID,
- llvm::Constant *Addr,
- uint64_t Size, int32_t,
- llvm::GlobalValue::LinkageTypes) {
- // TODO: Add support for global variables on the device after declare target
- // support.
- llvm::Function *Fn = dyn_cast<llvm::Function>(Addr);
- if (!Fn)
- return;
-
- llvm::Module &M = CGM.getModule();
- llvm::LLVMContext &Ctx = CGM.getLLVMContext();
-
- // Get "nvvm.annotations" metadata node.
- llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
-
- llvm::Metadata *MDVals[] = {
- llvm::ConstantAsMetadata::get(Fn), llvm::MDString::get(Ctx, "kernel"),
- llvm::ConstantAsMetadata::get(
- llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
- // Append metadata to nvvm.annotations.
- MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
-
- // Add a function attribute for the kernel.
- Fn->addFnAttr(llvm::Attribute::get(Ctx, "kernel"));
-}
-
void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
const OMPExecutableDirective &D, StringRef ParentName,
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
@@ -1166,39 +857,14 @@ void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode);
}
-namespace {
-LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
-/// Enum for accesseing the reserved_2 field of the ident_t struct.
-enum ModeFlagsTy : unsigned {
- /// Bit set to 1 when in SPMD mode.
- KMP_IDENT_SPMD_MODE = 0x01,
- /// Bit set to 1 when a simplified runtime is used.
- KMP_IDENT_SIMPLE_RT_MODE = 0x02,
- LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/KMP_IDENT_SIMPLE_RT_MODE)
-};
-
-/// Special mode Undefined. Is the combination of Non-SPMD mode + SimpleRuntime.
-static const ModeFlagsTy UndefinedMode =
- (~KMP_IDENT_SPMD_MODE) & KMP_IDENT_SIMPLE_RT_MODE;
-} // anonymous namespace
-
-unsigned CGOpenMPRuntimeGPU::getDefaultLocationReserved2Flags() const {
- switch (getExecutionMode()) {
- case EM_SPMD:
- if (requiresFullRuntime())
- return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE);
- return KMP_IDENT_SPMD_MODE | KMP_IDENT_SIMPLE_RT_MODE;
- case EM_NonSPMD:
- assert(requiresFullRuntime() && "Expected full runtime.");
- return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE);
- case EM_Unknown:
- return UndefinedMode;
- }
- llvm_unreachable("Unknown flags are requested.");
-}
-
CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
- : CGOpenMPRuntime(CGM, "_", "$") {
+ : CGOpenMPRuntime(CGM) {
+ llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, true,
+ hasRequiresUnifiedSharedMemory(),
+ CGM.getLangOpts().OpenMPOffloadMandatory);
+ OMPBuilder.setConfig(Config);
+ OffloadEntriesInfoManager.setConfig(Config);
+
if (!CGM.getLangOpts().OpenMPIsDevice)
llvm_unreachable("OpenMP can only handle device code.");
@@ -1214,6 +880,8 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
"__omp_rtl_assume_threads_oversubscription");
OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPNoThreadState,
"__omp_rtl_assume_no_thread_state");
+ OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPNoNestedParallelism,
+ "__omp_rtl_assume_no_nested_parallelism");
}
void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF,
@@ -1241,33 +909,13 @@ llvm::Function *CGOpenMPRuntimeGPU::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
// Emit target region as a standalone region.
- class NVPTXPrePostActionTy : public PrePostActionTy {
- bool &IsInParallelRegion;
- bool PrevIsInParallelRegion;
-
- public:
- NVPTXPrePostActionTy(bool &IsInParallelRegion)
- : IsInParallelRegion(IsInParallelRegion) {}
- void Enter(CodeGenFunction &CGF) override {
- PrevIsInParallelRegion = IsInParallelRegion;
- IsInParallelRegion = true;
- }
- void Exit(CodeGenFunction &CGF) override {
- IsInParallelRegion = PrevIsInParallelRegion;
- }
- } Action(IsInParallelRegion);
- CodeGen.setAction(Action);
bool PrevIsInTTDRegion = IsInTTDRegion;
IsInTTDRegion = false;
- bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion;
- IsInTargetMasterThreadRegion = false;
auto *OutlinedFun =
cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen));
- IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
IsInTTDRegion = PrevIsInTTDRegion;
- if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD &&
- !IsInParallelRegion) {
+ if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD) {
llvm::Function *WrapperFun =
createParallelDataSharingWrapper(OutlinedFun, D);
WrapperFunctionsMap[OutlinedFun] = WrapperFun;
@@ -1330,7 +978,7 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction(
getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions);
if (!LastPrivatesReductions.empty()) {
GlobalizedRD = ::buildRecordForGlobalizedVars(
- CGM.getContext(), llvm::None, LastPrivatesReductions,
+ CGM.getContext(), std::nullopt, LastPrivatesReductions,
MappedDeclsFields, WarpSize);
}
} else if (!LastPrivatesReductions.empty()) {
@@ -3307,7 +2955,7 @@ void CGOpenMPRuntimeGPU::emitReduction(
++Cnt;
}
const RecordDecl *TeamReductionRec = ::buildRecordForGlobalizedVars(
- CGM.getContext(), PrivatesReductions, llvm::None, VarFieldMap,
+ CGM.getContext(), PrivatesReductions, std::nullopt, VarFieldMap,
C.getLangOpts().OpenMPCUDAReductionBufNum);
TeamsReductions.push_back(TeamReductionRec);
if (!KernelTeamsReductionPtr) {
@@ -3379,7 +3027,7 @@ void CGOpenMPRuntimeGPU::emitReduction(
llvm::Value *EndArgs[] = {ThreadId};
RegionCodeGenTy RCG(CodeGen);
NVPTXActionTy Action(
- nullptr, llvm::None,
+ nullptr, std::nullopt,
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_nvptx_end_reduce_nowait),
EndArgs);
@@ -3435,7 +3083,7 @@ CGOpenMPRuntimeGPU::getParameterAddress(CodeGenFunction &CGF,
const Type *NonQualTy = QC.strip(NativeParamType);
QualType NativePointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType();
unsigned NativePointeeAddrSpace =
- CGF.getContext().getTargetAddressSpace(NativePointeeTy);
+ CGF.getTypes().getTargetAddressSpace(NativePointeeTy);
QualType TargetTy = TargetParam->getType();
llvm::Value *TargetAddr = CGF.EmitLoadOfScalar(
LocalAddr, /*Volatile=*/false, TargetTy, SourceLocation());
@@ -3659,16 +3307,6 @@ void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF,
assert(VD->isCanonicalDecl() && "Expected canonical declaration");
Data.insert(std::make_pair(VD, MappedVarData()));
}
- if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) {
- CheckVarsEscapingDeclContext VarChecker(CGF, llvm::None);
- VarChecker.Visit(Body);
- I->getSecond().SecondaryLocalVarData.emplace();
- DeclToAddrMapTy &Data = *I->getSecond().SecondaryLocalVarData;
- for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
- assert(VD->isCanonicalDecl() && "Expected canonical declaration");
- Data.insert(std::make_pair(VD, MappedVarData()));
- }
- }
if (!NeedToDelayGlobalization) {
emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true);
struct GlobalizationScope final : EHScopeStack::Cleanup {
@@ -3810,7 +3448,7 @@ void CGOpenMPRuntimeGPU::adjustTargetSpecificDataForLambdas(
else
VDLVal = CGF.MakeAddrLValue(
VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
- llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
+ llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
FieldDecl *ThisCapture = nullptr;
RD->getCaptureFields(Captures, ThisCapture);
if (ThisCapture && CGF.CapturedStmtInfo->isCXXThisExprCaptured()) {
@@ -3822,13 +3460,15 @@ void CGOpenMPRuntimeGPU::adjustTargetSpecificDataForLambdas(
for (const LambdaCapture &LC : RD->captures()) {
if (LC.getCaptureKind() != LCK_ByRef)
continue;
- const VarDecl *VD = LC.getCapturedVar();
- if (!CS->capturesVariable(VD))
+ const ValueDecl *VD = LC.getCapturedVar();
+ // FIXME: For now VD is always a VarDecl because OpenMP does not support
+ // capturing structured bindings in lambdas yet.
+ if (!CS->capturesVariable(cast<VarDecl>(VD)))
continue;
auto It = Captures.find(VD);
assert(It != Captures.end() && "Found lambda capture without field.");
LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
- Address VDAddr = CGF.GetAddrOfLocalVar(VD);
+ Address VDAddr = CGF.GetAddrOfLocalVar(cast<VarDecl>(VD));
if (VD->getType().getCanonicalType()->isReferenceType())
VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr,
VD->getType().getCanonicalType())
@@ -3913,6 +3553,9 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
case CudaArch::SM_75:
case CudaArch::SM_80:
case CudaArch::SM_86:
+ case CudaArch::SM_87:
+ case CudaArch::SM_89:
+ case CudaArch::SM_90:
case CudaArch::GFX600:
case CudaArch::GFX601:
case CudaArch::GFX602:
@@ -4006,10 +3649,10 @@ llvm::Value *CGOpenMPRuntimeGPU::getGPUNumThreads(CodeGenFunction &CGF) {
llvm::Function *F = M->getFunction(LocSize);
if (!F) {
F = llvm::Function::Create(
- llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false),
+ llvm::FunctionType::get(CGF.Int32Ty, std::nullopt, false),
llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule());
}
- return Bld.CreateCall(F, llvm::None, "nvptx_num_threads");
+ return Bld.CreateCall(F, std::nullopt, "nvptx_num_threads");
}
llvm::Value *CGOpenMPRuntimeGPU::getGPUThreadID(CodeGenFunction &CGF) {