summaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp94
1 files changed, 60 insertions, 34 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index d00d84b79cfe..cbd443134e7a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -38,11 +38,9 @@ enum OpenMPRTLFunctionNVPTX {
/// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2,
/// Call to void __kmpc_kernel_prepare_parallel(void
- /// *outlined_function, int16_t
- /// IsOMPRuntimeInitialized);
+ /// *outlined_function);
OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
- /// Call to bool __kmpc_kernel_parallel(void **outlined_function,
- /// int16_t IsOMPRuntimeInitialized);
+ /// Call to bool __kmpc_kernel_parallel(void **outlined_function);
OMPRTL_NVPTX__kmpc_kernel_parallel,
/// Call to void __kmpc_kernel_end_parallel();
OMPRTL_NVPTX__kmpc_kernel_end_parallel,
@@ -85,6 +83,9 @@ enum OpenMPRTLFunctionNVPTX {
/// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size,
/// int16_t UseSharedMemory);
OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack,
+ /// Call to void* __kmpc_data_sharing_push_stack(size_t size, int16_t
+ /// UseSharedMemory);
+ OMPRTL_NVPTX__kmpc_data_sharing_push_stack,
/// Call to void __kmpc_data_sharing_pop_stack(void *a);
OMPRTL_NVPTX__kmpc_data_sharing_pop_stack,
/// Call to void __kmpc_begin_sharing_variables(void ***args,
@@ -341,8 +342,7 @@ class CheckVarsEscapingDeclContext final
if (!Attr)
return;
if (((Attr->getCaptureKind() != OMPC_map) &&
- !isOpenMPPrivate(
- static_cast<OpenMPClauseKind>(Attr->getCaptureKind()))) ||
+ !isOpenMPPrivate(Attr->getCaptureKind())) ||
((Attr->getCaptureKind() == OMPC_map) &&
!FD->getType()->isAnyPointerType()))
return;
@@ -786,6 +786,8 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
case OMPD_taskgroup:
case OMPD_atomic:
case OMPD_flush:
+ case OMPD_depobj:
+ case OMPD_scan:
case OMPD_teams:
case OMPD_target_data:
case OMPD_target_exit_data:
@@ -801,6 +803,8 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
case OMPD_target_update:
case OMPD_declare_simd:
case OMPD_declare_variant:
+ case OMPD_begin_declare_variant:
+ case OMPD_end_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
@@ -813,6 +817,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
case OMPD_unknown:
+ default:
llvm_unreachable("Unexpected directive.");
}
}
@@ -862,6 +867,8 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
case OMPD_taskgroup:
case OMPD_atomic:
case OMPD_flush:
+ case OMPD_depobj:
+ case OMPD_scan:
case OMPD_teams:
case OMPD_target_data:
case OMPD_target_exit_data:
@@ -877,6 +884,8 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
case OMPD_target_update:
case OMPD_declare_simd:
case OMPD_declare_variant:
+ case OMPD_begin_declare_variant:
+ case OMPD_end_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
@@ -889,6 +898,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
case OMPD_unknown:
+ default:
break;
}
llvm_unreachable(
@@ -1031,6 +1041,8 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
case OMPD_taskgroup:
case OMPD_atomic:
case OMPD_flush:
+ case OMPD_depobj:
+ case OMPD_scan:
case OMPD_teams:
case OMPD_target_data:
case OMPD_target_exit_data:
@@ -1046,6 +1058,8 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
case OMPD_target_update:
case OMPD_declare_simd:
case OMPD_declare_variant:
+ case OMPD_begin_declare_variant:
+ case OMPD_end_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
@@ -1058,6 +1072,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
case OMPD_unknown:
+ default:
llvm_unreachable("Unexpected directive.");
}
}
@@ -1113,6 +1128,8 @@ static bool supportsLightweightRuntime(ASTContext &Ctx,
case OMPD_taskgroup:
case OMPD_atomic:
case OMPD_flush:
+ case OMPD_depobj:
+ case OMPD_scan:
case OMPD_teams:
case OMPD_target_data:
case OMPD_target_exit_data:
@@ -1128,6 +1145,8 @@ static bool supportsLightweightRuntime(ASTContext &Ctx,
case OMPD_target_update:
case OMPD_declare_simd:
case OMPD_declare_variant:
+ case OMPD_begin_declare_variant:
+ case OMPD_end_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
@@ -1140,6 +1159,7 @@ static bool supportsLightweightRuntime(ASTContext &Ctx,
case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
case OMPD_unknown:
+ default:
break;
}
llvm_unreachable(
@@ -1444,8 +1464,7 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy));
// TODO: Optimize runtime initialization and pass in correct value.
- llvm::Value *Args[] = {WorkFn.getPointer(),
- /*RequiresOMPRuntime=*/Bld.getInt16(1)};
+ llvm::Value *Args[] = {WorkFn.getPointer()};
llvm::Value *Ret = CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args);
Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus);
@@ -1573,17 +1592,16 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
}
case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
/// Build void __kmpc_kernel_prepare_parallel(
- /// void *outlined_function, int16_t IsOMPRuntimeInitialized);
- llvm::Type *TypeParams[] = {CGM.Int8PtrTy, CGM.Int16Ty};
+ /// void *outlined_function);
+ llvm::Type *TypeParams[] = {CGM.Int8PtrTy};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel");
break;
}
case OMPRTL_NVPTX__kmpc_kernel_parallel: {
- /// Build bool __kmpc_kernel_parallel(void **outlined_function,
- /// int16_t IsOMPRuntimeInitialized);
- llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy, CGM.Int16Ty};
+ /// Build bool __kmpc_kernel_parallel(void **outlined_function);
+ llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy};
llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy);
auto *FnTy =
llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false);
@@ -1738,6 +1756,16 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack");
break;
}
+ case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: {
+ // Build void *__kmpc_data_sharing_push_stack(size_t size, int16_t
+ // UseSharedMemory);
+ llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(
+ FnTy, /*Name=*/"__kmpc_data_sharing_push_stack");
+ break;
+ }
case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: {
// Build void __kmpc_data_sharing_pop_stack(void *a);
llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
@@ -1915,19 +1943,6 @@ unsigned CGOpenMPRuntimeNVPTX::getDefaultLocationReserved2Flags() const {
llvm_unreachable("Unknown flags are requested.");
}
-bool CGOpenMPRuntimeNVPTX::tryEmitDeclareVariant(const GlobalDecl &NewGD,
- const GlobalDecl &OldGD,
- llvm::GlobalValue *OrigAddr,
- bool IsForDefinition) {
- // Emit the function in OldGD with the body from NewGD, if NewGD is defined.
- auto *NewFD = cast<FunctionDecl>(NewGD.getDecl());
- if (NewFD->isDefined()) {
- CGM.emitOpenMPDeviceFunctionRedefinition(OldGD, NewGD, OrigAddr);
- return true;
- }
- return false;
-}
-
CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
: CGOpenMPRuntime(CGM, "_", "$") {
if (!CGM.getLangOpts().OpenMPIsDevice)
@@ -2208,7 +2223,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
GlobalRecCastAddr = Phi;
I->getSecond().GlobalRecordAddr = Phi;
I->getSecond().IsInSPMDModeFlag = IsSPMD;
- } else if (IsInTTDRegion) {
+ } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) {
assert(GlobalizedRecords.back().Records.size() < 2 &&
"Expected less than 2 globalized records: one for target and one "
"for teams.");
@@ -2281,12 +2296,16 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
} else {
// TODO: allow the usage of shared memory to be controlled by
// the user, for now, default to global.
+ bool UseSharedMemory =
+ IsInTTDRegion && GlobalRecordSize <= SharedMemorySize;
llvm::Value *GlobalRecordSizeArg[] = {
llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
- CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
+ CGF.Builder.getInt16(UseSharedMemory ? 1 : 0)};
llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(
- OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
+ IsInTTDRegion
+ ? OMPRTL_NVPTX__kmpc_data_sharing_push_stack
+ : OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
GlobalRecordSizeArg);
GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
GlobalRecValue, GlobalRecPtrTy);
@@ -2433,7 +2452,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF,
OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr));
CGF.EmitBlock(ExitBB);
- } else if (IsInTTDRegion) {
+ } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) {
assert(GlobalizedRecords.back().RegionCounter > 0 &&
"region counter must be > 0.");
--GlobalizedRecords.back().RegionCounter;
@@ -2546,7 +2565,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy);
// Prepare for parallel region. Indicate the outlined function.
- llvm::Value *Args[] = {ID, /*RequiresOMPRuntime=*/Bld.getInt16(1)};
+ llvm::Value *Args[] = {ID};
CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),
Args);
@@ -4754,6 +4773,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
switch (A->getAllocatorType()) {
// Use the default allocator here as by default local vars are
// threadlocal.
+ case OMPAllocateDeclAttr::OMPNullMemAlloc:
case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
case OMPAllocateDeclAttr::OMPThreadMemAlloc:
case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
@@ -4920,6 +4940,7 @@ bool CGOpenMPRuntimeNVPTX::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
return false;
const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
switch(A->getAllocatorType()) {
+ case OMPAllocateDeclAttr::OMPNullMemAlloc:
case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
// Not supported, fallback to the default mem space.
case OMPAllocateDeclAttr::OMPThreadMemAlloc:
@@ -4962,7 +4983,7 @@ static CudaArch getCudaArch(CodeGenModule &CGM) {
/// Check to see if target architecture supports unified addressing which is
/// a restriction for OpenMP requires clause "unified_shared_memory".
-void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing(
+void CGOpenMPRuntimeNVPTX::processRequiresDirective(
const OMPRequiresDecl *D) {
for (const OMPClause *Clause : D->clauselists()) {
if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
@@ -4990,6 +5011,7 @@ void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing(
case CudaArch::SM_70:
case CudaArch::SM_72:
case CudaArch::SM_75:
+ case CudaArch::SM_80:
case CudaArch::GFX600:
case CudaArch::GFX601:
case CudaArch::GFX700:
@@ -5010,6 +5032,7 @@ void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing(
case CudaArch::GFX1010:
case CudaArch::GFX1011:
case CudaArch::GFX1012:
+ case CudaArch::GFX1030:
case CudaArch::UNKNOWN:
break;
case CudaArch::LAST:
@@ -5017,7 +5040,7 @@ void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing(
}
}
}
- CGOpenMPRuntime::checkArchForUnifiedAddressing(D);
+ CGOpenMPRuntime::processRequiresDirective(D);
}
/// Get number of SMs and number of blocks per SM.
@@ -5047,6 +5070,7 @@ static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) {
case CudaArch::SM_70:
case CudaArch::SM_72:
case CudaArch::SM_75:
+ case CudaArch::SM_80:
return {84, 32};
case CudaArch::GFX600:
case CudaArch::GFX601:
@@ -5068,6 +5092,7 @@ static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) {
case CudaArch::GFX1010:
case CudaArch::GFX1011:
case CudaArch::GFX1012:
+ case CudaArch::GFX1030:
case CudaArch::UNKNOWN:
break;
case CudaArch::LAST:
@@ -5077,7 +5102,8 @@ static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) {
}
void CGOpenMPRuntimeNVPTX::clear() {
- if (!GlobalizedRecords.empty()) {
+ if (!GlobalizedRecords.empty() &&
+ !CGM.getLangOpts().OpenMPCUDATargetParallel) {
ASTContext &C = CGM.getContext();
llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> GlobalRecs;
llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> SharedRecs;