aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen/CGOpenMPRuntime.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-01-17 20:45:01 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-01-17 20:45:01 +0000
commit706b4fc47bbc608932d3b491ae19a3b9cde9497b (patch)
tree4adf86a776049cbf7f69a1929c4babcbbef925eb /clang/lib/CodeGen/CGOpenMPRuntime.cpp
parent7cc9cf2bf09f069cb2dd947ead05d0b54301fb71 (diff)
Notes
Diffstat (limited to 'clang/lib/CodeGen/CGOpenMPRuntime.cpp')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp1018
1 files changed, 739 insertions, 279 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 2a13a2a58156..97b17799a03e 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -10,17 +10,22 @@
//
//===----------------------------------------------------------------------===//
+#include "CGOpenMPRuntime.h"
#include "CGCXXABI.h"
#include "CGCleanup.h"
-#include "CGOpenMPRuntime.h"
#include "CGRecordLayout.h"
#include "CodeGenFunction.h"
-#include "clang/CodeGen/ConstantInitBuilder.h"
+#include "clang/AST/Attr.h"
#include "clang/AST/Decl.h"
+#include "clang/AST/OpenMPClause.h"
#include "clang/AST/StmtOpenMP.h"
+#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/BitmaskEnum.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Value.h"
@@ -30,6 +35,7 @@
using namespace clang;
using namespace CodeGen;
+using namespace llvm::omp;
namespace {
/// Base class for handling code generation inside OpenMP regions.
@@ -356,7 +362,7 @@ public:
VD->getType().getNonReferenceType(), VK_LValue,
C.getLocation());
PrivScope.addPrivate(
- VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
+ VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
}
(void)PrivScope.Privatize();
}
@@ -727,10 +733,6 @@ enum OpenMPRTLFunction {
OMPRTL__tgt_target_teams_nowait,
// Call to void __tgt_register_requires(int64_t flags);
OMPRTL__tgt_register_requires,
- // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
- OMPRTL__tgt_register_lib,
- // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
- OMPRTL__tgt_unregister_lib,
// Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
// void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
OMPRTL__tgt_target_data_begin,
@@ -841,7 +843,7 @@ static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
break;
case TEK_Aggregate:
- InitRVal = RValue::getAggregate(LV.getAddress());
+ InitRVal = RValue::getAggregate(LV.getAddress(CGF));
break;
}
OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
@@ -965,7 +967,7 @@ void ReductionCodeGen::emitAggregateInitialization(
EmitDeclareReductionInit,
EmitDeclareReductionInit ? ClausesData[N].ReductionOp
: PrivateVD->getInit(),
- DRD, SharedLVal.getAddress());
+ DRD, SharedLVal.getAddress(CGF));
}
ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
@@ -1006,13 +1008,13 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
}
llvm::Value *Size;
llvm::Value *SizeInChars;
- auto *ElemType =
- cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
- ->getElementType();
+ auto *ElemType = cast<llvm::PointerType>(
+ SharedAddresses[N].first.getPointer(CGF)->getType())
+ ->getElementType();
auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
if (AsArraySection) {
- Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
- SharedAddresses[N].first.getPointer());
+ Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF),
+ SharedAddresses[N].first.getPointer(CGF));
Size = CGF.Builder.CreateNUWAdd(
Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
@@ -1062,7 +1064,7 @@ void ReductionCodeGen::emitInitialization(
PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
QualType SharedType = SharedAddresses[N].first.getType();
SharedLVal = CGF.MakeAddrLValue(
- CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
+ CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
CGF.ConvertTypeForMem(SharedType)),
SharedType, SharedAddresses[N].first.getBaseInfo(),
CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
@@ -1070,7 +1072,7 @@ void ReductionCodeGen::emitInitialization(
emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
} else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
- PrivateAddr, SharedLVal.getAddress(),
+ PrivateAddr, SharedLVal.getAddress(CGF),
SharedLVal.getType());
} else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
!CGF.isTrivialInitializer(PrivateVD->getInit())) {
@@ -1107,15 +1109,15 @@ static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
!CGF.getContext().hasSameType(BaseTy, ElTy)) {
if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
- BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
+ BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
} else {
- LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
+ LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
}
BaseTy = BaseTy->getPointeeType();
}
return CGF.MakeAddrLValue(
- CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
+ CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
CGF.ConvertTypeForMem(ElTy)),
BaseLV.getType(), BaseLV.getBaseInfo(),
CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
@@ -1179,15 +1181,15 @@ Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
OriginalBaseLValue);
llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
- BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
+ BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
llvm::Value *PrivatePointer =
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
PrivateAddr.getPointer(),
- SharedAddresses[N].first.getAddress().getType());
+ SharedAddresses[N].first.getAddress(CGF).getType());
llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
return castToBase(CGF, OrigVD->getType(),
SharedAddresses[N].first.getType(),
- OriginalBaseLValue.getAddress().getType(),
+ OriginalBaseLValue.getAddress(CGF).getType(),
OriginalBaseLValue.getAlignment(), Ptr);
}
BaseDecls.emplace_back(
@@ -1276,7 +1278,7 @@ bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD,
llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName);
if (Addr && !Addr->isDeclaration()) {
const auto *D = cast<FunctionDecl>(OldGD.getDecl());
- const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(OldGD);
+ const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD);
llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI);
// Create a reference to the named value. This ensures that it is emitted
@@ -1380,12 +1382,12 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
- .getAddress();
+ .getAddress(CGF);
});
Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
- .getAddress();
+ .getAddress(CGF);
});
(void)Scope.Privatize();
if (!IsCombiner && Out->hasInit() &&
@@ -1436,6 +1438,52 @@ CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
return UDRMap.lookup(D);
}
+namespace {
+// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
+// Builder if one is present.
+struct PushAndPopStackRAII {
+ PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
+ bool HasCancel)
+ : OMPBuilder(OMPBuilder) {
+ if (!OMPBuilder)
+ return;
+
+ // The following callback is the crucial part of clangs cleanup process.
+ //
+ // NOTE:
+ // Once the OpenMPIRBuilder is used to create parallel regions (and
+ // similar), the cancellation destination (Dest below) is determined via
+ // IP. That means if we have variables to finalize we split the block at IP,
+ // use the new block (=BB) as destination to build a JumpDest (via
+ // getJumpDestInCurrentScope(BB)) which then is fed to
+ // EmitBranchThroughCleanup. Furthermore, there will not be the need
+ // to push & pop an FinalizationInfo object.
+ // The FiniCB will still be needed but at the point where the
+ // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
+ auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
+ assert(IP.getBlock()->end() == IP.getPoint() &&
+ "Clang CG should cause non-terminated block!");
+ CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
+ CGF.Builder.restoreIP(IP);
+ CodeGenFunction::JumpDest Dest =
+ CGF.getOMPCancelDestination(OMPD_parallel);
+ CGF.EmitBranchThroughCleanup(Dest);
+ };
+
+ // TODO: Remove this once we emit parallel regions through the
+ // OpenMPIRBuilder as it can do this setup internally.
+ llvm::OpenMPIRBuilder::FinalizationInfo FI(
+ {FiniCB, OMPD_parallel, HasCancel});
+ OMPBuilder->pushFinalizationCB(std::move(FI));
+ }
+ ~PushAndPopStackRAII() {
+ if (OMPBuilder)
+ OMPBuilder->popFinalizationCB();
+ }
+ llvm::OpenMPIRBuilder *OMPBuilder;
+};
+} // namespace
+
static llvm::Function *emitParallelOrTeamsOutlinedFunction(
CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
@@ -1460,6 +1508,11 @@ static llvm::Function *emitParallelOrTeamsOutlinedFunction(
else if (const auto *OPFD =
dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
HasCancel = OPFD->hasCancel();
+
+ // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
+ // parallel region to make cancellation barriers work properly.
+ llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder();
+ PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel);
CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
HasCancel, OutlinedHelperName);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
@@ -1495,7 +1548,7 @@ llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
UpLoc, ThreadID,
CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
TaskTVar->getType()->castAs<PointerType>())
- .getPointer()};
+ .getPointer(CGF)};
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
};
CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
@@ -1706,9 +1759,10 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
!CGF.getLangOpts().CXXExceptions ||
CGF.Builder.GetInsertBlock() == TopBlock ||
- !isa<llvm::Instruction>(LVal.getPointer()) ||
- cast<llvm::Instruction>(LVal.getPointer())->getParent() == TopBlock ||
- cast<llvm::Instruction>(LVal.getPointer())->getParent() ==
+ !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
+ cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
+ TopBlock ||
+ cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
CGF.Builder.GetInsertBlock()) {
ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
// If value loaded in entry block, cache it and use it everywhere in
@@ -2422,26 +2476,6 @@ llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
break;
}
- case OMPRTL__tgt_register_lib: {
- // Build void __tgt_register_lib(__tgt_bin_desc *desc);
- QualType ParamTy =
- CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
- llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
- break;
- }
- case OMPRTL__tgt_unregister_lib: {
- // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
- QualType ParamTy =
- CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
- llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
- break;
- }
case OMPRTL__tgt_target_data_begin: {
// Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
// void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
@@ -2988,10 +3022,15 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
QualType VarType,
StringRef Name) {
std::string Suffix = getName({"artificial", ""});
- std::string CacheSuffix = getName({"cache", ""});
llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
llvm::Value *GAddr =
getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
+ if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
+ CGM.getTarget().isTLSSupported()) {
+ cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
+ return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
+ }
+ std::string CacheSuffix = getName({"cache", ""});
llvm::Value *Args[] = {
emitUpdateLocation(CGF, SourceLocation()),
getThreadID(CGF, SourceLocation()),
@@ -3005,12 +3044,12 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
VarLVType->getPointerTo(/*AddrSpace=*/0)),
- CGM.getPointerAlign());
+ CGM.getContext().getTypeAlignInChars(VarType));
}
-void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
- const RegionCodeGenTy &ThenGen,
- const RegionCodeGenTy &ElseGen) {
+void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
+ const RegionCodeGenTy &ThenGen,
+ const RegionCodeGenTy &ElseGen) {
CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
// If the condition constant folds and can be elided, try to avoid emitting
@@ -3100,7 +3139,7 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
EndArgs);
};
if (IfCond) {
- emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
+ emitIfClause(CGF, IfCond, ThenGen, ElseGen);
} else {
RegionCodeGenTy ThenRCG(ThenGen);
ThenRCG(CGF);
@@ -3118,7 +3157,7 @@ Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
if (auto *OMPRegionInfo =
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
if (OMPRegionInfo->getThreadIDVariable())
- return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
+ return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
llvm::Value *ThreadID = getThreadID(CGF, Loc);
QualType Int32Ty =
@@ -3394,7 +3433,8 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
CGF.Builder.CreateStore(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
+ CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
+ CGF.VoidPtrTy),
Elem);
}
// Build function that copies private values from single region to all other
@@ -3476,6 +3516,16 @@ void CGOpenMPRuntime::getDefaultScheduleAndChunk(
void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
OpenMPDirectiveKind Kind, bool EmitChecks,
bool ForceSimpleCall) {
+ // Check if we should use the OMPBuilder
+ auto *OMPRegionInfo =
+ dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
+ llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
+ if (OMPBuilder) {
+ CGF.Builder.restoreIP(OMPBuilder->CreateBarrier(
+ CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
+ return;
+ }
+
if (!CGF.HaveInsertPoint())
return;
// Build call __kmpc_cancel_barrier(loc, thread_id);
@@ -3485,8 +3535,7 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
// thread_id);
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
getThreadID(CGF, Loc)};
- if (auto *OMPRegionInfo =
- dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
+ if (OMPRegionInfo) {
if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
llvm::Value *Result = CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
@@ -3616,7 +3665,9 @@ static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
Schedule == OMP_sch_static_balanced_chunked ||
- Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static))
+ Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
+ Schedule == OMP_dist_sch_static_chunked ||
+ Schedule == OMP_dist_sch_static))
Modifier = OMP_sch_modifier_nonmonotonic;
}
return Schedule | Modifier;
@@ -3807,37 +3858,15 @@ void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
}
void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
- OpenMPProcBindClauseKind ProcBind,
+ ProcBindKind ProcBind,
SourceLocation Loc) {
if (!CGF.HaveInsertPoint())
return;
- // Constants for proc bind value accepted by the runtime.
- enum ProcBindTy {
- ProcBindFalse = 0,
- ProcBindTrue,
- ProcBindMaster,
- ProcBindClose,
- ProcBindSpread,
- ProcBindIntel,
- ProcBindDefault
- } RuntimeProcBind;
- switch (ProcBind) {
- case OMPC_PROC_BIND_master:
- RuntimeProcBind = ProcBindMaster;
- break;
- case OMPC_PROC_BIND_close:
- RuntimeProcBind = ProcBindClose;
- break;
- case OMPC_PROC_BIND_spread:
- RuntimeProcBind = ProcBindSpread;
- break;
- case OMPC_PROC_BIND_unknown:
- llvm_unreachable("Unsupported proc_bind value.");
- }
+ assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
// Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
llvm::Value *Args[] = {
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
- llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
+ llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
}
@@ -4327,57 +4356,6 @@ QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
return TgtOffloadEntryQTy;
}
-QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
- // These are the types we need to build:
- // struct __tgt_device_image{
- // void *ImageStart; // Pointer to the target code start.
- // void *ImageEnd; // Pointer to the target code end.
- // // We also add the host entries to the device image, as it may be useful
- // // for the target runtime to have access to that information.
- // __tgt_offload_entry *EntriesBegin; // Begin of the table with all
- // // the entries.
- // __tgt_offload_entry *EntriesEnd; // End of the table with all the
- // // entries (non inclusive).
- // };
- if (TgtDeviceImageQTy.isNull()) {
- ASTContext &C = CGM.getContext();
- RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
- RD->startDefinition();
- addFieldToRecordDecl(C, RD, C.VoidPtrTy);
- addFieldToRecordDecl(C, RD, C.VoidPtrTy);
- addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
- addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
- RD->completeDefinition();
- TgtDeviceImageQTy = C.getRecordType(RD);
- }
- return TgtDeviceImageQTy;
-}
-
-QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
- // struct __tgt_bin_desc{
- // int32_t NumDevices; // Number of devices supported.
- // __tgt_device_image *DeviceImages; // Arrays of device images
- // // (one per device).
- // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the
- // // entries.
- // __tgt_offload_entry *EntriesEnd; // End of the table with all the
- // // entries (non inclusive).
- // };
- if (TgtBinaryDescriptorQTy.isNull()) {
- ASTContext &C = CGM.getContext();
- RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
- RD->startDefinition();
- addFieldToRecordDecl(
- C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
- addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
- addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
- addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
- RD->completeDefinition();
- TgtBinaryDescriptorQTy = C.getRecordType(RD);
- }
- return TgtBinaryDescriptorQTy;
-}
-
namespace {
struct PrivateHelpersTy {
PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
@@ -4537,7 +4515,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
- llvm::Value *PartidParam = PartIdLVal.getPointer();
+ llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
@@ -4550,7 +4528,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- PrivatesLVal.getPointer(), CGF.VoidPtrTy);
+ PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
} else {
PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
}
@@ -4559,7 +4537,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
TaskPrivatesMap,
CGF.Builder
.CreatePointerBitCastOrAddrSpaceCast(
- TDBase.getAddress(), CGF.VoidPtrTy)
+ TDBase.getAddress(CGF), CGF.VoidPtrTy)
.getPointer()};
SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
std::end(CommonArgs));
@@ -4637,7 +4615,7 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
if (QualType::DestructionKind DtorKind =
Field->getType().isDestructedType()) {
LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
- CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
+ CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
}
}
CGF.FinishFunction();
@@ -4735,8 +4713,8 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
LValue RefLVal =
CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
- RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
- CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
+ RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
+ CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
++Counter;
}
CGF.FinishFunction();
@@ -4801,7 +4779,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
} else {
SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
SharedRefLValue = CGF.MakeAddrLValue(
- Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
+ Address(SharedRefLValue.getPointer(CGF),
+ C.getDeclAlign(OriginalVD)),
SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
SharedRefLValue.getTBAAInfo());
}
@@ -4814,7 +4793,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
// Initialize firstprivate array using element-by-element
// initialization.
CGF.EmitOMPAggregateAssign(
- PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
+ PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
+ Type,
[&CGF, Elem, Init, &CapturesInfo](Address DestElement,
Address SrcElement) {
// Clean up any temporaries needed by the initialization.
@@ -4832,8 +4812,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
}
} else {
CodeGenFunction::OMPPrivateScope InitScope(CGF);
- InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
- return SharedRefLValue.getAddress();
+ InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
+ return SharedRefLValue.getAddress(CGF);
});
(void)InitScope.Privatize();
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
@@ -5233,10 +5213,10 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
LValue UpAddrLVal =
CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
- llvm::Value *UpAddr =
- CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
+ llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
+ UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
llvm::Value *LowIntPtr =
- CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
+ CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy);
llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
} else {
@@ -5249,7 +5229,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
LValue BaseAddrLVal = CGF.EmitLValueForField(
Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
CGF.EmitStoreOfScalar(
- CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
+ CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy),
BaseAddrLVal);
// deps[i].len = sizeof(<Dependences[i].second>);
LValue LenLVal = CGF.EmitLValueForField(
@@ -5366,7 +5346,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
};
if (IfCond) {
- emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
+ emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
} else {
RegionCodeGenTy ThenRCG(ThenCodeGen);
ThenRCG(CGF);
@@ -5403,21 +5383,24 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
const auto *LBVar =
cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
- CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
+ CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
+ LBLVal.getQuals(),
/*IsInitializer=*/true);
LValue UBLVal = CGF.EmitLValueForField(
Result.TDBase,
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
const auto *UBVar =
cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
- CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
+ CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
+ UBLVal.getQuals(),
/*IsInitializer=*/true);
LValue StLVal = CGF.EmitLValueForField(
Result.TDBase,
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
const auto *StVar =
cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
- CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
+ CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
+ StLVal.getQuals(),
/*IsInitializer=*/true);
// Store reductions address.
LValue RedLVal = CGF.EmitLValueForField(
@@ -5426,7 +5409,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
if (Data.Reductions) {
CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
} else {
- CGF.EmitNullInitialization(RedLVal.getAddress(),
+ CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
CGF.getContext().VoidPtrTy);
}
enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
@@ -5435,11 +5418,11 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
ThreadID,
Result.NewTask,
IfVal,
- LBLVal.getPointer(),
- UBLVal.getPointer(),
+ LBLVal.getPointer(CGF),
+ UBLVal.getPointer(CGF),
CGF.EmitLoadOfScalar(StLVal, Loc),
llvm::ConstantInt::getSigned(
- CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
+ CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
llvm::ConstantInt::getSigned(
CGF.IntTy, Data.Schedule.getPointer()
? Data.Schedule.getInt() ? NumTasks : Grainsize
@@ -5751,7 +5734,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
CGF.Builder.CreateStore(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
+ CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
Elem);
if ((*IPriv)->getType()->isVariablyModifiedType()) {
// Store array size.
@@ -6179,7 +6162,7 @@ static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
// Emit the finalizer body:
// <destroy>(<type>* %0)
RCG.emitCleanups(CGF, N, PrivateAddr);
- CGF.FinishFunction();
+ CGF.FinishFunction(Loc);
return Fn;
}
@@ -6231,7 +6214,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
RCG.emitSharedLValue(CGF, Cnt);
llvm::Value *CastedShared =
- CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
+ CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
RCG.emitAggregateType(CGF, Cnt);
llvm::Value *SizeValInChars;
@@ -6274,7 +6257,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
FlagsLVal);
} else
- CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
+ CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
+ FlagsLVal.getType());
}
// Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
// *data);
@@ -6310,7 +6294,7 @@ void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
CGF.Builder.CreateStore(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
+ RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy),
SharedAddr, /*IsVolatile=*/false);
}
}
@@ -6321,12 +6305,12 @@ Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
LValue SharedLVal) {
// Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
// *d);
- llvm::Value *Args[] = {
- CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
- /*isSigned=*/true),
- ReductionsPtr,
- CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
- CGM.VoidPtrTy)};
+ llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
+ CGM.IntTy,
+ /*isSigned=*/true),
+ ReductionsPtr,
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
return Address(
CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
@@ -6449,8 +6433,8 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
CGF.EmitBlock(ContBB, /*IsFinished=*/true);
};
if (IfCond) {
- emitOMPIfClause(CGF, IfCond, ThenGen,
- [](CodeGenFunction &, PrePostActionTy &) {});
+ emitIfClause(CGF, IfCond, ThenGen,
+ [](CodeGenFunction &, PrePostActionTy &) {});
} else {
RegionCodeGenTy ThenRCG(ThenGen);
ThenRCG(CGF);
@@ -6663,6 +6647,7 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
case OMPD_parallel:
case OMPD_for:
case OMPD_parallel_for:
+ case OMPD_parallel_master:
case OMPD_parallel_sections:
case OMPD_for_simd:
case OMPD_parallel_for_simd:
@@ -6708,6 +6693,7 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
case OMPD_master_taskloop:
case OMPD_master_taskloop_simd:
case OMPD_parallel_master_taskloop:
+ case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
case OMPD_unknown:
break;
@@ -6972,6 +6958,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
case OMPD_parallel:
case OMPD_for:
case OMPD_parallel_for:
+ case OMPD_parallel_master:
case OMPD_parallel_sections:
case OMPD_for_simd:
case OMPD_parallel_for_simd:
@@ -7017,6 +7004,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
case OMPD_master_taskloop:
case OMPD_master_taskloop_simd:
case OMPD_parallel_master_taskloop:
+ case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
case OMPD_unknown:
break;
@@ -7509,11 +7497,11 @@ private:
} else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
(OASE &&
isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
- BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
+ BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
} else {
// The base is the reference to the variable.
// BP = &Var.
- BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
+ BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
if (const auto *VD =
dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
@@ -7607,8 +7595,8 @@ private:
isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
"Unexpected expression");
- Address LB =
- CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
+ Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
+ .getAddress(CGF);
// If this component is a pointer inside the base struct then we don't
// need to create any entry for it - it will be combined with the object
@@ -7655,7 +7643,7 @@ private:
if (MC.getAssociatedDeclaration()) {
ComponentLB =
CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
- .getAddress();
+ .getAddress(CGF);
Size = CGF.Builder.CreatePtrDiff(
CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
CGF.EmitCastToVoidPtr(LB.getPointer()));
@@ -7938,17 +7926,17 @@ public:
"Expect a executable directive");
const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
- for (const auto &L : C->component_lists()) {
+ for (const auto L : C->component_lists()) {
InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
/*ReturnDevicePointer=*/false, C->isImplicit());
}
for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
- for (const auto &L : C->component_lists()) {
+ for (const auto L : C->component_lists()) {
InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
/*ReturnDevicePointer=*/false, C->isImplicit());
}
for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
- for (const auto &L : C->component_lists()) {
+ for (const auto L : C->component_lists()) {
InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
/*ReturnDevicePointer=*/false, C->isImplicit());
}
@@ -7964,7 +7952,7 @@ public:
for (const auto *C :
CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
- for (const auto &L : C->component_lists()) {
+ for (const auto L : C->component_lists()) {
assert(!L.second.empty() && "Not expecting empty list of components!");
const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
VD = cast<ValueDecl>(VD->getCanonicalDecl());
@@ -8059,7 +8047,7 @@ public:
auto CI = DeferredInfo.find(M.first);
if (CI != DeferredInfo.end()) {
for (const DeferredDevicePtrEntryTy &L : CI->second) {
- llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
+ llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
CurBasePointers.emplace_back(BasePtr, L.VD);
@@ -8117,7 +8105,7 @@ public:
for (const auto *C : CurMapperDir->clauselists()) {
const auto *MC = cast<OMPMapClause>(C);
- for (const auto &L : MC->component_lists()) {
+ for (const auto L : MC->component_lists()) {
InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
/*ReturnDevicePointer=*/false, MC->isImplicit());
}
@@ -8181,9 +8169,10 @@ public:
LValue ThisLVal =
CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
- LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
- BasePointers.push_back(ThisLVal.getPointer());
- Pointers.push_back(ThisLValVal.getPointer());
+ LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
+ VDLVal.getPointer(CGF));
+ BasePointers.push_back(ThisLVal.getPointer(CGF));
+ Pointers.push_back(ThisLValVal.getPointer(CGF));
Sizes.push_back(
CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
CGF.Int64Ty, /*isSigned=*/true));
@@ -8201,17 +8190,19 @@ public:
LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
if (LC.getCaptureKind() == LCK_ByRef) {
LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
- LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
- BasePointers.push_back(VarLVal.getPointer());
- Pointers.push_back(VarLValVal.getPointer());
+ LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
+ VDLVal.getPointer(CGF));
+ BasePointers.push_back(VarLVal.getPointer(CGF));
+ Pointers.push_back(VarLValVal.getPointer(CGF));
Sizes.push_back(CGF.Builder.CreateIntCast(
CGF.getTypeSize(
VD->getType().getCanonicalType().getNonReferenceType()),
CGF.Int64Ty, /*isSigned=*/true));
} else {
RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
- LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
- BasePointers.push_back(VarLVal.getPointer());
+ LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
+ VDLVal.getPointer(CGF));
+ BasePointers.push_back(VarLVal.getPointer(CGF));
Pointers.push_back(VarRVal.getScalarVal());
Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
}
@@ -8286,7 +8277,7 @@ public:
"Expect a executable directive");
const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
- for (const auto &L : C->decl_component_lists(VD)) {
+ for (const auto L : C->decl_component_lists(VD)) {
assert(L.first == VD &&
"We got information for the wrong declaration??");
assert(!L.second.empty() &&
@@ -8439,7 +8430,7 @@ public:
// Map other list items in the map clause which are not captured variables
// but "declare target link" global variables.
for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
- for (const auto &L : C->component_lists()) {
+ for (const auto L : C->component_lists()) {
if (!L.first)
continue;
const auto *VD = dyn_cast<VarDecl>(L.first);
@@ -8517,7 +8508,7 @@ public:
CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
// Copy the value of the original variable to the new global copy.
CGF.Builder.CreateMemCpy(
- CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(),
+ CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
CurSizes.back(), /*IsVolatile=*/false);
// Use new global variable as the base pointers.
@@ -8746,6 +8737,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
case OMPD_parallel:
case OMPD_for:
case OMPD_parallel_for:
+ case OMPD_parallel_master:
case OMPD_parallel_sections:
case OMPD_for_simd:
case OMPD_parallel_for_simd:
@@ -8791,6 +8783,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
case OMPD_master_taskloop:
case OMPD_master_taskloop_simd:
case OMPD_parallel_master_taskloop:
+ case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
case OMPD_unknown:
llvm_unreachable("Unexpected directive.");
@@ -8926,7 +8919,7 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
return MapperCGF
.EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
- .getAddress();
+ .getAddress(MapperCGF);
});
(void)Scope.Privatize();
@@ -9423,7 +9416,7 @@ void CGOpenMPRuntime::emitTargetCall(
// specify target triples.
if (OutlinedFnID) {
if (IfCond) {
- emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
+ emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
} else {
RegionCodeGenTy ThenRCG(TargetThenGen);
ThenRCG(CGF);
@@ -9506,6 +9499,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
case OMPD_parallel:
case OMPD_for:
case OMPD_parallel_for:
+ case OMPD_parallel_master:
case OMPD_parallel_sections:
case OMPD_for_simd:
case OMPD_parallel_for_simd:
@@ -9551,6 +9545,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
case OMPD_master_taskloop:
case OMPD_master_taskloop_simd:
case OMPD_parallel_master_taskloop:
+ case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
case OMPD_unknown:
llvm_unreachable("Unknown target directive for OpenMP device codegen.");
@@ -9591,9 +9586,9 @@ bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
}
const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
- StringRef Name = CGM.getMangledName(GD);
// Try to detect target regions in the function.
if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
+ StringRef Name = CGM.getMangledName(GD);
scanForTargetRegionsFunctions(FD->getBody(), Name);
Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
OMPDeclareTargetDeclAttr::getDeviceType(FD);
@@ -9604,7 +9599,7 @@ bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
// Do not to emit function if it is not marked as declare target.
return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
- AlreadyEmittedTargetFunctions.count(Name) == 0;
+ AlreadyEmittedTargetDecls.count(VD) == 0;
}
bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
@@ -9835,20 +9830,20 @@ bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
return true;
- StringRef Name = CGM.getMangledName(GD);
const auto *D = cast<FunctionDecl>(GD.getDecl());
// Do not to emit function if it is marked as declare target as it was already
// emitted.
if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
- if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
- if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
+ if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
+ if (auto *F = dyn_cast_or_null<llvm::Function>(
+ CGM.GetGlobalValue(CGM.getMangledName(GD))))
return !F->isDeclaration();
return false;
}
return true;
}
- return !AlreadyEmittedTargetFunctions.insert(Name).second;
+ return !AlreadyEmittedTargetDecls.insert(D).second;
}
llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
@@ -10050,7 +10045,7 @@ void CGOpenMPRuntime::emitTargetDataCalls(
auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
if (IfCond) {
- emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
+ emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
} else {
RegionCodeGenTy RCG(BeginThenGen);
RCG(CGF);
@@ -10064,7 +10059,7 @@ void CGOpenMPRuntime::emitTargetDataCalls(
}
if (IfCond) {
- emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
+ emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
} else {
RegionCodeGenTy RCG(EndThenGen);
RCG(CGF);
@@ -10127,6 +10122,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
case OMPD_parallel:
case OMPD_for:
case OMPD_parallel_for:
+ case OMPD_parallel_master:
case OMPD_parallel_sections:
case OMPD_for_simd:
case OMPD_parallel_for_simd:
@@ -10169,6 +10165,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
case OMPD_master_taskloop:
case OMPD_master_taskloop_simd:
case OMPD_parallel_master_taskloop:
+ case OMPD_parallel_master_taskloop_simd:
case OMPD_target:
case OMPD_target_simd:
case OMPD_target_teams_distribute:
@@ -10220,8 +10217,8 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
};
if (IfCond) {
- emitOMPIfClause(CGF, IfCond, TargetThenGen,
- [](CodeGenFunction &CGF, PrePostActionTy &) {});
+ emitIfClause(CGF, IfCond, TargetThenGen,
+ [](CodeGenFunction &CGF, PrePostActionTy &) {});
} else {
RegionCodeGenTy ThenRCG(TargetThenGen);
ThenRCG(CGF);
@@ -10759,8 +10756,7 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
ExprLoc = VLENExpr->getExprLoc();
}
OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
- if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
- CGM.getTriple().getArch() == llvm::Triple::x86_64) {
+ if (CGM.getTriple().isX86()) {
emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
} else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
unsigned VLEN = VLENVal.getExtValue();
@@ -11018,12 +11014,18 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
return Address(Addr, Align);
}
+namespace {
+using OMPContextSelectorData =
+ OpenMPCtxSelectorData<ArrayRef<StringRef>, llvm::APSInt>;
+using CompleteOMPContextSelectorData = SmallVector<OMPContextSelectorData, 4>;
+} // anonymous namespace
+
/// Checks current context and returns true if it matches the context selector.
-template <OMPDeclareVariantAttr::CtxSelectorSetType CtxSet,
- OMPDeclareVariantAttr::CtxSelectorType Ctx>
-static bool checkContext(const OMPDeclareVariantAttr *A) {
- assert(CtxSet != OMPDeclareVariantAttr::CtxSetUnknown &&
- Ctx != OMPDeclareVariantAttr::CtxUnknown &&
+template <OpenMPContextSelectorSetKind CtxSet, OpenMPContextSelectorKind Ctx,
+ typename... Arguments>
+static bool checkContext(const OMPContextSelectorData &Data,
+ Arguments... Params) {
+ assert(Data.CtxSet != OMP_CTX_SET_unknown && Data.Ctx != OMP_CTX_unknown &&
"Unknown context selector or context selector set.");
return false;
}
@@ -11031,89 +11033,233 @@ static bool checkContext(const OMPDeclareVariantAttr *A) {
/// Checks for implementation={vendor(<vendor>)} context selector.
/// \returns true iff <vendor>="llvm", false otherwise.
template <>
-bool checkContext<OMPDeclareVariantAttr::CtxSetImplementation,
- OMPDeclareVariantAttr::CtxVendor>(
- const OMPDeclareVariantAttr *A) {
- return llvm::all_of(A->implVendors(),
+bool checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(
+ const OMPContextSelectorData &Data) {
+ return llvm::all_of(Data.Names,
[](StringRef S) { return !S.compare_lower("llvm"); });
}
-static bool greaterCtxScore(ASTContext &Ctx, const Expr *LHS, const Expr *RHS) {
- // If both scores are unknown, choose the very first one.
- if (!LHS && !RHS)
- return true;
- // If only one is known, return this one.
- if (LHS && !RHS)
- return true;
- if (!LHS && RHS)
- return false;
- llvm::APSInt LHSVal = LHS->EvaluateKnownConstInt(Ctx);
- llvm::APSInt RHSVal = RHS->EvaluateKnownConstInt(Ctx);
- return llvm::APSInt::compareValues(LHSVal, RHSVal) >= 0;
+/// Checks for device={kind(<kind>)} context selector.
+/// \returns true if <kind>="host" and compilation is for host.
+/// true if <kind>="nohost" and compilation is for device.
+/// true if <kind>="cpu" and compilation is for Arm, X86 or PPC CPU.
+/// true if <kind>="gpu" and compilation is for NVPTX or AMDGCN.
+/// false otherwise.
+template <>
+bool checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(
+ const OMPContextSelectorData &Data, CodeGenModule &CGM) {
+ for (StringRef Name : Data.Names) {
+ if (!Name.compare_lower("host")) {
+ if (CGM.getLangOpts().OpenMPIsDevice)
+ return false;
+ continue;
+ }
+ if (!Name.compare_lower("nohost")) {
+ if (!CGM.getLangOpts().OpenMPIsDevice)
+ return false;
+ continue;
+ }
+ switch (CGM.getTriple().getArch()) {
+ case llvm::Triple::arm:
+ case llvm::Triple::armeb:
+ case llvm::Triple::aarch64:
+ case llvm::Triple::aarch64_be:
+ case llvm::Triple::aarch64_32:
+ case llvm::Triple::ppc:
+ case llvm::Triple::ppc64:
+ case llvm::Triple::ppc64le:
+ case llvm::Triple::x86:
+ case llvm::Triple::x86_64:
+ if (Name.compare_lower("cpu"))
+ return false;
+ break;
+ case llvm::Triple::amdgcn:
+ case llvm::Triple::nvptx:
+ case llvm::Triple::nvptx64:
+ if (Name.compare_lower("gpu"))
+ return false;
+ break;
+ case llvm::Triple::UnknownArch:
+ case llvm::Triple::arc:
+ case llvm::Triple::avr:
+ case llvm::Triple::bpfel:
+ case llvm::Triple::bpfeb:
+ case llvm::Triple::hexagon:
+ case llvm::Triple::mips:
+ case llvm::Triple::mipsel:
+ case llvm::Triple::mips64:
+ case llvm::Triple::mips64el:
+ case llvm::Triple::msp430:
+ case llvm::Triple::r600:
+ case llvm::Triple::riscv32:
+ case llvm::Triple::riscv64:
+ case llvm::Triple::sparc:
+ case llvm::Triple::sparcv9:
+ case llvm::Triple::sparcel:
+ case llvm::Triple::systemz:
+ case llvm::Triple::tce:
+ case llvm::Triple::tcele:
+ case llvm::Triple::thumb:
+ case llvm::Triple::thumbeb:
+ case llvm::Triple::xcore:
+ case llvm::Triple::le32:
+ case llvm::Triple::le64:
+ case llvm::Triple::amdil:
+ case llvm::Triple::amdil64:
+ case llvm::Triple::hsail:
+ case llvm::Triple::hsail64:
+ case llvm::Triple::spir:
+ case llvm::Triple::spir64:
+ case llvm::Triple::kalimba:
+ case llvm::Triple::shave:
+ case llvm::Triple::lanai:
+ case llvm::Triple::wasm32:
+ case llvm::Triple::wasm64:
+ case llvm::Triple::renderscript32:
+ case llvm::Triple::renderscript64:
+ case llvm::Triple::ve:
+ return false;
+ }
+ }
+ return true;
}
-namespace {
-/// Comparator for the priority queue for context selector.
-class OMPDeclareVariantAttrComparer
- : public std::greater<const OMPDeclareVariantAttr *> {
-private:
- ASTContext &Ctx;
+static bool matchesContext(CodeGenModule &CGM,
+ const CompleteOMPContextSelectorData &ContextData) {
+ for (const OMPContextSelectorData &Data : ContextData) {
+ switch (Data.Ctx) {
+ case OMP_CTX_vendor:
+ assert(Data.CtxSet == OMP_CTX_SET_implementation &&
+ "Expected implementation context selector set.");
+ if (!checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(Data))
+ return false;
+ break;
+ case OMP_CTX_kind:
+ assert(Data.CtxSet == OMP_CTX_SET_device &&
+ "Expected device context selector set.");
+ if (!checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(Data,
+ CGM))
+ return false;
+ break;
+ case OMP_CTX_unknown:
+ llvm_unreachable("Unknown context selector kind.");
+ }
+ }
+ return true;
+}
-public:
- OMPDeclareVariantAttrComparer(ASTContext &Ctx) : Ctx(Ctx) {}
- bool operator()(const OMPDeclareVariantAttr *LHS,
- const OMPDeclareVariantAttr *RHS) const {
- const Expr *LHSExpr = nullptr;
- const Expr *RHSExpr = nullptr;
- if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
- LHSExpr = LHS->getScore();
- if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
- RHSExpr = RHS->getScore();
- return greaterCtxScore(Ctx, LHSExpr, RHSExpr);
+static CompleteOMPContextSelectorData
+translateAttrToContextSelectorData(ASTContext &C,
+ const OMPDeclareVariantAttr *A) {
+ CompleteOMPContextSelectorData Data;
+ for (unsigned I = 0, E = A->scores_size(); I < E; ++I) {
+ Data.emplace_back();
+ auto CtxSet = static_cast<OpenMPContextSelectorSetKind>(
+ *std::next(A->ctxSelectorSets_begin(), I));
+ auto Ctx = static_cast<OpenMPContextSelectorKind>(
+ *std::next(A->ctxSelectors_begin(), I));
+ Data.back().CtxSet = CtxSet;
+ Data.back().Ctx = Ctx;
+ const Expr *Score = *std::next(A->scores_begin(), I);
+ Data.back().Score = Score->EvaluateKnownConstInt(C);
+ switch (Ctx) {
+ case OMP_CTX_vendor:
+ assert(CtxSet == OMP_CTX_SET_implementation &&
+ "Expected implementation context selector set.");
+ Data.back().Names =
+ llvm::makeArrayRef(A->implVendors_begin(), A->implVendors_end());
+ break;
+ case OMP_CTX_kind:
+ assert(CtxSet == OMP_CTX_SET_device &&
+ "Expected device context selector set.");
+ Data.back().Names =
+ llvm::makeArrayRef(A->deviceKinds_begin(), A->deviceKinds_end());
+ break;
+ case OMP_CTX_unknown:
+ llvm_unreachable("Unknown context selector kind.");
+ }
}
-};
-} // anonymous namespace
+ return Data;
+}
+
+static bool isStrictSubset(const CompleteOMPContextSelectorData &LHS,
+ const CompleteOMPContextSelectorData &RHS) {
+ llvm::SmallDenseMap<std::pair<int, int>, llvm::StringSet<>, 4> RHSData;
+ for (const OMPContextSelectorData &D : RHS) {
+ auto &Pair = RHSData.FindAndConstruct(std::make_pair(D.CtxSet, D.Ctx));
+ Pair.getSecond().insert(D.Names.begin(), D.Names.end());
+ }
+ bool AllSetsAreEqual = true;
+ for (const OMPContextSelectorData &D : LHS) {
+ auto It = RHSData.find(std::make_pair(D.CtxSet, D.Ctx));
+ if (It == RHSData.end())
+ return false;
+ if (D.Names.size() > It->getSecond().size())
+ return false;
+ if (llvm::set_union(It->getSecond(), D.Names))
+ return false;
+ AllSetsAreEqual =
+ AllSetsAreEqual && (D.Names.size() == It->getSecond().size());
+ }
+
+ return LHS.size() != RHS.size() || !AllSetsAreEqual;
+}
+
+static bool greaterCtxScore(const CompleteOMPContextSelectorData &LHS,
+ const CompleteOMPContextSelectorData &RHS) {
+ // Score is calculated as sum of all scores + 1.
+ llvm::APSInt LHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false);
+ bool RHSIsSubsetOfLHS = isStrictSubset(RHS, LHS);
+ if (RHSIsSubsetOfLHS) {
+ LHSScore = llvm::APSInt::get(0);
+ } else {
+ for (const OMPContextSelectorData &Data : LHS) {
+ if (Data.Score.getBitWidth() > LHSScore.getBitWidth()) {
+ LHSScore = LHSScore.extend(Data.Score.getBitWidth()) + Data.Score;
+ } else if (Data.Score.getBitWidth() < LHSScore.getBitWidth()) {
+ LHSScore += Data.Score.extend(LHSScore.getBitWidth());
+ } else {
+ LHSScore += Data.Score;
+ }
+ }
+ }
+ llvm::APSInt RHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false);
+ if (!RHSIsSubsetOfLHS && isStrictSubset(LHS, RHS)) {
+ RHSScore = llvm::APSInt::get(0);
+ } else {
+ for (const OMPContextSelectorData &Data : RHS) {
+ if (Data.Score.getBitWidth() > RHSScore.getBitWidth()) {
+ RHSScore = RHSScore.extend(Data.Score.getBitWidth()) + Data.Score;
+ } else if (Data.Score.getBitWidth() < RHSScore.getBitWidth()) {
+ RHSScore += Data.Score.extend(RHSScore.getBitWidth());
+ } else {
+ RHSScore += Data.Score;
+ }
+ }
+ }
+ return llvm::APSInt::compareValues(LHSScore, RHSScore) >= 0;
+}
/// Finds the variant function that matches current context with its context
/// selector.
-static const FunctionDecl *getDeclareVariantFunction(ASTContext &Ctx,
+static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM,
const FunctionDecl *FD) {
if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>())
return FD;
// Iterate through all DeclareVariant attributes and check context selectors.
- auto &&Comparer = [&Ctx](const OMPDeclareVariantAttr *LHS,
- const OMPDeclareVariantAttr *RHS) {
- const Expr *LHSExpr = nullptr;
- const Expr *RHSExpr = nullptr;
- if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
- LHSExpr = LHS->getScore();
- if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified)
- RHSExpr = RHS->getScore();
- return greaterCtxScore(Ctx, LHSExpr, RHSExpr);
- };
const OMPDeclareVariantAttr *TopMostAttr = nullptr;
+ CompleteOMPContextSelectorData TopMostData;
for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) {
- const OMPDeclareVariantAttr *SelectedAttr = nullptr;
- switch (A->getCtxSelectorSet()) {
- case OMPDeclareVariantAttr::CtxSetImplementation:
- switch (A->getCtxSelector()) {
- case OMPDeclareVariantAttr::CtxVendor:
- if (checkContext<OMPDeclareVariantAttr::CtxSetImplementation,
- OMPDeclareVariantAttr::CtxVendor>(A))
- SelectedAttr = A;
- break;
- case OMPDeclareVariantAttr::CtxUnknown:
- llvm_unreachable(
- "Unknown context selector in implementation selector set.");
- }
- break;
- case OMPDeclareVariantAttr::CtxSetUnknown:
- llvm_unreachable("Unknown context selector set.");
- }
+ CompleteOMPContextSelectorData Data =
+ translateAttrToContextSelectorData(CGM.getContext(), A);
+ if (!matchesContext(CGM, Data))
+ continue;
// If the attribute matches the context, find the attribute with the highest
// score.
- if (SelectedAttr && (!TopMostAttr || !Comparer(TopMostAttr, SelectedAttr)))
- TopMostAttr = SelectedAttr;
+ if (!TopMostAttr || !greaterCtxScore(TopMostData, Data)) {
+ TopMostAttr = A;
+ TopMostData.swap(Data);
+ }
}
if (!TopMostAttr)
return FD;
@@ -11129,7 +11275,7 @@ bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) {
llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName);
if (Orig && !Orig->isDeclaration())
return false;
- const FunctionDecl *NewFD = getDeclareVariantFunction(CGM.getContext(), D);
+ const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D);
// Emit original function if it does not have declare variant attribute or the
// context does not match.
if (NewFD == D)
@@ -11143,6 +11289,320 @@ bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) {
return true;
}
+CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
+ CodeGenModule &CGM, const OMPLoopDirective &S)
+ : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
+ assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
+ if (!NeedToPush)
+ return;
+ NontemporalDeclsSet &DS =
+ CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
+ for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
+ for (const Stmt *Ref : C->private_refs()) {
+ const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
+ const ValueDecl *VD;
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
+ VD = DRE->getDecl();
+ } else {
+ const auto *ME = cast<MemberExpr>(SimpleRefExpr);
+ assert((ME->isImplicitCXXThis() ||
+ isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
+ "Expected member of current class.");
+ VD = ME->getMemberDecl();
+ }
+ DS.insert(VD);
+ }
+ }
+}
+
+CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
+ if (!NeedToPush)
+ return;
+ CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
+}
+
+bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
+ assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
+
+ return llvm::any_of(
+ CGM.getOpenMPRuntime().NontemporalDeclsStack,
+ [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
+}
+
+CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
+ CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
+ : CGM(CGF.CGM),
+ NeedToPush(llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
+ [](const OMPLastprivateClause *C) {
+ return C->getKind() ==
+ OMPC_LASTPRIVATE_conditional;
+ })) {
+ assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
+ if (!NeedToPush)
+ return;
+ LastprivateConditionalData &Data =
+ CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
+ for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
+ if (C->getKind() != OMPC_LASTPRIVATE_conditional)
+ continue;
+
+ for (const Expr *Ref : C->varlists()) {
+ Data.DeclToUniqeName.try_emplace(
+ cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
+ generateUniqueName(CGM, "pl_cond", Ref));
+ }
+ }
+ Data.IVLVal = IVLVal;
+ // In simd only mode or for simd directives no need to generate threadprivate
+ // references for the loop iteration counter, we can use the original one
+ // since outlining cannot happen in simd regions.
+ if (CGF.getLangOpts().OpenMPSimd ||
+ isOpenMPSimdDirective(S.getDirectiveKind())) {
+ Data.UseOriginalIV = true;
+ return;
+ }
+ llvm::SmallString<16> Buffer;
+ llvm::raw_svector_ostream OS(Buffer);
+ PresumedLoc PLoc =
+ CGM.getContext().getSourceManager().getPresumedLoc(S.getBeginLoc());
+ assert(PLoc.isValid() && "Source location is expected to be always valid.");
+
+ llvm::sys::fs::UniqueID ID;
+ if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
+ CGM.getDiags().Report(diag::err_cannot_open_file)
+ << PLoc.getFilename() << EC.message();
+ OS << "$pl_cond_" << ID.getDevice() << "_" << ID.getFile() << "_"
+ << PLoc.getLine() << "_" << PLoc.getColumn() << "$iv";
+ Data.IVName = OS.str();
+}
+
+CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
+ if (!NeedToPush)
+ return;
+ CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
+}
+
+void CGOpenMPRuntime::initLastprivateConditionalCounter(
+ CodeGenFunction &CGF, const OMPExecutableDirective &S) {
+ if (CGM.getLangOpts().OpenMPSimd ||
+ !llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
+ [](const OMPLastprivateClause *C) {
+ return C->getKind() == OMPC_LASTPRIVATE_conditional;
+ }))
+ return;
+ const CGOpenMPRuntime::LastprivateConditionalData &Data =
+ LastprivateConditionalStack.back();
+ if (Data.UseOriginalIV)
+ return;
+ // Global loop counter. Required to handle inner parallel-for regions.
+ // global_iv = iv;
+ Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
+ CGF, Data.IVLVal.getType(), Data.IVName);
+ LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, Data.IVLVal.getType());
+ llvm::Value *IVVal = CGF.EmitLoadOfScalar(Data.IVLVal, S.getBeginLoc());
+ CGF.EmitStoreOfScalar(IVVal, GlobIVLVal);
+}
+
+namespace {
+/// Checks if the lastprivate conditional variable is referenced in LHS.
+class LastprivateConditionalRefChecker final
+ : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
+ CodeGenFunction &CGF;
+ ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
+ const Expr *FoundE = nullptr;
+ const Decl *FoundD = nullptr;
+ StringRef UniqueDeclName;
+ LValue IVLVal;
+ StringRef IVName;
+ SourceLocation Loc;
+ bool UseOriginalIV = false;
+
+public:
+ bool VisitDeclRefExpr(const DeclRefExpr *E) {
+ for (const CGOpenMPRuntime::LastprivateConditionalData &D :
+ llvm::reverse(LPM)) {
+ auto It = D.DeclToUniqeName.find(E->getDecl());
+ if (It == D.DeclToUniqeName.end())
+ continue;
+ FoundE = E;
+ FoundD = E->getDecl()->getCanonicalDecl();
+ UniqueDeclName = It->getSecond();
+ IVLVal = D.IVLVal;
+ IVName = D.IVName;
+ UseOriginalIV = D.UseOriginalIV;
+ break;
+ }
+ return FoundE == E;
+ }
+ bool VisitMemberExpr(const MemberExpr *E) {
+ if (!CGF.IsWrappedCXXThis(E->getBase()))
+ return false;
+ for (const CGOpenMPRuntime::LastprivateConditionalData &D :
+ llvm::reverse(LPM)) {
+ auto It = D.DeclToUniqeName.find(E->getMemberDecl());
+ if (It == D.DeclToUniqeName.end())
+ continue;
+ FoundE = E;
+ FoundD = E->getMemberDecl()->getCanonicalDecl();
+ UniqueDeclName = It->getSecond();
+ IVLVal = D.IVLVal;
+ IVName = D.IVName;
+ UseOriginalIV = D.UseOriginalIV;
+ break;
+ }
+ return FoundE == E;
+ }
+ bool VisitStmt(const Stmt *S) {
+ for (const Stmt *Child : S->children()) {
+ if (!Child)
+ continue;
+ if (const auto *E = dyn_cast<Expr>(Child))
+ if (!E->isGLValue())
+ continue;
+ if (Visit(Child))
+ return true;
+ }
+ return false;
+ }
+ explicit LastprivateConditionalRefChecker(
+ CodeGenFunction &CGF,
+ ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
+ : CGF(CGF), LPM(LPM) {}
+ std::tuple<const Expr *, const Decl *, StringRef, LValue, StringRef, bool>
+ getFoundData() const {
+ return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName,
+ UseOriginalIV);
+ }
+};
+} // namespace
+
+void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
+ const Expr *LHS) {
+ if (CGF.getLangOpts().OpenMP < 50)
+ return;
+ LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack);
+ if (!Checker.Visit(LHS))
+ return;
+ const Expr *FoundE;
+ const Decl *FoundD;
+ StringRef UniqueDeclName;
+ LValue IVLVal;
+ StringRef IVName;
+ bool UseOriginalIV;
+ std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) =
+ Checker.getFoundData();
+
+ // Last updated loop counter for the lastprivate conditional var.
+ // int<xx> last_iv = 0;
+ llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
+ llvm::Constant *LastIV =
+ getOrCreateInternalVariable(LLIVTy, UniqueDeclName + "$iv");
+ cast<llvm::GlobalVariable>(LastIV)->setAlignment(
+ IVLVal.getAlignment().getAsAlign());
+ LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
+
+ // Private address of the lastprivate conditional in the current context.
+ // priv_a
+ LValue LVal = CGF.EmitLValue(FoundE);
+ // Last value of the lastprivate conditional.
+ // decltype(priv_a) last_a;
+ llvm::Constant *Last = getOrCreateInternalVariable(
+ LVal.getAddress(CGF).getElementType(), UniqueDeclName);
+ cast<llvm::GlobalVariable>(Last)->setAlignment(
+ LVal.getAlignment().getAsAlign());
+ LValue LastLVal =
+ CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
+
+ // Global loop counter. Required to handle inner parallel-for regions.
+ // global_iv
+ if (!UseOriginalIV) {
+ Address IVAddr =
+ getAddrOfArtificialThreadPrivate(CGF, IVLVal.getType(), IVName);
+ IVLVal = CGF.MakeAddrLValue(IVAddr, IVLVal.getType());
+ }
+ llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc());
+
+ // #pragma omp critical(a)
+ // if (last_iv <= iv) {
+ // last_iv = iv;
+ // last_a = priv_a;
+ // }
+ auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
+ FoundE](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ llvm::Value *LastIVVal =
+ CGF.EmitLoadOfScalar(LastIVLVal, FoundE->getExprLoc());
+ // (last_iv <= global_iv) ? Check if the variable is updated and store new
+ // value in global var.
+ llvm::Value *CmpRes;
+ if (IVLVal.getType()->isSignedIntegerType()) {
+ CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
+ } else {
+ assert(IVLVal.getType()->isUnsignedIntegerType() &&
+ "Loop iteration variable must be integer.");
+ CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
+ }
+ llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
+ CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
+ // {
+ CGF.EmitBlock(ThenBB);
+
+ // last_iv = global_iv;
+ CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
+
+ // last_a = priv_a;
+ switch (CGF.getEvaluationKind(LVal.getType())) {
+ case TEK_Scalar: {
+ llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, FoundE->getExprLoc());
+ CGF.EmitStoreOfScalar(PrivVal, LastLVal);
+ break;
+ }
+ case TEK_Complex: {
+ CodeGenFunction::ComplexPairTy PrivVal =
+ CGF.EmitLoadOfComplex(LVal, FoundE->getExprLoc());
+ CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
+ break;
+ }
+ case TEK_Aggregate:
+ llvm_unreachable(
+ "Aggregates are not supported in lastprivate conditional.");
+ }
+ // }
+ CGF.EmitBranch(ExitBB);
+ // There is no need to emit line number for unconditional branch.
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
+ };
+
+ if (CGM.getLangOpts().OpenMPSimd) {
+ // Do not emit as a critical region as no parallel region could be emitted.
+ RegionCodeGenTy ThenRCG(CodeGen);
+ ThenRCG(CGF);
+ } else {
+ emitCriticalRegion(CGF, UniqueDeclName, CodeGen, FoundE->getExprLoc());
+ }
+}
+
+void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
+ CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
+ SourceLocation Loc) {
+ if (CGF.getLangOpts().OpenMP < 50)
+ return;
+ auto It = LastprivateConditionalStack.back().DeclToUniqeName.find(VD);
+ assert(It != LastprivateConditionalStack.back().DeclToUniqeName.end() &&
+ "Unknown lastprivate conditional variable.");
+ StringRef UniqueName = It->getSecond();
+ llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
+ // The variable was not updated in the region - exit.
+ if (!GV)
+ return;
+ LValue LPLVal = CGF.MakeAddrLValue(
+ GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
+ llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
+ CGF.EmitStoreOfScalar(Res, PrivLVal);
+}
+
llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
@@ -11265,7 +11725,7 @@ void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
}
void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
- OpenMPProcBindClauseKind ProcBind,
+ ProcBindKind ProcBind,
SourceLocation Loc) {
llvm_unreachable("Not supported in SIMD-only mode");
}