summaryrefslogtreecommitdiff
path: root/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp')
-rw-r--r--contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp2076
1 files changed, 1555 insertions, 521 deletions
diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 27e7175da841..97b17799a03e 100644
--- a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -10,17 +10,22 @@
//
//===----------------------------------------------------------------------===//
+#include "CGOpenMPRuntime.h"
#include "CGCXXABI.h"
#include "CGCleanup.h"
-#include "CGOpenMPRuntime.h"
#include "CGRecordLayout.h"
#include "CodeGenFunction.h"
-#include "clang/CodeGen/ConstantInitBuilder.h"
+#include "clang/AST/Attr.h"
#include "clang/AST/Decl.h"
+#include "clang/AST/OpenMPClause.h"
#include "clang/AST/StmtOpenMP.h"
+#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/BitmaskEnum.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Value.h"
@@ -30,6 +35,7 @@
using namespace clang;
using namespace CodeGen;
+using namespace llvm::omp;
namespace {
/// Base class for handling code generation inside OpenMP regions.
@@ -356,7 +362,7 @@ public:
VD->getType().getNonReferenceType(), VK_LValue,
C.getLocation());
PrivScope.addPrivate(
- VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
+ VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
}
(void)PrivScope.Privatize();
}
@@ -727,10 +733,6 @@ enum OpenMPRTLFunction {
OMPRTL__tgt_target_teams_nowait,
// Call to void __tgt_register_requires(int64_t flags);
OMPRTL__tgt_register_requires,
- // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
- OMPRTL__tgt_register_lib,
- // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
- OMPRTL__tgt_unregister_lib,
// Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
// void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
OMPRTL__tgt_target_data_begin,
@@ -752,6 +754,11 @@ enum OpenMPRTLFunction {
// arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
// *arg_types);
OMPRTL__tgt_target_data_update_nowait,
+ // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
+ OMPRTL__tgt_mapper_num_components,
+ // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
+ // *base, void *begin, int64_t size, int64_t type);
+ OMPRTL__tgt_push_mapper_component,
};
/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
@@ -836,7 +843,7 @@ static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
break;
case TEK_Aggregate:
- InitRVal = RValue::getAggregate(LV.getAddress());
+ InitRVal = RValue::getAggregate(LV.getAddress(CGF));
break;
}
OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
@@ -960,7 +967,7 @@ void ReductionCodeGen::emitAggregateInitialization(
EmitDeclareReductionInit,
EmitDeclareReductionInit ? ClausesData[N].ReductionOp
: PrivateVD->getInit(),
- DRD, SharedLVal.getAddress());
+ DRD, SharedLVal.getAddress(CGF));
}
ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
@@ -1001,13 +1008,13 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
}
llvm::Value *Size;
llvm::Value *SizeInChars;
- auto *ElemType =
- cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
- ->getElementType();
+ auto *ElemType = cast<llvm::PointerType>(
+ SharedAddresses[N].first.getPointer(CGF)->getType())
+ ->getElementType();
auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
if (AsArraySection) {
- Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
- SharedAddresses[N].first.getPointer());
+ Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF),
+ SharedAddresses[N].first.getPointer(CGF));
Size = CGF.Builder.CreateNUWAdd(
Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
@@ -1057,7 +1064,7 @@ void ReductionCodeGen::emitInitialization(
PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
QualType SharedType = SharedAddresses[N].first.getType();
SharedLVal = CGF.MakeAddrLValue(
- CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
+ CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
CGF.ConvertTypeForMem(SharedType)),
SharedType, SharedAddresses[N].first.getBaseInfo(),
CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
@@ -1065,7 +1072,7 @@ void ReductionCodeGen::emitInitialization(
emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
} else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
- PrivateAddr, SharedLVal.getAddress(),
+ PrivateAddr, SharedLVal.getAddress(CGF),
SharedLVal.getType());
} else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
!CGF.isTrivialInitializer(PrivateVD->getInit())) {
@@ -1102,15 +1109,15 @@ static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
!CGF.getContext().hasSameType(BaseTy, ElTy)) {
if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
- BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
+ BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
} else {
- LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
+ LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
}
BaseTy = BaseTy->getPointeeType();
}
return CGF.MakeAddrLValue(
- CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
+ CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
CGF.ConvertTypeForMem(ElTy)),
BaseLV.getType(), BaseLV.getBaseInfo(),
CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
@@ -1174,15 +1181,15 @@ Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
OriginalBaseLValue);
llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
- BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
+ BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
llvm::Value *PrivatePointer =
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
PrivateAddr.getPointer(),
- SharedAddresses[N].first.getAddress().getType());
+ SharedAddresses[N].first.getAddress(CGF).getType());
llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
return castToBase(CGF, OrigVD->getType(),
SharedAddresses[N].first.getType(),
- OriginalBaseLValue.getAddress().getType(),
+ OriginalBaseLValue.getAddress(CGF).getType(),
OriginalBaseLValue.getAlignment(), Ptr);
}
BaseDecls.emplace_back(
@@ -1259,6 +1266,52 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
loadOffloadInfoMetadata();
}
+bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD,
+ const GlobalDecl &OldGD,
+ llvm::GlobalValue *OrigAddr,
+ bool IsForDefinition) {
+ // Emit at least a definition for the aliasee if the the address of the
+ // original function is requested.
+ if (IsForDefinition || OrigAddr)
+ (void)CGM.GetAddrOfGlobal(NewGD);
+ StringRef NewMangledName = CGM.getMangledName(NewGD);
+ llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName);
+ if (Addr && !Addr->isDeclaration()) {
+ const auto *D = cast<FunctionDecl>(OldGD.getDecl());
+ const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD);
+ llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI);
+
+ // Create a reference to the named value. This ensures that it is emitted
+ // if a deferred decl.
+ llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD);
+
+ // Create the new alias itself, but don't set a name yet.
+ auto *GA =
+ llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule());
+
+ if (OrigAddr) {
+ assert(OrigAddr->isDeclaration() && "Expected declaration");
+
+ GA->takeName(OrigAddr);
+ OrigAddr->replaceAllUsesWith(
+ llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType()));
+ OrigAddr->eraseFromParent();
+ } else {
+ GA->setName(CGM.getMangledName(OldGD));
+ }
+
+ // Set attributes which are particular to an alias; this is a
+ // specialization of the attributes which may be set on a global function.
+ if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() ||
+ D->isWeakImported())
+ GA->setLinkage(llvm::Function::WeakAnyLinkage);
+
+ CGM.SetCommonAttributes(OldGD, GA);
+ return true;
+ }
+ return false;
+}
+
void CGOpenMPRuntime::clear() {
InternalVars.clear();
// Clean non-target variable declarations possibly used only in debug info.
@@ -1272,6 +1325,14 @@ void CGOpenMPRuntime::clear() {
continue;
GV->eraseFromParent();
}
+ // Emit aliases for the deferred aliasees.
+ for (const auto &Pair : DeferredVariantFunction) {
+ StringRef MangledName = CGM.getMangledName(Pair.second.second);
+ llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName);
+ // If not able to emit alias, just emit original declaration.
+ (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr,
+ /*IsForDefinition=*/false);
+ }
}
std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
@@ -1321,12 +1382,12 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
- .getAddress();
+ .getAddress(CGF);
});
Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
- .getAddress();
+ .getAddress(CGF);
});
(void)Scope.Privatize();
if (!IsCombiner && Out->hasInit() &&
@@ -1377,6 +1438,52 @@ CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
return UDRMap.lookup(D);
}
+namespace {
+// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
+// Builder if one is present.
+struct PushAndPopStackRAII {
+ PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
+ bool HasCancel)
+ : OMPBuilder(OMPBuilder) {
+ if (!OMPBuilder)
+ return;
+
+ // The following callback is the crucial part of clangs cleanup process.
+ //
+ // NOTE:
+ // Once the OpenMPIRBuilder is used to create parallel regions (and
+ // similar), the cancellation destination (Dest below) is determined via
+ // IP. That means if we have variables to finalize we split the block at IP,
+ // use the new block (=BB) as destination to build a JumpDest (via
+ // getJumpDestInCurrentScope(BB)) which then is fed to
+ // EmitBranchThroughCleanup. Furthermore, there will not be the need
+ // to push & pop an FinalizationInfo object.
+ // The FiniCB will still be needed but at the point where the
+ // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
+ auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
+ assert(IP.getBlock()->end() == IP.getPoint() &&
+ "Clang CG should cause non-terminated block!");
+ CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
+ CGF.Builder.restoreIP(IP);
+ CodeGenFunction::JumpDest Dest =
+ CGF.getOMPCancelDestination(OMPD_parallel);
+ CGF.EmitBranchThroughCleanup(Dest);
+ };
+
+ // TODO: Remove this once we emit parallel regions through the
+ // OpenMPIRBuilder as it can do this setup internally.
+ llvm::OpenMPIRBuilder::FinalizationInfo FI(
+ {FiniCB, OMPD_parallel, HasCancel});
+ OMPBuilder->pushFinalizationCB(std::move(FI));
+ }
+ ~PushAndPopStackRAII() {
+ if (OMPBuilder)
+ OMPBuilder->popFinalizationCB();
+ }
+ llvm::OpenMPIRBuilder *OMPBuilder;
+};
+} // namespace
+
static llvm::Function *emitParallelOrTeamsOutlinedFunction(
CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
@@ -1401,6 +1508,11 @@ static llvm::Function *emitParallelOrTeamsOutlinedFunction(
else if (const auto *OPFD =
dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
HasCancel = OPFD->hasCancel();
+
+ // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
+ // parallel region to make cancellation barriers work properly.
+ llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder();
+ PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel);
CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
HasCancel, OutlinedHelperName);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
@@ -1436,7 +1548,7 @@ llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
UpLoc, ThreadID,
CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
TaskTVar->getType()->castAs<PointerType>())
- .getPointer()};
+ .getPointer(CGF)};
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
};
CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
@@ -1638,18 +1750,24 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
return ThreadID;
}
// If exceptions are enabled, do not use parameter to avoid possible crash.
- if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
- !CGF.getLangOpts().CXXExceptions ||
- CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
- if (auto *OMPRegionInfo =
- dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
- if (OMPRegionInfo->getThreadIDVariable()) {
- // Check if this an outlined function with thread id passed as argument.
- LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
+ if (auto *OMPRegionInfo =
+ dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
+ if (OMPRegionInfo->getThreadIDVariable()) {
+ // Check if this an outlined function with thread id passed as argument.
+ LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
+ llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
+ if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
+ !CGF.getLangOpts().CXXExceptions ||
+ CGF.Builder.GetInsertBlock() == TopBlock ||
+ !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
+ cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
+ TopBlock ||
+ cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
+ CGF.Builder.GetInsertBlock()) {
ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
// If value loaded in entry block, cache it and use it everywhere in
// function.
- if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
+ if (CGF.Builder.GetInsertBlock() == TopBlock) {
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
Elem.second.ThreadID = ThreadID;
}
@@ -1686,6 +1804,12 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
UDRMap.erase(D);
FunctionUDRMap.erase(CGF.CurFn);
}
+ auto I = FunctionUDMMap.find(CGF.CurFn);
+ if (I != FunctionUDMMap.end()) {
+ for(auto *D : I->second)
+ UDMMap.erase(D);
+ FunctionUDMMap.erase(I);
+ }
}
llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
@@ -2352,26 +2476,6 @@ llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
break;
}
- case OMPRTL__tgt_register_lib: {
- // Build void __tgt_register_lib(__tgt_bin_desc *desc);
- QualType ParamTy =
- CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
- llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
- break;
- }
- case OMPRTL__tgt_unregister_lib: {
- // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
- QualType ParamTy =
- CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
- llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
- break;
- }
case OMPRTL__tgt_target_data_begin: {
// Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
// void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
@@ -2459,6 +2563,24 @@ llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
break;
}
+ case OMPRTL__tgt_mapper_num_components: {
+ // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
+ llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
+ break;
+ }
+ case OMPRTL__tgt_push_mapper_component: {
+ // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
+ // *base, void *begin, int64_t size, int64_t type);
+ llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
+ CGM.Int64Ty, CGM.Int64Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
+ break;
+ }
}
assert(RTLFn && "Unable to find OpenMP runtime function");
return RTLFn;
@@ -2552,6 +2674,32 @@ CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
return CGM.CreateRuntimeFunction(FnTy, Name);
}
+/// Obtain information that uniquely identifies a target entry. This
+/// consists of the file and device IDs as well as line number associated with
+/// the relevant entry source location.
+static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
+ unsigned &DeviceID, unsigned &FileID,
+ unsigned &LineNum) {
+ SourceManager &SM = C.getSourceManager();
+
+ // The loc should be always valid and have a file ID (the user cannot use
+ // #pragma directives in macros)
+
+ assert(Loc.isValid() && "Source location is expected to be always valid.");
+
+ PresumedLoc PLoc = SM.getPresumedLoc(Loc);
+ assert(PLoc.isValid() && "Source location is expected to be always valid.");
+
+ llvm::sys::fs::UniqueID ID;
+ if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
+ SM.getDiagnostics().Report(diag::err_cannot_open_file)
+ << PLoc.getFilename() << EC.message();
+
+ DeviceID = ID.getDevice();
+ FileID = ID.getFile();
+ LineNum = PLoc.getLine();
+}
+
Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
if (CGM.getLangOpts().OpenMPSimd)
return Address::invalid();
@@ -2563,19 +2711,27 @@ Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
SmallString<64> PtrName;
{
llvm::raw_svector_ostream OS(PtrName);
- OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_ref_ptr";
+ OS << CGM.getMangledName(GlobalDecl(VD));
+ if (!VD->isExternallyVisible()) {
+ unsigned DeviceID, FileID, Line;
+ getTargetEntryUniqueInfo(CGM.getContext(),
+ VD->getCanonicalDecl()->getBeginLoc(),
+ DeviceID, FileID, Line);
+ OS << llvm::format("_%x", FileID);
+ }
+ OS << "_decl_tgt_ref_ptr";
}
llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
if (!Ptr) {
QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
PtrName);
- if (!CGM.getLangOpts().OpenMPIsDevice) {
- auto *GV = cast<llvm::GlobalVariable>(Ptr);
- GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+
+ auto *GV = cast<llvm::GlobalVariable>(Ptr);
+ GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
+
+ if (!CGM.getLangOpts().OpenMPIsDevice)
GV->setInitializer(CGM.GetAddrOfGlobal(VD));
- }
- CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
}
return Address(Ptr, CGM.getContext().getDeclAlign(VD));
@@ -2749,35 +2905,12 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
return nullptr;
}
-/// Obtain information that uniquely identifies a target entry. This
-/// consists of the file and device IDs as well as line number associated with
-/// the relevant entry source location.
-static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
- unsigned &DeviceID, unsigned &FileID,
- unsigned &LineNum) {
- SourceManager &SM = C.getSourceManager();
-
- // The loc should be always valid and have a file ID (the user cannot use
- // #pragma directives in macros)
-
- assert(Loc.isValid() && "Source location is expected to be always valid.");
-
- PresumedLoc PLoc = SM.getPresumedLoc(Loc);
- assert(PLoc.isValid() && "Source location is expected to be always valid.");
-
- llvm::sys::fs::UniqueID ID;
- if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
- SM.getDiagnostics().Report(diag::err_cannot_open_file)
- << PLoc.getFilename() << EC.message();
-
- DeviceID = ID.getDevice();
- FileID = ID.getFile();
- LineNum = PLoc.getLine();
-}
-
bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
llvm::GlobalVariable *Addr,
bool PerformInit) {
+ if (CGM.getLangOpts().OMPTargetTriples.empty() &&
+ !CGM.getLangOpts().OpenMPIsDevice)
+ return false;
Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
@@ -2889,10 +3022,15 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
QualType VarType,
StringRef Name) {
std::string Suffix = getName({"artificial", ""});
- std::string CacheSuffix = getName({"cache", ""});
llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
llvm::Value *GAddr =
getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
+ if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
+ CGM.getTarget().isTLSSupported()) {
+ cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
+ return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
+ }
+ std::string CacheSuffix = getName({"cache", ""});
llvm::Value *Args[] = {
emitUpdateLocation(CGF, SourceLocation()),
getThreadID(CGF, SourceLocation()),
@@ -2906,12 +3044,12 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
VarLVType->getPointerTo(/*AddrSpace=*/0)),
- CGM.getPointerAlign());
+ CGM.getContext().getTypeAlignInChars(VarType));
}
-void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
- const RegionCodeGenTy &ThenGen,
- const RegionCodeGenTy &ElseGen) {
+void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
+ const RegionCodeGenTy &ThenGen,
+ const RegionCodeGenTy &ElseGen) {
CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
// If the condition constant folds and can be elided, try to avoid emitting
@@ -2981,14 +3119,16 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
CGF.EmitRuntimeCall(
RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
- // OutlinedFn(&GTid, &zero, CapturedStruct);
- Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
- /*Name*/ ".zero.addr");
- CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+ // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
+ Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
+ Address ZeroAddrBound =
+ CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+ /*Name=*/".bound.zero.addr");
+ CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
// ThreadId for serialized parallels is 0.
- OutlinedFnArgs.push_back(ZeroAddr.getPointer());
- OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+ OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
+ OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
@@ -2999,7 +3139,7 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
EndArgs);
};
if (IfCond) {
- emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
+ emitIfClause(CGF, IfCond, ThenGen, ElseGen);
} else {
RegionCodeGenTy ThenRCG(ThenGen);
ThenRCG(CGF);
@@ -3017,7 +3157,7 @@ Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
if (auto *OMPRegionInfo =
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
if (OMPRegionInfo->getThreadIDVariable())
- return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
+ return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
llvm::Value *ThreadID = getThreadID(CGF, Loc);
QualType Int32Ty =
@@ -3283,9 +3423,9 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
// <copy_func>, did_it);
if (DidIt.isValid()) {
llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
- QualType CopyprivateArrayTy =
- C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
- /*IndexTypeQuals=*/0);
+ QualType CopyprivateArrayTy = C.getConstantArrayType(
+ C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
// Create a list of all private variables for copyprivate.
Address CopyprivateList =
CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
@@ -3293,7 +3433,8 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
CGF.Builder.CreateStore(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
+ CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
+ CGF.VoidPtrTy),
Elem);
}
// Build function that copies private values from single region to all other
@@ -3375,6 +3516,16 @@ void CGOpenMPRuntime::getDefaultScheduleAndChunk(
void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
OpenMPDirectiveKind Kind, bool EmitChecks,
bool ForceSimpleCall) {
+ // Check if we should use the OMPBuilder
+ auto *OMPRegionInfo =
+ dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
+ llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
+ if (OMPBuilder) {
+ CGF.Builder.restoreIP(OMPBuilder->CreateBarrier(
+ CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
+ return;
+ }
+
if (!CGF.HaveInsertPoint())
return;
// Build call __kmpc_cancel_barrier(loc, thread_id);
@@ -3384,8 +3535,7 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
// thread_id);
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
getThreadID(CGF, Loc)};
- if (auto *OMPRegionInfo =
- dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
+ if (OMPRegionInfo) {
if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
llvm::Value *Result = CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
@@ -3472,7 +3622,7 @@ bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
return Schedule != OMP_sch_static;
}
-static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
+static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
OpenMPScheduleClauseModifier M1,
OpenMPScheduleClauseModifier M2) {
int Modifier = 0;
@@ -3506,6 +3656,20 @@ static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
case OMPC_SCHEDULE_MODIFIER_unknown:
break;
}
+ // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
+ // If the static schedule kind is specified or if the ordered clause is
+ // specified, and if the nonmonotonic modifier is not specified, the effect is
+ // as if the monotonic modifier is specified. Otherwise, unless the monotonic
+ // modifier is specified, the effect is as if the nonmonotonic modifier is
+ // specified.
+ if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
+ if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
+ Schedule == OMP_sch_static_balanced_chunked ||
+ Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
+ Schedule == OMP_dist_sch_static_chunked ||
+ Schedule == OMP_dist_sch_static))
+ Modifier = OMP_sch_modifier_nonmonotonic;
+ }
return Schedule | Modifier;
}
@@ -3530,13 +3694,14 @@ void CGOpenMPRuntime::emitForDispatchInit(
llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
: CGF.Builder.getIntN(IVSize, 1);
llvm::Value *Args[] = {
- emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
+ emitUpdateLocation(CGF, Loc),
+ getThreadID(CGF, Loc),
CGF.Builder.getInt32(addMonoNonMonoModifier(
- Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
- DispatchValues.LB, // Lower
- DispatchValues.UB, // Upper
- CGF.Builder.getIntN(IVSize, 1), // Stride
- Chunk // Chunk
+ CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
+ DispatchValues.LB, // Lower
+ DispatchValues.UB, // Upper
+ CGF.Builder.getIntN(IVSize, 1), // Stride
+ Chunk // Chunk
};
CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
}
@@ -3578,7 +3743,7 @@ static void emitForStaticInitCall(
llvm::Value *Args[] = {
UpdateLocation,
ThreadId,
- CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
+ CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
M2)), // Schedule type
Values.IL.getPointer(), // &isLastIter
Values.LB.getPointer(), // &LB
@@ -3693,37 +3858,15 @@ void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
}
void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
- OpenMPProcBindClauseKind ProcBind,
+ ProcBindKind ProcBind,
SourceLocation Loc) {
if (!CGF.HaveInsertPoint())
return;
- // Constants for proc bind value accepted by the runtime.
- enum ProcBindTy {
- ProcBindFalse = 0,
- ProcBindTrue,
- ProcBindMaster,
- ProcBindClose,
- ProcBindSpread,
- ProcBindIntel,
- ProcBindDefault
- } RuntimeProcBind;
- switch (ProcBind) {
- case OMPC_PROC_BIND_master:
- RuntimeProcBind = ProcBindMaster;
- break;
- case OMPC_PROC_BIND_close:
- RuntimeProcBind = ProcBindClose;
- break;
- case OMPC_PROC_BIND_spread:
- RuntimeProcBind = ProcBindSpread;
- break;
- case OMPC_PROC_BIND_unknown:
- llvm_unreachable("Unsupported proc_bind value.");
- }
+ assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
// Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
llvm::Value *Args[] = {
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
- llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
+ llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
}
@@ -3899,157 +4042,6 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
Action(E.getKey(), E.getValue());
}
-llvm::Function *
-CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
- // If we don't have entries or if we are emitting code for the device, we
- // don't need to do anything.
- if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
- return nullptr;
-
- llvm::Module &M = CGM.getModule();
- ASTContext &C = CGM.getContext();
-
- // Get list of devices we care about
- const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
-
- // We should be creating an offloading descriptor only if there are devices
- // specified.
- assert(!Devices.empty() && "No OpenMP offloading devices??");
-
- // Create the external variables that will point to the begin and end of the
- // host entries section. These will be defined by the linker.
- llvm::Type *OffloadEntryTy =
- CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
- std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
- auto *HostEntriesBegin = new llvm::GlobalVariable(
- M, OffloadEntryTy, /*isConstant=*/true,
- llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
- EntriesBeginName);
- std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
- auto *HostEntriesEnd =
- new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
- llvm::GlobalValue::ExternalLinkage,
- /*Initializer=*/nullptr, EntriesEndName);
-
- // Create all device images
- auto *DeviceImageTy = cast<llvm::StructType>(
- CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
- ConstantInitBuilder DeviceImagesBuilder(CGM);
- ConstantArrayBuilder DeviceImagesEntries =
- DeviceImagesBuilder.beginArray(DeviceImageTy);
-
- for (const llvm::Triple &Device : Devices) {
- StringRef T = Device.getTriple();
- std::string BeginName = getName({"omp_offloading", "img_start", ""});
- auto *ImgBegin = new llvm::GlobalVariable(
- M, CGM.Int8Ty, /*isConstant=*/true,
- llvm::GlobalValue::ExternalWeakLinkage,
- /*Initializer=*/nullptr, Twine(BeginName).concat(T));
- std::string EndName = getName({"omp_offloading", "img_end", ""});
- auto *ImgEnd = new llvm::GlobalVariable(
- M, CGM.Int8Ty, /*isConstant=*/true,
- llvm::GlobalValue::ExternalWeakLinkage,
- /*Initializer=*/nullptr, Twine(EndName).concat(T));
-
- llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
- HostEntriesEnd};
- createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
- DeviceImagesEntries);
- }
-
- // Create device images global array.
- std::string ImagesName = getName({"omp_offloading", "device_images"});
- llvm::GlobalVariable *DeviceImages =
- DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
- CGM.getPointerAlign(),
- /*isConstant=*/true);
- DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
-
- // This is a Zero array to be used in the creation of the constant expressions
- llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
- llvm::Constant::getNullValue(CGM.Int32Ty)};
-
- // Create the target region descriptor.
- llvm::Constant *Data[] = {
- llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
- llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
- DeviceImages, Index),
- HostEntriesBegin, HostEntriesEnd};
- std::string Descriptor = getName({"omp_offloading", "descriptor"});
- llvm::GlobalVariable *Desc = createGlobalStruct(
- CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
-
- // Emit code to register or unregister the descriptor at execution
- // startup or closing, respectively.
-
- llvm::Function *UnRegFn;
- {
- FunctionArgList Args;
- ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
- Args.push_back(&DummyPtr);
-
- CodeGenFunction CGF(CGM);
- // Disable debug info for global (de-)initializer because they are not part
- // of some particular construct.
- CGF.disableDebugInfo();
- const auto &FI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
- llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
- std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
- UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
- CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
- Desc);
- CGF.FinishFunction();
- }
- llvm::Function *RegFn;
- {
- CodeGenFunction CGF(CGM);
- // Disable debug info for global (de-)initializer because they are not part
- // of some particular construct.
- CGF.disableDebugInfo();
- const auto &FI = CGM.getTypes().arrangeNullaryFunction();
- llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
-
- // Encode offload target triples into the registration function name. It
- // will serve as a comdat key for the registration/unregistration code for
- // this particular combination of offloading targets.
- SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
- RegFnNameParts[0] = "omp_offloading";
- RegFnNameParts[1] = "descriptor_reg";
- llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
- [](const llvm::Triple &T) -> const std::string& {
- return T.getTriple();
- });
- llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
- std::string Descriptor = getName(RegFnNameParts);
- RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
- CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
- // Create a variable to drive the registration and unregistration of the
- // descriptor, so we can reuse the logic that emits Ctors and Dtors.
- ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
- SourceLocation(), nullptr, C.CharTy,
- ImplicitParamDecl::Other);
- CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
- CGF.FinishFunction();
- }
- if (CGM.supportsCOMDAT()) {
- // It is sufficient to call registration function only once, so create a
- // COMDAT group for registration/unregistration functions and associated
- // data. That would reduce startup time and code size. Registration
- // function serves as a COMDAT group key.
- llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
- RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
- RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
- RegFn->setComdat(ComdatKey);
- UnRegFn->setComdat(ComdatKey);
- DeviceImages->setComdat(ComdatKey);
- Desc->setComdat(ComdatKey);
- }
- return RegFn;
-}
-
void CGOpenMPRuntime::createOffloadEntry(
llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
llvm::GlobalValue::LinkageTypes Linkage) {
@@ -4077,8 +4069,7 @@ void CGOpenMPRuntime::createOffloadEntry(
Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
// The entry has to be created in the section the linker expects it to be.
- std::string Section = getName({"omp_offloading", "entries"});
- Entry->setSection(Section);
+ Entry->setSection("omp_offloading_entries");
}
void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
@@ -4091,13 +4082,16 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
// Right now we only generate metadata for function that contain target
// regions.
- // If we do not have entries, we don't need to do anything.
- if (OffloadEntriesInfoManager.empty())
+ // If we are in simd mode or there are no entries, we don't need to do
+ // anything.
+ if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
return;
llvm::Module &M = CGM.getModule();
llvm::LLVMContext &C = M.getContext();
- SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
+ SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
+ SourceLocation, StringRef>,
+ 16>
OrderedEntries(OffloadEntriesInfoManager.size());
llvm::SmallVector<StringRef, 16> ParentFunctions(
OffloadEntriesInfoManager.size());
@@ -4115,7 +4109,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
// Create function that emits metadata for each target region entry;
auto &&TargetRegionMetadataEmitter =
- [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
+ [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
+ &GetMDString](
unsigned DeviceID, unsigned FileID, StringRef ParentName,
unsigned Line,
const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
@@ -4133,8 +4128,19 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
GetMDInt(FileID), GetMDString(ParentName),
GetMDInt(Line), GetMDInt(E.getOrder())};
+ SourceLocation Loc;
+ for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
+ E = CGM.getContext().getSourceManager().fileinfo_end();
+ I != E; ++I) {
+ if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
+ I->getFirst()->getUniqueID().getFile() == FileID) {
+ Loc = CGM.getContext().getSourceManager().translateFileLineCol(
+ I->getFirst(), Line, 1);
+ break;
+ }
+ }
// Save this entry in the right position of the ordered entries array.
- OrderedEntries[E.getOrder()] = &E;
+ OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
ParentFunctions[E.getOrder()] = ParentName;
// Add metadata to the named metadata node.
@@ -4162,7 +4168,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
// Save this entry in the right position of the ordered entries array.
- OrderedEntries[E.getOrder()] = &E;
+ OrderedEntries[E.getOrder()] =
+ std::make_tuple(&E, SourceLocation(), MangledName);
// Add metadata to the named metadata node.
MD->addOperand(llvm::MDNode::get(C, Ops));
@@ -4171,11 +4178,11 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
DeviceGlobalVarMetadataEmitter);
- for (const auto *E : OrderedEntries) {
- assert(E && "All ordered entries must exist!");
+ for (const auto &E : OrderedEntries) {
+ assert(std::get<0>(E) && "All ordered entries must exist!");
if (const auto *CE =
dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
- E)) {
+ std::get<0>(E))) {
if (!CE->getID() || !CE->getAddress()) {
// Do not blame the entry if the parent funtion is not emitted.
StringRef FnName = ParentFunctions[CE->getOrder()];
@@ -4183,16 +4190,16 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
continue;
unsigned DiagID = CGM.getDiags().getCustomDiagID(
DiagnosticsEngine::Error,
- "Offloading entry for target region is incorrect: either the "
+ "Offloading entry for target region in %0 is incorrect: either the "
"address or the ID is invalid.");
- CGM.getDiags().Report(DiagID);
+ CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
continue;
}
createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
- } else if (const auto *CE =
- dyn_cast<OffloadEntriesInfoManagerTy::
- OffloadEntryInfoDeviceGlobalVar>(E)) {
+ } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
+ OffloadEntryInfoDeviceGlobalVar>(
+ std::get<0>(E))) {
OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
CE->getFlags());
@@ -4203,10 +4210,10 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
continue;
if (!CE->getAddress()) {
unsigned DiagID = CGM.getDiags().getCustomDiagID(
- DiagnosticsEngine::Error,
- "Offloading entry for declare target variable is incorrect: the "
- "address is invalid.");
- CGM.getDiags().Report(DiagID);
+ DiagnosticsEngine::Error, "Offloading entry for declare target "
+ "variable %0 is incorrect: the "
+ "address is invalid.");
+ CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
continue;
}
// The vaiable has no definition - no need to add the entry.
@@ -4349,57 +4356,6 @@ QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
return TgtOffloadEntryQTy;
}
-QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
- // These are the types we need to build:
- // struct __tgt_device_image{
- // void *ImageStart; // Pointer to the target code start.
- // void *ImageEnd; // Pointer to the target code end.
- // // We also add the host entries to the device image, as it may be useful
- // // for the target runtime to have access to that information.
- // __tgt_offload_entry *EntriesBegin; // Begin of the table with all
- // // the entries.
- // __tgt_offload_entry *EntriesEnd; // End of the table with all the
- // // entries (non inclusive).
- // };
- if (TgtDeviceImageQTy.isNull()) {
- ASTContext &C = CGM.getContext();
- RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
- RD->startDefinition();
- addFieldToRecordDecl(C, RD, C.VoidPtrTy);
- addFieldToRecordDecl(C, RD, C.VoidPtrTy);
- addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
- addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
- RD->completeDefinition();
- TgtDeviceImageQTy = C.getRecordType(RD);
- }
- return TgtDeviceImageQTy;
-}
-
-QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
- // struct __tgt_bin_desc{
- // int32_t NumDevices; // Number of devices supported.
- // __tgt_device_image *DeviceImages; // Arrays of device images
- // // (one per device).
- // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the
- // // entries.
- // __tgt_offload_entry *EntriesEnd; // End of the table with all the
- // // entries (non inclusive).
- // };
- if (TgtBinaryDescriptorQTy.isNull()) {
- ASTContext &C = CGM.getContext();
- RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
- RD->startDefinition();
- addFieldToRecordDecl(
- C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
- addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
- addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
- addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
- RD->completeDefinition();
- TgtBinaryDescriptorQTy = C.getRecordType(RD);
- }
- return TgtBinaryDescriptorQTy;
-}
-
namespace {
struct PrivateHelpersTy {
PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
@@ -4559,7 +4515,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
- llvm::Value *PartidParam = PartIdLVal.getPointer();
+ llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
@@ -4572,7 +4528,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- PrivatesLVal.getPointer(), CGF.VoidPtrTy);
+ PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
} else {
PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
}
@@ -4581,7 +4537,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
TaskPrivatesMap,
CGF.Builder
.CreatePointerBitCastOrAddrSpaceCast(
- TDBase.getAddress(), CGF.VoidPtrTy)
+ TDBase.getAddress(CGF), CGF.VoidPtrTy)
.getPointer()};
SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
std::end(CommonArgs));
@@ -4659,7 +4615,7 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
if (QualType::DestructionKind DtorKind =
Field->getType().isDestructedType()) {
LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
- CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
+ CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
}
}
CGF.FinishFunction();
@@ -4757,8 +4713,8 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
LValue RefLVal =
CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
- RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
- CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
+ RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
+ CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
++Counter;
}
CGF.FinishFunction();
@@ -4823,7 +4779,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
} else {
SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
SharedRefLValue = CGF.MakeAddrLValue(
- Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
+ Address(SharedRefLValue.getPointer(CGF),
+ C.getDeclAlign(OriginalVD)),
SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
SharedRefLValue.getTBAAInfo());
}
@@ -4836,7 +4793,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
// Initialize firstprivate array using element-by-element
// initialization.
CGF.EmitOMPAggregateAssign(
- PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
+ PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
+ Type,
[&CGF, Elem, Init, &CapturesInfo](Address DestElement,
Address SrcElement) {
// Clean up any temporaries needed by the initialization.
@@ -4854,8 +4812,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
}
} else {
CodeGenFunction::OMPPrivateScope InitScope(CGF);
- InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
- return SharedRefLValue.getAddress();
+ InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
+ return SharedRefLValue.getAddress(CGF);
});
(void)InitScope.Privatize();
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
@@ -5242,7 +5200,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
// Define type kmp_depend_info[<Dependences.size()>];
QualType KmpDependInfoArrayTy = C.getConstantArrayType(
KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
- ArrayType::Normal, /*IndexTypeQuals=*/0);
+ nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
// kmp_depend_info[<Dependences.size()>] deps;
DependenciesArray =
CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
@@ -5255,10 +5213,10 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
LValue UpAddrLVal =
CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
- llvm::Value *UpAddr =
- CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
+ llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
+ UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
llvm::Value *LowIntPtr =
- CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
+ CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy);
llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
} else {
@@ -5271,7 +5229,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
LValue BaseAddrLVal = CGF.EmitLValueForField(
Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
CGF.EmitStoreOfScalar(
- CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
+ CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy),
BaseAddrLVal);
// deps[i].len = sizeof(<Dependences[i].second>);
LValue LenLVal = CGF.EmitLValueForField(
@@ -5388,7 +5346,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
};
if (IfCond) {
- emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
+ emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
} else {
RegionCodeGenTy ThenRCG(ThenCodeGen);
ThenRCG(CGF);
@@ -5425,21 +5383,24 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
const auto *LBVar =
cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
- CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
+ CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
+ LBLVal.getQuals(),
/*IsInitializer=*/true);
LValue UBLVal = CGF.EmitLValueForField(
Result.TDBase,
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
const auto *UBVar =
cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
- CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
+ CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
+ UBLVal.getQuals(),
/*IsInitializer=*/true);
LValue StLVal = CGF.EmitLValueForField(
Result.TDBase,
*std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
const auto *StVar =
cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
- CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
+ CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
+ StLVal.getQuals(),
/*IsInitializer=*/true);
// Store reductions address.
LValue RedLVal = CGF.EmitLValueForField(
@@ -5448,7 +5409,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
if (Data.Reductions) {
CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
} else {
- CGF.EmitNullInitialization(RedLVal.getAddress(),
+ CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
CGF.getContext().VoidPtrTy);
}
enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
@@ -5457,11 +5418,11 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
ThreadID,
Result.NewTask,
IfVal,
- LBLVal.getPointer(),
- UBLVal.getPointer(),
+ LBLVal.getPointer(CGF),
+ UBLVal.getPointer(CGF),
CGF.EmitLoadOfScalar(StLVal, Loc),
llvm::ConstantInt::getSigned(
- CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
+ CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
llvm::ConstantInt::getSigned(
CGF.IntTy, Data.Schedule.getPointer()
? Data.Schedule.getInt() ? NumTasks : Grainsize
@@ -5763,7 +5724,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
}
llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
QualType ReductionArrayTy =
- C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
+ C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
/*IndexTypeQuals=*/0);
Address ReductionList =
CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
@@ -5773,7 +5734,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
CGF.Builder.CreateStore(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
+ CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
Elem);
if ((*IPriv)->getType()->isVariablyModifiedType()) {
// Store array size.
@@ -6201,7 +6162,7 @@ static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
// Emit the finalizer body:
// <destroy>(<type>* %0)
RCG.emitCleanups(CGF, N, PrivateAddr);
- CGF.FinishFunction();
+ CGF.FinishFunction(Loc);
return Fn;
}
@@ -6235,7 +6196,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
unsigned Size = Data.ReductionVars.size();
llvm::APInt ArraySize(/*numBits=*/64, Size);
QualType ArrayRDType = C.getConstantArrayType(
- RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
+ RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
// kmp_task_red_input_t .rd_input.[Size];
Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
@@ -6253,7 +6214,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
RCG.emitSharedLValue(CGF, Cnt);
llvm::Value *CastedShared =
- CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
+ CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
RCG.emitAggregateType(CGF, Cnt);
llvm::Value *SizeValInChars;
@@ -6296,7 +6257,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
FlagsLVal);
} else
- CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
+ CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
+ FlagsLVal.getType());
}
// Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
// *data);
@@ -6332,7 +6294,7 @@ void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
CGF.Builder.CreateStore(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
+ RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy),
SharedAddr, /*IsVolatile=*/false);
}
}
@@ -6343,12 +6305,12 @@ Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
LValue SharedLVal) {
// Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
// *d);
- llvm::Value *Args[] = {
- CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
- /*isSigned=*/true),
- ReductionsPtr,
- CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
- CGM.VoidPtrTy)};
+ llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
+ CGM.IntTy,
+ /*isSigned=*/true),
+ ReductionsPtr,
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
return Address(
CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
@@ -6471,8 +6433,8 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
CGF.EmitBlock(ContBB, /*IsFinished=*/true);
};
if (IfCond) {
- emitOMPIfClause(CGF, IfCond, ThenGen,
- [](CodeGenFunction &, PrePostActionTy &) {});
+ emitIfClause(CGF, IfCond, ThenGen,
+ [](CodeGenFunction &, PrePostActionTy &) {});
} else {
RegionCodeGenTy ThenRCG(ThenGen);
ThenRCG(CGF);
@@ -6685,6 +6647,7 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
case OMPD_parallel:
case OMPD_for:
case OMPD_parallel_for:
+ case OMPD_parallel_master:
case OMPD_parallel_sections:
case OMPD_for_simd:
case OMPD_parallel_for_simd:
@@ -6720,12 +6683,17 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_target_update:
case OMPD_declare_simd:
+ case OMPD_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
+ case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
case OMPD_unknown:
break;
@@ -6990,6 +6958,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
case OMPD_parallel:
case OMPD_for:
case OMPD_parallel_for:
+ case OMPD_parallel_master:
case OMPD_parallel_sections:
case OMPD_for_simd:
case OMPD_parallel_for_simd:
@@ -7025,12 +6994,17 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_target_update:
case OMPD_declare_simd:
+ case OMPD_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
+ case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
case OMPD_unknown:
break;
@@ -7079,12 +7053,24 @@ public:
OMP_MAP_LITERAL = 0x100,
/// Implicit map
OMP_MAP_IMPLICIT = 0x200,
+ /// Close is a hint to the runtime to allocate memory close to
+ /// the target device.
+ OMP_MAP_CLOSE = 0x400,
/// The 16 MSBs of the flags indicate whether the entry is member of some
/// struct/class.
OMP_MAP_MEMBER_OF = 0xffff000000000000,
LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
};
+ /// Get the offset of the OMP_MAP_MEMBER_OF field.
+ static unsigned getFlagMemberOffset() {
+ unsigned Offset = 0;
+ for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
+ Remain = Remain >> 1)
+ Offset++;
+ return Offset;
+ }
+
/// Class that associates information with a base pointer to be passed to the
/// runtime library.
class BasePointerInfo {
@@ -7148,8 +7134,11 @@ private:
: IE(IE), VD(VD) {}
};
- /// Directive from where the map clauses were extracted.
- const OMPExecutableDirective &CurDir;
+ /// The target directive from where the mappable clauses were extracted. It
+ /// is either a executable directive or a user-defined mapper directive.
+ llvm::PointerUnion<const OMPExecutableDirective *,
+ const OMPDeclareMapperDecl *>
+ CurDir;
/// Function the directive is being generated for.
CodeGenFunction &CGF;
@@ -7181,9 +7170,11 @@ private:
OAE->getBase()->IgnoreParenImpCasts())
.getCanonicalType();
- // If there is no length associated with the expression, that means we
- // are using the whole length of the base.
- if (!OAE->getLength() && OAE->getColonLoc().isValid())
+ // If there is no length associated with the expression and lower bound is
+ // not specified too, that means we are using the whole length of the
+ // base.
+ if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
+ !OAE->getLowerBound())
return CGF.getTypeSize(BaseTy);
llvm::Value *ElemSize;
@@ -7197,13 +7188,30 @@ private:
// If we don't have a length at this point, that is because we have an
// array section with a single element.
- if (!OAE->getLength())
+ if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
return ElemSize;
- llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
- LengthVal =
- CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
- return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
+ if (const Expr *LenExpr = OAE->getLength()) {
+ llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
+ LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
+ CGF.getContext().getSizeType(),
+ LenExpr->getExprLoc());
+ return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
+ }
+ assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
+ OAE->getLowerBound() && "expected array_section[lb:].");
+ // Size = sizetype - lb * elemtype;
+ llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
+ llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
+ LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
+ CGF.getContext().getSizeType(),
+ OAE->getLowerBound()->getExprLoc());
+ LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
+ llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
+ llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
+ LengthVal = CGF.Builder.CreateSelect(
+ Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
+ return LengthVal;
}
return CGF.getTypeSize(ExprTy);
}
@@ -7247,6 +7255,9 @@ private:
if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
!= MapModifiers.end())
Bits |= OMP_MAP_ALWAYS;
+ if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
+ != MapModifiers.end())
+ Bits |= OMP_MAP_CLOSE;
return Bits;
}
@@ -7486,11 +7497,11 @@ private:
} else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
(OASE &&
isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
- BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
+ BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
} else {
// The base is the reference to the variable.
// BP = &Var.
- BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
+ BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
if (const auto *VD =
dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
@@ -7584,8 +7595,8 @@ private:
isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
"Unexpected expression");
- Address LB =
- CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
+ Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
+ .getAddress(CGF);
// If this component is a pointer inside the base struct then we don't
// need to create any entry for it - it will be combined with the object
@@ -7632,7 +7643,7 @@ private:
if (MC.getAssociatedDeclaration()) {
ComponentLB =
CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
- .getAddress();
+ .getAddress(CGF);
Size = CGF.Builder.CreatePtrDiff(
CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
CGF.EmitCastToVoidPtr(LB.getPointer()));
@@ -7675,10 +7686,10 @@ private:
if (!IsExpressionFirstInfo) {
// If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
- // then we reset the TO/FROM/ALWAYS/DELETE flags.
+ // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
if (IsPointer)
Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
- OMP_MAP_DELETE);
+ OMP_MAP_DELETE | OMP_MAP_CLOSE);
if (ShouldBeMemberOf) {
// Set placeholder value MEMBER_OF=FFFF to indicate that the flag
@@ -7752,9 +7763,9 @@ private:
}
static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
- // Member of is given by the 16 MSB of the flag, so rotate by 48 bits.
+ // Rotate by getFlagMemberOffset() bits.
return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
- << 48);
+ << getFlagMemberOffset());
}
static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
@@ -7834,7 +7845,7 @@ private:
public:
MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
- : CurDir(Dir), CGF(CGF) {
+ : CurDir(&Dir), CGF(CGF) {
// Extract firstprivate clause information.
for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
for (const auto *D : C->varlists())
@@ -7846,6 +7857,10 @@ public:
DevPointersMap[L.first].push_back(L.second);
}
+ /// Constructor for the declare mapper directive.
+ MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
+ : CurDir(&Dir), CGF(CGF) {}
+
/// Generate code for the combined entry if we have a partially mapped struct
/// and take care of the mapping flags of the arguments corresponding to
/// individual struct members.
@@ -7907,19 +7922,21 @@ public:
IsImplicit);
};
- // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
- for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
- for (const auto &L : C->component_lists()) {
+ assert(CurDir.is<const OMPExecutableDirective *>() &&
+ "Expect a executable directive");
+ const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
+ for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
+ for (const auto L : C->component_lists()) {
InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
/*ReturnDevicePointer=*/false, C->isImplicit());
}
- for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
- for (const auto &L : C->component_lists()) {
+ for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
+ for (const auto L : C->component_lists()) {
InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
/*ReturnDevicePointer=*/false, C->isImplicit());
}
- for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
- for (const auto &L : C->component_lists()) {
+ for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
+ for (const auto L : C->component_lists()) {
InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
/*ReturnDevicePointer=*/false, C->isImplicit());
}
@@ -7933,10 +7950,9 @@ public:
llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
DeferredInfo;
- // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
for (const auto *C :
- this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) {
- for (const auto &L : C->component_lists()) {
+ CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
+ for (const auto L : C->component_lists()) {
assert(!L.second.empty() && "Not expecting empty list of components!");
const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
VD = cast<ValueDecl>(VD->getCanonicalDecl());
@@ -7964,7 +7980,6 @@ public:
// We didn't find any match in our map information - generate a zero
// size array section - if the pointer is a struct member we defer this
// action until the whole struct has been processed.
- // FIXME: MSVC 2013 seems to require this-> to find member CGF.
if (isa<MemberExpr>(IE)) {
// Insert the pointer into Info to be processed by
// generateInfoForComponentList. Because it is a member pointer
@@ -7977,11 +7992,11 @@ public:
/*ReturnDevicePointer=*/false, C->isImplicit());
DeferredInfo[nullptr].emplace_back(IE, VD);
} else {
- llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
- this->CGF.EmitLValue(IE), IE->getExprLoc());
+ llvm::Value *Ptr =
+ CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
BasePointers.emplace_back(Ptr, VD);
Pointers.push_back(Ptr);
- Sizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
+ Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
}
}
@@ -8005,11 +8020,10 @@ public:
// Remember the current base pointer index.
unsigned CurrentBasePointersIdx = CurBasePointers.size();
- // FIXME: MSVC 2013 seems to require this-> to find the member method.
- this->generateInfoForComponentList(
- L.MapType, L.MapModifiers, L.Components, CurBasePointers,
- CurPointers, CurSizes, CurTypes, PartialStruct,
- IsFirstComponentList, L.IsImplicit);
+ generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
+ CurBasePointers, CurPointers, CurSizes,
+ CurTypes, PartialStruct,
+ IsFirstComponentList, L.IsImplicit);
// If this entry relates with a device pointer, set the relevant
// declaration and add the 'return pointer' flag.
@@ -8033,7 +8047,7 @@ public:
auto CI = DeferredInfo.find(M.first);
if (CI != DeferredInfo.end()) {
for (const DeferredDevicePtrEntryTy &L : CI->second) {
- llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
+ llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
CurBasePointers.emplace_back(BasePtr, L.VD);
@@ -8061,6 +8075,78 @@ public:
}
}
+ /// Generate all the base pointers, section pointers, sizes and map types for
+ /// the extracted map clauses of user-defined mapper.
+ void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
+ MapValuesArrayTy &Pointers,
+ MapValuesArrayTy &Sizes,
+ MapFlagsArrayTy &Types) const {
+ assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
+ "Expect a declare mapper directive");
+ const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
+ // We have to process the component lists that relate with the same
+ // declaration in a single chunk so that we can generate the map flags
+ // correctly. Therefore, we organize all lists in a map.
+ llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
+
+ // Helper function to fill the information map for the different supported
+ // clauses.
+ auto &&InfoGen = [&Info](
+ const ValueDecl *D,
+ OMPClauseMappableExprCommon::MappableExprComponentListRef L,
+ OpenMPMapClauseKind MapType,
+ ArrayRef<OpenMPMapModifierKind> MapModifiers,
+ bool ReturnDevicePointer, bool IsImplicit) {
+ const ValueDecl *VD =
+ D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
+ Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
+ IsImplicit);
+ };
+
+ for (const auto *C : CurMapperDir->clauselists()) {
+ const auto *MC = cast<OMPMapClause>(C);
+ for (const auto L : MC->component_lists()) {
+ InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
+ /*ReturnDevicePointer=*/false, MC->isImplicit());
+ }
+ }
+
+ for (const auto &M : Info) {
+ // We need to know when we generate information for the first component
+ // associated with a capture, because the mapping flags depend on it.
+ bool IsFirstComponentList = true;
+
+ // Temporary versions of arrays
+ MapBaseValuesArrayTy CurBasePointers;
+ MapValuesArrayTy CurPointers;
+ MapValuesArrayTy CurSizes;
+ MapFlagsArrayTy CurTypes;
+ StructRangeInfoTy PartialStruct;
+
+ for (const MapInfo &L : M.second) {
+ assert(!L.Components.empty() &&
+ "Not expecting declaration with no component lists.");
+ generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
+ CurBasePointers, CurPointers, CurSizes,
+ CurTypes, PartialStruct,
+ IsFirstComponentList, L.IsImplicit);
+ IsFirstComponentList = false;
+ }
+
+ // If there is an entry in PartialStruct it means we have a struct with
+ // individual members mapped. Emit an extra combined entry.
+ if (PartialStruct.Base.isValid())
+ emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
+ PartialStruct);
+
+ // We need to append the results of this capture to what we already have.
+ BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
+ Pointers.append(CurPointers.begin(), CurPointers.end());
+ Sizes.append(CurSizes.begin(), CurSizes.end());
+ Types.append(CurTypes.begin(), CurTypes.end());
+ }
+ }
+
/// Emit capture info for lambdas for variables captured by reference.
void generateInfoForLambdaCaptures(
const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
@@ -8083,9 +8169,10 @@ public:
LValue ThisLVal =
CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
- LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
- BasePointers.push_back(ThisLVal.getPointer());
- Pointers.push_back(ThisLValVal.getPointer());
+ LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
+ VDLVal.getPointer(CGF));
+ BasePointers.push_back(ThisLVal.getPointer(CGF));
+ Pointers.push_back(ThisLValVal.getPointer(CGF));
Sizes.push_back(
CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
CGF.Int64Ty, /*isSigned=*/true));
@@ -8103,17 +8190,19 @@ public:
LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
if (LC.getCaptureKind() == LCK_ByRef) {
LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
- LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
- BasePointers.push_back(VarLVal.getPointer());
- Pointers.push_back(VarLValVal.getPointer());
+ LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
+ VDLVal.getPointer(CGF));
+ BasePointers.push_back(VarLVal.getPointer(CGF));
+ Pointers.push_back(VarLValVal.getPointer(CGF));
Sizes.push_back(CGF.Builder.CreateIntCast(
CGF.getTypeSize(
VD->getType().getCanonicalType().getNonReferenceType()),
CGF.Int64Ty, /*isSigned=*/true));
} else {
RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
- LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
- BasePointers.push_back(VarLVal.getPointer());
+ LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
+ VDLVal.getPointer(CGF));
+ BasePointers.push_back(VarLVal.getPointer(CGF));
Pointers.push_back(VarRVal.getScalarVal());
Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
}
@@ -8184,9 +8273,11 @@ public:
std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
SmallVector<MapData, 4> DeclComponentLists;
- // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
- for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
- for (const auto &L : C->decl_component_lists(VD)) {
+ assert(CurDir.is<const OMPExecutableDirective *>() &&
+ "Expect a executable directive");
+ const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
+ for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
+ for (const auto L : C->decl_component_lists(VD)) {
assert(L.first == VD &&
"We got information for the wrong declaration??");
assert(!L.second.empty() &&
@@ -8333,10 +8424,13 @@ public:
MapValuesArrayTy &Pointers,
MapValuesArrayTy &Sizes,
MapFlagsArrayTy &Types) const {
+ assert(CurDir.is<const OMPExecutableDirective *>() &&
+ "Expect a executable directive");
+ const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
// Map other list items in the map clause which are not captured variables
// but "declare target link" global variables.
- for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
- for (const auto &L : C->component_lists()) {
+ for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
+ for (const auto L : C->component_lists()) {
if (!L.first)
continue;
const auto *VD = dyn_cast<VarDecl>(L.first);
@@ -8414,7 +8508,7 @@ public:
CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
// Copy the value of the original variable to the new global copy.
CGF.Builder.CreateMemCpy(
- CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(),
+ CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
CurSizes.back(), /*IsVolatile=*/false);
// Use new global variable as the base pointers.
@@ -8472,9 +8566,9 @@ emitOffloadingArrays(CodeGenFunction &CGF,
}
llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
- QualType PointerArrayType =
- Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
- /*IndexTypeQuals=*/0);
+ QualType PointerArrayType = Ctx.getConstantArrayType(
+ Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
Info.BasePointersArray =
CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
@@ -8487,9 +8581,9 @@ emitOffloadingArrays(CodeGenFunction &CGF,
QualType Int64Ty =
Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
if (hasRuntimeEvaluationCaptureSize) {
- QualType SizeArrayType =
- Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal,
- /*IndexTypeQuals=*/0);
+ QualType SizeArrayType = Ctx.getConstantArrayType(
+ Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
Info.SizesArray =
CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
} else {
@@ -8562,6 +8656,7 @@ emitOffloadingArrays(CodeGenFunction &CGF,
}
}
}
+
/// Emit the arguments to be passed to the runtime library based on the
/// arrays of pointers, sizes and map types.
static void emitOffloadingArraysArgument(
@@ -8642,6 +8737,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
case OMPD_parallel:
case OMPD_for:
case OMPD_parallel_for:
+ case OMPD_parallel_master:
case OMPD_parallel_sections:
case OMPD_for_simd:
case OMPD_parallel_for_simd:
@@ -8677,12 +8773,17 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_target_update:
case OMPD_declare_simd:
+ case OMPD_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
+ case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
case OMPD_unknown:
llvm_unreachable("Unexpected directive.");
@@ -8692,10 +8793,343 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
return nullptr;
}
+/// Emit the user-defined mapper function. The code generation follows the
+/// pattern in the example below.
+/// \code
+/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
+/// void *base, void *begin,
+/// int64_t size, int64_t type) {
+/// // Allocate space for an array section first.
+/// if (size > 1 && !maptype.IsDelete)
+/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
+/// size*sizeof(Ty), clearToFrom(type));
+/// // Map members.
+/// for (unsigned i = 0; i < size; i++) {
+/// // For each component specified by this mapper:
+/// for (auto c : all_components) {
+/// if (c.hasMapper())
+/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
+/// c.arg_type);
+/// else
+/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
+/// c.arg_begin, c.arg_size, c.arg_type);
+/// }
+/// }
+/// // Delete the array section.
+/// if (size > 1 && maptype.IsDelete)
+/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
+/// size*sizeof(Ty), clearToFrom(type));
+/// }
+/// \endcode
+void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
+ CodeGenFunction *CGF) {
+ if (UDMMap.count(D) > 0)
+ return;
+ ASTContext &C = CGM.getContext();
+ QualType Ty = D->getType();
+ QualType PtrTy = C.getPointerType(Ty).withRestrict();
+ QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
+ auto *MapperVarDecl =
+ cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
+ SourceLocation Loc = D->getLocation();
+ CharUnits ElementSize = C.getTypeSizeInChars(Ty);
+
+ // Prepare mapper function arguments and attributes.
+ ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
+ ImplicitParamDecl::Other);
+ FunctionArgList Args;
+ Args.push_back(&HandleArg);
+ Args.push_back(&BaseArg);
+ Args.push_back(&BeginArg);
+ Args.push_back(&SizeArg);
+ Args.push_back(&TypeArg);
+ const CGFunctionInfo &FnInfo =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+ SmallString<64> TyStr;
+ llvm::raw_svector_ostream Out(TyStr);
+ CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
+ std::string Name = getName({"omp_mapper", TyStr, D->getName()});
+ auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
+ Name, &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
+ Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
+ // Start the mapper function code generation.
+ CodeGenFunction MapperCGF(CGM);
+ MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
+ // Compute the starting and end addreses of array elements.
+ llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
+ MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
+ C.getPointerType(Int64Ty), Loc);
+ llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
+ MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
+ CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
+ llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
+ llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
+ MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
+ C.getPointerType(Int64Ty), Loc);
+ // Prepare common arguments for array initiation and deletion.
+ llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
+ MapperCGF.GetAddrOfLocalVar(&HandleArg),
+ /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
+ llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
+ MapperCGF.GetAddrOfLocalVar(&BaseArg),
+ /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
+ llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
+ MapperCGF.GetAddrOfLocalVar(&BeginArg),
+ /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
+
+ // Emit array initiation if this is an array section and \p MapType indicates
+ // that memory allocation is required.
+ llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
+ emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
+ ElementSize, HeadBB, /*IsInit=*/true);
+
+ // Emit a for loop to iterate through SizeArg of elements and map all of them.
+
+ // Emit the loop header block.
+ MapperCGF.EmitBlock(HeadBB);
+ llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
+ llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
+ // Evaluate whether the initial condition is satisfied.
+ llvm::Value *IsEmpty =
+ MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
+ MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
+ llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
+
+ // Emit the loop body block.
+ MapperCGF.EmitBlock(BodyBB);
+ llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
+ PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
+ PtrPHI->addIncoming(PtrBegin, EntryBB);
+ Address PtrCurrent =
+ Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
+ .getAlignment()
+ .alignmentOfArrayElement(ElementSize));
+ // Privatize the declared variable of mapper to be the current array element.
+ CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
+ Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
+ return MapperCGF
+ .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
+ .getAddress(MapperCGF);
+ });
+ (void)Scope.Privatize();
+
+ // Get map clause information. Fill up the arrays with all mapped variables.
+ MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
+ MappableExprsHandler::MapValuesArrayTy Pointers;
+ MappableExprsHandler::MapValuesArrayTy Sizes;
+ MappableExprsHandler::MapFlagsArrayTy MapTypes;
+ MappableExprsHandler MEHandler(*D, MapperCGF);
+ MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
+
+ // Call the runtime API __tgt_mapper_num_components to get the number of
+ // pre-existing components.
+ llvm::Value *OffloadingArgs[] = {Handle};
+ llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
+ llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
+ PreviousSize,
+ MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
+
+ // Fill up the runtime mapper handle for all components.
+ for (unsigned I = 0; I < BasePointers.size(); ++I) {
+ llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
+ *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
+ llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
+ Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
+ llvm::Value *CurSizeArg = Sizes[I];
+
+ // Extract the MEMBER_OF field from the map type.
+ llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
+ MapperCGF.EmitBlock(MemberBB);
+ llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
+ llvm::Value *Member = MapperCGF.Builder.CreateAnd(
+ OriMapType,
+ MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
+ llvm::BasicBlock *MemberCombineBB =
+ MapperCGF.createBasicBlock("omp.member.combine");
+ llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
+ llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
+ MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
+ // Add the number of pre-existing components to the MEMBER_OF field if it
+ // is valid.
+ MapperCGF.EmitBlock(MemberCombineBB);
+ llvm::Value *CombinedMember =
+ MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
+ // Do nothing if it is not a member of previous components.
+ MapperCGF.EmitBlock(TypeBB);
+ llvm::PHINode *MemberMapType =
+ MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
+ MemberMapType->addIncoming(OriMapType, MemberBB);
+ MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
+
+ // Combine the map type inherited from user-defined mapper with that
+ // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
+ // bits of the \a MapType, which is the input argument of the mapper
+ // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
+ // bits of MemberMapType.
+ // [OpenMP 5.0], 1.2.6. map-type decay.
+ // | alloc | to | from | tofrom | release | delete
+ // ----------------------------------------------------------
+ // alloc | alloc | alloc | alloc | alloc | release | delete
+ // to | alloc | to | alloc | to | release | delete
+ // from | alloc | alloc | from | from | release | delete
+ // tofrom | alloc | to | from | tofrom | release | delete
+ llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
+ MapType,
+ MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
+ MappableExprsHandler::OMP_MAP_FROM));
+ llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
+ llvm::BasicBlock *AllocElseBB =
+ MapperCGF.createBasicBlock("omp.type.alloc.else");
+ llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
+ llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
+ llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
+ llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
+ llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
+ MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
+ // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
+ MapperCGF.EmitBlock(AllocBB);
+ llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
+ MemberMapType,
+ MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
+ MappableExprsHandler::OMP_MAP_FROM)));
+ MapperCGF.Builder.CreateBr(EndBB);
+ MapperCGF.EmitBlock(AllocElseBB);
+ llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
+ LeftToFrom,
+ MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
+ MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
+ // In case of to, clear OMP_MAP_FROM.
+ MapperCGF.EmitBlock(ToBB);
+ llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
+ MemberMapType,
+ MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
+ MapperCGF.Builder.CreateBr(EndBB);
+ MapperCGF.EmitBlock(ToElseBB);
+ llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
+ LeftToFrom,
+ MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
+ MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
+ // In case of from, clear OMP_MAP_TO.
+ MapperCGF.EmitBlock(FromBB);
+ llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
+ MemberMapType,
+ MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
+ // In case of tofrom, do nothing.
+ MapperCGF.EmitBlock(EndBB);
+ llvm::PHINode *CurMapType =
+ MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
+ CurMapType->addIncoming(AllocMapType, AllocBB);
+ CurMapType->addIncoming(ToMapType, ToBB);
+ CurMapType->addIncoming(FromMapType, FromBB);
+ CurMapType->addIncoming(MemberMapType, ToElseBB);
+
+ // TODO: call the corresponding mapper function if a user-defined mapper is
+ // associated with this map clause.
+ // Call the runtime API __tgt_push_mapper_component to fill up the runtime
+ // data structure.
+ llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
+ CurSizeArg, CurMapType};
+ MapperCGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
+ OffloadingArgs);
+ }
+
+ // Update the pointer to point to the next element that needs to be mapped,
+ // and check whether we have mapped all elements.
+ llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
+ PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
+ PtrPHI->addIncoming(PtrNext, BodyBB);
+ llvm::Value *IsDone =
+ MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
+ llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
+ MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
+
+ MapperCGF.EmitBlock(ExitBB);
+ // Emit array deletion if this is an array section and \p MapType indicates
+ // that deletion is required.
+ emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
+ ElementSize, DoneBB, /*IsInit=*/false);
+
+ // Emit the function exit block.
+ MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
+ MapperCGF.FinishFunction();
+ UDMMap.try_emplace(D, Fn);
+ if (CGF) {
+ auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
+ Decls.second.push_back(D);
+ }
+}
+
+/// Emit the array initialization or deletion portion for user-defined mapper
+/// code generation. First, it evaluates whether an array section is mapped and
+/// whether the \a MapType instructs to delete this section. If \a IsInit is
+/// true, and \a MapType indicates to not delete this array, array
+/// initialization code is generated. If \a IsInit is false, and \a MapType
+/// indicates to not this array, array deletion code is generated.
+void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
+ CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
+ llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
+ CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
+ StringRef Prefix = IsInit ? ".init" : ".del";
+
+ // Evaluate if this is an array section.
+ llvm::BasicBlock *IsDeleteBB =
+ MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete");
+ llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix);
+ llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
+ Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
+ MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
+
+ // Evaluate if we are going to delete this section.
+ MapperCGF.EmitBlock(IsDeleteBB);
+ llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
+ MapType,
+ MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
+ llvm::Value *DeleteCond;
+ if (IsInit) {
+ DeleteCond = MapperCGF.Builder.CreateIsNull(
+ DeleteBit, "omp.array" + Prefix + ".delete");
+ } else {
+ DeleteCond = MapperCGF.Builder.CreateIsNotNull(
+ DeleteBit, "omp.array" + Prefix + ".delete");
+ }
+ MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
+
+ MapperCGF.EmitBlock(BodyBB);
+ // Get the array size by multiplying element size and element number (i.e., \p
+ // Size).
+ llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
+ Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
+ // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
+ // memory allocation/deletion purpose only.
+ llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
+ MapType,
+ MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
+ MappableExprsHandler::OMP_MAP_FROM)));
+ // Call the runtime API __tgt_push_mapper_component to fill up the runtime
+ // data structure.
+ llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
+ MapperCGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
+}
+
void CGOpenMPRuntime::emitTargetNumIterationsCall(
- CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
- const llvm::function_ref<llvm::Value *(
- CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
+ CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Value *DeviceID,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter) {
OpenMPDirectiveKind Kind = D.getDirectiveKind();
const OMPExecutableDirective *TD = &D;
// Get nested teams distribute kind directive, if any.
@@ -8704,30 +9138,24 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall(
if (!TD)
return;
const auto *LD = cast<OMPLoopDirective>(TD);
- auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
+ auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
PrePostActionTy &) {
- llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
-
- // Emit device ID if any.
- llvm::Value *DeviceID;
- if (Device)
- DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
- CGF.Int64Ty, /*isSigned=*/true);
- else
- DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
-
- llvm::Value *Args[] = {DeviceID, NumIterations};
- CGF.EmitRuntimeCall(
- createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
+ if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
+ llvm::Value *Args[] = {DeviceID, NumIterations};
+ CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
+ }
};
emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
}
-void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
- const OMPExecutableDirective &D,
- llvm::Function *OutlinedFn,
- llvm::Value *OutlinedFnID,
- const Expr *IfCond, const Expr *Device) {
+void CGOpenMPRuntime::emitTargetCall(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
+ const Expr *Device,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter) {
if (!CGF.HaveInsertPoint())
return;
@@ -8746,8 +9174,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
llvm::Value *MapTypesArray = nullptr;
// Fill up the pointer arrays and transfer execution to the device.
auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
- &MapTypesArray, &CS, RequiresOuterTask,
- &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
+ &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
+ SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
// On top of the arrays that were filled up, the target offloading call
// takes as arguments the device id as well as the host pointer. The host
// pointer is used by the runtime library to identify the current target
@@ -8779,6 +9207,9 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
+ // Emit tripcount for the target loop-based directive.
+ emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
+
bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
// The target region is an outlined function launched by the runtime
// via calls __tgt_target() or __tgt_target_teams().
@@ -8985,7 +9416,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
// specify target triples.
if (OutlinedFnID) {
if (IfCond) {
- emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
+ emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
} else {
RegionCodeGenTy ThenRCG(TargetThenGen);
ThenRCG(CGF);
@@ -9068,6 +9499,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
case OMPD_parallel:
case OMPD_for:
case OMPD_parallel_for:
+ case OMPD_parallel_master:
case OMPD_parallel_sections:
case OMPD_for_simd:
case OMPD_parallel_for_simd:
@@ -9103,12 +9535,17 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_target_update:
case OMPD_declare_simd:
+ case OMPD_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
+ case OMPD_parallel_master_taskloop_simd:
case OMPD_requires:
case OMPD_unknown:
llvm_unreachable("Unknown target directive for OpenMP device codegen.");
@@ -9137,18 +9574,32 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
// If emitting code for the host, we do not process FD here. Instead we do
// the normal code generation.
- if (!CGM.getLangOpts().OpenMPIsDevice)
+ if (!CGM.getLangOpts().OpenMPIsDevice) {
+ if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
+ Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
+ OMPDeclareTargetDeclAttr::getDeviceType(FD);
+ // Do not emit device_type(nohost) functions for the host.
+ if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
+ return true;
+ }
return false;
+ }
const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
- StringRef Name = CGM.getMangledName(GD);
// Try to detect target regions in the function.
- if (const auto *FD = dyn_cast<FunctionDecl>(VD))
+ if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
+ StringRef Name = CGM.getMangledName(GD);
scanForTargetRegionsFunctions(FD->getBody(), Name);
+ Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
+ OMPDeclareTargetDeclAttr::getDeviceType(FD);
+ // Do not emit device_type(nohost) functions for the host.
+ if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
+ return true;
+ }
// Do not to emit function if it is not marked as declare target.
return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
- AlreadyEmittedTargetFunctions.count(Name) == 0;
+ AlreadyEmittedTargetDecls.count(VD) == 0;
}
bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
@@ -9221,6 +9672,9 @@ CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
llvm::Constant *Addr) {
+ if (CGM.getLangOpts().OMPTargetTriples.empty() &&
+ !CGM.getLangOpts().OpenMPIsDevice)
+ return;
llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
if (!Res) {
@@ -9376,20 +9830,20 @@ bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
return true;
- StringRef Name = CGM.getMangledName(GD);
const auto *D = cast<FunctionDecl>(GD.getDecl());
// Do not to emit function if it is marked as declare target as it was already
// emitted.
if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
- if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
- if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
+ if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
+ if (auto *F = dyn_cast_or_null<llvm::Function>(
+ CGM.GetGlobalValue(CGM.getMangledName(GD))))
return !F->isDeclaration();
return false;
}
return true;
}
- return !AlreadyEmittedTargetFunctions.insert(Name).second;
+ return !AlreadyEmittedTargetDecls.insert(D).second;
}
llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
@@ -9433,17 +9887,6 @@ llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
return RequiresRegFn;
}
-llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
- // If we have offloading in the current module, we need to emit the entries
- // now and register the offloading descriptor.
- createOffloadEntriesAndInfoMetadata();
-
- // Create and register the offloading binary descriptors. This is the main
- // entity that captures all the information about offloading in the current
- // compilation unit.
- return createOffloadingBinaryDescriptorRegistration();
-}
-
void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
SourceLocation Loc,
@@ -9602,7 +10045,7 @@ void CGOpenMPRuntime::emitTargetDataCalls(
auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
if (IfCond) {
- emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
+ emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
} else {
RegionCodeGenTy RCG(BeginThenGen);
RCG(CGF);
@@ -9616,7 +10059,7 @@ void CGOpenMPRuntime::emitTargetDataCalls(
}
if (IfCond) {
- emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
+ emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
} else {
RegionCodeGenTy RCG(EndThenGen);
RCG(CGF);
@@ -9679,6 +10122,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
case OMPD_parallel:
case OMPD_for:
case OMPD_parallel_for:
+ case OMPD_parallel_master:
case OMPD_parallel_sections:
case OMPD_for_simd:
case OMPD_parallel_for_simd:
@@ -9711,12 +10155,17 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
case OMPD_teams_distribute_parallel_for:
case OMPD_teams_distribute_parallel_for_simd:
case OMPD_declare_simd:
+ case OMPD_declare_variant:
case OMPD_declare_target:
case OMPD_end_declare_target:
case OMPD_declare_reduction:
case OMPD_declare_mapper:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
+ case OMPD_parallel_master_taskloop_simd:
case OMPD_target:
case OMPD_target_simd:
case OMPD_target_teams_distribute:
@@ -9768,8 +10217,8 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
};
if (IfCond) {
- emitOMPIfClause(CGF, IfCond, TargetThenGen,
- [](CodeGenFunction &CGF, PrePostActionTy &) {});
+ emitIfClause(CGF, IfCond, TargetThenGen,
+ [](CodeGenFunction &CGF, PrePostActionTy &) {});
} else {
RegionCodeGenTy ThenRCG(TargetThenGen);
ThenRCG(CGF);
@@ -10307,8 +10756,7 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
ExprLoc = VLENExpr->getExprLoc();
}
OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
- if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
- CGM.getTriple().getArch() == llvm::Triple::x86_64) {
+ if (CGM.getTriple().isX86()) {
emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
} else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
unsigned VLEN = VLENVal.getExtValue();
@@ -10377,7 +10825,7 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
}
llvm::APInt Size(/*numBits=*/32, NumIterations.size());
QualType ArrayTy =
- C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0);
+ C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
CGF.EmitNullInitialization(DimsAddr, ArrayTy);
@@ -10428,7 +10876,7 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
QualType ArrayTy = CGM.getContext().getConstantArrayType(
- Int64Ty, Size, ArrayType::Normal, 0);
+ Int64Ty, Size, nullptr, ArrayType::Normal, 0);
Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
const Expr *CounterVal = C->getLoopData(I);
@@ -10566,6 +11014,595 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
return Address(Addr, Align);
}
+namespace {
+using OMPContextSelectorData =
+ OpenMPCtxSelectorData<ArrayRef<StringRef>, llvm::APSInt>;
+using CompleteOMPContextSelectorData = SmallVector<OMPContextSelectorData, 4>;
+} // anonymous namespace
+
+/// Checks current context and returns true if it matches the context selector.
+template <OpenMPContextSelectorSetKind CtxSet, OpenMPContextSelectorKind Ctx,
+ typename... Arguments>
+static bool checkContext(const OMPContextSelectorData &Data,
+ Arguments... Params) {
+ assert(Data.CtxSet != OMP_CTX_SET_unknown && Data.Ctx != OMP_CTX_unknown &&
+ "Unknown context selector or context selector set.");
+ return false;
+}
+
+/// Checks for implementation={vendor(<vendor>)} context selector.
+/// \returns true iff <vendor>="llvm", false otherwise.
+template <>
+bool checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(
+ const OMPContextSelectorData &Data) {
+ return llvm::all_of(Data.Names,
+ [](StringRef S) { return !S.compare_lower("llvm"); });
+}
+
+/// Checks for device={kind(<kind>)} context selector.
+/// \returns true if <kind>="host" and compilation is for host.
+/// true if <kind>="nohost" and compilation is for device.
+/// true if <kind>="cpu" and compilation is for Arm, X86 or PPC CPU.
+/// true if <kind>="gpu" and compilation is for NVPTX or AMDGCN.
+/// false otherwise.
+template <>
+bool checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(
+ const OMPContextSelectorData &Data, CodeGenModule &CGM) {
+ for (StringRef Name : Data.Names) {
+ if (!Name.compare_lower("host")) {
+ if (CGM.getLangOpts().OpenMPIsDevice)
+ return false;
+ continue;
+ }
+ if (!Name.compare_lower("nohost")) {
+ if (!CGM.getLangOpts().OpenMPIsDevice)
+ return false;
+ continue;
+ }
+ switch (CGM.getTriple().getArch()) {
+ case llvm::Triple::arm:
+ case llvm::Triple::armeb:
+ case llvm::Triple::aarch64:
+ case llvm::Triple::aarch64_be:
+ case llvm::Triple::aarch64_32:
+ case llvm::Triple::ppc:
+ case llvm::Triple::ppc64:
+ case llvm::Triple::ppc64le:
+ case llvm::Triple::x86:
+ case llvm::Triple::x86_64:
+ if (Name.compare_lower("cpu"))
+ return false;
+ break;
+ case llvm::Triple::amdgcn:
+ case llvm::Triple::nvptx:
+ case llvm::Triple::nvptx64:
+ if (Name.compare_lower("gpu"))
+ return false;
+ break;
+ case llvm::Triple::UnknownArch:
+ case llvm::Triple::arc:
+ case llvm::Triple::avr:
+ case llvm::Triple::bpfel:
+ case llvm::Triple::bpfeb:
+ case llvm::Triple::hexagon:
+ case llvm::Triple::mips:
+ case llvm::Triple::mipsel:
+ case llvm::Triple::mips64:
+ case llvm::Triple::mips64el:
+ case llvm::Triple::msp430:
+ case llvm::Triple::r600:
+ case llvm::Triple::riscv32:
+ case llvm::Triple::riscv64:
+ case llvm::Triple::sparc:
+ case llvm::Triple::sparcv9:
+ case llvm::Triple::sparcel:
+ case llvm::Triple::systemz:
+ case llvm::Triple::tce:
+ case llvm::Triple::tcele:
+ case llvm::Triple::thumb:
+ case llvm::Triple::thumbeb:
+ case llvm::Triple::xcore:
+ case llvm::Triple::le32:
+ case llvm::Triple::le64:
+ case llvm::Triple::amdil:
+ case llvm::Triple::amdil64:
+ case llvm::Triple::hsail:
+ case llvm::Triple::hsail64:
+ case llvm::Triple::spir:
+ case llvm::Triple::spir64:
+ case llvm::Triple::kalimba:
+ case llvm::Triple::shave:
+ case llvm::Triple::lanai:
+ case llvm::Triple::wasm32:
+ case llvm::Triple::wasm64:
+ case llvm::Triple::renderscript32:
+ case llvm::Triple::renderscript64:
+ case llvm::Triple::ve:
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool matchesContext(CodeGenModule &CGM,
+ const CompleteOMPContextSelectorData &ContextData) {
+ for (const OMPContextSelectorData &Data : ContextData) {
+ switch (Data.Ctx) {
+ case OMP_CTX_vendor:
+ assert(Data.CtxSet == OMP_CTX_SET_implementation &&
+ "Expected implementation context selector set.");
+ if (!checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(Data))
+ return false;
+ break;
+ case OMP_CTX_kind:
+ assert(Data.CtxSet == OMP_CTX_SET_device &&
+ "Expected device context selector set.");
+ if (!checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(Data,
+ CGM))
+ return false;
+ break;
+ case OMP_CTX_unknown:
+ llvm_unreachable("Unknown context selector kind.");
+ }
+ }
+ return true;
+}
+
+static CompleteOMPContextSelectorData
+translateAttrToContextSelectorData(ASTContext &C,
+ const OMPDeclareVariantAttr *A) {
+ CompleteOMPContextSelectorData Data;
+ for (unsigned I = 0, E = A->scores_size(); I < E; ++I) {
+ Data.emplace_back();
+ auto CtxSet = static_cast<OpenMPContextSelectorSetKind>(
+ *std::next(A->ctxSelectorSets_begin(), I));
+ auto Ctx = static_cast<OpenMPContextSelectorKind>(
+ *std::next(A->ctxSelectors_begin(), I));
+ Data.back().CtxSet = CtxSet;
+ Data.back().Ctx = Ctx;
+ const Expr *Score = *std::next(A->scores_begin(), I);
+ Data.back().Score = Score->EvaluateKnownConstInt(C);
+ switch (Ctx) {
+ case OMP_CTX_vendor:
+ assert(CtxSet == OMP_CTX_SET_implementation &&
+ "Expected implementation context selector set.");
+ Data.back().Names =
+ llvm::makeArrayRef(A->implVendors_begin(), A->implVendors_end());
+ break;
+ case OMP_CTX_kind:
+ assert(CtxSet == OMP_CTX_SET_device &&
+ "Expected device context selector set.");
+ Data.back().Names =
+ llvm::makeArrayRef(A->deviceKinds_begin(), A->deviceKinds_end());
+ break;
+ case OMP_CTX_unknown:
+ llvm_unreachable("Unknown context selector kind.");
+ }
+ }
+ return Data;
+}
+
+static bool isStrictSubset(const CompleteOMPContextSelectorData &LHS,
+ const CompleteOMPContextSelectorData &RHS) {
+ llvm::SmallDenseMap<std::pair<int, int>, llvm::StringSet<>, 4> RHSData;
+ for (const OMPContextSelectorData &D : RHS) {
+ auto &Pair = RHSData.FindAndConstruct(std::make_pair(D.CtxSet, D.Ctx));
+ Pair.getSecond().insert(D.Names.begin(), D.Names.end());
+ }
+ bool AllSetsAreEqual = true;
+ for (const OMPContextSelectorData &D : LHS) {
+ auto It = RHSData.find(std::make_pair(D.CtxSet, D.Ctx));
+ if (It == RHSData.end())
+ return false;
+ if (D.Names.size() > It->getSecond().size())
+ return false;
+ if (llvm::set_union(It->getSecond(), D.Names))
+ return false;
+ AllSetsAreEqual =
+ AllSetsAreEqual && (D.Names.size() == It->getSecond().size());
+ }
+
+ return LHS.size() != RHS.size() || !AllSetsAreEqual;
+}
+
+static bool greaterCtxScore(const CompleteOMPContextSelectorData &LHS,
+ const CompleteOMPContextSelectorData &RHS) {
+ // Score is calculated as sum of all scores + 1.
+ llvm::APSInt LHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false);
+ bool RHSIsSubsetOfLHS = isStrictSubset(RHS, LHS);
+ if (RHSIsSubsetOfLHS) {
+ LHSScore = llvm::APSInt::get(0);
+ } else {
+ for (const OMPContextSelectorData &Data : LHS) {
+ if (Data.Score.getBitWidth() > LHSScore.getBitWidth()) {
+ LHSScore = LHSScore.extend(Data.Score.getBitWidth()) + Data.Score;
+ } else if (Data.Score.getBitWidth() < LHSScore.getBitWidth()) {
+ LHSScore += Data.Score.extend(LHSScore.getBitWidth());
+ } else {
+ LHSScore += Data.Score;
+ }
+ }
+ }
+ llvm::APSInt RHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false);
+ if (!RHSIsSubsetOfLHS && isStrictSubset(LHS, RHS)) {
+ RHSScore = llvm::APSInt::get(0);
+ } else {
+ for (const OMPContextSelectorData &Data : RHS) {
+ if (Data.Score.getBitWidth() > RHSScore.getBitWidth()) {
+ RHSScore = RHSScore.extend(Data.Score.getBitWidth()) + Data.Score;
+ } else if (Data.Score.getBitWidth() < RHSScore.getBitWidth()) {
+ RHSScore += Data.Score.extend(RHSScore.getBitWidth());
+ } else {
+ RHSScore += Data.Score;
+ }
+ }
+ }
+ return llvm::APSInt::compareValues(LHSScore, RHSScore) >= 0;
+}
+
+/// Finds the variant function that matches current context with its context
+/// selector.
+static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM,
+ const FunctionDecl *FD) {
+ if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>())
+ return FD;
+ // Iterate through all DeclareVariant attributes and check context selectors.
+ const OMPDeclareVariantAttr *TopMostAttr = nullptr;
+ CompleteOMPContextSelectorData TopMostData;
+ for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) {
+ CompleteOMPContextSelectorData Data =
+ translateAttrToContextSelectorData(CGM.getContext(), A);
+ if (!matchesContext(CGM, Data))
+ continue;
+ // If the attribute matches the context, find the attribute with the highest
+ // score.
+ if (!TopMostAttr || !greaterCtxScore(TopMostData, Data)) {
+ TopMostAttr = A;
+ TopMostData.swap(Data);
+ }
+ }
+ if (!TopMostAttr)
+ return FD;
+ return cast<FunctionDecl>(
+ cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts())
+ ->getDecl());
+}
+
+bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) {
+ const auto *D = cast<FunctionDecl>(GD.getDecl());
+ // If the original function is defined already, use its definition.
+ StringRef MangledName = CGM.getMangledName(GD);
+ llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName);
+ if (Orig && !Orig->isDeclaration())
+ return false;
+ const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D);
+ // Emit original function if it does not have declare variant attribute or the
+ // context does not match.
+ if (NewFD == D)
+ return false;
+ GlobalDecl NewGD = GD.getWithDecl(NewFD);
+ if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) {
+ DeferredVariantFunction.erase(D);
+ return true;
+ }
+ DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD)));
+ return true;
+}
+
+CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
+ CodeGenModule &CGM, const OMPLoopDirective &S)
+ : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
+ assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
+ if (!NeedToPush)
+ return;
+ NontemporalDeclsSet &DS =
+ CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
+ for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
+ for (const Stmt *Ref : C->private_refs()) {
+ const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
+ const ValueDecl *VD;
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
+ VD = DRE->getDecl();
+ } else {
+ const auto *ME = cast<MemberExpr>(SimpleRefExpr);
+ assert((ME->isImplicitCXXThis() ||
+ isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
+ "Expected member of current class.");
+ VD = ME->getMemberDecl();
+ }
+ DS.insert(VD);
+ }
+ }
+}
+
+CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
+ if (!NeedToPush)
+ return;
+ CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
+}
+
+bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
+ assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
+
+ return llvm::any_of(
+ CGM.getOpenMPRuntime().NontemporalDeclsStack,
+ [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
+}
+
+CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
+ CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
+ : CGM(CGF.CGM),
+ NeedToPush(llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
+ [](const OMPLastprivateClause *C) {
+ return C->getKind() ==
+ OMPC_LASTPRIVATE_conditional;
+ })) {
+ assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
+ if (!NeedToPush)
+ return;
+ LastprivateConditionalData &Data =
+ CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
+ for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
+ if (C->getKind() != OMPC_LASTPRIVATE_conditional)
+ continue;
+
+ for (const Expr *Ref : C->varlists()) {
+ Data.DeclToUniqeName.try_emplace(
+ cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
+ generateUniqueName(CGM, "pl_cond", Ref));
+ }
+ }
+ Data.IVLVal = IVLVal;
+ // In simd only mode or for simd directives no need to generate threadprivate
+ // references for the loop iteration counter, we can use the original one
+ // since outlining cannot happen in simd regions.
+ if (CGF.getLangOpts().OpenMPSimd ||
+ isOpenMPSimdDirective(S.getDirectiveKind())) {
+ Data.UseOriginalIV = true;
+ return;
+ }
+ llvm::SmallString<16> Buffer;
+ llvm::raw_svector_ostream OS(Buffer);
+ PresumedLoc PLoc =
+ CGM.getContext().getSourceManager().getPresumedLoc(S.getBeginLoc());
+ assert(PLoc.isValid() && "Source location is expected to be always valid.");
+
+ llvm::sys::fs::UniqueID ID;
+ if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
+ CGM.getDiags().Report(diag::err_cannot_open_file)
+ << PLoc.getFilename() << EC.message();
+ OS << "$pl_cond_" << ID.getDevice() << "_" << ID.getFile() << "_"
+ << PLoc.getLine() << "_" << PLoc.getColumn() << "$iv";
+ Data.IVName = OS.str();
+}
+
+CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
+ if (!NeedToPush)
+ return;
+ CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
+}
+
+void CGOpenMPRuntime::initLastprivateConditionalCounter(
+ CodeGenFunction &CGF, const OMPExecutableDirective &S) {
+ if (CGM.getLangOpts().OpenMPSimd ||
+ !llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
+ [](const OMPLastprivateClause *C) {
+ return C->getKind() == OMPC_LASTPRIVATE_conditional;
+ }))
+ return;
+ const CGOpenMPRuntime::LastprivateConditionalData &Data =
+ LastprivateConditionalStack.back();
+ if (Data.UseOriginalIV)
+ return;
+ // Global loop counter. Required to handle inner parallel-for regions.
+ // global_iv = iv;
+ Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
+ CGF, Data.IVLVal.getType(), Data.IVName);
+ LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, Data.IVLVal.getType());
+ llvm::Value *IVVal = CGF.EmitLoadOfScalar(Data.IVLVal, S.getBeginLoc());
+ CGF.EmitStoreOfScalar(IVVal, GlobIVLVal);
+}
+
+namespace {
+/// Checks if the lastprivate conditional variable is referenced in LHS.
+class LastprivateConditionalRefChecker final
+ : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
+ CodeGenFunction &CGF;
+ ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
+ const Expr *FoundE = nullptr;
+ const Decl *FoundD = nullptr;
+ StringRef UniqueDeclName;
+ LValue IVLVal;
+ StringRef IVName;
+ SourceLocation Loc;
+ bool UseOriginalIV = false;
+
+public:
+ bool VisitDeclRefExpr(const DeclRefExpr *E) {
+ for (const CGOpenMPRuntime::LastprivateConditionalData &D :
+ llvm::reverse(LPM)) {
+ auto It = D.DeclToUniqeName.find(E->getDecl());
+ if (It == D.DeclToUniqeName.end())
+ continue;
+ FoundE = E;
+ FoundD = E->getDecl()->getCanonicalDecl();
+ UniqueDeclName = It->getSecond();
+ IVLVal = D.IVLVal;
+ IVName = D.IVName;
+ UseOriginalIV = D.UseOriginalIV;
+ break;
+ }
+ return FoundE == E;
+ }
+ bool VisitMemberExpr(const MemberExpr *E) {
+ if (!CGF.IsWrappedCXXThis(E->getBase()))
+ return false;
+ for (const CGOpenMPRuntime::LastprivateConditionalData &D :
+ llvm::reverse(LPM)) {
+ auto It = D.DeclToUniqeName.find(E->getMemberDecl());
+ if (It == D.DeclToUniqeName.end())
+ continue;
+ FoundE = E;
+ FoundD = E->getMemberDecl()->getCanonicalDecl();
+ UniqueDeclName = It->getSecond();
+ IVLVal = D.IVLVal;
+ IVName = D.IVName;
+ UseOriginalIV = D.UseOriginalIV;
+ break;
+ }
+ return FoundE == E;
+ }
+ bool VisitStmt(const Stmt *S) {
+ for (const Stmt *Child : S->children()) {
+ if (!Child)
+ continue;
+ if (const auto *E = dyn_cast<Expr>(Child))
+ if (!E->isGLValue())
+ continue;
+ if (Visit(Child))
+ return true;
+ }
+ return false;
+ }
+ explicit LastprivateConditionalRefChecker(
+ CodeGenFunction &CGF,
+ ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
+ : CGF(CGF), LPM(LPM) {}
+ std::tuple<const Expr *, const Decl *, StringRef, LValue, StringRef, bool>
+ getFoundData() const {
+ return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName,
+ UseOriginalIV);
+ }
+};
+} // namespace
+
+void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
+ const Expr *LHS) {
+ if (CGF.getLangOpts().OpenMP < 50)
+ return;
+ LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack);
+ if (!Checker.Visit(LHS))
+ return;
+ const Expr *FoundE;
+ const Decl *FoundD;
+ StringRef UniqueDeclName;
+ LValue IVLVal;
+ StringRef IVName;
+ bool UseOriginalIV;
+ std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) =
+ Checker.getFoundData();
+
+ // Last updated loop counter for the lastprivate conditional var.
+ // int<xx> last_iv = 0;
+ llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
+ llvm::Constant *LastIV =
+ getOrCreateInternalVariable(LLIVTy, UniqueDeclName + "$iv");
+ cast<llvm::GlobalVariable>(LastIV)->setAlignment(
+ IVLVal.getAlignment().getAsAlign());
+ LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
+
+ // Private address of the lastprivate conditional in the current context.
+ // priv_a
+ LValue LVal = CGF.EmitLValue(FoundE);
+ // Last value of the lastprivate conditional.
+ // decltype(priv_a) last_a;
+ llvm::Constant *Last = getOrCreateInternalVariable(
+ LVal.getAddress(CGF).getElementType(), UniqueDeclName);
+ cast<llvm::GlobalVariable>(Last)->setAlignment(
+ LVal.getAlignment().getAsAlign());
+ LValue LastLVal =
+ CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
+
+ // Global loop counter. Required to handle inner parallel-for regions.
+ // global_iv
+ if (!UseOriginalIV) {
+ Address IVAddr =
+ getAddrOfArtificialThreadPrivate(CGF, IVLVal.getType(), IVName);
+ IVLVal = CGF.MakeAddrLValue(IVAddr, IVLVal.getType());
+ }
+ llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc());
+
+ // #pragma omp critical(a)
+ // if (last_iv <= iv) {
+ // last_iv = iv;
+ // last_a = priv_a;
+ // }
+ auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
+ FoundE](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ llvm::Value *LastIVVal =
+ CGF.EmitLoadOfScalar(LastIVLVal, FoundE->getExprLoc());
+ // (last_iv <= global_iv) ? Check if the variable is updated and store new
+ // value in global var.
+ llvm::Value *CmpRes;
+ if (IVLVal.getType()->isSignedIntegerType()) {
+ CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
+ } else {
+ assert(IVLVal.getType()->isUnsignedIntegerType() &&
+ "Loop iteration variable must be integer.");
+ CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
+ }
+ llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
+ CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
+ // {
+ CGF.EmitBlock(ThenBB);
+
+ // last_iv = global_iv;
+ CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
+
+ // last_a = priv_a;
+ switch (CGF.getEvaluationKind(LVal.getType())) {
+ case TEK_Scalar: {
+ llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, FoundE->getExprLoc());
+ CGF.EmitStoreOfScalar(PrivVal, LastLVal);
+ break;
+ }
+ case TEK_Complex: {
+ CodeGenFunction::ComplexPairTy PrivVal =
+ CGF.EmitLoadOfComplex(LVal, FoundE->getExprLoc());
+ CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
+ break;
+ }
+ case TEK_Aggregate:
+ llvm_unreachable(
+ "Aggregates are not supported in lastprivate conditional.");
+ }
+ // }
+ CGF.EmitBranch(ExitBB);
+ // There is no need to emit line number for unconditional branch.
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
+ };
+
+ if (CGM.getLangOpts().OpenMPSimd) {
+ // Do not emit as a critical region as no parallel region could be emitted.
+ RegionCodeGenTy ThenRCG(CodeGen);
+ ThenRCG(CGF);
+ } else {
+ emitCriticalRegion(CGF, UniqueDeclName, CodeGen, FoundE->getExprLoc());
+ }
+}
+
+void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
+ CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
+ SourceLocation Loc) {
+ if (CGF.getLangOpts().OpenMP < 50)
+ return;
+ auto It = LastprivateConditionalStack.back().DeclToUniqeName.find(VD);
+ assert(It != LastprivateConditionalStack.back().DeclToUniqeName.end() &&
+ "Unknown lastprivate conditional variable.");
+ StringRef UniqueName = It->getSecond();
+ llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
+ // The variable was not updated in the region - exit.
+ if (!GV)
+ return;
+ LValue LPLVal = CGF.MakeAddrLValue(
+ GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
+ llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
+ CGF.EmitStoreOfScalar(Res, PrivLVal);
+}
+
llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
@@ -10688,7 +11725,7 @@ void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
}
void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
- OpenMPProcBindClauseKind ProcBind,
+ ProcBindKind ProcBind,
SourceLocation Loc) {
llvm_unreachable("Not supported in SIMD-only mode");
}
@@ -10786,12 +11823,13 @@ void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
llvm_unreachable("Not supported in SIMD-only mode");
}
-void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
- const OMPExecutableDirective &D,
- llvm::Function *OutlinedFn,
- llvm::Value *OutlinedFnID,
- const Expr *IfCond,
- const Expr *Device) {
+void CGOpenMPSIMDRuntime::emitTargetCall(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
+ const Expr *Device,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter) {
llvm_unreachable("Not supported in SIMD-only mode");
}
@@ -10807,10 +11845,6 @@ bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
return false;
}
-llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
- return nullptr;
-}
-
void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
SourceLocation Loc,