diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp')
| -rw-r--r-- | contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp | 2076 |
1 files changed, 1555 insertions, 521 deletions
diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 27e7175da841..97b17799a03e 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -10,17 +10,22 @@ // //===----------------------------------------------------------------------===// +#include "CGOpenMPRuntime.h" #include "CGCXXABI.h" #include "CGCleanup.h" -#include "CGOpenMPRuntime.h" #include "CGRecordLayout.h" #include "CodeGenFunction.h" -#include "clang/CodeGen/ConstantInitBuilder.h" +#include "clang/AST/Attr.h" #include "clang/AST/Decl.h" +#include "clang/AST/OpenMPClause.h" #include "clang/AST/StmtOpenMP.h" +#include "clang/AST/StmtVisitor.h" #include "clang/Basic/BitmaskEnum.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Value.h" @@ -30,6 +35,7 @@ using namespace clang; using namespace CodeGen; +using namespace llvm::omp; namespace { /// Base class for handling code generation inside OpenMP regions. @@ -356,7 +362,7 @@ public: VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); PrivScope.addPrivate( - VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); + VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); } (void)PrivScope.Privatize(); } @@ -727,10 +733,6 @@ enum OpenMPRTLFunction { OMPRTL__tgt_target_teams_nowait, // Call to void __tgt_register_requires(int64_t flags); OMPRTL__tgt_register_requires, - // Call to void __tgt_register_lib(__tgt_bin_desc *desc); - OMPRTL__tgt_register_lib, - // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); - OMPRTL__tgt_unregister_lib, // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); OMPRTL__tgt_target_data_begin, @@ -752,6 +754,11 @@ enum OpenMPRTLFunction { // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t // *arg_types); OMPRTL__tgt_target_data_update_nowait, + // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); + OMPRTL__tgt_mapper_num_components, + // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void + // *base, void *begin, int64_t size, int64_t type); + OMPRTL__tgt_push_mapper_component, }; /// A basic class for pre|post-action for advanced codegen sequence for OpenMP @@ -836,7 +843,7 @@ static void emitInitWithReductionInitializer(CodeGenFunction &CGF, RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); break; case TEK_Aggregate: - InitRVal = RValue::getAggregate(LV.getAddress()); + InitRVal = RValue::getAggregate(LV.getAddress(CGF)); break; } OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); @@ -960,7 +967,7 @@ void ReductionCodeGen::emitAggregateInitialization( EmitDeclareReductionInit, EmitDeclareReductionInit ? ClausesData[N].ReductionOp : PrivateVD->getInit(), - DRD, SharedLVal.getAddress()); + DRD, SharedLVal.getAddress(CGF)); } ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, @@ -1001,13 +1008,13 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { } llvm::Value *Size; llvm::Value *SizeInChars; - auto *ElemType = - cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) - ->getElementType(); + auto *ElemType = cast<llvm::PointerType>( + SharedAddresses[N].first.getPointer(CGF)->getType()) + ->getElementType(); auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); if (AsArraySection) { - Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), - SharedAddresses[N].first.getPointer()); + Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF), + SharedAddresses[N].first.getPointer(CGF)); Size = CGF.Builder.CreateNUWAdd( Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); @@ -1057,7 +1064,7 @@ void ReductionCodeGen::emitInitialization( PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); QualType SharedType = SharedAddresses[N].first.getType(); SharedLVal = CGF.MakeAddrLValue( - CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), + CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), CGF.ConvertTypeForMem(SharedType)), SharedType, SharedAddresses[N].first.getBaseInfo(), CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); @@ -1065,7 +1072,7 @@ void ReductionCodeGen::emitInitialization( emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, - PrivateAddr, SharedLVal.getAddress(), + PrivateAddr, SharedLVal.getAddress(CGF), SharedLVal.getType()); } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && !CGF.isTrivialInitializer(PrivateVD->getInit())) { @@ -1102,15 +1109,15 @@ static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && !CGF.getContext().hasSameType(BaseTy, ElTy)) { if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { - BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); + BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); } else { - LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); + LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); } BaseTy = BaseTy->getPointeeType(); } return CGF.MakeAddrLValue( - CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), + CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), CGF.ConvertTypeForMem(ElTy)), BaseLV.getType(), BaseLV.getBaseInfo(), CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); @@ -1174,15 +1181,15 @@ Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), OriginalBaseLValue); llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( - BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); + BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); llvm::Value *PrivatePointer = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( PrivateAddr.getPointer(), - SharedAddresses[N].first.getAddress().getType()); + SharedAddresses[N].first.getAddress(CGF).getType()); llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); return castToBase(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), - OriginalBaseLValue.getAddress().getType(), + OriginalBaseLValue.getAddress(CGF).getType(), OriginalBaseLValue.getAlignment(), Ptr); } BaseDecls.emplace_back( @@ -1259,6 +1266,52 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, loadOffloadInfoMetadata(); } +bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD, + const GlobalDecl &OldGD, + llvm::GlobalValue *OrigAddr, + bool IsForDefinition) { + // Emit at least a definition for the aliasee if the the address of the + // original function is requested. + if (IsForDefinition || OrigAddr) + (void)CGM.GetAddrOfGlobal(NewGD); + StringRef NewMangledName = CGM.getMangledName(NewGD); + llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName); + if (Addr && !Addr->isDeclaration()) { + const auto *D = cast<FunctionDecl>(OldGD.getDecl()); + const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD); + llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI); + + // Create a reference to the named value. This ensures that it is emitted + // if a deferred decl. + llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD); + + // Create the new alias itself, but don't set a name yet. + auto *GA = + llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule()); + + if (OrigAddr) { + assert(OrigAddr->isDeclaration() && "Expected declaration"); + + GA->takeName(OrigAddr); + OrigAddr->replaceAllUsesWith( + llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType())); + OrigAddr->eraseFromParent(); + } else { + GA->setName(CGM.getMangledName(OldGD)); + } + + // Set attributes which are particular to an alias; this is a + // specialization of the attributes which may be set on a global function. + if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() || + D->isWeakImported()) + GA->setLinkage(llvm::Function::WeakAnyLinkage); + + CGM.SetCommonAttributes(OldGD, GA); + return true; + } + return false; +} + void CGOpenMPRuntime::clear() { InternalVars.clear(); // Clean non-target variable declarations possibly used only in debug info. @@ -1272,6 +1325,14 @@ void CGOpenMPRuntime::clear() { continue; GV->eraseFromParent(); } + // Emit aliases for the deferred aliasees. + for (const auto &Pair : DeferredVariantFunction) { + StringRef MangledName = CGM.getMangledName(Pair.second.second); + llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName); + // If not able to emit alias, just emit original declaration. + (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr, + /*IsForDefinition=*/false); + } } std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { @@ -1321,12 +1382,12 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) - .getAddress(); + .getAddress(CGF); }); Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) - .getAddress(); + .getAddress(CGF); }); (void)Scope.Privatize(); if (!IsCombiner && Out->hasInit() && @@ -1377,6 +1438,52 @@ CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { return UDRMap.lookup(D); } +namespace { +// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR +// Builder if one is present. +struct PushAndPopStackRAII { + PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, + bool HasCancel) + : OMPBuilder(OMPBuilder) { + if (!OMPBuilder) + return; + + // The following callback is the crucial part of clangs cleanup process. + // + // NOTE: + // Once the OpenMPIRBuilder is used to create parallel regions (and + // similar), the cancellation destination (Dest below) is determined via + // IP. That means if we have variables to finalize we split the block at IP, + // use the new block (=BB) as destination to build a JumpDest (via + // getJumpDestInCurrentScope(BB)) which then is fed to + // EmitBranchThroughCleanup. Furthermore, there will not be the need + // to push & pop an FinalizationInfo object. + // The FiniCB will still be needed but at the point where the + // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. + auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { + assert(IP.getBlock()->end() == IP.getPoint() && + "Clang CG should cause non-terminated block!"); + CGBuilderTy::InsertPointGuard IPG(CGF.Builder); + CGF.Builder.restoreIP(IP); + CodeGenFunction::JumpDest Dest = + CGF.getOMPCancelDestination(OMPD_parallel); + CGF.EmitBranchThroughCleanup(Dest); + }; + + // TODO: Remove this once we emit parallel regions through the + // OpenMPIRBuilder as it can do this setup internally. + llvm::OpenMPIRBuilder::FinalizationInfo FI( + {FiniCB, OMPD_parallel, HasCancel}); + OMPBuilder->pushFinalizationCB(std::move(FI)); + } + ~PushAndPopStackRAII() { + if (OMPBuilder) + OMPBuilder->popFinalizationCB(); + } + llvm::OpenMPIRBuilder *OMPBuilder; +}; +} // namespace + static llvm::Function *emitParallelOrTeamsOutlinedFunction( CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, @@ -1401,6 +1508,11 @@ static llvm::Function *emitParallelOrTeamsOutlinedFunction( else if (const auto *OPFD = dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) HasCancel = OPFD->hasCancel(); + + // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new + // parallel region to make cancellation barriers work properly. + llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder(); + PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel); CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, HasCancel, OutlinedHelperName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); @@ -1436,7 +1548,7 @@ llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( UpLoc, ThreadID, CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), TaskTVar->getType()->castAs<PointerType>()) - .getPointer()}; + .getPointer(CGF)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); }; CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, @@ -1638,18 +1750,24 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, return ThreadID; } // If exceptions are enabled, do not use parameter to avoid possible crash. - if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || - !CGF.getLangOpts().CXXExceptions || - CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { - if (auto *OMPRegionInfo = - dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { - if (OMPRegionInfo->getThreadIDVariable()) { - // Check if this an outlined function with thread id passed as argument. - LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); + if (auto *OMPRegionInfo = + dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { + if (OMPRegionInfo->getThreadIDVariable()) { + // Check if this an outlined function with thread id passed as argument. + LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); + llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); + if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || + !CGF.getLangOpts().CXXExceptions || + CGF.Builder.GetInsertBlock() == TopBlock || + !isa<llvm::Instruction>(LVal.getPointer(CGF)) || + cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == + TopBlock || + cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == + CGF.Builder.GetInsertBlock()) { ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); // If value loaded in entry block, cache it and use it everywhere in // function. - if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { + if (CGF.Builder.GetInsertBlock() == TopBlock) { auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); Elem.second.ThreadID = ThreadID; } @@ -1686,6 +1804,12 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { UDRMap.erase(D); FunctionUDRMap.erase(CGF.CurFn); } + auto I = FunctionUDMMap.find(CGF.CurFn); + if (I != FunctionUDMMap.end()) { + for(auto *D : I->second) + UDMMap.erase(D); + FunctionUDMMap.erase(I); + } } llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { @@ -2352,26 +2476,6 @@ llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); break; } - case OMPRTL__tgt_register_lib: { - // Build void __tgt_register_lib(__tgt_bin_desc *desc); - QualType ParamTy = - CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); - llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); - break; - } - case OMPRTL__tgt_unregister_lib: { - // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); - QualType ParamTy = - CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); - llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); - break; - } case OMPRTL__tgt_target_data_begin: { // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); @@ -2459,6 +2563,24 @@ llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); break; } + case OMPRTL__tgt_mapper_num_components: { + // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); + llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); + break; + } + case OMPRTL__tgt_push_mapper_component: { + // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void + // *base, void *begin, int64_t size, int64_t type); + llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, + CGM.Int64Ty, CGM.Int64Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); + break; + } } assert(RTLFn && "Unable to find OpenMP runtime function"); return RTLFn; @@ -2552,6 +2674,32 @@ CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { return CGM.CreateRuntimeFunction(FnTy, Name); } +/// Obtain information that uniquely identifies a target entry. This +/// consists of the file and device IDs as well as line number associated with +/// the relevant entry source location. +static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, + unsigned &DeviceID, unsigned &FileID, + unsigned &LineNum) { + SourceManager &SM = C.getSourceManager(); + + // The loc should be always valid and have a file ID (the user cannot use + // #pragma directives in macros) + + assert(Loc.isValid() && "Source location is expected to be always valid."); + + PresumedLoc PLoc = SM.getPresumedLoc(Loc); + assert(PLoc.isValid() && "Source location is expected to be always valid."); + + llvm::sys::fs::UniqueID ID; + if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) + SM.getDiagnostics().Report(diag::err_cannot_open_file) + << PLoc.getFilename() << EC.message(); + + DeviceID = ID.getDevice(); + FileID = ID.getFile(); + LineNum = PLoc.getLine(); +} + Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { if (CGM.getLangOpts().OpenMPSimd) return Address::invalid(); @@ -2563,19 +2711,27 @@ Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { SmallString<64> PtrName; { llvm::raw_svector_ostream OS(PtrName); - OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_ref_ptr"; + OS << CGM.getMangledName(GlobalDecl(VD)); + if (!VD->isExternallyVisible()) { + unsigned DeviceID, FileID, Line; + getTargetEntryUniqueInfo(CGM.getContext(), + VD->getCanonicalDecl()->getBeginLoc(), + DeviceID, FileID, Line); + OS << llvm::format("_%x", FileID); + } + OS << "_decl_tgt_ref_ptr"; } llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); if (!Ptr) { QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), PtrName); - if (!CGM.getLangOpts().OpenMPIsDevice) { - auto *GV = cast<llvm::GlobalVariable>(Ptr); - GV->setLinkage(llvm::GlobalValue::ExternalLinkage); + + auto *GV = cast<llvm::GlobalVariable>(Ptr); + GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); + + if (!CGM.getLangOpts().OpenMPIsDevice) GV->setInitializer(CGM.GetAddrOfGlobal(VD)); - } - CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr)); registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); } return Address(Ptr, CGM.getContext().getDeclAlign(VD)); @@ -2749,35 +2905,12 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( return nullptr; } -/// Obtain information that uniquely identifies a target entry. This -/// consists of the file and device IDs as well as line number associated with -/// the relevant entry source location. -static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, - unsigned &DeviceID, unsigned &FileID, - unsigned &LineNum) { - SourceManager &SM = C.getSourceManager(); - - // The loc should be always valid and have a file ID (the user cannot use - // #pragma directives in macros) - - assert(Loc.isValid() && "Source location is expected to be always valid."); - - PresumedLoc PLoc = SM.getPresumedLoc(Loc); - assert(PLoc.isValid() && "Source location is expected to be always valid."); - - llvm::sys::fs::UniqueID ID; - if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) - SM.getDiagnostics().Report(diag::err_cannot_open_file) - << PLoc.getFilename() << EC.message(); - - DeviceID = ID.getDevice(); - FileID = ID.getFile(); - LineNum = PLoc.getLine(); -} - bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, llvm::GlobalVariable *Addr, bool PerformInit) { + if (CGM.getLangOpts().OMPTargetTriples.empty() && + !CGM.getLangOpts().OpenMPIsDevice) + return false; Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || @@ -2889,10 +3022,15 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) { std::string Suffix = getName({"artificial", ""}); - std::string CacheSuffix = getName({"cache", ""}); llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); + if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && + CGM.getTarget().isTLSSupported()) { + cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); + return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); + } + std::string CacheSuffix = getName({"cache", ""}); llvm::Value *Args[] = { emitUpdateLocation(CGF, SourceLocation()), getThreadID(CGF, SourceLocation()), @@ -2906,12 +3044,12 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), VarLVType->getPointerTo(/*AddrSpace=*/0)), - CGM.getPointerAlign()); + CGM.getContext().getTypeAlignInChars(VarType)); } -void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, - const RegionCodeGenTy &ThenGen, - const RegionCodeGenTy &ElseGen) { +void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, + const RegionCodeGenTy &ThenGen, + const RegionCodeGenTy &ElseGen) { CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); // If the condition constant folds and can be elided, try to avoid emitting @@ -2981,14 +3119,16 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitRuntimeCall( RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); - // OutlinedFn(>id, &zero, CapturedStruct); - Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, - /*Name*/ ".zero.addr"); - CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); + // OutlinedFn(>id, &zero_bound, CapturedStruct); + Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); + Address ZeroAddrBound = + CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".bound.zero.addr"); + CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; // ThreadId for serialized parallels is 0. - OutlinedFnArgs.push_back(ZeroAddr.getPointer()); - OutlinedFnArgs.push_back(ZeroAddr.getPointer()); + OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); + OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); @@ -2999,7 +3139,7 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, EndArgs); }; if (IfCond) { - emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); + emitIfClause(CGF, IfCond, ThenGen, ElseGen); } else { RegionCodeGenTy ThenRCG(ThenGen); ThenRCG(CGF); @@ -3017,7 +3157,7 @@ Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, if (auto *OMPRegionInfo = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) if (OMPRegionInfo->getThreadIDVariable()) - return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); + return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); llvm::Value *ThreadID = getThreadID(CGF, Loc); QualType Int32Ty = @@ -3283,9 +3423,9 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, // <copy_func>, did_it); if (DidIt.isValid()) { llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); - QualType CopyprivateArrayTy = - C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, - /*IndexTypeQuals=*/0); + QualType CopyprivateArrayTy = C.getConstantArrayType( + C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, + /*IndexTypeQuals=*/0); // Create a list of all private variables for copyprivate. Address CopyprivateList = CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); @@ -3293,7 +3433,8 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), + CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), + CGF.VoidPtrTy), Elem); } // Build function that copies private values from single region to all other @@ -3375,6 +3516,16 @@ void CGOpenMPRuntime::getDefaultScheduleAndChunk( void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks, bool ForceSimpleCall) { + // Check if we should use the OMPBuilder + auto *OMPRegionInfo = + dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); + llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); + if (OMPBuilder) { + CGF.Builder.restoreIP(OMPBuilder->CreateBarrier( + CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); + return; + } + if (!CGF.HaveInsertPoint()) return; // Build call __kmpc_cancel_barrier(loc, thread_id); @@ -3384,8 +3535,7 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, // thread_id); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), getThreadID(CGF, Loc)}; - if (auto *OMPRegionInfo = - dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { + if (OMPRegionInfo) { if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { llvm::Value *Result = CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); @@ -3472,7 +3622,7 @@ bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { return Schedule != OMP_sch_static; } -static int addMonoNonMonoModifier(OpenMPSchedType Schedule, +static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2) { int Modifier = 0; @@ -3506,6 +3656,20 @@ static int addMonoNonMonoModifier(OpenMPSchedType Schedule, case OMPC_SCHEDULE_MODIFIER_unknown: break; } + // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. + // If the static schedule kind is specified or if the ordered clause is + // specified, and if the nonmonotonic modifier is not specified, the effect is + // as if the monotonic modifier is specified. Otherwise, unless the monotonic + // modifier is specified, the effect is as if the nonmonotonic modifier is + // specified. + if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { + if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || + Schedule == OMP_sch_static_balanced_chunked || + Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || + Schedule == OMP_dist_sch_static_chunked || + Schedule == OMP_dist_sch_static)) + Modifier = OMP_sch_modifier_nonmonotonic; + } return Schedule | Modifier; } @@ -3530,13 +3694,14 @@ void CGOpenMPRuntime::emitForDispatchInit( llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk : CGF.Builder.getIntN(IVSize, 1); llvm::Value *Args[] = { - emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), + emitUpdateLocation(CGF, Loc), + getThreadID(CGF, Loc), CGF.Builder.getInt32(addMonoNonMonoModifier( - Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type - DispatchValues.LB, // Lower - DispatchValues.UB, // Upper - CGF.Builder.getIntN(IVSize, 1), // Stride - Chunk // Chunk + CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type + DispatchValues.LB, // Lower + DispatchValues.UB, // Upper + CGF.Builder.getIntN(IVSize, 1), // Stride + Chunk // Chunk }; CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); } @@ -3578,7 +3743,7 @@ static void emitForStaticInitCall( llvm::Value *Args[] = { UpdateLocation, ThreadId, - CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, + CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, M2)), // Schedule type Values.IL.getPointer(), // &isLastIter Values.LB.getPointer(), // &LB @@ -3693,37 +3858,15 @@ void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, } void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, - OpenMPProcBindClauseKind ProcBind, + ProcBindKind ProcBind, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; - // Constants for proc bind value accepted by the runtime. - enum ProcBindTy { - ProcBindFalse = 0, - ProcBindTrue, - ProcBindMaster, - ProcBindClose, - ProcBindSpread, - ProcBindIntel, - ProcBindDefault - } RuntimeProcBind; - switch (ProcBind) { - case OMPC_PROC_BIND_master: - RuntimeProcBind = ProcBindMaster; - break; - case OMPC_PROC_BIND_close: - RuntimeProcBind = ProcBindClose; - break; - case OMPC_PROC_BIND_spread: - RuntimeProcBind = ProcBindSpread; - break; - case OMPC_PROC_BIND_unknown: - llvm_unreachable("Unsupported proc_bind value."); - } + assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), - llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; + llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); } @@ -3899,157 +4042,6 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: Action(E.getKey(), E.getValue()); } -llvm::Function * -CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { - // If we don't have entries or if we are emitting code for the device, we - // don't need to do anything. - if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) - return nullptr; - - llvm::Module &M = CGM.getModule(); - ASTContext &C = CGM.getContext(); - - // Get list of devices we care about - const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples; - - // We should be creating an offloading descriptor only if there are devices - // specified. - assert(!Devices.empty() && "No OpenMP offloading devices??"); - - // Create the external variables that will point to the begin and end of the - // host entries section. These will be defined by the linker. - llvm::Type *OffloadEntryTy = - CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); - std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"}); - auto *HostEntriesBegin = new llvm::GlobalVariable( - M, OffloadEntryTy, /*isConstant=*/true, - llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, - EntriesBeginName); - std::string EntriesEndName = getName({"omp_offloading", "entries_end"}); - auto *HostEntriesEnd = - new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true, - llvm::GlobalValue::ExternalLinkage, - /*Initializer=*/nullptr, EntriesEndName); - - // Create all device images - auto *DeviceImageTy = cast<llvm::StructType>( - CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); - ConstantInitBuilder DeviceImagesBuilder(CGM); - ConstantArrayBuilder DeviceImagesEntries = - DeviceImagesBuilder.beginArray(DeviceImageTy); - - for (const llvm::Triple &Device : Devices) { - StringRef T = Device.getTriple(); - std::string BeginName = getName({"omp_offloading", "img_start", ""}); - auto *ImgBegin = new llvm::GlobalVariable( - M, CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::ExternalWeakLinkage, - /*Initializer=*/nullptr, Twine(BeginName).concat(T)); - std::string EndName = getName({"omp_offloading", "img_end", ""}); - auto *ImgEnd = new llvm::GlobalVariable( - M, CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::ExternalWeakLinkage, - /*Initializer=*/nullptr, Twine(EndName).concat(T)); - - llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin, - HostEntriesEnd}; - createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data, - DeviceImagesEntries); - } - - // Create device images global array. - std::string ImagesName = getName({"omp_offloading", "device_images"}); - llvm::GlobalVariable *DeviceImages = - DeviceImagesEntries.finishAndCreateGlobal(ImagesName, - CGM.getPointerAlign(), - /*isConstant=*/true); - DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - - // This is a Zero array to be used in the creation of the constant expressions - llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), - llvm::Constant::getNullValue(CGM.Int32Ty)}; - - // Create the target region descriptor. - llvm::Constant *Data[] = { - llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), - llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), - DeviceImages, Index), - HostEntriesBegin, HostEntriesEnd}; - std::string Descriptor = getName({"omp_offloading", "descriptor"}); - llvm::GlobalVariable *Desc = createGlobalStruct( - CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor); - - // Emit code to register or unregister the descriptor at execution - // startup or closing, respectively. - - llvm::Function *UnRegFn; - { - FunctionArgList Args; - ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); - Args.push_back(&DummyPtr); - - CodeGenFunction CGF(CGM); - // Disable debug info for global (de-)initializer because they are not part - // of some particular construct. - CGF.disableDebugInfo(); - const auto &FI = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); - std::string UnregName = getName({"omp_offloading", "descriptor_unreg"}); - UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI); - CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args); - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), - Desc); - CGF.FinishFunction(); - } - llvm::Function *RegFn; - { - CodeGenFunction CGF(CGM); - // Disable debug info for global (de-)initializer because they are not part - // of some particular construct. - CGF.disableDebugInfo(); - const auto &FI = CGM.getTypes().arrangeNullaryFunction(); - llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); - - // Encode offload target triples into the registration function name. It - // will serve as a comdat key for the registration/unregistration code for - // this particular combination of offloading targets. - SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U); - RegFnNameParts[0] = "omp_offloading"; - RegFnNameParts[1] = "descriptor_reg"; - llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2), - [](const llvm::Triple &T) -> const std::string& { - return T.getTriple(); - }); - llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end()); - std::string Descriptor = getName(RegFnNameParts); - RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI); - CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); - // Create a variable to drive the registration and unregistration of the - // descriptor, so we can reuse the logic that emits Ctors and Dtors. - ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), - SourceLocation(), nullptr, C.CharTy, - ImplicitParamDecl::Other); - CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); - CGF.FinishFunction(); - } - if (CGM.supportsCOMDAT()) { - // It is sufficient to call registration function only once, so create a - // COMDAT group for registration/unregistration functions and associated - // data. That would reduce startup time and code size. Registration - // function serves as a COMDAT group key. - llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName()); - RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); - RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); - RegFn->setComdat(ComdatKey); - UnRegFn->setComdat(ComdatKey); - DeviceImages->setComdat(ComdatKey); - Desc->setComdat(ComdatKey); - } - return RegFn; -} - void CGOpenMPRuntime::createOffloadEntry( llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, llvm::GlobalValue::LinkageTypes Linkage) { @@ -4077,8 +4069,7 @@ void CGOpenMPRuntime::createOffloadEntry( Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); // The entry has to be created in the section the linker expects it to be. - std::string Section = getName({"omp_offloading", "entries"}); - Entry->setSection(Section); + Entry->setSection("omp_offloading_entries"); } void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { @@ -4091,13 +4082,16 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Right now we only generate metadata for function that contain target // regions. - // If we do not have entries, we don't need to do anything. - if (OffloadEntriesInfoManager.empty()) + // If we are in simd mode or there are no entries, we don't need to do + // anything. + if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) return; llvm::Module &M = CGM.getModule(); llvm::LLVMContext &C = M.getContext(); - SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> + SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, + SourceLocation, StringRef>, + 16> OrderedEntries(OffloadEntriesInfoManager.size()); llvm::SmallVector<StringRef, 16> ParentFunctions( OffloadEntriesInfoManager.size()); @@ -4115,7 +4109,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Create function that emits metadata for each target region entry; auto &&TargetRegionMetadataEmitter = - [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString]( + [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, + &GetMDString]( unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { @@ -4133,8 +4128,19 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { GetMDInt(FileID), GetMDString(ParentName), GetMDInt(Line), GetMDInt(E.getOrder())}; + SourceLocation Loc; + for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), + E = CGM.getContext().getSourceManager().fileinfo_end(); + I != E; ++I) { + if (I->getFirst()->getUniqueID().getDevice() == DeviceID && + I->getFirst()->getUniqueID().getFile() == FileID) { + Loc = CGM.getContext().getSourceManager().translateFileLineCol( + I->getFirst(), Line, 1); + break; + } + } // Save this entry in the right position of the ordered entries array. - OrderedEntries[E.getOrder()] = &E; + OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); ParentFunctions[E.getOrder()] = ParentName; // Add metadata to the named metadata node. @@ -4162,7 +4168,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; // Save this entry in the right position of the ordered entries array. - OrderedEntries[E.getOrder()] = &E; + OrderedEntries[E.getOrder()] = + std::make_tuple(&E, SourceLocation(), MangledName); // Add metadata to the named metadata node. MD->addOperand(llvm::MDNode::get(C, Ops)); @@ -4171,11 +4178,11 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( DeviceGlobalVarMetadataEmitter); - for (const auto *E : OrderedEntries) { - assert(E && "All ordered entries must exist!"); + for (const auto &E : OrderedEntries) { + assert(std::get<0>(E) && "All ordered entries must exist!"); if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( - E)) { + std::get<0>(E))) { if (!CE->getID() || !CE->getAddress()) { // Do not blame the entry if the parent funtion is not emitted. StringRef FnName = ParentFunctions[CE->getOrder()]; @@ -4183,16 +4190,16 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { continue; unsigned DiagID = CGM.getDiags().getCustomDiagID( DiagnosticsEngine::Error, - "Offloading entry for target region is incorrect: either the " + "Offloading entry for target region in %0 is incorrect: either the " "address or the ID is invalid."); - CGM.getDiags().Report(DiagID); + CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; continue; } createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); - } else if (const auto *CE = - dyn_cast<OffloadEntriesInfoManagerTy:: - OffloadEntryInfoDeviceGlobalVar>(E)) { + } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: + OffloadEntryInfoDeviceGlobalVar>( + std::get<0>(E))) { OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( CE->getFlags()); @@ -4203,10 +4210,10 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { continue; if (!CE->getAddress()) { unsigned DiagID = CGM.getDiags().getCustomDiagID( - DiagnosticsEngine::Error, - "Offloading entry for declare target variable is incorrect: the " - "address is invalid."); - CGM.getDiags().Report(DiagID); + DiagnosticsEngine::Error, "Offloading entry for declare target " + "variable %0 is incorrect: the " + "address is invalid."); + CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); continue; } // The vaiable has no definition - no need to add the entry. @@ -4349,57 +4356,6 @@ QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { return TgtOffloadEntryQTy; } -QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { - // These are the types we need to build: - // struct __tgt_device_image{ - // void *ImageStart; // Pointer to the target code start. - // void *ImageEnd; // Pointer to the target code end. - // // We also add the host entries to the device image, as it may be useful - // // for the target runtime to have access to that information. - // __tgt_offload_entry *EntriesBegin; // Begin of the table with all - // // the entries. - // __tgt_offload_entry *EntriesEnd; // End of the table with all the - // // entries (non inclusive). - // }; - if (TgtDeviceImageQTy.isNull()) { - ASTContext &C = CGM.getContext(); - RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); - RD->startDefinition(); - addFieldToRecordDecl(C, RD, C.VoidPtrTy); - addFieldToRecordDecl(C, RD, C.VoidPtrTy); - addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); - addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); - RD->completeDefinition(); - TgtDeviceImageQTy = C.getRecordType(RD); - } - return TgtDeviceImageQTy; -} - -QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { - // struct __tgt_bin_desc{ - // int32_t NumDevices; // Number of devices supported. - // __tgt_device_image *DeviceImages; // Arrays of device images - // // (one per device). - // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the - // // entries. - // __tgt_offload_entry *EntriesEnd; // End of the table with all the - // // entries (non inclusive). - // }; - if (TgtBinaryDescriptorQTy.isNull()) { - ASTContext &C = CGM.getContext(); - RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); - RD->startDefinition(); - addFieldToRecordDecl( - C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); - addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); - addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); - addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); - RD->completeDefinition(); - TgtBinaryDescriptorQTy = C.getRecordType(RD); - } - return TgtBinaryDescriptorQTy; -} - namespace { struct PrivateHelpersTy { PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, @@ -4559,7 +4515,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); - llvm::Value *PartidParam = PartIdLVal.getPointer(); + llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); @@ -4572,7 +4528,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - PrivatesLVal.getPointer(), CGF.VoidPtrTy); + PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); } else { PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } @@ -4581,7 +4537,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, TaskPrivatesMap, CGF.Builder .CreatePointerBitCastOrAddrSpaceCast( - TDBase.getAddress(), CGF.VoidPtrTy) + TDBase.getAddress(CGF), CGF.VoidPtrTy) .getPointer()}; SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), std::end(CommonArgs)); @@ -4659,7 +4615,7 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, if (QualType::DestructionKind DtorKind = Field->getType().isDestructedType()) { LValue FieldLValue = CGF.EmitLValueForField(Base, Field); - CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); + CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); } } CGF.FinishFunction(); @@ -4757,8 +4713,8 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, LValue RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( - RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); - CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); + RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); + CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); ++Counter; } CGF.FinishFunction(); @@ -4823,7 +4779,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF, } else { SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); SharedRefLValue = CGF.MakeAddrLValue( - Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), + Address(SharedRefLValue.getPointer(CGF), + C.getDeclAlign(OriginalVD)), SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), SharedRefLValue.getTBAAInfo()); } @@ -4836,7 +4793,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF, // Initialize firstprivate array using element-by-element // initialization. CGF.EmitOMPAggregateAssign( - PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, + PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), + Type, [&CGF, Elem, Init, &CapturesInfo](Address DestElement, Address SrcElement) { // Clean up any temporaries needed by the initialization. @@ -4854,8 +4812,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF, } } else { CodeGenFunction::OMPPrivateScope InitScope(CGF); - InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { - return SharedRefLValue.getAddress(); + InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { + return SharedRefLValue.getAddress(CGF); }); (void)InitScope.Privatize(); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); @@ -5242,7 +5200,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, // Define type kmp_depend_info[<Dependences.size()>]; QualType KmpDependInfoArrayTy = C.getConstantArrayType( KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), - ArrayType::Normal, /*IndexTypeQuals=*/0); + nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); // kmp_depend_info[<Dependences.size()>] deps; DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); @@ -5255,10 +5213,10 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { LValue UpAddrLVal = CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); - llvm::Value *UpAddr = - CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); + llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( + UpAddrLVal.getPointer(CGF), /*Idx0=*/1); llvm::Value *LowIntPtr = - CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); + CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy); llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); } else { @@ -5271,7 +5229,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, LValue BaseAddrLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); CGF.EmitStoreOfScalar( - CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), + CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy), BaseAddrLVal); // deps[i].len = sizeof(<Dependences[i].second>); LValue LenLVal = CGF.EmitLValueForField( @@ -5388,7 +5346,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, }; if (IfCond) { - emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); + emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); } else { RegionCodeGenTy ThenRCG(ThenCodeGen); ThenRCG(CGF); @@ -5425,21 +5383,24 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); const auto *LBVar = cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); - CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), + CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), + LBLVal.getQuals(), /*IsInitializer=*/true); LValue UBLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); const auto *UBVar = cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); - CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), + CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), + UBLVal.getQuals(), /*IsInitializer=*/true); LValue StLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); const auto *StVar = cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); - CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), + CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), + StLVal.getQuals(), /*IsInitializer=*/true); // Store reductions address. LValue RedLVal = CGF.EmitLValueForField( @@ -5448,7 +5409,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, if (Data.Reductions) { CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); } else { - CGF.EmitNullInitialization(RedLVal.getAddress(), + CGF.EmitNullInitialization(RedLVal.getAddress(CGF), CGF.getContext().VoidPtrTy); } enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; @@ -5457,11 +5418,11 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, ThreadID, Result.NewTask, IfVal, - LBLVal.getPointer(), - UBLVal.getPointer(), + LBLVal.getPointer(CGF), + UBLVal.getPointer(CGF), CGF.EmitLoadOfScalar(StLVal, Loc), llvm::ConstantInt::getSigned( - CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler + CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler llvm::ConstantInt::getSigned( CGF.IntTy, Data.Schedule.getPointer() ? Data.Schedule.getInt() ? NumTasks : Grainsize @@ -5763,7 +5724,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, } llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); QualType ReductionArrayTy = - C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, + C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); Address ReductionList = CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); @@ -5773,7 +5734,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), + CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), Elem); if ((*IPriv)->getType()->isVariablyModifiedType()) { // Store array size. @@ -6201,7 +6162,7 @@ static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, // Emit the finalizer body: // <destroy>(<type>* %0) RCG.emitCleanups(CGF, N, PrivateAddr); - CGF.FinishFunction(); + CGF.FinishFunction(Loc); return Fn; } @@ -6235,7 +6196,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( unsigned Size = Data.ReductionVars.size(); llvm::APInt ArraySize(/*numBits=*/64, Size); QualType ArrayRDType = C.getConstantArrayType( - RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); + RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); // kmp_task_red_input_t .rd_input.[Size]; Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, @@ -6253,7 +6214,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); RCG.emitSharedLValue(CGF, Cnt); llvm::Value *CastedShared = - CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); + CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); CGF.EmitStoreOfScalar(CastedShared, SharedLVal); RCG.emitAggregateType(CGF, Cnt); llvm::Value *SizeValInChars; @@ -6296,7 +6257,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), FlagsLVal); } else - CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); + CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), + FlagsLVal.getType()); } // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void // *data); @@ -6332,7 +6294,7 @@ void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), + RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy), SharedAddr, /*IsVolatile=*/false); } } @@ -6343,12 +6305,12 @@ Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, LValue SharedLVal) { // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void // *d); - llvm::Value *Args[] = { - CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, - /*isSigned=*/true), - ReductionsPtr, - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), - CGM.VoidPtrTy)}; + llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), + CGM.IntTy, + /*isSigned=*/true), + ReductionsPtr, + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; return Address( CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), @@ -6471,8 +6433,8 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitBlock(ContBB, /*IsFinished=*/true); }; if (IfCond) { - emitOMPIfClause(CGF, IfCond, ThenGen, - [](CodeGenFunction &, PrePostActionTy &) {}); + emitIfClause(CGF, IfCond, ThenGen, + [](CodeGenFunction &, PrePostActionTy &) {}); } else { RegionCodeGenTy ThenRCG(ThenGen); ThenRCG(CGF); @@ -6685,6 +6647,7 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: case OMPD_parallel_for_simd: @@ -6720,12 +6683,17 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: break; @@ -6990,6 +6958,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: case OMPD_parallel_for_simd: @@ -7025,12 +6994,17 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: break; @@ -7079,12 +7053,24 @@ public: OMP_MAP_LITERAL = 0x100, /// Implicit map OMP_MAP_IMPLICIT = 0x200, + /// Close is a hint to the runtime to allocate memory close to + /// the target device. + OMP_MAP_CLOSE = 0x400, /// The 16 MSBs of the flags indicate whether the entry is member of some /// struct/class. OMP_MAP_MEMBER_OF = 0xffff000000000000, LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), }; + /// Get the offset of the OMP_MAP_MEMBER_OF field. + static unsigned getFlagMemberOffset() { + unsigned Offset = 0; + for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); + Remain = Remain >> 1) + Offset++; + return Offset; + } + /// Class that associates information with a base pointer to be passed to the /// runtime library. class BasePointerInfo { @@ -7148,8 +7134,11 @@ private: : IE(IE), VD(VD) {} }; - /// Directive from where the map clauses were extracted. - const OMPExecutableDirective &CurDir; + /// The target directive from where the mappable clauses were extracted. It + /// is either a executable directive or a user-defined mapper directive. + llvm::PointerUnion<const OMPExecutableDirective *, + const OMPDeclareMapperDecl *> + CurDir; /// Function the directive is being generated for. CodeGenFunction &CGF; @@ -7181,9 +7170,11 @@ private: OAE->getBase()->IgnoreParenImpCasts()) .getCanonicalType(); - // If there is no length associated with the expression, that means we - // are using the whole length of the base. - if (!OAE->getLength() && OAE->getColonLoc().isValid()) + // If there is no length associated with the expression and lower bound is + // not specified too, that means we are using the whole length of the + // base. + if (!OAE->getLength() && OAE->getColonLoc().isValid() && + !OAE->getLowerBound()) return CGF.getTypeSize(BaseTy); llvm::Value *ElemSize; @@ -7197,13 +7188,30 @@ private: // If we don't have a length at this point, that is because we have an // array section with a single element. - if (!OAE->getLength()) + if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) return ElemSize; - llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); - LengthVal = - CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); - return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); + if (const Expr *LenExpr = OAE->getLength()) { + llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); + LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), + CGF.getContext().getSizeType(), + LenExpr->getExprLoc()); + return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); + } + assert(!OAE->getLength() && OAE->getColonLoc().isValid() && + OAE->getLowerBound() && "expected array_section[lb:]."); + // Size = sizetype - lb * elemtype; + llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); + llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); + LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), + CGF.getContext().getSizeType(), + OAE->getLowerBound()->getExprLoc()); + LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); + llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); + llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); + LengthVal = CGF.Builder.CreateSelect( + Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); + return LengthVal; } return CGF.getTypeSize(ExprTy); } @@ -7247,6 +7255,9 @@ private: if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) != MapModifiers.end()) Bits |= OMP_MAP_ALWAYS; + if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) + != MapModifiers.end()) + Bits |= OMP_MAP_CLOSE; return Bits; } @@ -7486,11 +7497,11 @@ private: } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || (OASE && isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { - BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); + BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); } else { // The base is the reference to the variable. // BP = &Var. - BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); + BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); if (const auto *VD = dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = @@ -7584,8 +7595,8 @@ private: isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && "Unexpected expression"); - Address LB = - CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); + Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) + .getAddress(CGF); // If this component is a pointer inside the base struct then we don't // need to create any entry for it - it will be combined with the object @@ -7632,7 +7643,7 @@ private: if (MC.getAssociatedDeclaration()) { ComponentLB = CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) - .getAddress(); + .getAddress(CGF); Size = CGF.Builder.CreatePtrDiff( CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), CGF.EmitCastToVoidPtr(LB.getPointer())); @@ -7675,10 +7686,10 @@ private: if (!IsExpressionFirstInfo) { // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, - // then we reset the TO/FROM/ALWAYS/DELETE flags. + // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. if (IsPointer) Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | - OMP_MAP_DELETE); + OMP_MAP_DELETE | OMP_MAP_CLOSE); if (ShouldBeMemberOf) { // Set placeholder value MEMBER_OF=FFFF to indicate that the flag @@ -7752,9 +7763,9 @@ private: } static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { - // Member of is given by the 16 MSB of the flag, so rotate by 48 bits. + // Rotate by getFlagMemberOffset() bits. return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) - << 48); + << getFlagMemberOffset()); } static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, @@ -7834,7 +7845,7 @@ private: public: MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) - : CurDir(Dir), CGF(CGF) { + : CurDir(&Dir), CGF(CGF) { // Extract firstprivate clause information. for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) for (const auto *D : C->varlists()) @@ -7846,6 +7857,10 @@ public: DevPointersMap[L.first].push_back(L.second); } + /// Constructor for the declare mapper directive. + MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) + : CurDir(&Dir), CGF(CGF) {} + /// Generate code for the combined entry if we have a partially mapped struct /// and take care of the mapping flags of the arguments corresponding to /// individual struct members. @@ -7907,19 +7922,21 @@ public: IsImplicit); }; - // FIXME: MSVC 2013 seems to require this-> to find member CurDir. - for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) - for (const auto &L : C->component_lists()) { + assert(CurDir.is<const OMPExecutableDirective *>() && + "Expect a executable directive"); + const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); + for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) + for (const auto L : C->component_lists()) { InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), /*ReturnDevicePointer=*/false, C->isImplicit()); } - for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) - for (const auto &L : C->component_lists()) { + for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) + for (const auto L : C->component_lists()) { InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit()); } - for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) - for (const auto &L : C->component_lists()) { + for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) + for (const auto L : C->component_lists()) { InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit()); } @@ -7933,10 +7950,9 @@ public: llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> DeferredInfo; - // FIXME: MSVC 2013 seems to require this-> to find member CurDir. for (const auto *C : - this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) { - for (const auto &L : C->component_lists()) { + CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { + for (const auto L : C->component_lists()) { assert(!L.second.empty() && "Not expecting empty list of components!"); const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); VD = cast<ValueDecl>(VD->getCanonicalDecl()); @@ -7964,7 +7980,6 @@ public: // We didn't find any match in our map information - generate a zero // size array section - if the pointer is a struct member we defer this // action until the whole struct has been processed. - // FIXME: MSVC 2013 seems to require this-> to find member CGF. if (isa<MemberExpr>(IE)) { // Insert the pointer into Info to be processed by // generateInfoForComponentList. Because it is a member pointer @@ -7977,11 +7992,11 @@ public: /*ReturnDevicePointer=*/false, C->isImplicit()); DeferredInfo[nullptr].emplace_back(IE, VD); } else { - llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( - this->CGF.EmitLValue(IE), IE->getExprLoc()); + llvm::Value *Ptr = + CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); BasePointers.emplace_back(Ptr, VD); Pointers.push_back(Ptr); - Sizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); + Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); } } @@ -8005,11 +8020,10 @@ public: // Remember the current base pointer index. unsigned CurrentBasePointersIdx = CurBasePointers.size(); - // FIXME: MSVC 2013 seems to require this-> to find the member method. - this->generateInfoForComponentList( - L.MapType, L.MapModifiers, L.Components, CurBasePointers, - CurPointers, CurSizes, CurTypes, PartialStruct, - IsFirstComponentList, L.IsImplicit); + generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, + CurBasePointers, CurPointers, CurSizes, + CurTypes, PartialStruct, + IsFirstComponentList, L.IsImplicit); // If this entry relates with a device pointer, set the relevant // declaration and add the 'return pointer' flag. @@ -8033,7 +8047,7 @@ public: auto CI = DeferredInfo.find(M.first); if (CI != DeferredInfo.end()) { for (const DeferredDevicePtrEntryTy &L : CI->second) { - llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); + llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); CurBasePointers.emplace_back(BasePtr, L.VD); @@ -8061,6 +8075,78 @@ public: } } + /// Generate all the base pointers, section pointers, sizes and map types for + /// the extracted map clauses of user-defined mapper. + void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, + MapValuesArrayTy &Pointers, + MapValuesArrayTy &Sizes, + MapFlagsArrayTy &Types) const { + assert(CurDir.is<const OMPDeclareMapperDecl *>() && + "Expect a declare mapper directive"); + const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); + // We have to process the component lists that relate with the same + // declaration in a single chunk so that we can generate the map flags + // correctly. Therefore, we organize all lists in a map. + llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; + + // Helper function to fill the information map for the different supported + // clauses. + auto &&InfoGen = [&Info]( + const ValueDecl *D, + OMPClauseMappableExprCommon::MappableExprComponentListRef L, + OpenMPMapClauseKind MapType, + ArrayRef<OpenMPMapModifierKind> MapModifiers, + bool ReturnDevicePointer, bool IsImplicit) { + const ValueDecl *VD = + D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; + Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, + IsImplicit); + }; + + for (const auto *C : CurMapperDir->clauselists()) { + const auto *MC = cast<OMPMapClause>(C); + for (const auto L : MC->component_lists()) { + InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), + /*ReturnDevicePointer=*/false, MC->isImplicit()); + } + } + + for (const auto &M : Info) { + // We need to know when we generate information for the first component + // associated with a capture, because the mapping flags depend on it. + bool IsFirstComponentList = true; + + // Temporary versions of arrays + MapBaseValuesArrayTy CurBasePointers; + MapValuesArrayTy CurPointers; + MapValuesArrayTy CurSizes; + MapFlagsArrayTy CurTypes; + StructRangeInfoTy PartialStruct; + + for (const MapInfo &L : M.second) { + assert(!L.Components.empty() && + "Not expecting declaration with no component lists."); + generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, + CurBasePointers, CurPointers, CurSizes, + CurTypes, PartialStruct, + IsFirstComponentList, L.IsImplicit); + IsFirstComponentList = false; + } + + // If there is an entry in PartialStruct it means we have a struct with + // individual members mapped. Emit an extra combined entry. + if (PartialStruct.Base.isValid()) + emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, + PartialStruct); + + // We need to append the results of this capture to what we already have. + BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); + Pointers.append(CurPointers.begin(), CurPointers.end()); + Sizes.append(CurSizes.begin(), CurSizes.end()); + Types.append(CurTypes.begin(), CurTypes.end()); + } + } + /// Emit capture info for lambdas for variables captured by reference. void generateInfoForLambdaCaptures( const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, @@ -8083,9 +8169,10 @@ public: LValue ThisLVal = CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); - LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); - BasePointers.push_back(ThisLVal.getPointer()); - Pointers.push_back(ThisLValVal.getPointer()); + LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), + VDLVal.getPointer(CGF)); + BasePointers.push_back(ThisLVal.getPointer(CGF)); + Pointers.push_back(ThisLValVal.getPointer(CGF)); Sizes.push_back( CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, /*isSigned=*/true)); @@ -8103,17 +8190,19 @@ public: LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); if (LC.getCaptureKind() == LCK_ByRef) { LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); - LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); - BasePointers.push_back(VarLVal.getPointer()); - Pointers.push_back(VarLValVal.getPointer()); + LambdaPointers.try_emplace(VarLVal.getPointer(CGF), + VDLVal.getPointer(CGF)); + BasePointers.push_back(VarLVal.getPointer(CGF)); + Pointers.push_back(VarLValVal.getPointer(CGF)); Sizes.push_back(CGF.Builder.CreateIntCast( CGF.getTypeSize( VD->getType().getCanonicalType().getNonReferenceType()), CGF.Int64Ty, /*isSigned=*/true)); } else { RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); - LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); - BasePointers.push_back(VarLVal.getPointer()); + LambdaPointers.try_emplace(VarLVal.getPointer(CGF), + VDLVal.getPointer(CGF)); + BasePointers.push_back(VarLVal.getPointer(CGF)); Pointers.push_back(VarRVal.getScalarVal()); Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); } @@ -8184,9 +8273,11 @@ public: std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; SmallVector<MapData, 4> DeclComponentLists; - // FIXME: MSVC 2013 seems to require this-> to find member CurDir. - for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { - for (const auto &L : C->decl_component_lists(VD)) { + assert(CurDir.is<const OMPExecutableDirective *>() && + "Expect a executable directive"); + const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); + for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { + for (const auto L : C->decl_component_lists(VD)) { assert(L.first == VD && "We got information for the wrong declaration??"); assert(!L.second.empty() && @@ -8333,10 +8424,13 @@ public: MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types) const { + assert(CurDir.is<const OMPExecutableDirective *>() && + "Expect a executable directive"); + const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); // Map other list items in the map clause which are not captured variables // but "declare target link" global variables. - for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { - for (const auto &L : C->component_lists()) { + for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { + for (const auto L : C->component_lists()) { if (!L.first) continue; const auto *VD = dyn_cast<VarDecl>(L.first); @@ -8414,7 +8508,7 @@ public: CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); // Copy the value of the original variable to the new global copy. CGF.Builder.CreateMemCpy( - CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(), + CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), CurSizes.back(), /*IsVolatile=*/false); // Use new global variable as the base pointers. @@ -8472,9 +8566,9 @@ emitOffloadingArrays(CodeGenFunction &CGF, } llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); - QualType PointerArrayType = - Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, - /*IndexTypeQuals=*/0); + QualType PointerArrayType = Ctx.getConstantArrayType( + Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, + /*IndexTypeQuals=*/0); Info.BasePointersArray = CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); @@ -8487,9 +8581,9 @@ emitOffloadingArrays(CodeGenFunction &CGF, QualType Int64Ty = Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); if (hasRuntimeEvaluationCaptureSize) { - QualType SizeArrayType = - Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal, - /*IndexTypeQuals=*/0); + QualType SizeArrayType = Ctx.getConstantArrayType( + Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, + /*IndexTypeQuals=*/0); Info.SizesArray = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); } else { @@ -8562,6 +8656,7 @@ emitOffloadingArrays(CodeGenFunction &CGF, } } } + /// Emit the arguments to be passed to the runtime library based on the /// arrays of pointers, sizes and map types. static void emitOffloadingArraysArgument( @@ -8642,6 +8737,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: case OMPD_parallel_for_simd: @@ -8677,12 +8773,17 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unexpected directive."); @@ -8692,10 +8793,343 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { return nullptr; } +/// Emit the user-defined mapper function. The code generation follows the +/// pattern in the example below. +/// \code +/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, +/// void *base, void *begin, +/// int64_t size, int64_t type) { +/// // Allocate space for an array section first. +/// if (size > 1 && !maptype.IsDelete) +/// __tgt_push_mapper_component(rt_mapper_handle, base, begin, +/// size*sizeof(Ty), clearToFrom(type)); +/// // Map members. +/// for (unsigned i = 0; i < size; i++) { +/// // For each component specified by this mapper: +/// for (auto c : all_components) { +/// if (c.hasMapper()) +/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, +/// c.arg_type); +/// else +/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, +/// c.arg_begin, c.arg_size, c.arg_type); +/// } +/// } +/// // Delete the array section. +/// if (size > 1 && maptype.IsDelete) +/// __tgt_push_mapper_component(rt_mapper_handle, base, begin, +/// size*sizeof(Ty), clearToFrom(type)); +/// } +/// \endcode +void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, + CodeGenFunction *CGF) { + if (UDMMap.count(D) > 0) + return; + ASTContext &C = CGM.getContext(); + QualType Ty = D->getType(); + QualType PtrTy = C.getPointerType(Ty).withRestrict(); + QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); + auto *MapperVarDecl = + cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); + SourceLocation Loc = D->getLocation(); + CharUnits ElementSize = C.getTypeSizeInChars(Ty); + + // Prepare mapper function arguments and attributes. + ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl::Other); + ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, + ImplicitParamDecl::Other); + ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, + ImplicitParamDecl::Other); + FunctionArgList Args; + Args.push_back(&HandleArg); + Args.push_back(&BaseArg); + Args.push_back(&BeginArg); + Args.push_back(&SizeArg); + Args.push_back(&TypeArg); + const CGFunctionInfo &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + SmallString<64> TyStr; + llvm::raw_svector_ostream Out(TyStr); + CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); + std::string Name = getName({"omp_mapper", TyStr, D->getName()}); + auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + Name, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); + Fn->removeFnAttr(llvm::Attribute::OptimizeNone); + // Start the mapper function code generation. + CodeGenFunction MapperCGF(CGM); + MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); + // Compute the starting and end addreses of array elements. + llvm::Value *Size = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, + C.getPointerType(Int64Ty), Loc); + llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( + MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), + CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); + llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); + llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, + C.getPointerType(Int64Ty), Loc); + // Prepare common arguments for array initiation and deletion. + llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&HandleArg), + /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); + llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&BaseArg), + /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); + llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&BeginArg), + /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); + + // Emit array initiation if this is an array section and \p MapType indicates + // that memory allocation is required. + llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); + emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, + ElementSize, HeadBB, /*IsInit=*/true); + + // Emit a for loop to iterate through SizeArg of elements and map all of them. + + // Emit the loop header block. + MapperCGF.EmitBlock(HeadBB); + llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); + llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); + // Evaluate whether the initial condition is satisfied. + llvm::Value *IsEmpty = + MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); + MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); + llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); + + // Emit the loop body block. + MapperCGF.EmitBlock(BodyBB); + llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( + PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); + PtrPHI->addIncoming(PtrBegin, EntryBB); + Address PtrCurrent = + Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) + .getAlignment() + .alignmentOfArrayElement(ElementSize)); + // Privatize the declared variable of mapper to be the current array element. + CodeGenFunction::OMPPrivateScope Scope(MapperCGF); + Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { + return MapperCGF + .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) + .getAddress(MapperCGF); + }); + (void)Scope.Privatize(); + + // Get map clause information. Fill up the arrays with all mapped variables. + MappableExprsHandler::MapBaseValuesArrayTy BasePointers; + MappableExprsHandler::MapValuesArrayTy Pointers; + MappableExprsHandler::MapValuesArrayTy Sizes; + MappableExprsHandler::MapFlagsArrayTy MapTypes; + MappableExprsHandler MEHandler(*D, MapperCGF); + MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); + + // Call the runtime API __tgt_mapper_num_components to get the number of + // pre-existing components. + llvm::Value *OffloadingArgs[] = {Handle}; + llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); + llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( + PreviousSize, + MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); + + // Fill up the runtime mapper handle for all components. + for (unsigned I = 0; I < BasePointers.size(); ++I) { + llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( + *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); + llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( + Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); + llvm::Value *CurSizeArg = Sizes[I]; + + // Extract the MEMBER_OF field from the map type. + llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); + MapperCGF.EmitBlock(MemberBB); + llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); + llvm::Value *Member = MapperCGF.Builder.CreateAnd( + OriMapType, + MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); + llvm::BasicBlock *MemberCombineBB = + MapperCGF.createBasicBlock("omp.member.combine"); + llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); + llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); + MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); + // Add the number of pre-existing components to the MEMBER_OF field if it + // is valid. + MapperCGF.EmitBlock(MemberCombineBB); + llvm::Value *CombinedMember = + MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); + // Do nothing if it is not a member of previous components. + MapperCGF.EmitBlock(TypeBB); + llvm::PHINode *MemberMapType = + MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); + MemberMapType->addIncoming(OriMapType, MemberBB); + MemberMapType->addIncoming(CombinedMember, MemberCombineBB); + + // Combine the map type inherited from user-defined mapper with that + // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM + // bits of the \a MapType, which is the input argument of the mapper + // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM + // bits of MemberMapType. + // [OpenMP 5.0], 1.2.6. map-type decay. + // | alloc | to | from | tofrom | release | delete + // ---------------------------------------------------------- + // alloc | alloc | alloc | alloc | alloc | release | delete + // to | alloc | to | alloc | to | release | delete + // from | alloc | alloc | from | from | release | delete + // tofrom | alloc | to | from | tofrom | release | delete + llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( + MapType, + MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | + MappableExprsHandler::OMP_MAP_FROM)); + llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); + llvm::BasicBlock *AllocElseBB = + MapperCGF.createBasicBlock("omp.type.alloc.else"); + llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); + llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); + llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); + llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); + llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); + MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); + // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. + MapperCGF.EmitBlock(AllocBB); + llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( + MemberMapType, + MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | + MappableExprsHandler::OMP_MAP_FROM))); + MapperCGF.Builder.CreateBr(EndBB); + MapperCGF.EmitBlock(AllocElseBB); + llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( + LeftToFrom, + MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); + MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); + // In case of to, clear OMP_MAP_FROM. + MapperCGF.EmitBlock(ToBB); + llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( + MemberMapType, + MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); + MapperCGF.Builder.CreateBr(EndBB); + MapperCGF.EmitBlock(ToElseBB); + llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( + LeftToFrom, + MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); + MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); + // In case of from, clear OMP_MAP_TO. + MapperCGF.EmitBlock(FromBB); + llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( + MemberMapType, + MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); + // In case of tofrom, do nothing. + MapperCGF.EmitBlock(EndBB); + llvm::PHINode *CurMapType = + MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); + CurMapType->addIncoming(AllocMapType, AllocBB); + CurMapType->addIncoming(ToMapType, ToBB); + CurMapType->addIncoming(FromMapType, FromBB); + CurMapType->addIncoming(MemberMapType, ToElseBB); + + // TODO: call the corresponding mapper function if a user-defined mapper is + // associated with this map clause. + // Call the runtime API __tgt_push_mapper_component to fill up the runtime + // data structure. + llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, + CurSizeArg, CurMapType}; + MapperCGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__tgt_push_mapper_component), + OffloadingArgs); + } + + // Update the pointer to point to the next element that needs to be mapped, + // and check whether we have mapped all elements. + llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( + PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); + PtrPHI->addIncoming(PtrNext, BodyBB); + llvm::Value *IsDone = + MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); + llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); + MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); + + MapperCGF.EmitBlock(ExitBB); + // Emit array deletion if this is an array section and \p MapType indicates + // that deletion is required. + emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, + ElementSize, DoneBB, /*IsInit=*/false); + + // Emit the function exit block. + MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); + MapperCGF.FinishFunction(); + UDMMap.try_emplace(D, Fn); + if (CGF) { + auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); + Decls.second.push_back(D); + } +} + +/// Emit the array initialization or deletion portion for user-defined mapper +/// code generation. First, it evaluates whether an array section is mapped and +/// whether the \a MapType instructs to delete this section. If \a IsInit is +/// true, and \a MapType indicates to not delete this array, array +/// initialization code is generated. If \a IsInit is false, and \a MapType +/// indicates to not this array, array deletion code is generated. +void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( + CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, + llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, + CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { + StringRef Prefix = IsInit ? ".init" : ".del"; + + // Evaluate if this is an array section. + llvm::BasicBlock *IsDeleteBB = + MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete"); + llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix); + llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( + Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); + MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); + + // Evaluate if we are going to delete this section. + MapperCGF.EmitBlock(IsDeleteBB); + llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( + MapType, + MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); + llvm::Value *DeleteCond; + if (IsInit) { + DeleteCond = MapperCGF.Builder.CreateIsNull( + DeleteBit, "omp.array" + Prefix + ".delete"); + } else { + DeleteCond = MapperCGF.Builder.CreateIsNotNull( + DeleteBit, "omp.array" + Prefix + ".delete"); + } + MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); + + MapperCGF.EmitBlock(BodyBB); + // Get the array size by multiplying element size and element number (i.e., \p + // Size). + llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( + Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); + // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves + // memory allocation/deletion purpose only. + llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( + MapType, + MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | + MappableExprsHandler::OMP_MAP_FROM))); + // Call the runtime API __tgt_push_mapper_component to fill up the runtime + // data structure. + llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; + MapperCGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); +} + void CGOpenMPRuntime::emitTargetNumIterationsCall( - CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, - const llvm::function_ref<llvm::Value *( - CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) { + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Value *DeviceID, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter) { OpenMPDirectiveKind Kind = D.getDirectiveKind(); const OMPExecutableDirective *TD = &D; // Get nested teams distribute kind directive, if any. @@ -8704,30 +9138,24 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall( if (!TD) return; const auto *LD = cast<OMPLoopDirective>(TD); - auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF, + auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, PrePostActionTy &) { - llvm::Value *NumIterations = SizeEmitter(CGF, *LD); - - // Emit device ID if any. - llvm::Value *DeviceID; - if (Device) - DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), - CGF.Int64Ty, /*isSigned=*/true); - else - DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); - - llvm::Value *Args[] = {DeviceID, NumIterations}; - CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); + if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { + llvm::Value *Args[] = {DeviceID, NumIterations}; + CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); + } }; emitInlinedDirective(CGF, OMPD_unknown, CodeGen); } -void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - llvm::Function *OutlinedFn, - llvm::Value *OutlinedFnID, - const Expr *IfCond, const Expr *Device) { +void CGOpenMPRuntime::emitTargetCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, + const Expr *Device, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter) { if (!CGF.HaveInsertPoint()) return; @@ -8746,8 +9174,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, llvm::Value *MapTypesArray = nullptr; // Fill up the pointer arrays and transfer execution to the device. auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, - &MapTypesArray, &CS, RequiresOuterTask, - &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { + &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, + SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { // On top of the arrays that were filled up, the target offloading call // takes as arguments the device id as well as the host pointer. The host // pointer is used by the runtime library to identify the current target @@ -8779,6 +9207,9 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); + // Emit tripcount for the target loop-based directive. + emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); + bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); // The target region is an outlined function launched by the runtime // via calls __tgt_target() or __tgt_target_teams(). @@ -8985,7 +9416,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // specify target triples. if (OutlinedFnID) { if (IfCond) { - emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); + emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); } else { RegionCodeGenTy ThenRCG(TargetThenGen); ThenRCG(CGF); @@ -9068,6 +9499,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: case OMPD_parallel_for_simd: @@ -9103,12 +9535,17 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unknown target directive for OpenMP device codegen."); @@ -9137,18 +9574,32 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { // If emitting code for the host, we do not process FD here. Instead we do // the normal code generation. - if (!CGM.getLangOpts().OpenMPIsDevice) + if (!CGM.getLangOpts().OpenMPIsDevice) { + if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { + Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = + OMPDeclareTargetDeclAttr::getDeviceType(FD); + // Do not emit device_type(nohost) functions for the host. + if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) + return true; + } return false; + } const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); - StringRef Name = CGM.getMangledName(GD); // Try to detect target regions in the function. - if (const auto *FD = dyn_cast<FunctionDecl>(VD)) + if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { + StringRef Name = CGM.getMangledName(GD); scanForTargetRegionsFunctions(FD->getBody(), Name); + Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = + OMPDeclareTargetDeclAttr::getDeviceType(FD); + // Do not emit device_type(nohost) functions for the host. + if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) + return true; + } // Do not to emit function if it is not marked as declare target. return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && - AlreadyEmittedTargetFunctions.count(Name) == 0; + AlreadyEmittedTargetDecls.count(VD) == 0; } bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { @@ -9221,6 +9672,9 @@ CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr) { + if (CGM.getLangOpts().OMPTargetTriples.empty() && + !CGM.getLangOpts().OpenMPIsDevice) + return; llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res) { @@ -9376,20 +9830,20 @@ bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) return true; - StringRef Name = CGM.getMangledName(GD); const auto *D = cast<FunctionDecl>(GD.getDecl()); // Do not to emit function if it is marked as declare target as it was already // emitted. if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { - if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { - if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) + if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { + if (auto *F = dyn_cast_or_null<llvm::Function>( + CGM.GetGlobalValue(CGM.getMangledName(GD)))) return !F->isDeclaration(); return false; } return true; } - return !AlreadyEmittedTargetFunctions.insert(Name).second; + return !AlreadyEmittedTargetDecls.insert(D).second; } llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { @@ -9433,17 +9887,6 @@ llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { return RequiresRegFn; } -llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { - // If we have offloading in the current module, we need to emit the entries - // now and register the offloading descriptor. - createOffloadEntriesAndInfoMetadata(); - - // Create and register the offloading binary descriptors. This is the main - // entity that captures all the information about offloading in the current - // compilation unit. - return createOffloadingBinaryDescriptorRegistration(); -} - void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, @@ -9602,7 +10045,7 @@ void CGOpenMPRuntime::emitTargetDataCalls( auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; if (IfCond) { - emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); + emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); } else { RegionCodeGenTy RCG(BeginThenGen); RCG(CGF); @@ -9616,7 +10059,7 @@ void CGOpenMPRuntime::emitTargetDataCalls( } if (IfCond) { - emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); + emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); } else { RegionCodeGenTy RCG(EndThenGen); RCG(CGF); @@ -9679,6 +10122,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: case OMPD_parallel_for_simd: @@ -9711,12 +10155,17 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_teams_distribute_parallel_for: case OMPD_teams_distribute_parallel_for_simd: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: case OMPD_target: case OMPD_target_simd: case OMPD_target_teams_distribute: @@ -9768,8 +10217,8 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( }; if (IfCond) { - emitOMPIfClause(CGF, IfCond, TargetThenGen, - [](CodeGenFunction &CGF, PrePostActionTy &) {}); + emitIfClause(CGF, IfCond, TargetThenGen, + [](CodeGenFunction &CGF, PrePostActionTy &) {}); } else { RegionCodeGenTy ThenRCG(TargetThenGen); ThenRCG(CGF); @@ -10307,8 +10756,7 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, ExprLoc = VLENExpr->getExprLoc(); } OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); - if (CGM.getTriple().getArch() == llvm::Triple::x86 || - CGM.getTriple().getArch() == llvm::Triple::x86_64) { + if (CGM.getTriple().isX86()) { emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { unsigned VLEN = VLENVal.getExtValue(); @@ -10377,7 +10825,7 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, } llvm::APInt Size(/*numBits=*/32, NumIterations.size()); QualType ArrayTy = - C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0); + C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); CGF.EmitNullInitialization(DimsAddr, ArrayTy); @@ -10428,7 +10876,7 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); QualType ArrayTy = CGM.getContext().getConstantArrayType( - Int64Ty, Size, ArrayType::Normal, 0); + Int64Ty, Size, nullptr, ArrayType::Normal, 0); Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { const Expr *CounterVal = C->getLoopData(I); @@ -10566,6 +11014,595 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, return Address(Addr, Align); } +namespace { +using OMPContextSelectorData = + OpenMPCtxSelectorData<ArrayRef<StringRef>, llvm::APSInt>; +using CompleteOMPContextSelectorData = SmallVector<OMPContextSelectorData, 4>; +} // anonymous namespace + +/// Checks current context and returns true if it matches the context selector. +template <OpenMPContextSelectorSetKind CtxSet, OpenMPContextSelectorKind Ctx, + typename... Arguments> +static bool checkContext(const OMPContextSelectorData &Data, + Arguments... Params) { + assert(Data.CtxSet != OMP_CTX_SET_unknown && Data.Ctx != OMP_CTX_unknown && + "Unknown context selector or context selector set."); + return false; +} + +/// Checks for implementation={vendor(<vendor>)} context selector. +/// \returns true iff <vendor>="llvm", false otherwise. +template <> +bool checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>( + const OMPContextSelectorData &Data) { + return llvm::all_of(Data.Names, + [](StringRef S) { return !S.compare_lower("llvm"); }); +} + +/// Checks for device={kind(<kind>)} context selector. +/// \returns true if <kind>="host" and compilation is for host. +/// true if <kind>="nohost" and compilation is for device. +/// true if <kind>="cpu" and compilation is for Arm, X86 or PPC CPU. +/// true if <kind>="gpu" and compilation is for NVPTX or AMDGCN. +/// false otherwise. +template <> +bool checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>( + const OMPContextSelectorData &Data, CodeGenModule &CGM) { + for (StringRef Name : Data.Names) { + if (!Name.compare_lower("host")) { + if (CGM.getLangOpts().OpenMPIsDevice) + return false; + continue; + } + if (!Name.compare_lower("nohost")) { + if (!CGM.getLangOpts().OpenMPIsDevice) + return false; + continue; + } + switch (CGM.getTriple().getArch()) { + case llvm::Triple::arm: + case llvm::Triple::armeb: + case llvm::Triple::aarch64: + case llvm::Triple::aarch64_be: + case llvm::Triple::aarch64_32: + case llvm::Triple::ppc: + case llvm::Triple::ppc64: + case llvm::Triple::ppc64le: + case llvm::Triple::x86: + case llvm::Triple::x86_64: + if (Name.compare_lower("cpu")) + return false; + break; + case llvm::Triple::amdgcn: + case llvm::Triple::nvptx: + case llvm::Triple::nvptx64: + if (Name.compare_lower("gpu")) + return false; + break; + case llvm::Triple::UnknownArch: + case llvm::Triple::arc: + case llvm::Triple::avr: + case llvm::Triple::bpfel: + case llvm::Triple::bpfeb: + case llvm::Triple::hexagon: + case llvm::Triple::mips: + case llvm::Triple::mipsel: + case llvm::Triple::mips64: + case llvm::Triple::mips64el: + case llvm::Triple::msp430: + case llvm::Triple::r600: + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: + case llvm::Triple::sparc: + case llvm::Triple::sparcv9: + case llvm::Triple::sparcel: + case llvm::Triple::systemz: + case llvm::Triple::tce: + case llvm::Triple::tcele: + case llvm::Triple::thumb: + case llvm::Triple::thumbeb: + case llvm::Triple::xcore: + case llvm::Triple::le32: + case llvm::Triple::le64: + case llvm::Triple::amdil: + case llvm::Triple::amdil64: + case llvm::Triple::hsail: + case llvm::Triple::hsail64: + case llvm::Triple::spir: + case llvm::Triple::spir64: + case llvm::Triple::kalimba: + case llvm::Triple::shave: + case llvm::Triple::lanai: + case llvm::Triple::wasm32: + case llvm::Triple::wasm64: + case llvm::Triple::renderscript32: + case llvm::Triple::renderscript64: + case llvm::Triple::ve: + return false; + } + } + return true; +} + +static bool matchesContext(CodeGenModule &CGM, + const CompleteOMPContextSelectorData &ContextData) { + for (const OMPContextSelectorData &Data : ContextData) { + switch (Data.Ctx) { + case OMP_CTX_vendor: + assert(Data.CtxSet == OMP_CTX_SET_implementation && + "Expected implementation context selector set."); + if (!checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(Data)) + return false; + break; + case OMP_CTX_kind: + assert(Data.CtxSet == OMP_CTX_SET_device && + "Expected device context selector set."); + if (!checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(Data, + CGM)) + return false; + break; + case OMP_CTX_unknown: + llvm_unreachable("Unknown context selector kind."); + } + } + return true; +} + +static CompleteOMPContextSelectorData +translateAttrToContextSelectorData(ASTContext &C, + const OMPDeclareVariantAttr *A) { + CompleteOMPContextSelectorData Data; + for (unsigned I = 0, E = A->scores_size(); I < E; ++I) { + Data.emplace_back(); + auto CtxSet = static_cast<OpenMPContextSelectorSetKind>( + *std::next(A->ctxSelectorSets_begin(), I)); + auto Ctx = static_cast<OpenMPContextSelectorKind>( + *std::next(A->ctxSelectors_begin(), I)); + Data.back().CtxSet = CtxSet; + Data.back().Ctx = Ctx; + const Expr *Score = *std::next(A->scores_begin(), I); + Data.back().Score = Score->EvaluateKnownConstInt(C); + switch (Ctx) { + case OMP_CTX_vendor: + assert(CtxSet == OMP_CTX_SET_implementation && + "Expected implementation context selector set."); + Data.back().Names = + llvm::makeArrayRef(A->implVendors_begin(), A->implVendors_end()); + break; + case OMP_CTX_kind: + assert(CtxSet == OMP_CTX_SET_device && + "Expected device context selector set."); + Data.back().Names = + llvm::makeArrayRef(A->deviceKinds_begin(), A->deviceKinds_end()); + break; + case OMP_CTX_unknown: + llvm_unreachable("Unknown context selector kind."); + } + } + return Data; +} + +static bool isStrictSubset(const CompleteOMPContextSelectorData &LHS, + const CompleteOMPContextSelectorData &RHS) { + llvm::SmallDenseMap<std::pair<int, int>, llvm::StringSet<>, 4> RHSData; + for (const OMPContextSelectorData &D : RHS) { + auto &Pair = RHSData.FindAndConstruct(std::make_pair(D.CtxSet, D.Ctx)); + Pair.getSecond().insert(D.Names.begin(), D.Names.end()); + } + bool AllSetsAreEqual = true; + for (const OMPContextSelectorData &D : LHS) { + auto It = RHSData.find(std::make_pair(D.CtxSet, D.Ctx)); + if (It == RHSData.end()) + return false; + if (D.Names.size() > It->getSecond().size()) + return false; + if (llvm::set_union(It->getSecond(), D.Names)) + return false; + AllSetsAreEqual = + AllSetsAreEqual && (D.Names.size() == It->getSecond().size()); + } + + return LHS.size() != RHS.size() || !AllSetsAreEqual; +} + +static bool greaterCtxScore(const CompleteOMPContextSelectorData &LHS, + const CompleteOMPContextSelectorData &RHS) { + // Score is calculated as sum of all scores + 1. + llvm::APSInt LHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false); + bool RHSIsSubsetOfLHS = isStrictSubset(RHS, LHS); + if (RHSIsSubsetOfLHS) { + LHSScore = llvm::APSInt::get(0); + } else { + for (const OMPContextSelectorData &Data : LHS) { + if (Data.Score.getBitWidth() > LHSScore.getBitWidth()) { + LHSScore = LHSScore.extend(Data.Score.getBitWidth()) + Data.Score; + } else if (Data.Score.getBitWidth() < LHSScore.getBitWidth()) { + LHSScore += Data.Score.extend(LHSScore.getBitWidth()); + } else { + LHSScore += Data.Score; + } + } + } + llvm::APSInt RHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false); + if (!RHSIsSubsetOfLHS && isStrictSubset(LHS, RHS)) { + RHSScore = llvm::APSInt::get(0); + } else { + for (const OMPContextSelectorData &Data : RHS) { + if (Data.Score.getBitWidth() > RHSScore.getBitWidth()) { + RHSScore = RHSScore.extend(Data.Score.getBitWidth()) + Data.Score; + } else if (Data.Score.getBitWidth() < RHSScore.getBitWidth()) { + RHSScore += Data.Score.extend(RHSScore.getBitWidth()); + } else { + RHSScore += Data.Score; + } + } + } + return llvm::APSInt::compareValues(LHSScore, RHSScore) >= 0; +} + +/// Finds the variant function that matches current context with its context +/// selector. +static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM, + const FunctionDecl *FD) { + if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>()) + return FD; + // Iterate through all DeclareVariant attributes and check context selectors. + const OMPDeclareVariantAttr *TopMostAttr = nullptr; + CompleteOMPContextSelectorData TopMostData; + for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) { + CompleteOMPContextSelectorData Data = + translateAttrToContextSelectorData(CGM.getContext(), A); + if (!matchesContext(CGM, Data)) + continue; + // If the attribute matches the context, find the attribute with the highest + // score. + if (!TopMostAttr || !greaterCtxScore(TopMostData, Data)) { + TopMostAttr = A; + TopMostData.swap(Data); + } + } + if (!TopMostAttr) + return FD; + return cast<FunctionDecl>( + cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts()) + ->getDecl()); +} + +bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) { + const auto *D = cast<FunctionDecl>(GD.getDecl()); + // If the original function is defined already, use its definition. + StringRef MangledName = CGM.getMangledName(GD); + llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName); + if (Orig && !Orig->isDeclaration()) + return false; + const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D); + // Emit original function if it does not have declare variant attribute or the + // context does not match. + if (NewFD == D) + return false; + GlobalDecl NewGD = GD.getWithDecl(NewFD); + if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) { + DeferredVariantFunction.erase(D); + return true; + } + DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD))); + return true; +} + +CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( + CodeGenModule &CGM, const OMPLoopDirective &S) + : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { + assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); + if (!NeedToPush) + return; + NontemporalDeclsSet &DS = + CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); + for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { + for (const Stmt *Ref : C->private_refs()) { + const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); + const ValueDecl *VD; + if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { + VD = DRE->getDecl(); + } else { + const auto *ME = cast<MemberExpr>(SimpleRefExpr); + assert((ME->isImplicitCXXThis() || + isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && + "Expected member of current class."); + VD = ME->getMemberDecl(); + } + DS.insert(VD); + } + } +} + +CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { + if (!NeedToPush) + return; + CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); +} + +bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { + assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); + + return llvm::any_of( + CGM.getOpenMPRuntime().NontemporalDeclsStack, + [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); +} + +CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( + CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) + : CGM(CGF.CGM), + NeedToPush(llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), + [](const OMPLastprivateClause *C) { + return C->getKind() == + OMPC_LASTPRIVATE_conditional; + })) { + assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); + if (!NeedToPush) + return; + LastprivateConditionalData &Data = + CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); + for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { + if (C->getKind() != OMPC_LASTPRIVATE_conditional) + continue; + + for (const Expr *Ref : C->varlists()) { + Data.DeclToUniqeName.try_emplace( + cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), + generateUniqueName(CGM, "pl_cond", Ref)); + } + } + Data.IVLVal = IVLVal; + // In simd only mode or for simd directives no need to generate threadprivate + // references for the loop iteration counter, we can use the original one + // since outlining cannot happen in simd regions. + if (CGF.getLangOpts().OpenMPSimd || + isOpenMPSimdDirective(S.getDirectiveKind())) { + Data.UseOriginalIV = true; + return; + } + llvm::SmallString<16> Buffer; + llvm::raw_svector_ostream OS(Buffer); + PresumedLoc PLoc = + CGM.getContext().getSourceManager().getPresumedLoc(S.getBeginLoc()); + assert(PLoc.isValid() && "Source location is expected to be always valid."); + + llvm::sys::fs::UniqueID ID; + if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) + CGM.getDiags().Report(diag::err_cannot_open_file) + << PLoc.getFilename() << EC.message(); + OS << "$pl_cond_" << ID.getDevice() << "_" << ID.getFile() << "_" + << PLoc.getLine() << "_" << PLoc.getColumn() << "$iv"; + Data.IVName = OS.str(); +} + +CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { + if (!NeedToPush) + return; + CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); +} + +void CGOpenMPRuntime::initLastprivateConditionalCounter( + CodeGenFunction &CGF, const OMPExecutableDirective &S) { + if (CGM.getLangOpts().OpenMPSimd || + !llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), + [](const OMPLastprivateClause *C) { + return C->getKind() == OMPC_LASTPRIVATE_conditional; + })) + return; + const CGOpenMPRuntime::LastprivateConditionalData &Data = + LastprivateConditionalStack.back(); + if (Data.UseOriginalIV) + return; + // Global loop counter. Required to handle inner parallel-for regions. + // global_iv = iv; + Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, Data.IVLVal.getType(), Data.IVName); + LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, Data.IVLVal.getType()); + llvm::Value *IVVal = CGF.EmitLoadOfScalar(Data.IVLVal, S.getBeginLoc()); + CGF.EmitStoreOfScalar(IVVal, GlobIVLVal); +} + +namespace { +/// Checks if the lastprivate conditional variable is referenced in LHS. +class LastprivateConditionalRefChecker final + : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { + CodeGenFunction &CGF; + ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; + const Expr *FoundE = nullptr; + const Decl *FoundD = nullptr; + StringRef UniqueDeclName; + LValue IVLVal; + StringRef IVName; + SourceLocation Loc; + bool UseOriginalIV = false; + +public: + bool VisitDeclRefExpr(const DeclRefExpr *E) { + for (const CGOpenMPRuntime::LastprivateConditionalData &D : + llvm::reverse(LPM)) { + auto It = D.DeclToUniqeName.find(E->getDecl()); + if (It == D.DeclToUniqeName.end()) + continue; + FoundE = E; + FoundD = E->getDecl()->getCanonicalDecl(); + UniqueDeclName = It->getSecond(); + IVLVal = D.IVLVal; + IVName = D.IVName; + UseOriginalIV = D.UseOriginalIV; + break; + } + return FoundE == E; + } + bool VisitMemberExpr(const MemberExpr *E) { + if (!CGF.IsWrappedCXXThis(E->getBase())) + return false; + for (const CGOpenMPRuntime::LastprivateConditionalData &D : + llvm::reverse(LPM)) { + auto It = D.DeclToUniqeName.find(E->getMemberDecl()); + if (It == D.DeclToUniqeName.end()) + continue; + FoundE = E; + FoundD = E->getMemberDecl()->getCanonicalDecl(); + UniqueDeclName = It->getSecond(); + IVLVal = D.IVLVal; + IVName = D.IVName; + UseOriginalIV = D.UseOriginalIV; + break; + } + return FoundE == E; + } + bool VisitStmt(const Stmt *S) { + for (const Stmt *Child : S->children()) { + if (!Child) + continue; + if (const auto *E = dyn_cast<Expr>(Child)) + if (!E->isGLValue()) + continue; + if (Visit(Child)) + return true; + } + return false; + } + explicit LastprivateConditionalRefChecker( + CodeGenFunction &CGF, + ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) + : CGF(CGF), LPM(LPM) {} + std::tuple<const Expr *, const Decl *, StringRef, LValue, StringRef, bool> + getFoundData() const { + return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, + UseOriginalIV); + } +}; +} // namespace + +void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, + const Expr *LHS) { + if (CGF.getLangOpts().OpenMP < 50) + return; + LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack); + if (!Checker.Visit(LHS)) + return; + const Expr *FoundE; + const Decl *FoundD; + StringRef UniqueDeclName; + LValue IVLVal; + StringRef IVName; + bool UseOriginalIV; + std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) = + Checker.getFoundData(); + + // Last updated loop counter for the lastprivate conditional var. + // int<xx> last_iv = 0; + llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); + llvm::Constant *LastIV = + getOrCreateInternalVariable(LLIVTy, UniqueDeclName + "$iv"); + cast<llvm::GlobalVariable>(LastIV)->setAlignment( + IVLVal.getAlignment().getAsAlign()); + LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); + + // Private address of the lastprivate conditional in the current context. + // priv_a + LValue LVal = CGF.EmitLValue(FoundE); + // Last value of the lastprivate conditional. + // decltype(priv_a) last_a; + llvm::Constant *Last = getOrCreateInternalVariable( + LVal.getAddress(CGF).getElementType(), UniqueDeclName); + cast<llvm::GlobalVariable>(Last)->setAlignment( + LVal.getAlignment().getAsAlign()); + LValue LastLVal = + CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); + + // Global loop counter. Required to handle inner parallel-for regions. + // global_iv + if (!UseOriginalIV) { + Address IVAddr = + getAddrOfArtificialThreadPrivate(CGF, IVLVal.getType(), IVName); + IVLVal = CGF.MakeAddrLValue(IVAddr, IVLVal.getType()); + } + llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc()); + + // #pragma omp critical(a) + // if (last_iv <= iv) { + // last_iv = iv; + // last_a = priv_a; + // } + auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, + FoundE](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + llvm::Value *LastIVVal = + CGF.EmitLoadOfScalar(LastIVLVal, FoundE->getExprLoc()); + // (last_iv <= global_iv) ? Check if the variable is updated and store new + // value in global var. + llvm::Value *CmpRes; + if (IVLVal.getType()->isSignedIntegerType()) { + CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); + } else { + assert(IVLVal.getType()->isUnsignedIntegerType() && + "Loop iteration variable must be integer."); + CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); + } + llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); + llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); + CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); + // { + CGF.EmitBlock(ThenBB); + + // last_iv = global_iv; + CGF.EmitStoreOfScalar(IVVal, LastIVLVal); + + // last_a = priv_a; + switch (CGF.getEvaluationKind(LVal.getType())) { + case TEK_Scalar: { + llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, FoundE->getExprLoc()); + CGF.EmitStoreOfScalar(PrivVal, LastLVal); + break; + } + case TEK_Complex: { + CodeGenFunction::ComplexPairTy PrivVal = + CGF.EmitLoadOfComplex(LVal, FoundE->getExprLoc()); + CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); + break; + } + case TEK_Aggregate: + llvm_unreachable( + "Aggregates are not supported in lastprivate conditional."); + } + // } + CGF.EmitBranch(ExitBB); + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(ExitBB, /*IsFinished=*/true); + }; + + if (CGM.getLangOpts().OpenMPSimd) { + // Do not emit as a critical region as no parallel region could be emitted. + RegionCodeGenTy ThenRCG(CodeGen); + ThenRCG(CGF); + } else { + emitCriticalRegion(CGF, UniqueDeclName, CodeGen, FoundE->getExprLoc()); + } +} + +void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( + CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, + SourceLocation Loc) { + if (CGF.getLangOpts().OpenMP < 50) + return; + auto It = LastprivateConditionalStack.back().DeclToUniqeName.find(VD); + assert(It != LastprivateConditionalStack.back().DeclToUniqeName.end() && + "Unknown lastprivate conditional variable."); + StringRef UniqueName = It->getSecond(); + llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); + // The variable was not updated in the region - exit. + if (!GV) + return; + LValue LPLVal = CGF.MakeAddrLValue( + GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); + llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); + CGF.EmitStoreOfScalar(Res, PrivLVal); +} + llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { @@ -10688,7 +11725,7 @@ void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, } void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, - OpenMPProcBindClauseKind ProcBind, + ProcBindKind ProcBind, SourceLocation Loc) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -10786,12 +11823,13 @@ void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( llvm_unreachable("Not supported in SIMD-only mode"); } -void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - llvm::Function *OutlinedFn, - llvm::Value *OutlinedFnID, - const Expr *IfCond, - const Expr *Device) { +void CGOpenMPSIMDRuntime::emitTargetCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, + const Expr *Device, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -10807,10 +11845,6 @@ bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { return false; } -llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { - return nullptr; -} - void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, |
