diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2020-01-17 20:45:01 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2020-01-17 20:45:01 +0000 |
| commit | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (patch) | |
| tree | 4adf86a776049cbf7f69a1929c4babcbbef925eb /clang/lib/CodeGen/CGOpenMPRuntime.cpp | |
| parent | 7cc9cf2bf09f069cb2dd947ead05d0b54301fb71 (diff) | |
Notes
Diffstat (limited to 'clang/lib/CodeGen/CGOpenMPRuntime.cpp')
| -rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 1018 |
1 files changed, 739 insertions, 279 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 2a13a2a58156..97b17799a03e 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -10,17 +10,22 @@ // //===----------------------------------------------------------------------===// +#include "CGOpenMPRuntime.h" #include "CGCXXABI.h" #include "CGCleanup.h" -#include "CGOpenMPRuntime.h" #include "CGRecordLayout.h" #include "CodeGenFunction.h" -#include "clang/CodeGen/ConstantInitBuilder.h" +#include "clang/AST/Attr.h" #include "clang/AST/Decl.h" +#include "clang/AST/OpenMPClause.h" #include "clang/AST/StmtOpenMP.h" +#include "clang/AST/StmtVisitor.h" #include "clang/Basic/BitmaskEnum.h" +#include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Value.h" @@ -30,6 +35,7 @@ using namespace clang; using namespace CodeGen; +using namespace llvm::omp; namespace { /// Base class for handling code generation inside OpenMP regions. @@ -356,7 +362,7 @@ public: VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); PrivScope.addPrivate( - VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); + VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); } (void)PrivScope.Privatize(); } @@ -727,10 +733,6 @@ enum OpenMPRTLFunction { OMPRTL__tgt_target_teams_nowait, // Call to void __tgt_register_requires(int64_t flags); OMPRTL__tgt_register_requires, - // Call to void __tgt_register_lib(__tgt_bin_desc *desc); - OMPRTL__tgt_register_lib, - // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); - OMPRTL__tgt_unregister_lib, // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); OMPRTL__tgt_target_data_begin, @@ -841,7 +843,7 @@ static void emitInitWithReductionInitializer(CodeGenFunction &CGF, RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); break; case TEK_Aggregate: - InitRVal = RValue::getAggregate(LV.getAddress()); + InitRVal = RValue::getAggregate(LV.getAddress(CGF)); break; } OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); @@ -965,7 +967,7 @@ void ReductionCodeGen::emitAggregateInitialization( EmitDeclareReductionInit, EmitDeclareReductionInit ? ClausesData[N].ReductionOp : PrivateVD->getInit(), - DRD, SharedLVal.getAddress()); + DRD, SharedLVal.getAddress(CGF)); } ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, @@ -1006,13 +1008,13 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { } llvm::Value *Size; llvm::Value *SizeInChars; - auto *ElemType = - cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) - ->getElementType(); + auto *ElemType = cast<llvm::PointerType>( + SharedAddresses[N].first.getPointer(CGF)->getType()) + ->getElementType(); auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); if (AsArraySection) { - Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), - SharedAddresses[N].first.getPointer()); + Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF), + SharedAddresses[N].first.getPointer(CGF)); Size = CGF.Builder.CreateNUWAdd( Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); @@ -1062,7 +1064,7 @@ void ReductionCodeGen::emitInitialization( PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); QualType SharedType = SharedAddresses[N].first.getType(); SharedLVal = CGF.MakeAddrLValue( - CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), + CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), CGF.ConvertTypeForMem(SharedType)), SharedType, SharedAddresses[N].first.getBaseInfo(), CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); @@ -1070,7 +1072,7 @@ void ReductionCodeGen::emitInitialization( emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, - PrivateAddr, SharedLVal.getAddress(), + PrivateAddr, SharedLVal.getAddress(CGF), SharedLVal.getType()); } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && !CGF.isTrivialInitializer(PrivateVD->getInit())) { @@ -1107,15 +1109,15 @@ static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && !CGF.getContext().hasSameType(BaseTy, ElTy)) { if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { - BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); + BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); } else { - LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); + LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); } BaseTy = BaseTy->getPointeeType(); } return CGF.MakeAddrLValue( - CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), + CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), CGF.ConvertTypeForMem(ElTy)), BaseLV.getType(), BaseLV.getBaseInfo(), CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); @@ -1179,15 +1181,15 @@ Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), OriginalBaseLValue); llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( - BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); + BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); llvm::Value *PrivatePointer = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( PrivateAddr.getPointer(), - SharedAddresses[N].first.getAddress().getType()); + SharedAddresses[N].first.getAddress(CGF).getType()); llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); return castToBase(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), - OriginalBaseLValue.getAddress().getType(), + OriginalBaseLValue.getAddress(CGF).getType(), OriginalBaseLValue.getAlignment(), Ptr); } BaseDecls.emplace_back( @@ -1276,7 +1278,7 @@ bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD, llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName); if (Addr && !Addr->isDeclaration()) { const auto *D = cast<FunctionDecl>(OldGD.getDecl()); - const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(OldGD); + const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD); llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI); // Create a reference to the named value. This ensures that it is emitted @@ -1380,12 +1382,12 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) - .getAddress(); + .getAddress(CGF); }); Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) - .getAddress(); + .getAddress(CGF); }); (void)Scope.Privatize(); if (!IsCombiner && Out->hasInit() && @@ -1436,6 +1438,52 @@ CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { return UDRMap.lookup(D); } +namespace { +// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR +// Builder if one is present. +struct PushAndPopStackRAII { + PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, + bool HasCancel) + : OMPBuilder(OMPBuilder) { + if (!OMPBuilder) + return; + + // The following callback is the crucial part of clangs cleanup process. + // + // NOTE: + // Once the OpenMPIRBuilder is used to create parallel regions (and + // similar), the cancellation destination (Dest below) is determined via + // IP. That means if we have variables to finalize we split the block at IP, + // use the new block (=BB) as destination to build a JumpDest (via + // getJumpDestInCurrentScope(BB)) which then is fed to + // EmitBranchThroughCleanup. Furthermore, there will not be the need + // to push & pop an FinalizationInfo object. + // The FiniCB will still be needed but at the point where the + // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. + auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { + assert(IP.getBlock()->end() == IP.getPoint() && + "Clang CG should cause non-terminated block!"); + CGBuilderTy::InsertPointGuard IPG(CGF.Builder); + CGF.Builder.restoreIP(IP); + CodeGenFunction::JumpDest Dest = + CGF.getOMPCancelDestination(OMPD_parallel); + CGF.EmitBranchThroughCleanup(Dest); + }; + + // TODO: Remove this once we emit parallel regions through the + // OpenMPIRBuilder as it can do this setup internally. + llvm::OpenMPIRBuilder::FinalizationInfo FI( + {FiniCB, OMPD_parallel, HasCancel}); + OMPBuilder->pushFinalizationCB(std::move(FI)); + } + ~PushAndPopStackRAII() { + if (OMPBuilder) + OMPBuilder->popFinalizationCB(); + } + llvm::OpenMPIRBuilder *OMPBuilder; +}; +} // namespace + static llvm::Function *emitParallelOrTeamsOutlinedFunction( CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, @@ -1460,6 +1508,11 @@ static llvm::Function *emitParallelOrTeamsOutlinedFunction( else if (const auto *OPFD = dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) HasCancel = OPFD->hasCancel(); + + // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new + // parallel region to make cancellation barriers work properly. + llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder(); + PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel); CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, HasCancel, OutlinedHelperName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); @@ -1495,7 +1548,7 @@ llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( UpLoc, ThreadID, CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), TaskTVar->getType()->castAs<PointerType>()) - .getPointer()}; + .getPointer(CGF)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); }; CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, @@ -1706,9 +1759,10 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || !CGF.getLangOpts().CXXExceptions || CGF.Builder.GetInsertBlock() == TopBlock || - !isa<llvm::Instruction>(LVal.getPointer()) || - cast<llvm::Instruction>(LVal.getPointer())->getParent() == TopBlock || - cast<llvm::Instruction>(LVal.getPointer())->getParent() == + !isa<llvm::Instruction>(LVal.getPointer(CGF)) || + cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == + TopBlock || + cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == CGF.Builder.GetInsertBlock()) { ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); // If value loaded in entry block, cache it and use it everywhere in @@ -2422,26 +2476,6 @@ llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); break; } - case OMPRTL__tgt_register_lib: { - // Build void __tgt_register_lib(__tgt_bin_desc *desc); - QualType ParamTy = - CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); - llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); - break; - } - case OMPRTL__tgt_unregister_lib: { - // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); - QualType ParamTy = - CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); - llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); - break; - } case OMPRTL__tgt_target_data_begin: { // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); @@ -2988,10 +3022,15 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) { std::string Suffix = getName({"artificial", ""}); - std::string CacheSuffix = getName({"cache", ""}); llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); + if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && + CGM.getTarget().isTLSSupported()) { + cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); + return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); + } + std::string CacheSuffix = getName({"cache", ""}); llvm::Value *Args[] = { emitUpdateLocation(CGF, SourceLocation()), getThreadID(CGF, SourceLocation()), @@ -3005,12 +3044,12 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), VarLVType->getPointerTo(/*AddrSpace=*/0)), - CGM.getPointerAlign()); + CGM.getContext().getTypeAlignInChars(VarType)); } -void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, - const RegionCodeGenTy &ThenGen, - const RegionCodeGenTy &ElseGen) { +void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, + const RegionCodeGenTy &ThenGen, + const RegionCodeGenTy &ElseGen) { CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); // If the condition constant folds and can be elided, try to avoid emitting @@ -3100,7 +3139,7 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, EndArgs); }; if (IfCond) { - emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); + emitIfClause(CGF, IfCond, ThenGen, ElseGen); } else { RegionCodeGenTy ThenRCG(ThenGen); ThenRCG(CGF); @@ -3118,7 +3157,7 @@ Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, if (auto *OMPRegionInfo = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) if (OMPRegionInfo->getThreadIDVariable()) - return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); + return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); llvm::Value *ThreadID = getThreadID(CGF, Loc); QualType Int32Ty = @@ -3394,7 +3433,8 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), + CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), + CGF.VoidPtrTy), Elem); } // Build function that copies private values from single region to all other @@ -3476,6 +3516,16 @@ void CGOpenMPRuntime::getDefaultScheduleAndChunk( void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks, bool ForceSimpleCall) { + // Check if we should use the OMPBuilder + auto *OMPRegionInfo = + dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); + llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); + if (OMPBuilder) { + CGF.Builder.restoreIP(OMPBuilder->CreateBarrier( + CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); + return; + } + if (!CGF.HaveInsertPoint()) return; // Build call __kmpc_cancel_barrier(loc, thread_id); @@ -3485,8 +3535,7 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, // thread_id); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), getThreadID(CGF, Loc)}; - if (auto *OMPRegionInfo = - dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { + if (OMPRegionInfo) { if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { llvm::Value *Result = CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); @@ -3616,7 +3665,9 @@ static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || Schedule == OMP_sch_static_balanced_chunked || - Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static)) + Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || + Schedule == OMP_dist_sch_static_chunked || + Schedule == OMP_dist_sch_static)) Modifier = OMP_sch_modifier_nonmonotonic; } return Schedule | Modifier; @@ -3807,37 +3858,15 @@ void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, } void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, - OpenMPProcBindClauseKind ProcBind, + ProcBindKind ProcBind, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; - // Constants for proc bind value accepted by the runtime. - enum ProcBindTy { - ProcBindFalse = 0, - ProcBindTrue, - ProcBindMaster, - ProcBindClose, - ProcBindSpread, - ProcBindIntel, - ProcBindDefault - } RuntimeProcBind; - switch (ProcBind) { - case OMPC_PROC_BIND_master: - RuntimeProcBind = ProcBindMaster; - break; - case OMPC_PROC_BIND_close: - RuntimeProcBind = ProcBindClose; - break; - case OMPC_PROC_BIND_spread: - RuntimeProcBind = ProcBindSpread; - break; - case OMPC_PROC_BIND_unknown: - llvm_unreachable("Unsupported proc_bind value."); - } + assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), - llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; + llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); } @@ -4327,57 +4356,6 @@ QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { return TgtOffloadEntryQTy; } -QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { - // These are the types we need to build: - // struct __tgt_device_image{ - // void *ImageStart; // Pointer to the target code start. - // void *ImageEnd; // Pointer to the target code end. - // // We also add the host entries to the device image, as it may be useful - // // for the target runtime to have access to that information. - // __tgt_offload_entry *EntriesBegin; // Begin of the table with all - // // the entries. - // __tgt_offload_entry *EntriesEnd; // End of the table with all the - // // entries (non inclusive). - // }; - if (TgtDeviceImageQTy.isNull()) { - ASTContext &C = CGM.getContext(); - RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); - RD->startDefinition(); - addFieldToRecordDecl(C, RD, C.VoidPtrTy); - addFieldToRecordDecl(C, RD, C.VoidPtrTy); - addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); - addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); - RD->completeDefinition(); - TgtDeviceImageQTy = C.getRecordType(RD); - } - return TgtDeviceImageQTy; -} - -QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { - // struct __tgt_bin_desc{ - // int32_t NumDevices; // Number of devices supported. - // __tgt_device_image *DeviceImages; // Arrays of device images - // // (one per device). - // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the - // // entries. - // __tgt_offload_entry *EntriesEnd; // End of the table with all the - // // entries (non inclusive). - // }; - if (TgtBinaryDescriptorQTy.isNull()) { - ASTContext &C = CGM.getContext(); - RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); - RD->startDefinition(); - addFieldToRecordDecl( - C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); - addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); - addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); - addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); - RD->completeDefinition(); - TgtBinaryDescriptorQTy = C.getRecordType(RD); - } - return TgtBinaryDescriptorQTy; -} - namespace { struct PrivateHelpersTy { PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, @@ -4537,7 +4515,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); - llvm::Value *PartidParam = PartIdLVal.getPointer(); + llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); @@ -4550,7 +4528,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - PrivatesLVal.getPointer(), CGF.VoidPtrTy); + PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); } else { PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } @@ -4559,7 +4537,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, TaskPrivatesMap, CGF.Builder .CreatePointerBitCastOrAddrSpaceCast( - TDBase.getAddress(), CGF.VoidPtrTy) + TDBase.getAddress(CGF), CGF.VoidPtrTy) .getPointer()}; SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), std::end(CommonArgs)); @@ -4637,7 +4615,7 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, if (QualType::DestructionKind DtorKind = Field->getType().isDestructedType()) { LValue FieldLValue = CGF.EmitLValueForField(Base, Field); - CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); + CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); } } CGF.FinishFunction(); @@ -4735,8 +4713,8 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, LValue RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( - RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); - CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); + RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); + CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); ++Counter; } CGF.FinishFunction(); @@ -4801,7 +4779,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF, } else { SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); SharedRefLValue = CGF.MakeAddrLValue( - Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), + Address(SharedRefLValue.getPointer(CGF), + C.getDeclAlign(OriginalVD)), SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), SharedRefLValue.getTBAAInfo()); } @@ -4814,7 +4793,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF, // Initialize firstprivate array using element-by-element // initialization. CGF.EmitOMPAggregateAssign( - PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, + PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), + Type, [&CGF, Elem, Init, &CapturesInfo](Address DestElement, Address SrcElement) { // Clean up any temporaries needed by the initialization. @@ -4832,8 +4812,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF, } } else { CodeGenFunction::OMPPrivateScope InitScope(CGF); - InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { - return SharedRefLValue.getAddress(); + InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { + return SharedRefLValue.getAddress(CGF); }); (void)InitScope.Privatize(); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); @@ -5233,10 +5213,10 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { LValue UpAddrLVal = CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); - llvm::Value *UpAddr = - CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); + llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( + UpAddrLVal.getPointer(CGF), /*Idx0=*/1); llvm::Value *LowIntPtr = - CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); + CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy); llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); } else { @@ -5249,7 +5229,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, LValue BaseAddrLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); CGF.EmitStoreOfScalar( - CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), + CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy), BaseAddrLVal); // deps[i].len = sizeof(<Dependences[i].second>); LValue LenLVal = CGF.EmitLValueForField( @@ -5366,7 +5346,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, }; if (IfCond) { - emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); + emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); } else { RegionCodeGenTy ThenRCG(ThenCodeGen); ThenRCG(CGF); @@ -5403,21 +5383,24 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); const auto *LBVar = cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); - CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), + CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), + LBLVal.getQuals(), /*IsInitializer=*/true); LValue UBLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); const auto *UBVar = cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); - CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), + CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), + UBLVal.getQuals(), /*IsInitializer=*/true); LValue StLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); const auto *StVar = cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); - CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), + CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), + StLVal.getQuals(), /*IsInitializer=*/true); // Store reductions address. LValue RedLVal = CGF.EmitLValueForField( @@ -5426,7 +5409,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, if (Data.Reductions) { CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); } else { - CGF.EmitNullInitialization(RedLVal.getAddress(), + CGF.EmitNullInitialization(RedLVal.getAddress(CGF), CGF.getContext().VoidPtrTy); } enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; @@ -5435,11 +5418,11 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, ThreadID, Result.NewTask, IfVal, - LBLVal.getPointer(), - UBLVal.getPointer(), + LBLVal.getPointer(CGF), + UBLVal.getPointer(CGF), CGF.EmitLoadOfScalar(StLVal, Loc), llvm::ConstantInt::getSigned( - CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler + CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler llvm::ConstantInt::getSigned( CGF.IntTy, Data.Schedule.getPointer() ? Data.Schedule.getInt() ? NumTasks : Grainsize @@ -5751,7 +5734,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), + CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), Elem); if ((*IPriv)->getType()->isVariablyModifiedType()) { // Store array size. @@ -6179,7 +6162,7 @@ static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, // Emit the finalizer body: // <destroy>(<type>* %0) RCG.emitCleanups(CGF, N, PrivateAddr); - CGF.FinishFunction(); + CGF.FinishFunction(Loc); return Fn; } @@ -6231,7 +6214,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); RCG.emitSharedLValue(CGF, Cnt); llvm::Value *CastedShared = - CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); + CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); CGF.EmitStoreOfScalar(CastedShared, SharedLVal); RCG.emitAggregateType(CGF, Cnt); llvm::Value *SizeValInChars; @@ -6274,7 +6257,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), FlagsLVal); } else - CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); + CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), + FlagsLVal.getType()); } // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void // *data); @@ -6310,7 +6294,7 @@ void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), + RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy), SharedAddr, /*IsVolatile=*/false); } } @@ -6321,12 +6305,12 @@ Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, LValue SharedLVal) { // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void // *d); - llvm::Value *Args[] = { - CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, - /*isSigned=*/true), - ReductionsPtr, - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), - CGM.VoidPtrTy)}; + llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), + CGM.IntTy, + /*isSigned=*/true), + ReductionsPtr, + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; return Address( CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), @@ -6449,8 +6433,8 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitBlock(ContBB, /*IsFinished=*/true); }; if (IfCond) { - emitOMPIfClause(CGF, IfCond, ThenGen, - [](CodeGenFunction &, PrePostActionTy &) {}); + emitIfClause(CGF, IfCond, ThenGen, + [](CodeGenFunction &, PrePostActionTy &) {}); } else { RegionCodeGenTy ThenRCG(ThenGen); ThenRCG(CGF); @@ -6663,6 +6647,7 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: case OMPD_parallel_for_simd: @@ -6708,6 +6693,7 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, case OMPD_master_taskloop: case OMPD_master_taskloop_simd: case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: break; @@ -6972,6 +6958,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: case OMPD_parallel_for_simd: @@ -7017,6 +7004,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, case OMPD_master_taskloop: case OMPD_master_taskloop_simd: case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: break; @@ -7509,11 +7497,11 @@ private: } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || (OASE && isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { - BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); + BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); } else { // The base is the reference to the variable. // BP = &Var. - BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); + BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); if (const auto *VD = dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = @@ -7607,8 +7595,8 @@ private: isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && "Unexpected expression"); - Address LB = - CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); + Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) + .getAddress(CGF); // If this component is a pointer inside the base struct then we don't // need to create any entry for it - it will be combined with the object @@ -7655,7 +7643,7 @@ private: if (MC.getAssociatedDeclaration()) { ComponentLB = CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) - .getAddress(); + .getAddress(CGF); Size = CGF.Builder.CreatePtrDiff( CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), CGF.EmitCastToVoidPtr(LB.getPointer())); @@ -7938,17 +7926,17 @@ public: "Expect a executable directive"); const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) - for (const auto &L : C->component_lists()) { + for (const auto L : C->component_lists()) { InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), /*ReturnDevicePointer=*/false, C->isImplicit()); } for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) - for (const auto &L : C->component_lists()) { + for (const auto L : C->component_lists()) { InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit()); } for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) - for (const auto &L : C->component_lists()) { + for (const auto L : C->component_lists()) { InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit()); } @@ -7964,7 +7952,7 @@ public: for (const auto *C : CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { - for (const auto &L : C->component_lists()) { + for (const auto L : C->component_lists()) { assert(!L.second.empty() && "Not expecting empty list of components!"); const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); VD = cast<ValueDecl>(VD->getCanonicalDecl()); @@ -8059,7 +8047,7 @@ public: auto CI = DeferredInfo.find(M.first); if (CI != DeferredInfo.end()) { for (const DeferredDevicePtrEntryTy &L : CI->second) { - llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); + llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); CurBasePointers.emplace_back(BasePtr, L.VD); @@ -8117,7 +8105,7 @@ public: for (const auto *C : CurMapperDir->clauselists()) { const auto *MC = cast<OMPMapClause>(C); - for (const auto &L : MC->component_lists()) { + for (const auto L : MC->component_lists()) { InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), /*ReturnDevicePointer=*/false, MC->isImplicit()); } @@ -8181,9 +8169,10 @@ public: LValue ThisLVal = CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); - LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); - BasePointers.push_back(ThisLVal.getPointer()); - Pointers.push_back(ThisLValVal.getPointer()); + LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), + VDLVal.getPointer(CGF)); + BasePointers.push_back(ThisLVal.getPointer(CGF)); + Pointers.push_back(ThisLValVal.getPointer(CGF)); Sizes.push_back( CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, /*isSigned=*/true)); @@ -8201,17 +8190,19 @@ public: LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); if (LC.getCaptureKind() == LCK_ByRef) { LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); - LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); - BasePointers.push_back(VarLVal.getPointer()); - Pointers.push_back(VarLValVal.getPointer()); + LambdaPointers.try_emplace(VarLVal.getPointer(CGF), + VDLVal.getPointer(CGF)); + BasePointers.push_back(VarLVal.getPointer(CGF)); + Pointers.push_back(VarLValVal.getPointer(CGF)); Sizes.push_back(CGF.Builder.CreateIntCast( CGF.getTypeSize( VD->getType().getCanonicalType().getNonReferenceType()), CGF.Int64Ty, /*isSigned=*/true)); } else { RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); - LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); - BasePointers.push_back(VarLVal.getPointer()); + LambdaPointers.try_emplace(VarLVal.getPointer(CGF), + VDLVal.getPointer(CGF)); + BasePointers.push_back(VarLVal.getPointer(CGF)); Pointers.push_back(VarRVal.getScalarVal()); Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); } @@ -8286,7 +8277,7 @@ public: "Expect a executable directive"); const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { - for (const auto &L : C->decl_component_lists(VD)) { + for (const auto L : C->decl_component_lists(VD)) { assert(L.first == VD && "We got information for the wrong declaration??"); assert(!L.second.empty() && @@ -8439,7 +8430,7 @@ public: // Map other list items in the map clause which are not captured variables // but "declare target link" global variables. for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { - for (const auto &L : C->component_lists()) { + for (const auto L : C->component_lists()) { if (!L.first) continue; const auto *VD = dyn_cast<VarDecl>(L.first); @@ -8517,7 +8508,7 @@ public: CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); // Copy the value of the original variable to the new global copy. CGF.Builder.CreateMemCpy( - CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(), + CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), CurSizes.back(), /*IsVolatile=*/false); // Use new global variable as the base pointers. @@ -8746,6 +8737,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: case OMPD_parallel_for_simd: @@ -8791,6 +8783,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { case OMPD_master_taskloop: case OMPD_master_taskloop_simd: case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unexpected directive."); @@ -8926,7 +8919,7 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { return MapperCGF .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) - .getAddress(); + .getAddress(MapperCGF); }); (void)Scope.Privatize(); @@ -9423,7 +9416,7 @@ void CGOpenMPRuntime::emitTargetCall( // specify target triples. if (OutlinedFnID) { if (IfCond) { - emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); + emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); } else { RegionCodeGenTy ThenRCG(TargetThenGen); ThenRCG(CGF); @@ -9506,6 +9499,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: case OMPD_parallel_for_simd: @@ -9551,6 +9545,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_master_taskloop: case OMPD_master_taskloop_simd: case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unknown target directive for OpenMP device codegen."); @@ -9591,9 +9586,9 @@ bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { } const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); - StringRef Name = CGM.getMangledName(GD); // Try to detect target regions in the function. if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { + StringRef Name = CGM.getMangledName(GD); scanForTargetRegionsFunctions(FD->getBody(), Name); Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = OMPDeclareTargetDeclAttr::getDeviceType(FD); @@ -9604,7 +9599,7 @@ bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { // Do not to emit function if it is not marked as declare target. return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && - AlreadyEmittedTargetFunctions.count(Name) == 0; + AlreadyEmittedTargetDecls.count(VD) == 0; } bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { @@ -9835,20 +9830,20 @@ bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) return true; - StringRef Name = CGM.getMangledName(GD); const auto *D = cast<FunctionDecl>(GD.getDecl()); // Do not to emit function if it is marked as declare target as it was already // emitted. if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { - if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { - if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) + if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { + if (auto *F = dyn_cast_or_null<llvm::Function>( + CGM.GetGlobalValue(CGM.getMangledName(GD)))) return !F->isDeclaration(); return false; } return true; } - return !AlreadyEmittedTargetFunctions.insert(Name).second; + return !AlreadyEmittedTargetDecls.insert(D).second; } llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { @@ -10050,7 +10045,7 @@ void CGOpenMPRuntime::emitTargetDataCalls( auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; if (IfCond) { - emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); + emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); } else { RegionCodeGenTy RCG(BeginThenGen); RCG(CGF); @@ -10064,7 +10059,7 @@ void CGOpenMPRuntime::emitTargetDataCalls( } if (IfCond) { - emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); + emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); } else { RegionCodeGenTy RCG(EndThenGen); RCG(CGF); @@ -10127,6 +10122,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: case OMPD_parallel_for_simd: @@ -10169,6 +10165,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_master_taskloop: case OMPD_master_taskloop_simd: case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: case OMPD_target: case OMPD_target_simd: case OMPD_target_teams_distribute: @@ -10220,8 +10217,8 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( }; if (IfCond) { - emitOMPIfClause(CGF, IfCond, TargetThenGen, - [](CodeGenFunction &CGF, PrePostActionTy &) {}); + emitIfClause(CGF, IfCond, TargetThenGen, + [](CodeGenFunction &CGF, PrePostActionTy &) {}); } else { RegionCodeGenTy ThenRCG(TargetThenGen); ThenRCG(CGF); @@ -10759,8 +10756,7 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, ExprLoc = VLENExpr->getExprLoc(); } OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); - if (CGM.getTriple().getArch() == llvm::Triple::x86 || - CGM.getTriple().getArch() == llvm::Triple::x86_64) { + if (CGM.getTriple().isX86()) { emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { unsigned VLEN = VLENVal.getExtValue(); @@ -11018,12 +11014,18 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, return Address(Addr, Align); } +namespace { +using OMPContextSelectorData = + OpenMPCtxSelectorData<ArrayRef<StringRef>, llvm::APSInt>; +using CompleteOMPContextSelectorData = SmallVector<OMPContextSelectorData, 4>; +} // anonymous namespace + /// Checks current context and returns true if it matches the context selector. -template <OMPDeclareVariantAttr::CtxSelectorSetType CtxSet, - OMPDeclareVariantAttr::CtxSelectorType Ctx> -static bool checkContext(const OMPDeclareVariantAttr *A) { - assert(CtxSet != OMPDeclareVariantAttr::CtxSetUnknown && - Ctx != OMPDeclareVariantAttr::CtxUnknown && +template <OpenMPContextSelectorSetKind CtxSet, OpenMPContextSelectorKind Ctx, + typename... Arguments> +static bool checkContext(const OMPContextSelectorData &Data, + Arguments... Params) { + assert(Data.CtxSet != OMP_CTX_SET_unknown && Data.Ctx != OMP_CTX_unknown && "Unknown context selector or context selector set."); return false; } @@ -11031,89 +11033,233 @@ static bool checkContext(const OMPDeclareVariantAttr *A) { /// Checks for implementation={vendor(<vendor>)} context selector. /// \returns true iff <vendor>="llvm", false otherwise. template <> -bool checkContext<OMPDeclareVariantAttr::CtxSetImplementation, - OMPDeclareVariantAttr::CtxVendor>( - const OMPDeclareVariantAttr *A) { - return llvm::all_of(A->implVendors(), +bool checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>( + const OMPContextSelectorData &Data) { + return llvm::all_of(Data.Names, [](StringRef S) { return !S.compare_lower("llvm"); }); } -static bool greaterCtxScore(ASTContext &Ctx, const Expr *LHS, const Expr *RHS) { - // If both scores are unknown, choose the very first one. - if (!LHS && !RHS) - return true; - // If only one is known, return this one. - if (LHS && !RHS) - return true; - if (!LHS && RHS) - return false; - llvm::APSInt LHSVal = LHS->EvaluateKnownConstInt(Ctx); - llvm::APSInt RHSVal = RHS->EvaluateKnownConstInt(Ctx); - return llvm::APSInt::compareValues(LHSVal, RHSVal) >= 0; +/// Checks for device={kind(<kind>)} context selector. +/// \returns true if <kind>="host" and compilation is for host. +/// true if <kind>="nohost" and compilation is for device. +/// true if <kind>="cpu" and compilation is for Arm, X86 or PPC CPU. +/// true if <kind>="gpu" and compilation is for NVPTX or AMDGCN. +/// false otherwise. +template <> +bool checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>( + const OMPContextSelectorData &Data, CodeGenModule &CGM) { + for (StringRef Name : Data.Names) { + if (!Name.compare_lower("host")) { + if (CGM.getLangOpts().OpenMPIsDevice) + return false; + continue; + } + if (!Name.compare_lower("nohost")) { + if (!CGM.getLangOpts().OpenMPIsDevice) + return false; + continue; + } + switch (CGM.getTriple().getArch()) { + case llvm::Triple::arm: + case llvm::Triple::armeb: + case llvm::Triple::aarch64: + case llvm::Triple::aarch64_be: + case llvm::Triple::aarch64_32: + case llvm::Triple::ppc: + case llvm::Triple::ppc64: + case llvm::Triple::ppc64le: + case llvm::Triple::x86: + case llvm::Triple::x86_64: + if (Name.compare_lower("cpu")) + return false; + break; + case llvm::Triple::amdgcn: + case llvm::Triple::nvptx: + case llvm::Triple::nvptx64: + if (Name.compare_lower("gpu")) + return false; + break; + case llvm::Triple::UnknownArch: + case llvm::Triple::arc: + case llvm::Triple::avr: + case llvm::Triple::bpfel: + case llvm::Triple::bpfeb: + case llvm::Triple::hexagon: + case llvm::Triple::mips: + case llvm::Triple::mipsel: + case llvm::Triple::mips64: + case llvm::Triple::mips64el: + case llvm::Triple::msp430: + case llvm::Triple::r600: + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: + case llvm::Triple::sparc: + case llvm::Triple::sparcv9: + case llvm::Triple::sparcel: + case llvm::Triple::systemz: + case llvm::Triple::tce: + case llvm::Triple::tcele: + case llvm::Triple::thumb: + case llvm::Triple::thumbeb: + case llvm::Triple::xcore: + case llvm::Triple::le32: + case llvm::Triple::le64: + case llvm::Triple::amdil: + case llvm::Triple::amdil64: + case llvm::Triple::hsail: + case llvm::Triple::hsail64: + case llvm::Triple::spir: + case llvm::Triple::spir64: + case llvm::Triple::kalimba: + case llvm::Triple::shave: + case llvm::Triple::lanai: + case llvm::Triple::wasm32: + case llvm::Triple::wasm64: + case llvm::Triple::renderscript32: + case llvm::Triple::renderscript64: + case llvm::Triple::ve: + return false; + } + } + return true; } -namespace { -/// Comparator for the priority queue for context selector. -class OMPDeclareVariantAttrComparer - : public std::greater<const OMPDeclareVariantAttr *> { -private: - ASTContext &Ctx; +static bool matchesContext(CodeGenModule &CGM, + const CompleteOMPContextSelectorData &ContextData) { + for (const OMPContextSelectorData &Data : ContextData) { + switch (Data.Ctx) { + case OMP_CTX_vendor: + assert(Data.CtxSet == OMP_CTX_SET_implementation && + "Expected implementation context selector set."); + if (!checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(Data)) + return false; + break; + case OMP_CTX_kind: + assert(Data.CtxSet == OMP_CTX_SET_device && + "Expected device context selector set."); + if (!checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(Data, + CGM)) + return false; + break; + case OMP_CTX_unknown: + llvm_unreachable("Unknown context selector kind."); + } + } + return true; +} -public: - OMPDeclareVariantAttrComparer(ASTContext &Ctx) : Ctx(Ctx) {} - bool operator()(const OMPDeclareVariantAttr *LHS, - const OMPDeclareVariantAttr *RHS) const { - const Expr *LHSExpr = nullptr; - const Expr *RHSExpr = nullptr; - if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) - LHSExpr = LHS->getScore(); - if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) - RHSExpr = RHS->getScore(); - return greaterCtxScore(Ctx, LHSExpr, RHSExpr); +static CompleteOMPContextSelectorData +translateAttrToContextSelectorData(ASTContext &C, + const OMPDeclareVariantAttr *A) { + CompleteOMPContextSelectorData Data; + for (unsigned I = 0, E = A->scores_size(); I < E; ++I) { + Data.emplace_back(); + auto CtxSet = static_cast<OpenMPContextSelectorSetKind>( + *std::next(A->ctxSelectorSets_begin(), I)); + auto Ctx = static_cast<OpenMPContextSelectorKind>( + *std::next(A->ctxSelectors_begin(), I)); + Data.back().CtxSet = CtxSet; + Data.back().Ctx = Ctx; + const Expr *Score = *std::next(A->scores_begin(), I); + Data.back().Score = Score->EvaluateKnownConstInt(C); + switch (Ctx) { + case OMP_CTX_vendor: + assert(CtxSet == OMP_CTX_SET_implementation && + "Expected implementation context selector set."); + Data.back().Names = + llvm::makeArrayRef(A->implVendors_begin(), A->implVendors_end()); + break; + case OMP_CTX_kind: + assert(CtxSet == OMP_CTX_SET_device && + "Expected device context selector set."); + Data.back().Names = + llvm::makeArrayRef(A->deviceKinds_begin(), A->deviceKinds_end()); + break; + case OMP_CTX_unknown: + llvm_unreachable("Unknown context selector kind."); + } } -}; -} // anonymous namespace + return Data; +} + +static bool isStrictSubset(const CompleteOMPContextSelectorData &LHS, + const CompleteOMPContextSelectorData &RHS) { + llvm::SmallDenseMap<std::pair<int, int>, llvm::StringSet<>, 4> RHSData; + for (const OMPContextSelectorData &D : RHS) { + auto &Pair = RHSData.FindAndConstruct(std::make_pair(D.CtxSet, D.Ctx)); + Pair.getSecond().insert(D.Names.begin(), D.Names.end()); + } + bool AllSetsAreEqual = true; + for (const OMPContextSelectorData &D : LHS) { + auto It = RHSData.find(std::make_pair(D.CtxSet, D.Ctx)); + if (It == RHSData.end()) + return false; + if (D.Names.size() > It->getSecond().size()) + return false; + if (llvm::set_union(It->getSecond(), D.Names)) + return false; + AllSetsAreEqual = + AllSetsAreEqual && (D.Names.size() == It->getSecond().size()); + } + + return LHS.size() != RHS.size() || !AllSetsAreEqual; +} + +static bool greaterCtxScore(const CompleteOMPContextSelectorData &LHS, + const CompleteOMPContextSelectorData &RHS) { + // Score is calculated as sum of all scores + 1. + llvm::APSInt LHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false); + bool RHSIsSubsetOfLHS = isStrictSubset(RHS, LHS); + if (RHSIsSubsetOfLHS) { + LHSScore = llvm::APSInt::get(0); + } else { + for (const OMPContextSelectorData &Data : LHS) { + if (Data.Score.getBitWidth() > LHSScore.getBitWidth()) { + LHSScore = LHSScore.extend(Data.Score.getBitWidth()) + Data.Score; + } else if (Data.Score.getBitWidth() < LHSScore.getBitWidth()) { + LHSScore += Data.Score.extend(LHSScore.getBitWidth()); + } else { + LHSScore += Data.Score; + } + } + } + llvm::APSInt RHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false); + if (!RHSIsSubsetOfLHS && isStrictSubset(LHS, RHS)) { + RHSScore = llvm::APSInt::get(0); + } else { + for (const OMPContextSelectorData &Data : RHS) { + if (Data.Score.getBitWidth() > RHSScore.getBitWidth()) { + RHSScore = RHSScore.extend(Data.Score.getBitWidth()) + Data.Score; + } else if (Data.Score.getBitWidth() < RHSScore.getBitWidth()) { + RHSScore += Data.Score.extend(RHSScore.getBitWidth()); + } else { + RHSScore += Data.Score; + } + } + } + return llvm::APSInt::compareValues(LHSScore, RHSScore) >= 0; +} /// Finds the variant function that matches current context with its context /// selector. -static const FunctionDecl *getDeclareVariantFunction(ASTContext &Ctx, +static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM, const FunctionDecl *FD) { if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>()) return FD; // Iterate through all DeclareVariant attributes and check context selectors. - auto &&Comparer = [&Ctx](const OMPDeclareVariantAttr *LHS, - const OMPDeclareVariantAttr *RHS) { - const Expr *LHSExpr = nullptr; - const Expr *RHSExpr = nullptr; - if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) - LHSExpr = LHS->getScore(); - if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) - RHSExpr = RHS->getScore(); - return greaterCtxScore(Ctx, LHSExpr, RHSExpr); - }; const OMPDeclareVariantAttr *TopMostAttr = nullptr; + CompleteOMPContextSelectorData TopMostData; for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) { - const OMPDeclareVariantAttr *SelectedAttr = nullptr; - switch (A->getCtxSelectorSet()) { - case OMPDeclareVariantAttr::CtxSetImplementation: - switch (A->getCtxSelector()) { - case OMPDeclareVariantAttr::CtxVendor: - if (checkContext<OMPDeclareVariantAttr::CtxSetImplementation, - OMPDeclareVariantAttr::CtxVendor>(A)) - SelectedAttr = A; - break; - case OMPDeclareVariantAttr::CtxUnknown: - llvm_unreachable( - "Unknown context selector in implementation selector set."); - } - break; - case OMPDeclareVariantAttr::CtxSetUnknown: - llvm_unreachable("Unknown context selector set."); - } + CompleteOMPContextSelectorData Data = + translateAttrToContextSelectorData(CGM.getContext(), A); + if (!matchesContext(CGM, Data)) + continue; // If the attribute matches the context, find the attribute with the highest // score. - if (SelectedAttr && (!TopMostAttr || !Comparer(TopMostAttr, SelectedAttr))) - TopMostAttr = SelectedAttr; + if (!TopMostAttr || !greaterCtxScore(TopMostData, Data)) { + TopMostAttr = A; + TopMostData.swap(Data); + } } if (!TopMostAttr) return FD; @@ -11129,7 +11275,7 @@ bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) { llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName); if (Orig && !Orig->isDeclaration()) return false; - const FunctionDecl *NewFD = getDeclareVariantFunction(CGM.getContext(), D); + const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D); // Emit original function if it does not have declare variant attribute or the // context does not match. if (NewFD == D) @@ -11143,6 +11289,320 @@ bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) { return true; } +CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( + CodeGenModule &CGM, const OMPLoopDirective &S) + : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { + assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); + if (!NeedToPush) + return; + NontemporalDeclsSet &DS = + CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); + for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { + for (const Stmt *Ref : C->private_refs()) { + const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); + const ValueDecl *VD; + if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { + VD = DRE->getDecl(); + } else { + const auto *ME = cast<MemberExpr>(SimpleRefExpr); + assert((ME->isImplicitCXXThis() || + isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && + "Expected member of current class."); + VD = ME->getMemberDecl(); + } + DS.insert(VD); + } + } +} + +CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { + if (!NeedToPush) + return; + CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); +} + +bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { + assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); + + return llvm::any_of( + CGM.getOpenMPRuntime().NontemporalDeclsStack, + [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); +} + +CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( + CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) + : CGM(CGF.CGM), + NeedToPush(llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), + [](const OMPLastprivateClause *C) { + return C->getKind() == + OMPC_LASTPRIVATE_conditional; + })) { + assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); + if (!NeedToPush) + return; + LastprivateConditionalData &Data = + CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); + for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { + if (C->getKind() != OMPC_LASTPRIVATE_conditional) + continue; + + for (const Expr *Ref : C->varlists()) { + Data.DeclToUniqeName.try_emplace( + cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), + generateUniqueName(CGM, "pl_cond", Ref)); + } + } + Data.IVLVal = IVLVal; + // In simd only mode or for simd directives no need to generate threadprivate + // references for the loop iteration counter, we can use the original one + // since outlining cannot happen in simd regions. + if (CGF.getLangOpts().OpenMPSimd || + isOpenMPSimdDirective(S.getDirectiveKind())) { + Data.UseOriginalIV = true; + return; + } + llvm::SmallString<16> Buffer; + llvm::raw_svector_ostream OS(Buffer); + PresumedLoc PLoc = + CGM.getContext().getSourceManager().getPresumedLoc(S.getBeginLoc()); + assert(PLoc.isValid() && "Source location is expected to be always valid."); + + llvm::sys::fs::UniqueID ID; + if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) + CGM.getDiags().Report(diag::err_cannot_open_file) + << PLoc.getFilename() << EC.message(); + OS << "$pl_cond_" << ID.getDevice() << "_" << ID.getFile() << "_" + << PLoc.getLine() << "_" << PLoc.getColumn() << "$iv"; + Data.IVName = OS.str(); +} + +CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { + if (!NeedToPush) + return; + CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); +} + +void CGOpenMPRuntime::initLastprivateConditionalCounter( + CodeGenFunction &CGF, const OMPExecutableDirective &S) { + if (CGM.getLangOpts().OpenMPSimd || + !llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), + [](const OMPLastprivateClause *C) { + return C->getKind() == OMPC_LASTPRIVATE_conditional; + })) + return; + const CGOpenMPRuntime::LastprivateConditionalData &Data = + LastprivateConditionalStack.back(); + if (Data.UseOriginalIV) + return; + // Global loop counter. Required to handle inner parallel-for regions. + // global_iv = iv; + Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, Data.IVLVal.getType(), Data.IVName); + LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, Data.IVLVal.getType()); + llvm::Value *IVVal = CGF.EmitLoadOfScalar(Data.IVLVal, S.getBeginLoc()); + CGF.EmitStoreOfScalar(IVVal, GlobIVLVal); +} + +namespace { +/// Checks if the lastprivate conditional variable is referenced in LHS. +class LastprivateConditionalRefChecker final + : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { + CodeGenFunction &CGF; + ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; + const Expr *FoundE = nullptr; + const Decl *FoundD = nullptr; + StringRef UniqueDeclName; + LValue IVLVal; + StringRef IVName; + SourceLocation Loc; + bool UseOriginalIV = false; + +public: + bool VisitDeclRefExpr(const DeclRefExpr *E) { + for (const CGOpenMPRuntime::LastprivateConditionalData &D : + llvm::reverse(LPM)) { + auto It = D.DeclToUniqeName.find(E->getDecl()); + if (It == D.DeclToUniqeName.end()) + continue; + FoundE = E; + FoundD = E->getDecl()->getCanonicalDecl(); + UniqueDeclName = It->getSecond(); + IVLVal = D.IVLVal; + IVName = D.IVName; + UseOriginalIV = D.UseOriginalIV; + break; + } + return FoundE == E; + } + bool VisitMemberExpr(const MemberExpr *E) { + if (!CGF.IsWrappedCXXThis(E->getBase())) + return false; + for (const CGOpenMPRuntime::LastprivateConditionalData &D : + llvm::reverse(LPM)) { + auto It = D.DeclToUniqeName.find(E->getMemberDecl()); + if (It == D.DeclToUniqeName.end()) + continue; + FoundE = E; + FoundD = E->getMemberDecl()->getCanonicalDecl(); + UniqueDeclName = It->getSecond(); + IVLVal = D.IVLVal; + IVName = D.IVName; + UseOriginalIV = D.UseOriginalIV; + break; + } + return FoundE == E; + } + bool VisitStmt(const Stmt *S) { + for (const Stmt *Child : S->children()) { + if (!Child) + continue; + if (const auto *E = dyn_cast<Expr>(Child)) + if (!E->isGLValue()) + continue; + if (Visit(Child)) + return true; + } + return false; + } + explicit LastprivateConditionalRefChecker( + CodeGenFunction &CGF, + ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) + : CGF(CGF), LPM(LPM) {} + std::tuple<const Expr *, const Decl *, StringRef, LValue, StringRef, bool> + getFoundData() const { + return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, + UseOriginalIV); + } +}; +} // namespace + +void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, + const Expr *LHS) { + if (CGF.getLangOpts().OpenMP < 50) + return; + LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack); + if (!Checker.Visit(LHS)) + return; + const Expr *FoundE; + const Decl *FoundD; + StringRef UniqueDeclName; + LValue IVLVal; + StringRef IVName; + bool UseOriginalIV; + std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) = + Checker.getFoundData(); + + // Last updated loop counter for the lastprivate conditional var. + // int<xx> last_iv = 0; + llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); + llvm::Constant *LastIV = + getOrCreateInternalVariable(LLIVTy, UniqueDeclName + "$iv"); + cast<llvm::GlobalVariable>(LastIV)->setAlignment( + IVLVal.getAlignment().getAsAlign()); + LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); + + // Private address of the lastprivate conditional in the current context. + // priv_a + LValue LVal = CGF.EmitLValue(FoundE); + // Last value of the lastprivate conditional. + // decltype(priv_a) last_a; + llvm::Constant *Last = getOrCreateInternalVariable( + LVal.getAddress(CGF).getElementType(), UniqueDeclName); + cast<llvm::GlobalVariable>(Last)->setAlignment( + LVal.getAlignment().getAsAlign()); + LValue LastLVal = + CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); + + // Global loop counter. Required to handle inner parallel-for regions. + // global_iv + if (!UseOriginalIV) { + Address IVAddr = + getAddrOfArtificialThreadPrivate(CGF, IVLVal.getType(), IVName); + IVLVal = CGF.MakeAddrLValue(IVAddr, IVLVal.getType()); + } + llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc()); + + // #pragma omp critical(a) + // if (last_iv <= iv) { + // last_iv = iv; + // last_a = priv_a; + // } + auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, + FoundE](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + llvm::Value *LastIVVal = + CGF.EmitLoadOfScalar(LastIVLVal, FoundE->getExprLoc()); + // (last_iv <= global_iv) ? Check if the variable is updated and store new + // value in global var. + llvm::Value *CmpRes; + if (IVLVal.getType()->isSignedIntegerType()) { + CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); + } else { + assert(IVLVal.getType()->isUnsignedIntegerType() && + "Loop iteration variable must be integer."); + CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); + } + llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); + llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); + CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); + // { + CGF.EmitBlock(ThenBB); + + // last_iv = global_iv; + CGF.EmitStoreOfScalar(IVVal, LastIVLVal); + + // last_a = priv_a; + switch (CGF.getEvaluationKind(LVal.getType())) { + case TEK_Scalar: { + llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, FoundE->getExprLoc()); + CGF.EmitStoreOfScalar(PrivVal, LastLVal); + break; + } + case TEK_Complex: { + CodeGenFunction::ComplexPairTy PrivVal = + CGF.EmitLoadOfComplex(LVal, FoundE->getExprLoc()); + CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); + break; + } + case TEK_Aggregate: + llvm_unreachable( + "Aggregates are not supported in lastprivate conditional."); + } + // } + CGF.EmitBranch(ExitBB); + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(ExitBB, /*IsFinished=*/true); + }; + + if (CGM.getLangOpts().OpenMPSimd) { + // Do not emit as a critical region as no parallel region could be emitted. + RegionCodeGenTy ThenRCG(CodeGen); + ThenRCG(CGF); + } else { + emitCriticalRegion(CGF, UniqueDeclName, CodeGen, FoundE->getExprLoc()); + } +} + +void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( + CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, + SourceLocation Loc) { + if (CGF.getLangOpts().OpenMP < 50) + return; + auto It = LastprivateConditionalStack.back().DeclToUniqeName.find(VD); + assert(It != LastprivateConditionalStack.back().DeclToUniqeName.end() && + "Unknown lastprivate conditional variable."); + StringRef UniqueName = It->getSecond(); + llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); + // The variable was not updated in the region - exit. + if (!GV) + return; + LValue LPLVal = CGF.MakeAddrLValue( + GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); + llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); + CGF.EmitStoreOfScalar(Res, PrivLVal); +} + llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { @@ -11265,7 +11725,7 @@ void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, } void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, - OpenMPProcBindClauseKind ProcBind, + ProcBindKind ProcBind, SourceLocation Loc) { llvm_unreachable("Not supported in SIMD-only mode"); } |
