diff options
Diffstat (limited to 'lib/CodeGen/CGOpenMPRuntime.cpp')
-rw-r--r-- | lib/CodeGen/CGOpenMPRuntime.cpp | 1079 |
1 files changed, 873 insertions, 206 deletions
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index fa850155df4f9..20eb0b29f427d 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -353,7 +353,7 @@ public: if (VD->isLocalVarDeclOrParm()) continue; - DeclRefExpr DRE(const_cast<VarDecl *>(VD), + DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), /*RefersToEnclosingVariableOrCapture=*/false, VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); @@ -673,6 +673,9 @@ enum OpenMPRTLFunction { // // Offloading related calls // + // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 + // size); + OMPRTL__kmpc_push_target_tripcount, // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t // *arg_types); @@ -897,25 +900,6 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, CGF.EmitBlock(DoneBB, /*IsFinished=*/true); } -static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> -isDeclareTargetDeclaration(const ValueDecl *VD) { - for (const Decl *D : VD->redecls()) { - if (!D->hasAttrs()) - continue; - if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>()) - return Attr->getMapType(); - } - if (const auto *V = dyn_cast<VarDecl>(VD)) { - if (const VarDecl *TD = V->getTemplateInstantiationPattern()) - return isDeclareTargetDeclaration(TD); - } else if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { - if (const auto *TD = FD->getTemplateInstantiationPattern()) - return isDeclareTargetDeclaration(TD); - } - - return llvm::None; -} - LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { return CGF.EmitOMPSharedLValue(E); } @@ -1242,6 +1226,17 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, void CGOpenMPRuntime::clear() { InternalVars.clear(); + // Clean non-target variable declarations possibly used only in debug info. + for (const auto &Data : EmittedNonTargetVariables) { + if (!Data.getValue().pointsToAliveValue()) + continue; + auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); + if (!GV) + continue; + if (!GV->isDeclaration() || GV->getNumUses() > 0) + continue; + GV->eraseFromParent(); + } } std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { @@ -1314,27 +1309,19 @@ void CGOpenMPRuntime::emitUserDefinedReduction( CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { if (UDRMap.count(D) > 0) return; - ASTContext &C = CGM.getContext(); - if (!In || !Out) { - In = &C.Idents.get("omp_in"); - Out = &C.Idents.get("omp_out"); - } llvm::Function *Combiner = emitCombinerOrInitializer( - CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()), - cast<VarDecl>(D->lookup(Out).front()), + CGM, D->getType(), D->getCombiner(), + cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), + cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), /*IsCombiner=*/true); llvm::Function *Initializer = nullptr; if (const Expr *Init = D->getInitializer()) { - if (!Priv || !Orig) { - Priv = &C.Idents.get("omp_priv"); - Orig = &C.Idents.get("omp_orig"); - } Initializer = emitCombinerOrInitializer( CGM, D->getType(), D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init : nullptr, - cast<VarDecl>(D->lookup(Orig).front()), - cast<VarDecl>(D->lookup(Priv).front()), + cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), + cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), /*IsCombiner=*/false); } UDRMap.try_emplace(D, Combiner, Initializer); @@ -1406,8 +1393,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( bool Tied, unsigned &NumberOfParts) { auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, PrePostActionTy &) { - llvm::Value *ThreadID = getThreadID(CGF, D.getLocStart()); - llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); + llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); + llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); llvm::Value *TaskArgs[] = { UpLoc, ThreadID, CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), @@ -1456,17 +1443,17 @@ static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, template <class... As> static llvm::GlobalVariable * -createConstantGlobalStruct(CodeGenModule &CGM, QualType Ty, - ArrayRef<llvm::Constant *> Data, const Twine &Name, - As &&... Args) { +createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, + ArrayRef<llvm::Constant *> Data, const Twine &Name, + As &&... Args) { const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); ConstantInitBuilder CIBuilder(CGM); ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); buildStructValue(Fields, CGM, RD, RL, Data); return Fields.finishAndCreateGlobal( - Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), - /*isConstant=*/true, std::forward<As>(Args)...); + Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, + std::forward<As>(Args)...); } template <typename T> @@ -1483,7 +1470,9 @@ createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); - llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); + unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); + FlagsTy FlagsKey(Flags, Reserved2Flags); + llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); if (!Entry) { if (!DefaultOpenMPPSource) { // Initialize default location for psource field of ident_t structure of @@ -1496,21 +1485,47 @@ Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); } - llvm::Constant *Data[] = {llvm::ConstantInt::getNullValue(CGM.Int32Ty), - llvm::ConstantInt::get(CGM.Int32Ty, Flags), - llvm::ConstantInt::getNullValue(CGM.Int32Ty), - llvm::ConstantInt::getNullValue(CGM.Int32Ty), - DefaultOpenMPPSource}; - llvm::GlobalValue *DefaultOpenMPLocation = createConstantGlobalStruct( - CGM, IdentQTy, Data, "", llvm::GlobalValue::PrivateLinkage); + llvm::Constant *Data[] = { + llvm::ConstantInt::getNullValue(CGM.Int32Ty), + llvm::ConstantInt::get(CGM.Int32Ty, Flags), + llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), + llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; + llvm::GlobalValue *DefaultOpenMPLocation = + createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", + llvm::GlobalValue::PrivateLinkage); DefaultOpenMPLocation->setUnnamedAddr( llvm::GlobalValue::UnnamedAddr::Global); - OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; + OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; } return Address(Entry, Align); } +void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, + bool AtCurrentPoint) { + auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); + assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); + + llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); + if (AtCurrentPoint) { + Elem.second.ServiceInsertPt = new llvm::BitCastInst( + Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); + } else { + Elem.second.ServiceInsertPt = + new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); + Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); + } +} + +void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { + auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); + if (Elem.second.ServiceInsertPt) { + llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; + Elem.second.ServiceInsertPt = nullptr; + Ptr->eraseFromParent(); + } +} + llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags) { @@ -1537,8 +1552,10 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, Elem.second.DebugLoc = AI.getPointer(); LocValue = AI; + if (!Elem.second.ServiceInsertPt) + setLocThreadIdInsertPt(CGF); CGBuilderTy::InsertPointGuard IPG(CGF.Builder); - CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); + CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), CGF.getTypeSize(IdentQTy)); } @@ -1608,21 +1625,25 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, // kmpc_global_thread_num(ident_t *loc). // Generate thread id value and cache this value for use across the // function. + auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); + if (!Elem.second.ServiceInsertPt) + setLocThreadIdInsertPt(CGF); CGBuilderTy::InsertPointGuard IPG(CGF.Builder); - CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); + CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); llvm::CallInst *Call = CGF.Builder.CreateCall( createRuntimeFunction(OMPRTL__kmpc_global_thread_num), emitUpdateLocation(CGF, Loc)); Call->setCallingConv(CGF.getRuntimeCC()); - auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); Elem.second.ThreadID = Call; return Call; } void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { assert(CGF.CurFn && "No function in current CodeGenFunction."); - if (OpenMPLocThreadIDMap.count(CGF.CurFn)) + if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { + clearLocThreadIdInsertPt(CGF); OpenMPLocThreadIDMap.erase(CGF.CurFn); + } if (FunctionUDRMap.count(CGF.CurFn) > 0) { for(auto *D : FunctionUDRMap[CGF.CurFn]) UDRMap.erase(D); @@ -2145,6 +2166,15 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); break; } + case OMPRTL__kmpc_push_target_tripcount: { + // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 + // size); + llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); + break; + } case OMPRTL__tgt_target: { // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t @@ -2417,7 +2447,7 @@ Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) { if (CGM.getLangOpts().OpenMPSimd) return Address::invalid(); llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = - isDeclareTargetDeclaration(VD); + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) { SmallString<64> PtrName; { @@ -2496,8 +2526,7 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( return nullptr; VD = VD->getDefinition(CGM.getContext()); - if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { - ThreadPrivateWithDefinition.insert(VD); + if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { QualType ASTTy = VD->getType(); llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; @@ -2639,16 +2668,16 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, llvm::GlobalVariable *Addr, bool PerformInit) { Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = - isDeclareTargetDeclaration(VD); + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) - return false; + return CGM.getLangOpts().OpenMPIsDevice; VD = VD->getDefinition(CGM.getContext()); - if (VD && !DeclareTargetWithDefinition.insert(VD).second) + if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) return CGM.getLangOpts().OpenMPIsDevice; QualType ASTTy = VD->getType(); - SourceLocation Loc = VD->getCanonicalDecl()->getLocStart(); + SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); // Produce the unique prefix to identify the new target regions. We use // the source location of the variable declaration which we know to not // conflict with any target region. @@ -3197,13 +3226,7 @@ void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); } -void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, - OpenMPDirectiveKind Kind, bool EmitChecks, - bool ForceSimpleCall) { - if (!CGF.HaveInsertPoint()) - return; - // Build call __kmpc_cancel_barrier(loc, thread_id); - // Build call __kmpc_barrier(loc, thread_id); +unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { unsigned Flags; if (Kind == OMPD_for) Flags = OMP_IDENT_BARRIER_IMPL_FOR; @@ -3215,6 +3238,17 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, Flags = OMP_IDENT_BARRIER_EXPL; else Flags = OMP_IDENT_BARRIER_IMPL; + return Flags; +} + +void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind Kind, bool EmitChecks, + bool ForceSimpleCall) { + if (!CGF.HaveInsertPoint()) + return; + // Build call __kmpc_cancel_barrier(loc, thread_id); + // Build call __kmpc_barrier(loc, thread_id); + unsigned Flags = getDefaultFlagsForBarriers(Kind); // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, // thread_id); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), @@ -3287,6 +3321,18 @@ bool CGOpenMPRuntime::isStaticNonchunked( return Schedule == OMP_dist_sch_static; } +bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, + bool Chunked) const { + OpenMPSchedType Schedule = + getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); + return Schedule == OMP_sch_static_chunked; +} + +bool CGOpenMPRuntime::isStaticChunked( + OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { + OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); + return Schedule == OMP_dist_sch_static_chunked; +} bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { OpenMPSchedType Schedule = @@ -3784,8 +3830,8 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { DeviceImages, Index), HostEntriesBegin, HostEntriesEnd}; std::string Descriptor = getName({"omp_offloading", "descriptor"}); - llvm::GlobalVariable *Desc = createConstantGlobalStruct( - CGM, getTgtBinaryDescriptorQTy(), Data, Descriptor); + llvm::GlobalVariable *Desc = createGlobalStruct( + CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor); // Emit code to register or unregister the descriptor at execution // startup or closing, respectively. @@ -3818,7 +3864,19 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { CGF.disableDebugInfo(); const auto &FI = CGM.getTypes().arrangeNullaryFunction(); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); - std::string Descriptor = getName({"omp_offloading", "descriptor_reg"}); + + // Encode offload target triples into the registration function name. It + // will serve as a comdat key for the registration/unregistration code for + // this particular combination of offloading targets. + SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U); + RegFnNameParts[0] = "omp_offloading"; + RegFnNameParts[1] = "descriptor_reg"; + llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2), + [](const llvm::Triple &T) -> const std::string& { + return T.getTriple(); + }); + llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end()); + std::string Descriptor = getName(RegFnNameParts); RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI); CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); @@ -3868,9 +3926,9 @@ void CGOpenMPRuntime::createOffloadEntry( llvm::ConstantInt::get(CGM.Int32Ty, Flags), llvm::ConstantInt::get(CGM.Int32Ty, 0)}; std::string EntryName = getName({"omp_offloading", "entry", ""}); - llvm::GlobalVariable *Entry = createConstantGlobalStruct( - CGM, getTgtOffloadEntryQTy(), Data, Twine(EntryName).concat(Name), - llvm::GlobalValue::WeakAnyLinkage); + llvm::GlobalVariable *Entry = createGlobalStruct( + CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, + Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); // The entry has to be created in the section the linker expects it to be. std::string Section = getName({"omp_offloading", "entries"}); @@ -3895,6 +3953,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { llvm::LLVMContext &C = M.getContext(); SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> OrderedEntries(OffloadEntriesInfoManager.size()); + llvm::SmallVector<StringRef, 16> ParentFunctions( + OffloadEntriesInfoManager.size()); // Auxiliary methods to create metadata values and strings. auto &&GetMDInt = [this](unsigned V) { @@ -3909,7 +3969,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Create function that emits metadata for each target region entry; auto &&TargetRegionMetadataEmitter = - [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString]( + [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString]( unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { @@ -3929,6 +3989,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Save this entry in the right position of the ordered entries array. OrderedEntries[E.getOrder()] = &E; + ParentFunctions[E.getOrder()] = ParentName; // Add metadata to the named metadata node. MD->addOperand(llvm::MDNode::get(C, Ops)); @@ -3970,6 +4031,10 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( E)) { if (!CE->getID() || !CE->getAddress()) { + // Do not blame the entry if the parent funtion is not emitted. + StringRef FnName = ParentFunctions[CE->getOrder()]; + if (!CGM.GetGlobalValue(FnName)) + continue; unsigned DiagID = CGM.getDiags().getCustomDiagID( DiagnosticsEngine::Error, "Offloading entry for target region is incorrect: either the " @@ -3995,6 +4060,9 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { CGM.getDiags().Report(DiagID); continue; } + // The vaiable has no definition - no need to add the entry. + if (CE->getVarSize().isZero()) + continue; break; } case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: @@ -5226,8 +5294,8 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, LBLVal.getPointer(), UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, Loc), - llvm::ConstantInt::getNullValue( - CGF.IntTy), // Always 0 because taskgroup emitted by the compiler + llvm::ConstantInt::getSigned( + CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler llvm::ConstantInt::getSigned( CGF.IntTy, Data.Schedule.getPointer() ? Data.Schedule.getInt() ? NumTasks : Grainsize @@ -5776,7 +5844,7 @@ static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, std::string Name = CGM.getOpenMPRuntime().getName( {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); Out << Prefix << Name << "_" - << D->getCanonicalDecl()->getLocStart().getRawEncoding(); + << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); return Out.str(); } @@ -6274,7 +6342,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( unsigned DeviceID; unsigned FileID; unsigned Line; - getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, + getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, Line); SmallString<64> EntryFnName; { @@ -6589,17 +6657,17 @@ private: struct MapInfo { OMPClauseMappableExprCommon::MappableExprComponentListRef Components; OpenMPMapClauseKind MapType = OMPC_MAP_unknown; - OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown; + ArrayRef<OpenMPMapModifierKind> MapModifiers; bool ReturnDevicePointer = false; bool IsImplicit = false; MapInfo() = default; MapInfo( OMPClauseMappableExprCommon::MappableExprComponentListRef Components, - OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, + OpenMPMapClauseKind MapType, + ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer, bool IsImplicit) - : Components(Components), MapType(MapType), - MapTypeModifier(MapTypeModifier), + : Components(Components), MapType(MapType), MapModifiers(MapModifiers), ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} }; @@ -6676,10 +6744,9 @@ private: /// a flag marking the map as a pointer if requested. Add a flag marking the /// map as the first one of a series of maps that relate to the same map /// expression. - OpenMPOffloadMappingFlags getMapTypeBits(OpenMPMapClauseKind MapType, - OpenMPMapClauseKind MapTypeModifier, - bool IsImplicit, bool AddPtrFlag, - bool AddIsTargetParamFlag) const { + OpenMPOffloadMappingFlags getMapTypeBits( + OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, + bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { OpenMPOffloadMappingFlags Bits = IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; switch (MapType) { @@ -6702,7 +6769,6 @@ private: case OMPC_MAP_delete: Bits |= OMP_MAP_DELETE; break; - case OMPC_MAP_always: case OMPC_MAP_unknown: llvm_unreachable("Unexpected map type!"); } @@ -6710,7 +6776,8 @@ private: Bits |= OMP_MAP_PTR_AND_OBJ; if (AddIsTargetParamFlag) Bits |= OMP_MAP_TARGET_PARAM; - if (MapTypeModifier == OMPC_MAP_always) + if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) + != MapModifiers.end()) Bits |= OMP_MAP_ALWAYS; return Bits; } @@ -6746,10 +6813,11 @@ private: } // Check if the length evaluates to 1. - llvm::APSInt ConstLength; - if (!Length->EvaluateAsInt(ConstLength, CGF.getContext())) + Expr::EvalResult Result; + if (!Length->EvaluateAsInt(Result, CGF.getContext())) return true; // Can have more that size 1. + llvm::APSInt ConstLength = Result.Val.getInt(); return ConstLength.getSExtValue() != 1; } @@ -6758,12 +6826,15 @@ private: /// \a IsFirstComponent should be set to true if the provided set of /// components is the first associated with a capture. void generateInfoForComponentList( - OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, + OpenMPMapClauseKind MapType, + ArrayRef<OpenMPMapModifierKind> MapModifiers, OMPClauseMappableExprCommon::MappableExprComponentListRef Components, MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, - bool IsImplicit) const { + bool IsImplicit, + ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> + OverlappedElements = llvm::None) const { // The following summarizes what has to be generated for each map and the // types below. The generated information is expressed in this order: // base pointer, section pointer, size, flags @@ -6933,19 +7004,26 @@ private: // components. bool IsExpressionFirstInfo = true; Address BP = Address::invalid(); + const Expr *AssocExpr = I->getAssociatedExpression(); + const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); + const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); - if (isa<MemberExpr>(I->getAssociatedExpression())) { + if (isa<MemberExpr>(AssocExpr)) { // The base is the 'this' pointer. The content of the pointer is going // to be the base of the field being mapped. BP = CGF.LoadCXXThisAddress(); + } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || + (OASE && + isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { + BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); } else { // The base is the reference to the variable. // BP = &Var. - BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); + BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); if (const auto *VD = dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = - isDeclareTargetDeclaration(VD)) + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) if (*Res == OMPDeclareTargetDeclAttr::MT_Link) { IsLink = true; BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); @@ -7034,7 +7112,6 @@ private: Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); - llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); // If this component is a pointer inside the base struct then we don't // need to create any entry for it - it will be combined with the object @@ -7043,6 +7120,70 @@ private: IsPointer && EncounteredME && (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == EncounteredME); + if (!OverlappedElements.empty()) { + // Handle base element with the info for overlapped elements. + assert(!PartialStruct.Base.isValid() && "The base element is set."); + assert(Next == CE && + "Expected last element for the overlapped elements."); + assert(!IsPointer && + "Unexpected base element with the pointer type."); + // Mark the whole struct as the struct that requires allocation on the + // device. + PartialStruct.LowestElem = {0, LB}; + CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( + I->getAssociatedExpression()->getType()); + Address HB = CGF.Builder.CreateConstGEP( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, + CGF.VoidPtrTy), + TypeSize.getQuantity() - 1, CharUnits::One()); + PartialStruct.HighestElem = { + std::numeric_limits<decltype( + PartialStruct.HighestElem.first)>::max(), + HB}; + PartialStruct.Base = BP; + // Emit data for non-overlapped data. + OpenMPOffloadMappingFlags Flags = + OMP_MAP_MEMBER_OF | + getMapTypeBits(MapType, MapModifiers, IsImplicit, + /*AddPtrFlag=*/false, + /*AddIsTargetParamFlag=*/false); + LB = BP; + llvm::Value *Size = nullptr; + // Do bitcopy of all non-overlapped structure elements. + for (OMPClauseMappableExprCommon::MappableExprComponentListRef + Component : OverlappedElements) { + Address ComponentLB = Address::invalid(); + for (const OMPClauseMappableExprCommon::MappableComponent &MC : + Component) { + if (MC.getAssociatedDeclaration()) { + ComponentLB = + CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) + .getAddress(); + Size = CGF.Builder.CreatePtrDiff( + CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), + CGF.EmitCastToVoidPtr(LB.getPointer())); + break; + } + } + BasePointers.push_back(BP.getPointer()); + Pointers.push_back(LB.getPointer()); + Sizes.push_back(Size); + Types.push_back(Flags); + LB = CGF.Builder.CreateConstGEP(ComponentLB, 1, + CGF.getPointerSize()); + } + BasePointers.push_back(BP.getPointer()); + Pointers.push_back(LB.getPointer()); + Size = CGF.Builder.CreatePtrDiff( + CGF.EmitCastToVoidPtr( + CGF.Builder.CreateConstGEP(HB, 1, CharUnits::One()) + .getPointer()), + CGF.EmitCastToVoidPtr(LB.getPointer())); + Sizes.push_back(Size); + Types.push_back(Flags); + break; + } + llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); if (!IsMemberPointer) { BasePointers.push_back(BP.getPointer()); Pointers.push_back(LB.getPointer()); @@ -7053,7 +7194,7 @@ private: // this map is the first one that relates with the current capture // (there is a set of entries for each capture). OpenMPOffloadMappingFlags Flags = getMapTypeBits( - MapType, MapTypeModifier, IsImplicit, + MapType, MapModifiers, IsImplicit, !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink); if (!IsExpressionFirstInfo) { @@ -7147,6 +7288,66 @@ private: Flags |= MemberOfFlag; } + void getPlainLayout(const CXXRecordDecl *RD, + llvm::SmallVectorImpl<const FieldDecl *> &Layout, + bool AsBase) const { + const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); + + llvm::StructType *St = + AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); + + unsigned NumElements = St->getNumElements(); + llvm::SmallVector< + llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> + RecordLayout(NumElements); + + // Fill bases. + for (const auto &I : RD->bases()) { + if (I.isVirtual()) + continue; + const auto *Base = I.getType()->getAsCXXRecordDecl(); + // Ignore empty bases. + if (Base->isEmpty() || CGF.getContext() + .getASTRecordLayout(Base) + .getNonVirtualSize() + .isZero()) + continue; + + unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); + RecordLayout[FieldIndex] = Base; + } + // Fill in virtual bases. + for (const auto &I : RD->vbases()) { + const auto *Base = I.getType()->getAsCXXRecordDecl(); + // Ignore empty bases. + if (Base->isEmpty()) + continue; + unsigned FieldIndex = RL.getVirtualBaseIndex(Base); + if (RecordLayout[FieldIndex]) + continue; + RecordLayout[FieldIndex] = Base; + } + // Fill in all the fields. + assert(!RD->isUnion() && "Unexpected union."); + for (const auto *Field : RD->fields()) { + // Fill in non-bitfields. (Bitfields always use a zero pattern, which we + // will fill in later.) + if (!Field->isBitField()) { + unsigned FieldIndex = RL.getLLVMFieldNo(Field); + RecordLayout[FieldIndex] = Field; + } + } + for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> + &Data : RecordLayout) { + if (Data.isNull()) + continue; + if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) + getPlainLayout(Base, Layout, /*AsBase=*/true); + else + Layout.push_back(Data.get<const FieldDecl *>()); + } + } + public: MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) : CurDir(Dir), CGF(CGF) { @@ -7213,28 +7414,29 @@ public: auto &&InfoGen = [&Info]( const ValueDecl *D, OMPClauseMappableExprCommon::MappableExprComponentListRef L, - OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, + OpenMPMapClauseKind MapType, + ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer, bool IsImplicit) { const ValueDecl *VD = D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; - Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer, + Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, IsImplicit); }; // FIXME: MSVC 2013 seems to require this-> to find member CurDir. for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) for (const auto &L : C->component_lists()) { - InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(), + InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), /*ReturnDevicePointer=*/false, C->isImplicit()); } for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) for (const auto &L : C->component_lists()) { - InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown, + InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit()); } for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) for (const auto &L : C->component_lists()) { - InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown, + InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit()); } @@ -7287,7 +7489,7 @@ public: // Nonetheless, generateInfoForComponentList must be called to take // the pointer into account for the calculation of the range of the // partial struct. - InfoGen(nullptr, L.second, OMPC_MAP_unknown, OMPC_MAP_unknown, + InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit()); DeferredInfo[nullptr].emplace_back(IE, VD); } else { @@ -7321,7 +7523,7 @@ public: unsigned CurrentBasePointersIdx = CurBasePointers.size(); // FIXME: MSVC 2013 seems to require this-> to find the member method. this->generateInfoForComponentList( - L.MapType, L.MapTypeModifier, L.Components, CurBasePointers, + L.MapType, L.MapModifiers, L.Components, CurBasePointers, CurPointers, CurSizes, CurTypes, PartialStruct, IsFirstComponentList, L.IsImplicit); @@ -7375,6 +7577,82 @@ public: } } + /// Emit capture info for lambdas for variables captured by reference. + void generateInfoForLambdaCaptures( + const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, + MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, + MapFlagsArrayTy &Types, + llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { + const auto *RD = VD->getType() + .getCanonicalType() + .getNonReferenceType() + ->getAsCXXRecordDecl(); + if (!RD || !RD->isLambda()) + return; + Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); + LValue VDLVal = CGF.MakeAddrLValue( + VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); + llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; + FieldDecl *ThisCapture = nullptr; + RD->getCaptureFields(Captures, ThisCapture); + if (ThisCapture) { + LValue ThisLVal = + CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); + LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); + LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); + BasePointers.push_back(ThisLVal.getPointer()); + Pointers.push_back(ThisLValVal.getPointer()); + Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); + Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | + OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); + } + for (const LambdaCapture &LC : RD->captures()) { + if (LC.getCaptureKind() != LCK_ByRef) + continue; + const VarDecl *VD = LC.getCapturedVar(); + auto It = Captures.find(VD); + assert(It != Captures.end() && "Found lambda capture without field."); + LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); + LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); + LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); + BasePointers.push_back(VarLVal.getPointer()); + Pointers.push_back(VarLValVal.getPointer()); + Sizes.push_back(CGF.getTypeSize( + VD->getType().getCanonicalType().getNonReferenceType())); + Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | + OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); + } + } + + /// Set correct indices for lambdas captures. + void adjustMemberOfForLambdaCaptures( + const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, + MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, + MapFlagsArrayTy &Types) const { + for (unsigned I = 0, E = Types.size(); I < E; ++I) { + // Set correct member_of idx for all implicit lambda captures. + if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | + OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) + continue; + llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); + assert(BasePtr && "Unable to find base lambda address."); + int TgtIdx = -1; + for (unsigned J = I; J > 0; --J) { + unsigned Idx = J - 1; + if (Pointers[Idx] != BasePtr) + continue; + TgtIdx = Idx; + break; + } + assert(TgtIdx != -1 && "Unable to find parent lambda."); + // All other current entries will be MEMBER_OF the combined entry + // (except for PTR_AND_OBJ entries which do not have a placeholder value + // 0xFFFF in the MEMBER_OF field). + OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); + setCorrectMemberOfFlag(Types[I], MemberOfFlag); + } + } + /// Generate the base pointers, section pointers, sizes and map types /// associated to a given capture. void generateInfoForCapture(const CapturedStmt::Capture *Cap, @@ -7387,9 +7665,6 @@ public: "Not expecting to generate map info for a variable array type!"); // We need to know when we generating information for the first component - // associated with a capture, because the mapping flags depend on it. - bool IsFirstComponentList = true; - const ValueDecl *VD = Cap->capturesThis() ? nullptr : Cap->getCapturedVar()->getCanonicalDecl(); @@ -7405,19 +7680,151 @@ public: return; } + using MapData = + std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, + OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; + SmallVector<MapData, 4> DeclComponentLists; // FIXME: MSVC 2013 seems to require this-> to find member CurDir. - for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) + for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { for (const auto &L : C->decl_component_lists(VD)) { assert(L.first == VD && "We got information for the wrong declaration??"); assert(!L.second.empty() && "Not expecting declaration with no component lists."); - generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(), - L.second, BasePointers, Pointers, Sizes, - Types, PartialStruct, IsFirstComponentList, - C->isImplicit()); - IsFirstComponentList = false; + DeclComponentLists.emplace_back(L.second, C->getMapType(), + C->getMapTypeModifiers(), + C->isImplicit()); + } + } + + // Find overlapping elements (including the offset from the base element). + llvm::SmallDenseMap< + const MapData *, + llvm::SmallVector< + OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, + 4> + OverlappedData; + size_t Count = 0; + for (const MapData &L : DeclComponentLists) { + OMPClauseMappableExprCommon::MappableExprComponentListRef Components; + OpenMPMapClauseKind MapType; + ArrayRef<OpenMPMapModifierKind> MapModifiers; + bool IsImplicit; + std::tie(Components, MapType, MapModifiers, IsImplicit) = L; + ++Count; + for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { + OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; + std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; + auto CI = Components.rbegin(); + auto CE = Components.rend(); + auto SI = Components1.rbegin(); + auto SE = Components1.rend(); + for (; CI != CE && SI != SE; ++CI, ++SI) { + if (CI->getAssociatedExpression()->getStmtClass() != + SI->getAssociatedExpression()->getStmtClass()) + break; + // Are we dealing with different variables/fields? + if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) + break; + } + // Found overlapping if, at least for one component, reached the head of + // the components list. + if (CI == CE || SI == SE) { + assert((CI != CE || SI != SE) && + "Unexpected full match of the mapping components."); + const MapData &BaseData = CI == CE ? L : L1; + OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = + SI == SE ? Components : Components1; + auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); + OverlappedElements.getSecond().push_back(SubData); + } } + } + // Sort the overlapped elements for each item. + llvm::SmallVector<const FieldDecl *, 4> Layout; + if (!OverlappedData.empty()) { + if (const auto *CRD = + VD->getType().getCanonicalType()->getAsCXXRecordDecl()) + getPlainLayout(CRD, Layout, /*AsBase=*/false); + else { + const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); + Layout.append(RD->field_begin(), RD->field_end()); + } + } + for (auto &Pair : OverlappedData) { + llvm::sort( + Pair.getSecond(), + [&Layout]( + OMPClauseMappableExprCommon::MappableExprComponentListRef First, + OMPClauseMappableExprCommon::MappableExprComponentListRef + Second) { + auto CI = First.rbegin(); + auto CE = First.rend(); + auto SI = Second.rbegin(); + auto SE = Second.rend(); + for (; CI != CE && SI != SE; ++CI, ++SI) { + if (CI->getAssociatedExpression()->getStmtClass() != + SI->getAssociatedExpression()->getStmtClass()) + break; + // Are we dealing with different variables/fields? + if (CI->getAssociatedDeclaration() != + SI->getAssociatedDeclaration()) + break; + } + + // Lists contain the same elements. + if (CI == CE && SI == SE) + return false; + + // List with less elements is less than list with more elements. + if (CI == CE || SI == SE) + return CI == CE; + + const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); + const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); + if (FD1->getParent() == FD2->getParent()) + return FD1->getFieldIndex() < FD2->getFieldIndex(); + const auto It = + llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { + return FD == FD1 || FD == FD2; + }); + return *It == FD1; + }); + } + + // Associated with a capture, because the mapping flags depend on it. + // Go through all of the elements with the overlapped elements. + for (const auto &Pair : OverlappedData) { + const MapData &L = *Pair.getFirst(); + OMPClauseMappableExprCommon::MappableExprComponentListRef Components; + OpenMPMapClauseKind MapType; + ArrayRef<OpenMPMapModifierKind> MapModifiers; + bool IsImplicit; + std::tie(Components, MapType, MapModifiers, IsImplicit) = L; + ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> + OverlappedComponents = Pair.getSecond(); + bool IsFirstComponentList = true; + generateInfoForComponentList(MapType, MapModifiers, Components, + BasePointers, Pointers, Sizes, Types, + PartialStruct, IsFirstComponentList, + IsImplicit, OverlappedComponents); + } + // Go through other elements without overlapped elements. + bool IsFirstComponentList = OverlappedData.empty(); + for (const MapData &L : DeclComponentLists) { + OMPClauseMappableExprCommon::MappableExprComponentListRef Components; + OpenMPMapClauseKind MapType; + ArrayRef<OpenMPMapModifierKind> MapModifiers; + bool IsImplicit; + std::tie(Components, MapType, MapModifiers, IsImplicit) = L; + auto It = OverlappedData.find(&L); + if (It == OverlappedData.end()) + generateInfoForComponentList(MapType, MapModifiers, Components, + BasePointers, Pointers, Sizes, Types, + PartialStruct, IsFirstComponentList, + IsImplicit); + IsFirstComponentList = false; + } } /// Generate the base pointers, section pointers, sizes and map types @@ -7436,12 +7843,12 @@ public: if (!VD) continue; llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = - isDeclareTargetDeclaration(VD); + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) continue; StructRangeInfoTy PartialStruct; generateInfoForComponentList( - C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers, + C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, Pointers, Sizes, Types, PartialStruct, /*IsFirstComponentList=*/true, C->isImplicit()); assert(!PartialStruct.Base.isValid() && @@ -7658,6 +8065,183 @@ static void emitOffloadingArraysArgument( } } +/// Checks if the expression is constant or does not have non-trivial function +/// calls. +static bool isTrivial(ASTContext &Ctx, const Expr * E) { + // We can skip constant expressions. + // We can skip expressions with trivial calls or simple expressions. + return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || + !E->hasNonTrivialCall(Ctx)) && + !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); +} + +/// Checks if the \p Body is the \a CompoundStmt and returns its child statement +/// iff there is only one that is not evaluatable at the compile time. +static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) { + if (const auto *C = dyn_cast<CompoundStmt>(Body)) { + const Stmt *Child = nullptr; + for (const Stmt *S : C->body()) { + if (const auto *E = dyn_cast<Expr>(S)) { + if (isTrivial(Ctx, E)) + continue; + } + // Some of the statements can be ignored. + if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || + isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) + continue; + // Analyze declarations. + if (const auto *DS = dyn_cast<DeclStmt>(S)) { + if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { + if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || + isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || + isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || + isa<UsingDirectiveDecl>(D) || + isa<OMPDeclareReductionDecl>(D) || + isa<OMPThreadPrivateDecl>(D)) + return true; + const auto *VD = dyn_cast<VarDecl>(D); + if (!VD) + return false; + return VD->isConstexpr() || + ((VD->getType().isTrivialType(Ctx) || + VD->getType()->isReferenceType()) && + (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); + })) + continue; + } + // Found multiple children - cannot get the one child only. + if (Child) + return Body; + Child = S; + } + if (Child) + return Child; + } + return Body; +} + +/// Check for inner distribute directive. +static const OMPExecutableDirective * +getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { + const auto *CS = D.getInnermostCapturedStmt(); + const auto *Body = + CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); + const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body); + + if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); + switch (D.getDirectiveKind()) { + case OMPD_target: + if (isOpenMPDistributeDirective(DKind)) + return NestedDir; + if (DKind == OMPD_teams) { + Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( + /*IgnoreCaptured=*/true); + if (!Body) + return nullptr; + ChildStmt = getSingleCompoundChild(Ctx, Body); + if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + DKind = NND->getDirectiveKind(); + if (isOpenMPDistributeDirective(DKind)) + return NND; + } + } + return nullptr; + case OMPD_target_teams: + if (isOpenMPDistributeDirective(DKind)) + return NestedDir; + return nullptr; + case OMPD_target_parallel: + case OMPD_target_simd: + case OMPD_target_parallel_for: + case OMPD_target_parallel_for_simd: + return nullptr; + case OMPD_target_teams_distribute: + case OMPD_target_teams_distribute_simd: + case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: + case OMPD_parallel: + case OMPD_for: + case OMPD_parallel_for: + case OMPD_parallel_sections: + case OMPD_for_simd: + case OMPD_parallel_for_simd: + case OMPD_cancel: + case OMPD_cancellation_point: + case OMPD_ordered: + case OMPD_threadprivate: + case OMPD_task: + case OMPD_simd: + case OMPD_sections: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_taskgroup: + case OMPD_atomic: + case OMPD_flush: + case OMPD_teams: + case OMPD_target_data: + case OMPD_target_exit_data: + case OMPD_target_enter_data: + case OMPD_distribute: + case OMPD_distribute_simd: + case OMPD_distribute_parallel_for: + case OMPD_distribute_parallel_for_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_target_update: + case OMPD_declare_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_declare_reduction: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_requires: + case OMPD_unknown: + llvm_unreachable("Unexpected directive."); + } + } + + return nullptr; +} + +void CGOpenMPRuntime::emitTargetNumIterationsCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, + const llvm::function_ref<llvm::Value *( + CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) { + OpenMPDirectiveKind Kind = D.getDirectiveKind(); + const OMPExecutableDirective *TD = &D; + // Get nested teams distribute kind directive, if any. + if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) + TD = getNestedDistributeDirective(CGM.getContext(), D); + if (!TD) + return; + const auto *LD = cast<OMPLoopDirective>(TD); + auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF, + PrePostActionTy &) { + llvm::Value *NumIterations = SizeEmitter(CGF, *LD); + + // Emit device ID if any. + llvm::Value *DeviceID; + if (Device) + DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), + CGF.Int64Ty, /*isSigned=*/true); + else + DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); + + llvm::Value *Args[] = {DeviceID, NumIterations}; + CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); + }; + emitInlinedDirective(CGF, OMPD_unknown, CodeGen); +} + void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *OutlinedFn, @@ -7790,7 +8374,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, CapturedVars.clear(); CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); } - emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars); + emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); CGF.EmitBranch(OffloadContBlock); CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); @@ -7804,7 +8388,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, CapturedVars.clear(); CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); } - emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars); + emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); }; auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, @@ -7818,6 +8402,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // Get mappable expression information. MappableExprsHandler MEHandler(D, CGF); + llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; auto RI = CS.getCapturedRecordDecl()->field_begin(); auto CV = CapturedVars.begin(); @@ -7847,6 +8432,12 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, if (CurBasePointers.empty()) MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, CurPointers, CurSizes, CurMapTypes); + // Generate correct mapping for variables captured by reference in + // lambdas. + if (CI->capturesVariable()) + MEHandler.generateInfoForLambdaCaptures( + CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, + CurMapTypes, LambdaPointers); } // We expect to have at least an element of information for this capture. assert(!CurBasePointers.empty() && @@ -7868,6 +8459,9 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, Sizes.append(CurSizes.begin(), CurSizes.end()); MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); } + // Adjust MEMBER_OF flags for the lambdas captures. + MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, + Pointers, MapTypes); // Map other list items in the map clause which are not captured variables // but "declare target link" global variables. MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, @@ -7935,7 +8529,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, unsigned DeviceID; unsigned FileID; unsigned Line; - getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID, + getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, FileID, Line); // Is this a target region that should not be emitted as an entry point? If @@ -8030,6 +8624,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_declare_reduction: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unknown target directive for OpenMP device codegen."); } @@ -8055,19 +8650,20 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, } bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { - const auto *FD = cast<FunctionDecl>(GD.getDecl()); - // If emitting code for the host, we do not process FD here. Instead we do // the normal code generation. if (!CGM.getLangOpts().OpenMPIsDevice) return false; + const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); + StringRef Name = CGM.getMangledName(GD); // Try to detect target regions in the function. - scanForTargetRegionsFunctions(FD->getBody(), CGM.getMangledName(GD)); + if (const auto *FD = dyn_cast<FunctionDecl>(VD)) + scanForTargetRegionsFunctions(FD->getBody(), Name); // Do not to emit function if it is not marked as declare target. - return !isDeclareTargetDeclaration(FD) && - AlreadyEmittedTargetFunctions.count(FD->getCanonicalDecl()) == 0; + return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && + AlreadyEmittedTargetFunctions.count(Name) == 0; } bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { @@ -8093,64 +8689,105 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { // Do not to emit variable if it is not marked as declare target. llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = - isDeclareTargetDeclaration(cast<VarDecl>(GD.getDecl())); - return !Res || *Res == OMPDeclareTargetDeclAttr::MT_Link; + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( + cast<VarDecl>(GD.getDecl())); + if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) { + DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); + return true; + } + return false; } void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr) { - if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = - isDeclareTargetDeclaration(VD)) { - OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; - StringRef VarName; - CharUnits VarSize; - llvm::GlobalValue::LinkageTypes Linkage; - switch (*Res) { - case OMPDeclareTargetDeclAttr::MT_To: - Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; - VarName = CGM.getMangledName(VD); + llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); + if (!Res) { + if (CGM.getLangOpts().OpenMPIsDevice) { + // Register non-target variables being emitted in device code (debug info + // may cause this). + StringRef VarName = CGM.getMangledName(VD); + EmittedNonTargetVariables.try_emplace(VarName, Addr); + } + return; + } + // Register declare target variables. + OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; + StringRef VarName; + CharUnits VarSize; + llvm::GlobalValue::LinkageTypes Linkage; + switch (*Res) { + case OMPDeclareTargetDeclAttr::MT_To: + Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; + VarName = CGM.getMangledName(VD); + if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); - Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); - // Temp solution to prevent optimizations of the internal variables. - if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { - std::string RefName = getName({VarName, "ref"}); - if (!CGM.GetGlobalValue(RefName)) { - llvm::Constant *AddrRef = - getOrCreateInternalVariable(Addr->getType(), RefName); - auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); - GVAddrRef->setConstant(/*Val=*/true); - GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); - GVAddrRef->setInitializer(Addr); - CGM.addCompilerUsedGlobal(GVAddrRef); - } - } - break; - case OMPDeclareTargetDeclAttr::MT_Link: - Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; - if (CGM.getLangOpts().OpenMPIsDevice) { - VarName = Addr->getName(); - Addr = nullptr; - } else { - VarName = getAddrOfDeclareTargetLink(VD).getName(); - Addr = - cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer()); + assert(!VarSize.isZero() && "Expected non-zero size of the variable"); + } else { + VarSize = CharUnits::Zero(); + } + Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); + // Temp solution to prevent optimizations of the internal variables. + if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { + std::string RefName = getName({VarName, "ref"}); + if (!CGM.GetGlobalValue(RefName)) { + llvm::Constant *AddrRef = + getOrCreateInternalVariable(Addr->getType(), RefName); + auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); + GVAddrRef->setConstant(/*Val=*/true); + GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); + GVAddrRef->setInitializer(Addr); + CGM.addCompilerUsedGlobal(GVAddrRef); } - VarSize = CGM.getPointerSize(); - Linkage = llvm::GlobalValue::WeakAnyLinkage; - break; } - OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( - VarName, Addr, VarSize, Flags, Linkage); + break; + case OMPDeclareTargetDeclAttr::MT_Link: + Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; + if (CGM.getLangOpts().OpenMPIsDevice) { + VarName = Addr->getName(); + Addr = nullptr; + } else { + VarName = getAddrOfDeclareTargetLink(VD).getName(); + Addr = cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer()); + } + VarSize = CGM.getPointerSize(); + Linkage = llvm::GlobalValue::WeakAnyLinkage; + break; } + OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( + VarName, Addr, VarSize, Flags, Linkage); } bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { - if (isa<FunctionDecl>(GD.getDecl())) + if (isa<FunctionDecl>(GD.getDecl()) || + isa<OMPDeclareReductionDecl>(GD.getDecl())) return emitTargetFunctions(GD); return emitTargetGlobalVariable(GD); } +void CGOpenMPRuntime::emitDeferredTargetDecls() const { + for (const VarDecl *VD : DeferredGlobalVariables) { + llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); + if (!Res) + continue; + if (*Res == OMPDeclareTargetDeclAttr::MT_To) { + CGM.EmitGlobal(VD); + } else { + assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && + "Expected to or link clauses."); + (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); + } + } +} + +void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( + CodeGenFunction &CGF, const OMPExecutableDirective &D) const { + assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && + " Expected target-based directive."); +} + CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( CodeGenModule &CGM) : CGM(CGM) { @@ -8169,21 +8806,20 @@ bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) return true; + StringRef Name = CGM.getMangledName(GD); const auto *D = cast<FunctionDecl>(GD.getDecl()); - const FunctionDecl *FD = D->getCanonicalDecl(); // Do not to emit function if it is marked as declare target as it was already // emitted. - if (isDeclareTargetDeclaration(D)) { - if (D->hasBody() && AlreadyEmittedTargetFunctions.count(FD) == 0) { - if (auto *F = dyn_cast_or_null<llvm::Function>( - CGM.GetGlobalValue(CGM.getMangledName(GD)))) + if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { + if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { + if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) return !F->isDeclaration(); return false; } return true; } - return !AlreadyEmittedTargetFunctions.insert(FD).second; + return !AlreadyEmittedTargetFunctions.insert(Name).second; } llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { @@ -8478,6 +9114,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: + case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unexpected standalone target data directive."); break; @@ -8730,8 +9367,8 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, ParamAttrTy &ParamAttr = ParamAttrs[Pos]; ParamAttr.Kind = Linear; if (*SI) { - if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, - Expr::SE_AllowSideEffects)) { + Expr::EvalResult Result; + if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { if (const auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { @@ -8740,6 +9377,8 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, ParamPositions[StridePVD->getCanonicalDecl()]); } } + } else { + ParamAttr.StrideOrArg = Result.Val.getInt(); } } ++SI; @@ -8782,7 +9421,8 @@ public: } // namespace void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, - const OMPLoopDirective &D) { + const OMPLoopDirective &D, + ArrayRef<Expr *> NumIterations) { if (!CGF.HaveInsertPoint()) return; @@ -8805,37 +9445,50 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, } else { RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); } + llvm::APInt Size(/*numBits=*/32, NumIterations.size()); + QualType ArrayTy = + C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0); - Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims"); - CGF.EmitNullInitialization(DimsAddr, KmpDimTy); + Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); + CGF.EmitNullInitialization(DimsAddr, ArrayTy); enum { LowerFD = 0, UpperFD, StrideFD }; // Fill dims with data. - LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy); - // dims.upper = num_iterations; - LValue UpperLVal = - CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD)); - llvm::Value *NumIterVal = CGF.EmitScalarConversion( - CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(), - Int64Ty, D.getNumIterations()->getExprLoc()); - CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); - // dims.stride = 1; - LValue StrideLVal = - CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD)); - CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), - StrideLVal); + for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { + LValue DimsLVal = + CGF.MakeAddrLValue(CGF.Builder.CreateConstArrayGEP( + DimsAddr, I, C.getTypeSizeInChars(KmpDimTy)), + KmpDimTy); + // dims.upper = num_iterations; + LValue UpperLVal = CGF.EmitLValueForField( + DimsLVal, *std::next(RD->field_begin(), UpperFD)); + llvm::Value *NumIterVal = + CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), + D.getNumIterations()->getType(), Int64Ty, + D.getNumIterations()->getExprLoc()); + CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); + // dims.stride = 1; + LValue StrideLVal = CGF.EmitLValueForField( + DimsLVal, *std::next(RD->field_begin(), StrideFD)); + CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), + StrideLVal); + } // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, // kmp_int32 num_dims, struct kmp_dim * dims); - llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()), - getThreadID(CGF, D.getLocStart()), - llvm::ConstantInt::getSigned(CGM.Int32Ty, 1), - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - DimsAddr.getPointer(), CGM.VoidPtrTy)}; + llvm::Value *Args[] = { + emitUpdateLocation(CGF, D.getBeginLoc()), + getThreadID(CGF, D.getBeginLoc()), + llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.Builder + .CreateConstArrayGEP(DimsAddr, 0, C.getTypeSizeInChars(KmpDimTy)) + .getPointer(), + CGM.VoidPtrTy)}; llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init); CGF.EmitRuntimeCall(RTLFn, Args); llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { - emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())}; + emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini); CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, llvm::makeArrayRef(FiniArgs)); @@ -8845,16 +9498,29 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) { QualType Int64Ty = CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); - const Expr *CounterVal = C->getCounterValue(); - assert(CounterVal); - llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal), - CounterVal->getType(), Int64Ty, - CounterVal->getExprLoc()); - Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr"); - CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty); - llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()), - getThreadID(CGF, C->getLocStart()), - CntAddr.getPointer()}; + llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); + QualType ArrayTy = CGM.getContext().getConstantArrayType( + Int64Ty, Size, ArrayType::Normal, 0); + Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); + for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { + const Expr *CounterVal = C->getLoopData(I); + assert(CounterVal); + llvm::Value *CntVal = CGF.EmitScalarConversion( + CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, + CounterVal->getExprLoc()); + CGF.EmitStoreOfScalar( + CntVal, + CGF.Builder.CreateConstArrayGEP( + CntAddr, I, CGM.getContext().getTypeSizeInChars(Int64Ty)), + /*Volatile=*/false, Int64Ty); + } + llvm::Value *Args[] = { + emitUpdateLocation(CGF, C->getBeginLoc()), + getThreadID(CGF, C->getBeginLoc()), + CGF.Builder + .CreateConstArrayGEP(CntAddr, 0, + CGM.getContext().getTypeSizeInChars(Int64Ty)) + .getPointer()}; llvm::Value *RTLFn; if (C->getDependencyKind() == OMPC_DEPEND_source) { RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); @@ -9169,7 +9835,8 @@ void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( } void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, - const OMPLoopDirective &D) { + const OMPLoopDirective &D, + ArrayRef<Expr *> NumIterations) { llvm_unreachable("Not supported in SIMD-only mode"); } |