diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 11:06:01 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 11:06:01 +0000 |
| commit | 486754660bb926339aefcf012a3f848592babb8b (patch) | |
| tree | ecdbc446c9876f4f120f701c243373cd3cb43db3 /lib/CodeGen/CGOpenMPRuntime.cpp | |
| parent | 55e6d896ad333f07bb3b1ba487df214fc268a4ab (diff) | |
Notes
Diffstat (limited to 'lib/CodeGen/CGOpenMPRuntime.cpp')
| -rw-r--r-- | lib/CodeGen/CGOpenMPRuntime.cpp | 4158 |
1 files changed, 2660 insertions, 1498 deletions
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index fa38ee80bf41..3730b9af12fa 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -14,12 +14,13 @@ #include "CGCXXABI.h" #include "CGCleanup.h" #include "CGOpenMPRuntime.h" +#include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/Decl.h" #include "clang/AST/StmtOpenMP.h" +#include "clang/Basic/BitmaskEnum.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/BitmaskEnum.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" @@ -33,20 +34,20 @@ using namespace clang; using namespace CodeGen; namespace { -/// \brief Base class for handling code generation inside OpenMP regions. +/// Base class for handling code generation inside OpenMP regions. class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { public: - /// \brief Kinds of OpenMP regions used in codegen. + /// Kinds of OpenMP regions used in codegen. enum CGOpenMPRegionKind { - /// \brief Region with outlined function for standalone 'parallel' + /// Region with outlined function for standalone 'parallel' /// directive. ParallelOutlinedRegion, - /// \brief Region with outlined function for standalone 'task' directive. + /// Region with outlined function for standalone 'task' directive. TaskOutlinedRegion, - /// \brief Region for constructs that do not require function outlining, + /// Region for constructs that do not require function outlining, /// like 'for', 'sections', 'atomic' etc. directives. InlinedRegion, - /// \brief Region with outlined function for standalone 'target' directive. + /// Region with outlined function for standalone 'target' directive. TargetRegion, }; @@ -63,14 +64,14 @@ public: : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} - /// \brief Get a variable or parameter for storing global thread id + /// Get a variable or parameter for storing global thread id /// inside OpenMP construct. virtual const VarDecl *getThreadIDVariable() const = 0; - /// \brief Emit the captured statement body. + /// Emit the captured statement body. void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; - /// \brief Get an LValue for the current ThreadID variable. + /// Get an LValue for the current ThreadID variable. /// \return LValue for thread id variable. This LValue always has type int32*. virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); @@ -95,7 +96,7 @@ protected: bool HasCancel; }; -/// \brief API for captured statement code generation in OpenMP constructs. +/// API for captured statement code generation in OpenMP constructs. class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { public: CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, @@ -108,11 +109,11 @@ public: assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); } - /// \brief Get a variable or parameter for storing global thread id + /// Get a variable or parameter for storing global thread id /// inside OpenMP construct. const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } - /// \brief Get the name of the capture helper. + /// Get the name of the capture helper. StringRef getHelperName() const override { return HelperName; } static bool classof(const CGCapturedStmtInfo *Info) { @@ -122,13 +123,13 @@ public: } private: - /// \brief A variable or parameter storing global thread id for OpenMP + /// A variable or parameter storing global thread id for OpenMP /// constructs. const VarDecl *ThreadIDVar; StringRef HelperName; }; -/// \brief API for captured statement code generation in OpenMP constructs. +/// API for captured statement code generation in OpenMP constructs. class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { public: class UntiedTaskActionTy final : public PrePostActionTy { @@ -144,11 +145,12 @@ public: void Enter(CodeGenFunction &CGF) override { if (Untied) { // Emit task switching point. - auto PartIdLVal = CGF.EmitLoadOfPointerLValue( + LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(PartIDVar), PartIDVar->getType()->castAs<PointerType>()); - auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation()); - auto *DoneBB = CGF.createBasicBlock(".untied.done."); + llvm::Value *Res = + CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); + llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); CGF.EmitBlock(DoneBB); CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); @@ -160,7 +162,7 @@ public: } void emitUntiedSwitch(CodeGenFunction &CGF) const { if (Untied) { - auto PartIdLVal = CGF.EmitLoadOfPointerLValue( + LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(PartIDVar), PartIDVar->getType()->castAs<PointerType>()); CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), @@ -188,14 +190,14 @@ public: assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); } - /// \brief Get a variable or parameter for storing global thread id + /// Get a variable or parameter for storing global thread id /// inside OpenMP construct. const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } - /// \brief Get an LValue for the current ThreadID variable. + /// Get an LValue for the current ThreadID variable. LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; - /// \brief Get the name of the capture helper. + /// Get the name of the capture helper. StringRef getHelperName() const override { return ".omp_outlined."; } void emitUntiedSwitch(CodeGenFunction &CGF) override { @@ -209,14 +211,14 @@ public: } private: - /// \brief A variable or parameter storing global thread id for OpenMP + /// A variable or parameter storing global thread id for OpenMP /// constructs. const VarDecl *ThreadIDVar; /// Action for emitting code for untied tasks. const UntiedTaskActionTy &Action; }; -/// \brief API for inlined captured statement code generation in OpenMP +/// API for inlined captured statement code generation in OpenMP /// constructs. class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { public: @@ -227,7 +229,7 @@ public: OldCSI(OldCSI), OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} - // \brief Retrieve the value of the context parameter. + // Retrieve the value of the context parameter. llvm::Value *getContextValue() const override { if (OuterRegionInfo) return OuterRegionInfo->getContextValue(); @@ -242,7 +244,7 @@ public: llvm_unreachable("No context value for inlined OpenMP region"); } - /// \brief Lookup the captured field decl for a variable. + /// Lookup the captured field decl for a variable. const FieldDecl *lookup(const VarDecl *VD) const override { if (OuterRegionInfo) return OuterRegionInfo->lookup(VD); @@ -257,7 +259,7 @@ public: return nullptr; } - /// \brief Get a variable or parameter for storing global thread id + /// Get a variable or parameter for storing global thread id /// inside OpenMP construct. const VarDecl *getThreadIDVariable() const override { if (OuterRegionInfo) @@ -265,14 +267,14 @@ public: return nullptr; } - /// \brief Get an LValue for the current ThreadID variable. + /// Get an LValue for the current ThreadID variable. LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { if (OuterRegionInfo) return OuterRegionInfo->getThreadIDVariableLValue(CGF); llvm_unreachable("No LValue for inlined OpenMP construct"); } - /// \brief Get the name of the capture helper. + /// Get the name of the capture helper. StringRef getHelperName() const override { if (auto *OuterRegionInfo = getOldCSI()) return OuterRegionInfo->getHelperName(); @@ -294,12 +296,12 @@ public: ~CGOpenMPInlinedRegionInfo() override = default; private: - /// \brief CodeGen info about outer OpenMP region. + /// CodeGen info about outer OpenMP region. CodeGenFunction::CGCapturedStmtInfo *OldCSI; CGOpenMPRegionInfo *OuterRegionInfo; }; -/// \brief API for captured statement code generation in OpenMP target +/// API for captured statement code generation in OpenMP target /// constructs. For this captures, implicit parameters are used instead of the /// captured fields. The name of the target region has to be unique in a given /// application so it is provided by the client, because only the client has @@ -312,11 +314,11 @@ public: /*HasCancel=*/false), HelperName(HelperName) {} - /// \brief This is unused for target regions because each starts executing + /// This is unused for target regions because each starts executing /// with a single thread. const VarDecl *getThreadIDVariable() const override { return nullptr; } - /// \brief Get the name of the capture helper. + /// Get the name of the capture helper. StringRef getHelperName() const override { return HelperName; } static bool classof(const CGCapturedStmtInfo *Info) { @@ -331,7 +333,7 @@ private: static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { llvm_unreachable("No codegen for expressions"); } -/// \brief API for generation of expressions captured in a innermost OpenMP +/// API for generation of expressions captured in a innermost OpenMP /// region. class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { public: @@ -343,7 +345,7 @@ public: // Make sure the globals captured in the provided statement are local by // using the privatization logic. We assume the same variable is not // captured more than once. - for (auto &C : CS.captures()) { + for (const auto &C : CS.captures()) { if (!C.capturesVariable() && !C.capturesVariableByCopy()) continue; @@ -354,33 +356,32 @@ public: DeclRefExpr DRE(const_cast<VarDecl *>(VD), /*RefersToEnclosingVariableOrCapture=*/false, VD->getType().getNonReferenceType(), VK_LValue, - SourceLocation()); - PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address { - return CGF.EmitLValue(&DRE).getAddress(); - }); + C.getLocation()); + PrivScope.addPrivate( + VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); } (void)PrivScope.Privatize(); } - /// \brief Lookup the captured field decl for a variable. + /// Lookup the captured field decl for a variable. const FieldDecl *lookup(const VarDecl *VD) const override { - if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) + if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) return FD; return nullptr; } - /// \brief Emit the captured statement body. + /// Emit the captured statement body. void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { llvm_unreachable("No body for expressions"); } - /// \brief Get a variable or parameter for storing global thread id + /// Get a variable or parameter for storing global thread id /// inside OpenMP construct. const VarDecl *getThreadIDVariable() const override { llvm_unreachable("No thread id for expressions"); } - /// \brief Get the name of the capture helper. + /// Get the name of the capture helper. StringRef getHelperName() const override { llvm_unreachable("No helper name for expressions"); } @@ -392,14 +393,15 @@ private: CodeGenFunction::OMPPrivateScope PrivScope; }; -/// \brief RAII for emitting code of OpenMP constructs. +/// RAII for emitting code of OpenMP constructs. class InlinedOpenMPRegionRAII { CodeGenFunction &CGF; llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; FieldDecl *LambdaThisCaptureField = nullptr; + const CodeGen::CGBlockInfo *BlockInfo = nullptr; public: - /// \brief Constructs region for combined constructs. + /// Constructs region for combined constructs. /// \param CodeGen Code generation sequence for combined directives. Includes /// a list of functions used for code generation of implicitly inlined /// regions. @@ -412,6 +414,8 @@ public: std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); LambdaThisCaptureField = CGF.LambdaThisCaptureField; CGF.LambdaThisCaptureField = nullptr; + BlockInfo = CGF.BlockInfo; + CGF.BlockInfo = nullptr; } ~InlinedOpenMPRegionRAII() { @@ -422,28 +426,29 @@ public: CGF.CapturedStmtInfo = OldCSI; std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); CGF.LambdaThisCaptureField = LambdaThisCaptureField; + CGF.BlockInfo = BlockInfo; } }; -/// \brief Values for bit flags used in the ident_t to describe the fields. +/// Values for bit flags used in the ident_t to describe the fields. /// All enumeric elements are named and described in accordance with the code /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h enum OpenMPLocationFlags : unsigned { - /// \brief Use trampoline for internal microtask. + /// Use trampoline for internal microtask. OMP_IDENT_IMD = 0x01, - /// \brief Use c-style ident structure. + /// Use c-style ident structure. OMP_IDENT_KMPC = 0x02, - /// \brief Atomic reduction option for kmpc_reduce. + /// Atomic reduction option for kmpc_reduce. OMP_ATOMIC_REDUCE = 0x10, - /// \brief Explicit 'barrier' directive. + /// Explicit 'barrier' directive. OMP_IDENT_BARRIER_EXPL = 0x20, - /// \brief Implicit barrier in code. + /// Implicit barrier in code. OMP_IDENT_BARRIER_IMPL = 0x40, - /// \brief Implicit barrier in 'for' directive. + /// Implicit barrier in 'for' directive. OMP_IDENT_BARRIER_IMPL_FOR = 0x40, - /// \brief Implicit barrier in 'sections' directive. + /// Implicit barrier in 'sections' directive. OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, - /// \brief Implicit barrier in 'single' directive. + /// Implicit barrier in 'single' directive. OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, /// Call of __kmp_for_static_init for static loop. OMP_IDENT_WORK_LOOP = 0x200, @@ -454,7 +459,7 @@ enum OpenMPLocationFlags : unsigned { LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) }; -/// \brief Describes ident structure that describes a source location. +/// Describes ident structure that describes a source location. /// All descriptions are taken from /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h /// Original structure: @@ -481,24 +486,24 @@ enum OpenMPLocationFlags : unsigned { /// */ /// } ident_t; enum IdentFieldIndex { - /// \brief might be used in Fortran + /// might be used in Fortran IdentField_Reserved_1, - /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. + /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. IdentField_Flags, - /// \brief Not really used in Fortran any more + /// Not really used in Fortran any more IdentField_Reserved_2, - /// \brief Source[4] in Fortran, do not use for C++ + /// Source[4] in Fortran, do not use for C++ IdentField_Reserved_3, - /// \brief String describing the source location. The string is composed of + /// String describing the source location. The string is composed of /// semi-colon separated fields which describe the source file, the function /// and a pair of line numbers that delimit the construct. IdentField_PSource }; -/// \brief Schedule types for 'omp for' loops (these enumerators are taken from +/// Schedule types for 'omp for' loops (these enumerators are taken from /// the enum sched_type in kmp.h). enum OpenMPSchedType { - /// \brief Lower bound for default (unordered) versions. + /// Lower bound for default (unordered) versions. OMP_sch_lower = 32, OMP_sch_static_chunked = 33, OMP_sch_static = 34, @@ -508,7 +513,7 @@ enum OpenMPSchedType { OMP_sch_auto = 38, /// static with chunk adjustment (e.g., simd) OMP_sch_static_balanced_chunked = 45, - /// \brief Lower bound for 'ordered' versions. + /// Lower bound for 'ordered' versions. OMP_ord_lower = 64, OMP_ord_static_chunked = 65, OMP_ord_static = 66, @@ -517,7 +522,7 @@ enum OpenMPSchedType { OMP_ord_runtime = 69, OMP_ord_auto = 70, OMP_sch_default = OMP_sch_static, - /// \brief dist_schedule types + /// dist_schedule types OMP_dist_sch_static_chunked = 91, OMP_dist_sch_static = 92, /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. @@ -528,13 +533,13 @@ enum OpenMPSchedType { }; enum OpenMPRTLFunction { - /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, + /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, /// kmpc_micro microtask, ...); OMPRTL__kmpc_fork_call, - /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc, + /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, /// kmp_int32 global_tid, void *data, size_t size, void ***cache); OMPRTL__kmpc_threadprivate_cached, - /// \brief Call to void __kmpc_threadprivate_register( ident_t *, + /// Call to void __kmpc_threadprivate_register( ident_t *, /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); OMPRTL__kmpc_threadprivate_register, // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); @@ -742,11 +747,11 @@ void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { /// UDR decl used for reduction. static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp) { - if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) - if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) - if (auto *DRE = + if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) + if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) + if (const auto *DRE = dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) - if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) + if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) return DRD; return nullptr; } @@ -759,48 +764,51 @@ static void emitInitWithReductionInitializer(CodeGenFunction &CGF, if (DRD->getInitializer()) { std::pair<llvm::Function *, llvm::Function *> Reduction = CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); - auto *CE = cast<CallExpr>(InitOp); - auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); + const auto *CE = cast<CallExpr>(InitOp); + const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); - auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); - auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); + const auto *LHSDRE = + cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); + const auto *RHSDRE = + cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); CodeGenFunction::OMPPrivateScope PrivateScope(CGF); PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), - [=]() -> Address { return Private; }); + [=]() { return Private; }); PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), - [=]() -> Address { return Original; }); + [=]() { return Original; }); (void)PrivateScope.Privatize(); RValue Func = RValue::get(Reduction.second); CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); CGF.EmitIgnoredExpr(InitOp); } else { llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); + std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); auto *GV = new llvm::GlobalVariable( CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, - llvm::GlobalValue::PrivateLinkage, Init, ".init"); + llvm::GlobalValue::PrivateLinkage, Init, Name); LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); RValue InitRVal; switch (CGF.getEvaluationKind(Ty)) { case TEK_Scalar: - InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); + InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); break; case TEK_Complex: InitRVal = - RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); + RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); break; case TEK_Aggregate: InitRVal = RValue::getAggregate(LV.getAddress()); break; } - OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); + OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), /*IsInitializer=*/false); } } -/// \brief Emit initialization of arrays of complex types. +/// Emit initialization of arrays of complex types. /// \param DestAddr Address of the array. /// \param Type Type of array. /// \param Init Initial expression of array. @@ -814,8 +822,8 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType ElementTy; // Drill down to the base element type on both arrays. - auto ArrayTy = Type->getAsArrayTypeUnsafe(); - auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); + const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); + llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); DestAddr = CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); if (DRD) @@ -825,18 +833,18 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, llvm::Value *SrcBegin = nullptr; if (DRD) SrcBegin = SrcAddr.getPointer(); - auto DestBegin = DestAddr.getPointer(); + llvm::Value *DestBegin = DestAddr.getPointer(); // Cast from pointer to array type to pointer to single element. - auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); + llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); // The basic structure here is a while-do loop. - auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); - auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); - auto IsEmpty = + llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); + llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); + llvm::Value *IsEmpty = CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); // Enter the loop body, making that address the current address. - auto EntryBB = CGF.Builder.GetInsertBlock(); + llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); CGF.EmitBlock(BodyBB); CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); @@ -871,16 +879,16 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, if (DRD) { // Shift the address forward by one element. - auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( + llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); } // Shift the address forward by one element. - auto DestElementNext = CGF.Builder.CreateConstGEP1_32( + llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); // Check whether we've reached the end. - auto Done = + llvm::Value *Done = CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); @@ -889,6 +897,25 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, CGF.EmitBlock(DoneBB, /*IsFinished=*/true); } +static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> +isDeclareTargetDeclaration(const ValueDecl *VD) { + for (const Decl *D : VD->redecls()) { + if (!D->hasAttrs()) + continue; + if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>()) + return Attr->getMapType(); + } + if (const auto *V = dyn_cast<VarDecl>(VD)) { + if (const VarDecl *TD = V->getTemplateInstantiationPattern()) + return isDeclareTargetDeclaration(TD); + } else if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { + if (const auto *TD = FD->getTemplateInstantiationPattern()) + return isDeclareTargetDeclaration(TD); + } + + return llvm::None; +} + LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { return CGF.EmitOMPSharedLValue(E); } @@ -906,7 +933,7 @@ void ReductionCodeGen::emitAggregateInitialization( // Emit VarDecl with copy init for arrays. // Get the address of the original variable captured in current // captured region. - auto *PrivateVD = + const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); bool EmitDeclareReductionInit = DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); @@ -926,7 +953,7 @@ ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, BaseDecls.reserve(Shareds.size()); auto IPriv = Privates.begin(); auto IRed = ReductionOps.begin(); - for (const auto *Ref : Shareds) { + for (const Expr *Ref : Shareds) { ClausesData.emplace_back(Ref, *IPriv, *IRed); std::advance(IPriv, 1); std::advance(IRed, 1); @@ -942,7 +969,7 @@ void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { } void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { - auto *PrivateVD = + const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); QualType PrivateType = PrivateVD->getType(); bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); @@ -955,7 +982,7 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { } llvm::Value *Size; llvm::Value *SizeInChars; - llvm::Type *ElemType = + auto *ElemType = cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) ->getElementType(); auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); @@ -981,7 +1008,7 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, llvm::Value *Size) { - auto *PrivateVD = + const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); QualType PrivateType = PrivateVD->getType(); if (!PrivateType->isVariablyModifiedType()) { @@ -1002,9 +1029,10 @@ void ReductionCodeGen::emitInitialization( CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { assert(SharedAddresses.size() > N && "No variable was generated"); - auto *PrivateVD = + const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); - auto *DRD = getReductionInit(ClausesData[N].ReductionOp); + const OMPDeclareReductionDecl *DRD = + getReductionInit(ClausesData[N].ReductionOp); QualType PrivateType = PrivateVD->getType(); PrivateAddr = CGF.Builder.CreateElementBitCast( PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); @@ -1029,7 +1057,7 @@ void ReductionCodeGen::emitInitialization( } bool ReductionCodeGen::needCleanups(unsigned N) { - auto *PrivateVD = + const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); QualType PrivateType = PrivateVD->getType(); QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); @@ -1038,7 +1066,7 @@ bool ReductionCodeGen::needCleanups(unsigned N) { void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr) { - auto *PrivateVD = + const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); QualType PrivateType = PrivateVD->getType(); QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); @@ -1054,9 +1082,9 @@ static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, BaseTy = BaseTy.getNonReferenceType(); while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && !CGF.getContext().hasSameType(BaseTy, ElTy)) { - if (auto *PtrTy = BaseTy->getAs<PointerType>()) + if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); - else { + } else { LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); } @@ -1097,28 +1125,32 @@ static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, return Address(Addr, BaseLVAlignment); } -Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, - Address PrivateAddr) { - const DeclRefExpr *DE; +static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { const VarDecl *OrigVD = nullptr; - if (auto *OASE = dyn_cast<OMPArraySectionExpr>(ClausesData[N].Ref)) { - auto *Base = OASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) + if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { + const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); + while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) Base = TempOASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) + while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) Base = TempASE->getBase()->IgnoreParenImpCasts(); DE = cast<DeclRefExpr>(Base); OrigVD = cast<VarDecl>(DE->getDecl()); - } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(ClausesData[N].Ref)) { - auto *Base = ASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) + } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { + const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); + while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) Base = TempASE->getBase()->IgnoreParenImpCasts(); DE = cast<DeclRefExpr>(Base); OrigVD = cast<VarDecl>(DE->getDecl()); } - if (OrigVD) { + return OrigVD; +} + +Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr) { + const DeclRefExpr *DE; + if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { BaseDecls.emplace_back(OrigVD); - auto OriginalBaseLValue = CGF.EmitLValue(DE); + LValue OriginalBaseLValue = CGF.EmitLValue(DE); LValue BaseLValue = loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), OriginalBaseLValue); @@ -1140,7 +1172,8 @@ Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, } bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { - auto *DRD = getReductionInit(ClausesData[N].ReductionOp); + const OMPDeclareReductionDecl *DRD = + getReductionInit(ClausesData[N].ReductionOp); return DRD && DRD->getInitializer(); } @@ -1170,12 +1203,38 @@ LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( AlignmentSource::Decl); } -CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) - : CGM(CGM), OffloadEntriesInfoManager(CGM) { - IdentTy = llvm::StructType::create( - "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, - CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, - CGM.Int8PtrTy /* psource */); +static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, + QualType FieldTy) { + auto *Field = FieldDecl::Create( + C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, + C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + DC->addDecl(Field); + return Field; +} + +CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, + StringRef Separator) + : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), + OffloadEntriesInfoManager(CGM) { + ASTContext &C = CGM.getContext(); + RecordDecl *RD = C.buildImplicitRecord("ident_t"); + QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); + RD->startDefinition(); + // reserved_1 + addFieldToRecordDecl(C, RD, KmpInt32Ty); + // flags + addFieldToRecordDecl(C, RD, KmpInt32Ty); + // reserved_2 + addFieldToRecordDecl(C, RD, KmpInt32Ty); + // reserved_3 + addFieldToRecordDecl(C, RD, KmpInt32Ty); + // psource + addFieldToRecordDecl(C, RD, C.VoidPtrTy); + RD->completeDefinition(); + IdentQTy = C.getRecordType(RD); + IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); loadOffloadInfoMetadata(); @@ -1185,12 +1244,23 @@ void CGOpenMPRuntime::clear() { InternalVars.clear(); } +std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { + SmallString<128> Buffer; + llvm::raw_svector_ostream OS(Buffer); + StringRef Sep = FirstSeparator; + for (StringRef Part : Parts) { + OS << Sep << Part; + Sep = Separator; + } + return OS.str(); +} + static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner) { // void .omp_combiner.(Ty *in, Ty *out); - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); QualType PtrTy = C.getPointerType(Ty).withRestrict(); FunctionArgList Args; ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), @@ -1199,28 +1269,30 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); Args.push_back(&OmpOutParm); Args.push_back(&OmpInParm); - auto &FnInfo = + const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); - auto *Fn = llvm::Function::Create( - FnTy, llvm::GlobalValue::InternalLinkage, - IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + std::string Name = CGM.getOpenMPRuntime().getName( + {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); + auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + Name, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); Fn->removeFnAttr(llvm::Attribute::NoInline); Fn->removeFnAttr(llvm::Attribute::OptimizeNone); Fn->addFnAttr(llvm::Attribute::AlwaysInline); CodeGenFunction CGF(CGM); // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), + Out->getLocation()); CodeGenFunction::OMPPrivateScope Scope(CGF); Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); - Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address { + Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) .getAddress(); }); Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); - Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address { + Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) .getAddress(); }); @@ -1242,7 +1314,7 @@ void CGOpenMPRuntime::emitUserDefinedReduction( CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { if (UDRMap.count(D) > 0) return; - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); if (!In || !Out) { In = &C.Idents.get("omp_in"); Out = &C.Idents.get("omp_out"); @@ -1252,7 +1324,7 @@ void CGOpenMPRuntime::emitUserDefinedReduction( cast<VarDecl>(D->lookup(Out).front()), /*IsCombiner=*/true); llvm::Function *Initializer = nullptr; - if (auto *Init = D->getInitializer()) { + if (const Expr *Init = D->getInitializer()) { if (!Priv || !Orig) { Priv = &C.Idents.get("omp_priv"); Orig = &C.Idents.get("omp_orig"); @@ -1265,7 +1337,7 @@ void CGOpenMPRuntime::emitUserDefinedReduction( cast<VarDecl>(D->lookup(Priv).front()), /*IsCombiner=*/false); } - UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer))); + UDRMap.try_emplace(D, Combiner, Initializer); if (CGF) { auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); Decls.second.push_back(D); @@ -1281,25 +1353,6 @@ CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { return UDRMap.lookup(D); } -// Layout information for ident_t. -static CharUnits getIdentAlign(CodeGenModule &CGM) { - return CGM.getPointerAlign(); -} -static CharUnits getIdentSize(CodeGenModule &CGM) { - assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign())); - return CharUnits::fromQuantity(16) + CGM.getPointerSize(); -} -static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) { - // All the fields except the last are i32, so this works beautifully. - return unsigned(Field) * CharUnits::fromQuantity(4); -} -static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, - IdentFieldIndex Field, - const llvm::Twine &Name = "") { - auto Offset = getOffsetOfIdentField(Field); - return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name); -} - static llvm::Value *emitParallelOrTeamsOutlinedFunction( CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, @@ -1308,19 +1361,20 @@ static llvm::Value *emitParallelOrTeamsOutlinedFunction( "thread id variable must be of type kmp_int32 *"); CodeGenFunction CGF(CGM, true); bool HasCancel = false; - if (auto *OPD = dyn_cast<OMPParallelDirective>(&D)) + if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) HasCancel = OPD->hasCancel(); - else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) + else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) HasCancel = OPSD->hasCancel(); - else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) + else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) HasCancel = OPFD->hasCancel(); - else if (auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) + else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) HasCancel = OPFD->hasCancel(); - else if (auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) + else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) HasCancel = OPFD->hasCancel(); - else if (auto *OPFD = dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) + else if (const auto *OPFD = + dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) HasCancel = OPFD->hasCancel(); - else if (auto *OPFD = + else if (const auto *OPFD = dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) HasCancel = OPFD->hasCancel(); CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, @@ -1352,8 +1406,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( bool Tied, unsigned &NumberOfParts) { auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, PrePostActionTy &) { - auto *ThreadID = getThreadID(CGF, D.getLocStart()); - auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); + llvm::Value *ThreadID = getThreadID(CGF, D.getLocStart()); + llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); llvm::Value *TaskArgs[] = { UpLoc, ThreadID, CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), @@ -1366,21 +1420,69 @@ llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( CodeGen.setAction(Action); assert(!ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 for tasks"); - auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); - auto *TD = dyn_cast<OMPTaskDirective>(&D); + const OpenMPDirectiveKind Region = + isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop + : OMPD_task; + const CapturedStmt *CS = D.getCapturedStmt(Region); + const auto *TD = dyn_cast<OMPTaskDirective>(&D); CodeGenFunction CGF(CGM, true); CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, TD ? TD->hasCancel() : false, Action); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - auto *Res = CGF.GenerateCapturedStmtFunction(*CS); + llvm::Value *Res = CGF.GenerateCapturedStmtFunction(*CS); if (!Tied) NumberOfParts = Action.getNumberOfParts(); return Res; } +static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, + const RecordDecl *RD, const CGRecordLayout &RL, + ArrayRef<llvm::Constant *> Data) { + llvm::StructType *StructTy = RL.getLLVMType(); + unsigned PrevIdx = 0; + ConstantInitBuilder CIBuilder(CGM); + auto DI = Data.begin(); + for (const FieldDecl *FD : RD->fields()) { + unsigned Idx = RL.getLLVMFieldNo(FD); + // Fill the alignment. + for (unsigned I = PrevIdx; I < Idx; ++I) + Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); + PrevIdx = Idx + 1; + Fields.add(*DI); + ++DI; + } +} + +template <class... As> +static llvm::GlobalVariable * +createConstantGlobalStruct(CodeGenModule &CGM, QualType Ty, + ArrayRef<llvm::Constant *> Data, const Twine &Name, + As &&... Args) { + const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); + const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); + ConstantInitBuilder CIBuilder(CGM); + ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); + buildStructValue(Fields, CGM, RD, RL, Data); + return Fields.finishAndCreateGlobal( + Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), + /*isConstant=*/true, std::forward<As>(Args)...); +} + +template <typename T> +static void +createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, + ArrayRef<llvm::Constant *> Data, + T &Parent) { + const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); + const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); + ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); + buildStructValue(Fields, CGM, RD, RL, Data); + Fields.finishAndAddTo(Parent); +} + Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { - CharUnits Align = getIdentAlign(CGM); + CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); if (!Entry) { if (!DefaultOpenMPPSource) { @@ -1394,17 +1496,15 @@ Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); } - ConstantInitBuilder builder(CGM); - auto fields = builder.beginStruct(IdentTy); - fields.addInt(CGM.Int32Ty, 0); - fields.addInt(CGM.Int32Ty, Flags); - fields.addInt(CGM.Int32Ty, 0); - fields.addInt(CGM.Int32Ty, 0); - fields.add(DefaultOpenMPPSource); - auto DefaultOpenMPLocation = - fields.finishAndCreateGlobal("", Align, /*isConstant*/ true, - llvm::GlobalValue::PrivateLinkage); - DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + llvm::Constant *Data[] = {llvm::ConstantInt::getNullValue(CGM.Int32Ty), + llvm::ConstantInt::get(CGM.Int32Ty, Flags), + llvm::ConstantInt::getNullValue(CGM.Int32Ty), + llvm::ConstantInt::getNullValue(CGM.Int32Ty), + DefaultOpenMPPSource}; + llvm::GlobalValue *DefaultOpenMPLocation = createConstantGlobalStruct( + CGM, IdentQTy, Data, "", llvm::GlobalValue::PrivateLinkage); + DefaultOpenMPLocation->setUnnamedAddr( + llvm::GlobalValue::UnnamedAddr::Global); OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; } @@ -1422,17 +1522,17 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, assert(CGF.CurFn && "No function in current CodeGenFunction."); + CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); Address LocValue = Address::invalid(); auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); if (I != OpenMPLocThreadIDMap.end()) - LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM)); + LocValue = Address(I->second.DebugLoc, Align); // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if // GetOpenMPThreadID was called before this routine. if (!LocValue.isValid()) { // Generate "ident_t .kmpc_loc.addr;" - Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM), - ".kmpc_loc.addr"); + Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); Elem.second.DebugLoc = AI.getPointer(); LocValue = AI; @@ -1440,29 +1540,30 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, CGBuilderTy::InsertPointGuard IPG(CGF.Builder); CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), - CGM.getSize(getIdentSize(CGF.CGM))); + CGF.getTypeSize(IdentQTy)); } // char **psource = &.kmpc_loc_<flags>.addr.psource; - Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource); + LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); + auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); + LValue PSource = + CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); - auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); + llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); if (OMPDebugLoc == nullptr) { SmallString<128> Buffer2; llvm::raw_svector_ostream OS2(Buffer2); // Build debug location PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); OS2 << ";" << PLoc.getFilename() << ";"; - if (const FunctionDecl *FD = - dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) { + if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) OS2 << FD->getQualifiedNameAsString(); - } OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; } // *psource = ";<File>;<Function>;<Line>;<Column>;;"; - CGF.Builder.CreateStore(OMPDebugLoc, PSource); + CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); // Our callers always pass this to a runtime function, so for // convenience, go ahead and return a naked pointer. @@ -1490,8 +1591,8 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { if (OMPRegionInfo->getThreadIDVariable()) { // Check if this an outlined function with thread id passed as argument. - auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); - ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); + LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); + ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); // If value loaded in entry block, cache it and use it everywhere in // function. if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { @@ -1509,7 +1610,7 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, // function. CGBuilderTy::InsertPointGuard IPG(CGF.Builder); CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); - auto *Call = CGF.Builder.CreateCall( + llvm::CallInst *Call = CGF.Builder.CreateCall( createRuntimeFunction(OMPRTL__kmpc_global_thread_num), emitUpdateLocation(CGF, Loc)); Call->setCallingConv(CGF.getRuntimeCC()); @@ -1523,17 +1624,14 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { if (OpenMPLocThreadIDMap.count(CGF.CurFn)) OpenMPLocThreadIDMap.erase(CGF.CurFn); if (FunctionUDRMap.count(CGF.CurFn) > 0) { - for(auto *D : FunctionUDRMap[CGF.CurFn]) { + for(auto *D : FunctionUDRMap[CGF.CurFn]) UDRMap.erase(D); - } FunctionUDRMap.erase(CGF.CurFn); } } llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { - if (!IdentTy) { - } - return llvm::PointerType::getUnqual(IdentTy); + return IdentTy->getPointerTo(); } llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { @@ -1555,7 +1653,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // microtask, ...); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, getKmpc_MicroPointerTy()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); break; @@ -1563,7 +1661,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_global_thread_num: { // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); break; @@ -1574,7 +1672,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.SizeTy, CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); break; @@ -1585,7 +1683,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, llvm::PointerType::getUnqual(KmpCriticalNameTy)}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); break; @@ -1596,7 +1694,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, llvm::PointerType::getUnqual(KmpCriticalNameTy), CGM.IntPtrTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); break; @@ -1605,21 +1703,22 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // Build void __kmpc_threadprivate_register(ident_t *, void *data, // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); // typedef void *(*kmpc_ctor)(void *); - auto KmpcCtorTy = + auto *KmpcCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, /*isVarArg*/ false)->getPointerTo(); // typedef void *(*kmpc_cctor)(void *, void *); llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; - auto KmpcCopyCtorTy = + auto *KmpcCopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, - /*isVarArg*/ false)->getPointerTo(); + /*isVarArg*/ false) + ->getPointerTo(); // typedef void (*kmpc_dtor)(void *); - auto KmpcDtorTy = + auto *KmpcDtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) ->getPointerTo(); llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, KmpcCopyCtorTy, KmpcDtorTy}; - auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); break; @@ -1630,7 +1729,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, llvm::PointerType::getUnqual(KmpCriticalNameTy)}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); break; @@ -1639,7 +1738,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 // global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); break; @@ -1647,7 +1746,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_barrier: { // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); break; @@ -1655,7 +1754,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_for_static_fini: { // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); break; @@ -1665,7 +1764,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // kmp_int32 num_threads) llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); break; @@ -1674,7 +1773,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 // global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); break; @@ -1683,7 +1782,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 // global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); break; @@ -1691,7 +1790,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_flush: { // Build void __kmpc_flush(ident_t *loc); llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); break; @@ -1699,7 +1798,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_master: { // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); break; @@ -1707,7 +1806,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_end_master: { // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); break; @@ -1716,7 +1815,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, // int end_part); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); break; @@ -1724,7 +1823,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_single: { // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); break; @@ -1732,7 +1831,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_end_single: { // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); break; @@ -1746,7 +1845,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; // Return void * and then cast to particular kmp_task_t type. - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); break; @@ -1756,7 +1855,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // *new_task); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); break; @@ -1771,7 +1870,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, CGM.VoidPtrTy, CpyFnTy->getPointerTo(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); break; @@ -1787,7 +1886,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), llvm::PointerType::getUnqual(KmpCriticalNameTy)}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); break; @@ -1804,7 +1903,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), llvm::PointerType::getUnqual(KmpCriticalNameTy)}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); break; @@ -1815,7 +1914,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, llvm::PointerType::getUnqual(KmpCriticalNameTy)}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); break; @@ -1826,7 +1925,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, llvm::PointerType::getUnqual(KmpCriticalNameTy)}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); @@ -1837,7 +1936,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // *new_task); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); @@ -1848,7 +1947,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // *new_task); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_complete_if0"); @@ -1857,7 +1956,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_ordered: { // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); break; @@ -1865,7 +1964,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_end_ordered: { // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); break; @@ -1873,7 +1972,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_omp_taskwait: { // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); break; @@ -1881,7 +1980,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_taskgroup: { // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); break; @@ -1889,7 +1988,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_end_taskgroup: { // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); break; @@ -1898,7 +1997,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, // int proc_bind) llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); break; @@ -1910,7 +2009,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); @@ -1923,7 +2022,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); break; @@ -1932,7 +2031,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 // global_tid, kmp_int32 cncl_kind) llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); break; @@ -1941,7 +2040,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, // kmp_int32 cncl_kind) llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); break; @@ -1951,7 +2050,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // kmp_int32 num_teams, kmp_int32 num_threads) llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); break; @@ -1961,7 +2060,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // microtask, ...); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, getKmpc_MicroPointerTy()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); break; @@ -1981,7 +2080,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { CGM.IntTy, CGM.Int64Ty, CGM.VoidPtrTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); break; @@ -1993,7 +2092,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { CGM.Int32Ty, CGM.Int32Ty, CGM.VoidPtrTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); break; @@ -2001,7 +2100,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { case OMPRTL__kmpc_doacross_fini: { // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); break; @@ -2011,7 +2110,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // *vec); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int64Ty->getPointerTo()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); break; @@ -2021,7 +2120,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // *vec); llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int64Ty->getPointerTo()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); break; @@ -2030,7 +2129,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void // *data); llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); @@ -2040,7 +2139,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void // *d); llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction( FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); @@ -2057,7 +2156,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), CGM.Int64Ty->getPointerTo()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); break; @@ -2073,7 +2172,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), CGM.Int64Ty->getPointerTo()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); break; @@ -2091,7 +2190,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { CGM.Int64Ty->getPointerTo(), CGM.Int32Ty, CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); break; @@ -2109,7 +2208,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { CGM.Int64Ty->getPointerTo(), CGM.Int32Ty, CGM.Int32Ty}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); break; @@ -2119,7 +2218,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { QualType ParamTy = CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); break; @@ -2129,7 +2228,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { QualType ParamTy = CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); break; @@ -2143,7 +2242,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), CGM.Int64Ty->getPointerTo()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); break; @@ -2172,7 +2271,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), CGM.Int64Ty->getPointerTo()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); break; @@ -2201,7 +2300,7 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), CGM.Int64Ty->getPointerTo()}; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); break; @@ -2230,12 +2329,12 @@ llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); - auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" - : "__kmpc_for_static_init_4u") - : (IVSigned ? "__kmpc_for_static_init_8" - : "__kmpc_for_static_init_8u"); - auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; - auto PtrTy = llvm::PointerType::getUnqual(ITy); + StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" + : "__kmpc_for_static_init_4u") + : (IVSigned ? "__kmpc_for_static_init_8" + : "__kmpc_for_static_init_8u"); + llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; + auto *PtrTy = llvm::PointerType::getUnqual(ITy); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc CGM.Int32Ty, // tid @@ -2247,7 +2346,7 @@ llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, ITy, // incr ITy // chunk }; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); return CGM.CreateRuntimeFunction(FnTy, Name); } @@ -2256,11 +2355,11 @@ llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); - auto Name = + StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); - auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; + llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc CGM.Int32Ty, // tid CGM.Int32Ty, // schedtype @@ -2269,7 +2368,7 @@ llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, ITy, // stride ITy // chunk }; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); return CGM.CreateRuntimeFunction(FnTy, Name); } @@ -2278,7 +2377,7 @@ llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); - auto Name = + StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); @@ -2286,7 +2385,7 @@ llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, getIdentTyPointerTy(), // loc CGM.Int32Ty, // tid }; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FnTy, Name); } @@ -2295,12 +2394,12 @@ llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); - auto Name = + StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); - auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; - auto PtrTy = llvm::PointerType::getUnqual(ITy); + llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; + auto *PtrTy = llvm::PointerType::getUnqual(ITy); llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc CGM.Int32Ty, // tid @@ -2309,18 +2408,48 @@ llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, PtrTy, // p_upper PtrTy // p_stride }; - llvm::FunctionType *FnTy = + auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); return CGM.CreateRuntimeFunction(FnTy, Name); } +Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) { + if (CGM.getLangOpts().OpenMPSimd) + return Address::invalid(); + llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + isDeclareTargetDeclaration(VD); + if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) { + SmallString<64> PtrName; + { + llvm::raw_svector_ostream OS(PtrName); + OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr"; + } + llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); + if (!Ptr) { + QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); + Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), + PtrName); + if (!CGM.getLangOpts().OpenMPIsDevice) { + auto *GV = cast<llvm::GlobalVariable>(Ptr); + GV->setLinkage(llvm::GlobalValue::ExternalLinkage); + GV->setInitializer(CGM.GetAddrOfGlobal(VD)); + } + CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr)); + registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); + } + return Address(Ptr, CGM.getContext().getDeclAlign(VD)); + } + return Address::invalid(); +} + llvm::Constant * CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { assert(!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo().isTLSSupported()); // Lookup the entry, lazily creating it if necessary. - return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, - Twine(CGM.getMangledName(VD)) + ".cache."); + std::string Suffix = getName({"cache", ""}); + return getOrCreateInternalVariable( + CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); } Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, @@ -2331,7 +2460,7 @@ Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, CGM.getContext().getTargetInfo().isTLSSupported()) return VDAddr; - auto VarTy = VDAddr.getElementType(); + llvm::Type *VarTy = VDAddr.getElementType(); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy), @@ -2347,15 +2476,14 @@ void CGOpenMPRuntime::emitThreadPrivateVarInit( llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime // library. - auto OMPLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), OMPLoc); // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) // to register constructor/destructor for variable. - llvm::Value *Args[] = {OMPLoc, - CGF.Builder.CreatePointerCast(VDAddr.getPointer(), - CGM.VoidPtrTy), - Ctor, CopyCtor, Dtor}; + llvm::Value *Args[] = { + OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), + Ctor, CopyCtor, Dtor}; CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); } @@ -2373,29 +2501,31 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( QualType ASTTy = VD->getType(); llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; - auto Init = VD->getAnyInitializer(); + const Expr *Init = VD->getAnyInitializer(); if (CGM.getLangOpts().CPlusPlus && PerformInit) { // Generate function that re-emits the declaration's initializer into the // threadprivate copy of the variable VD CodeGenFunction CtorCGF(CGM); FunctionArgList Args; - ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy, + ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, + /*Id=*/nullptr, CGM.getContext().VoidPtrTy, ImplicitParamDecl::Other); Args.push_back(&Dst); - auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( + const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( CGM.getContext().VoidPtrTy, Args); - auto FTy = CGM.getTypes().GetFunctionType(FI); - auto Fn = CGM.CreateGlobalInitOrDestructFunction( - FTy, ".__kmpc_global_ctor_.", FI, Loc); + llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); + std::string Name = getName({"__kmpc_global_ctor_", ""}); + llvm::Function *Fn = + CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, - Args, SourceLocation()); - auto ArgVal = CtorCGF.EmitLoadOfScalar( + Args, Loc, Loc); + llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); Address Arg = Address(ArgVal, VDAddr.getAlignment()); - Arg = CtorCGF.Builder.CreateElementBitCast(Arg, - CtorCGF.ConvertTypeForMem(ASTTy)); + Arg = CtorCGF.Builder.CreateElementBitCast( + Arg, CtorCGF.ConvertTypeForMem(ASTTy)); CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), /*IsInitializer=*/true); ArgVal = CtorCGF.EmitLoadOfScalar( @@ -2410,21 +2540,23 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( // of the variable VD CodeGenFunction DtorCGF(CGM); FunctionArgList Args; - ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy, + ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, + /*Id=*/nullptr, CGM.getContext().VoidPtrTy, ImplicitParamDecl::Other); Args.push_back(&Dst); - auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( + const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( CGM.getContext().VoidTy, Args); - auto FTy = CGM.getTypes().GetFunctionType(FI); - auto Fn = CGM.CreateGlobalInitOrDestructFunction( - FTy, ".__kmpc_global_dtor_.", FI, Loc); + llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); + std::string Name = getName({"__kmpc_global_dtor_", ""}); + llvm::Function *Fn = + CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, - SourceLocation()); + Loc, Loc); // Create a scope with an artificial location for the body of this function. auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); - auto ArgVal = DtorCGF.EmitLoadOfScalar( + llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( DtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, @@ -2438,34 +2570,36 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( return nullptr; llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; - auto CopyCtorTy = - llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, - /*isVarArg=*/false)->getPointerTo(); + auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, + /*isVarArg=*/false) + ->getPointerTo(); // Copying constructor for the threadprivate variable. // Must be NULL - reserved by runtime, but currently it requires that this // parameter is always NULL. Otherwise it fires assertion. CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); if (Ctor == nullptr) { - auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, - /*isVarArg=*/false)->getPointerTo(); + auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, + /*isVarArg=*/false) + ->getPointerTo(); Ctor = llvm::Constant::getNullValue(CtorTy); } if (Dtor == nullptr) { - auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, - /*isVarArg=*/false)->getPointerTo(); + auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, + /*isVarArg=*/false) + ->getPointerTo(); Dtor = llvm::Constant::getNullValue(DtorTy); } if (!CGF) { - auto InitFunctionTy = + auto *InitFunctionTy = llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); - auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( - InitFunctionTy, ".__omp_threadprivate_init_.", - CGM.getTypes().arrangeNullaryFunction()); + std::string Name = getName({"__omp_threadprivate_init_", ""}); + llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( + InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); CodeGenFunction InitCGF(CGM); FunctionArgList ArgList; InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, CGM.getTypes().arrangeNullaryFunction(), ArgList, - Loc); + Loc, Loc); emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); InitCGF.FinishFunction(); return InitFunction; @@ -2475,19 +2609,156 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( return nullptr; } +/// Obtain information that uniquely identifies a target entry. This +/// consists of the file and device IDs as well as line number associated with +/// the relevant entry source location. +static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, + unsigned &DeviceID, unsigned &FileID, + unsigned &LineNum) { + SourceManager &SM = C.getSourceManager(); + + // The loc should be always valid and have a file ID (the user cannot use + // #pragma directives in macros) + + assert(Loc.isValid() && "Source location is expected to be always valid."); + + PresumedLoc PLoc = SM.getPresumedLoc(Loc); + assert(PLoc.isValid() && "Source location is expected to be always valid."); + + llvm::sys::fs::UniqueID ID; + if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) + SM.getDiagnostics().Report(diag::err_cannot_open_file) + << PLoc.getFilename() << EC.message(); + + DeviceID = ID.getDevice(); + FileID = ID.getFile(); + LineNum = PLoc.getLine(); +} + +bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, + llvm::GlobalVariable *Addr, + bool PerformInit) { + Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + isDeclareTargetDeclaration(VD); + if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) + return false; + VD = VD->getDefinition(CGM.getContext()); + if (VD && !DeclareTargetWithDefinition.insert(VD).second) + return CGM.getLangOpts().OpenMPIsDevice; + + QualType ASTTy = VD->getType(); + + SourceLocation Loc = VD->getCanonicalDecl()->getLocStart(); + // Produce the unique prefix to identify the new target regions. We use + // the source location of the variable declaration which we know to not + // conflict with any target region. + unsigned DeviceID; + unsigned FileID; + unsigned Line; + getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); + SmallString<128> Buffer, Out; + { + llvm::raw_svector_ostream OS(Buffer); + OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) + << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; + } + + const Expr *Init = VD->getAnyInitializer(); + if (CGM.getLangOpts().CPlusPlus && PerformInit) { + llvm::Constant *Ctor; + llvm::Constant *ID; + if (CGM.getLangOpts().OpenMPIsDevice) { + // Generate function that re-emits the declaration's initializer into + // the threadprivate copy of the variable VD + CodeGenFunction CtorCGF(CGM); + + const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); + llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); + llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( + FTy, Twine(Buffer, "_ctor"), FI, Loc); + auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); + CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, + FunctionArgList(), Loc, Loc); + auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); + CtorCGF.EmitAnyExprToMem(Init, + Address(Addr, CGM.getContext().getDeclAlign(VD)), + Init->getType().getQualifiers(), + /*IsInitializer=*/true); + CtorCGF.FinishFunction(); + Ctor = Fn; + ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); + CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); + } else { + Ctor = new llvm::GlobalVariable( + CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, + llvm::GlobalValue::PrivateLinkage, + llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); + ID = Ctor; + } + + // Register the information for the entry associated with the constructor. + Out.clear(); + OffloadEntriesInfoManager.registerTargetRegionEntryInfo( + DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, + ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); + } + if (VD->getType().isDestructedType() != QualType::DK_none) { + llvm::Constant *Dtor; + llvm::Constant *ID; + if (CGM.getLangOpts().OpenMPIsDevice) { + // Generate function that emits destructor call for the threadprivate + // copy of the variable VD + CodeGenFunction DtorCGF(CGM); + + const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); + llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); + llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( + FTy, Twine(Buffer, "_dtor"), FI, Loc); + auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); + DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, + FunctionArgList(), Loc, Loc); + // Create a scope with an artificial location for the body of this + // function. + auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); + DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), + ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), + DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); + DtorCGF.FinishFunction(); + Dtor = Fn; + ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); + CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); + } else { + Dtor = new llvm::GlobalVariable( + CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, + llvm::GlobalValue::PrivateLinkage, + llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); + ID = Dtor; + } + // Register the information for the entry associated with the destructor. + Out.clear(); + OffloadEntriesInfoManager.registerTargetRegionEntryInfo( + DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, + ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); + } + return CGM.getLangOpts().OpenMPIsDevice; +} + Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) { - llvm::Twine VarName(Name, ".artificial."); + std::string Suffix = getName({"artificial", ""}); + std::string CacheSuffix = getName({"cache", ""}); llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); - llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName); + llvm::Value *GAddr = + getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); llvm::Value *Args[] = { emitUpdateLocation(CGF, SourceLocation()), getThreadID(CGF, SourceLocation()), CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, /*IsSigned=*/false), - getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")}; + getOrCreateInternalVariable( + CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; return Address( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitRuntimeCall( @@ -2496,13 +2767,6 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, CGM.getPointerAlign()); } -/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen -/// function. Here is the logic: -/// if (Cond) { -/// ThenGen(); -/// } else { -/// ElseGen(); -/// } void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen) { @@ -2521,9 +2785,9 @@ void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, // Otherwise, the condition did not fold, or we couldn't elide it. Just // emit the conditional branch. - auto ThenBlock = CGF.createBasicBlock("omp_if.then"); - auto ElseBlock = CGF.createBasicBlock("omp_if.else"); - auto ContBlock = CGF.createBasicBlock("omp_if.end"); + llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); + llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); + llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); // Emit the 'then' code. @@ -2548,11 +2812,11 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond) { if (!CGF.HaveInsertPoint()) return; - auto *RTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, PrePostActionTy &) { // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); - auto &RT = CGF.CGM.getOpenMPRuntime(); + CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); llvm::Value *Args[] = { RTLoc, CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars @@ -2561,13 +2825,13 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, RealArgs.append(std::begin(Args), std::end(Args)); RealArgs.append(CapturedVars.begin(), CapturedVars.end()); - auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); + llvm::Value *RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); CGF.EmitRuntimeCall(RTLFn, RealArgs); }; auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, PrePostActionTy &) { - auto &RT = CGF.CGM.getOpenMPRuntime(); - auto ThreadID = RT.getThreadID(CGF, Loc); + CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); + llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); // Build calls: // __kmpc_serialized_parallel(&Loc, GTid); llvm::Value *Args[] = {RTLoc, ThreadID}; @@ -2575,13 +2839,12 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); // OutlinedFn(>id, &zero, CapturedStruct); - auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); - Address ZeroAddr = - CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), - /*Name*/ ".zero.addr"); + Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name*/ ".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; - OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); + // ThreadId for serialized parallels is 0. + OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); @@ -2592,9 +2855,9 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs); }; - if (IfCond) + if (IfCond) { emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); - else { + } else { RegionCodeGenTy ThenRCG(ThenGen); ThenRCG(CGF); } @@ -2613,10 +2876,10 @@ Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, if (OMPRegionInfo->getThreadIDVariable()) return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); - auto ThreadID = getThreadID(CGF, Loc); - auto Int32Ty = + llvm::Value *ThreadID = getThreadID(CGF, Loc); + QualType Int32Ty = CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); - auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); + Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); CGF.EmitStoreOfScalar(ThreadID, CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); @@ -2629,8 +2892,8 @@ CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); Out << Name; - auto RuntimeName = Out.str(); - auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; + StringRef RuntimeName = Out.str(); + auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; if (Elem.second) { assert(Elem.second->getType()->getPointerElementType() == Ty && "OMP internal variable has different type than requested"); @@ -2644,8 +2907,9 @@ CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty, } llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { - llvm::Twine Name(".gomp_critical_user_", CriticalName); - return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); + std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); + std::string Name = getName({Prefix, "var"}); + return getOrCreateInternalVariable(KmpCriticalNameTy, Name); } namespace { @@ -2779,21 +3043,28 @@ static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, static llvm::Value *emitCopyprivateCopyFunction( CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, - ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) { - auto &C = CGM.getContext(); + ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, + SourceLocation Loc) { + ASTContext &C = CGM.getContext(); // void copy_func(void *LHSArg, void *RHSArg); FunctionArgList Args; - ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); - ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl::Other); + ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl::Other); Args.push_back(&LHSArg); Args.push_back(&RHSArg); - auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *Fn = llvm::Function::Create( - CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, - ".omp.copyprivate.copy_func", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); + const auto &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + std::string Name = + CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); + auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), + llvm::GlobalValue::InternalLinkage, Name, + &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); + Fn->setDoesNotRecurse(); CodeGenFunction CGF(CGM); - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); // Dest = (void*[n])(LHSArg); // Src = (void*[n])(RHSArg); Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( @@ -2807,13 +3078,15 @@ static llvm::Value *emitCopyprivateCopyFunction( // ... // *(Typen*)Dst[n] = *(Typen*)Src[n]; for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { - auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); + const auto *DestVar = + cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); - auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); + const auto *SrcVar = + cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); - auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); + const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); QualType Type = VD->getType(); CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); } @@ -2833,7 +3106,7 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, assert(CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()); - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); // int32 did_it = 0; // if(__kmpc_single(ident_t *, gtid)) { // SingleOpGen(); @@ -2846,7 +3119,8 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, Address DidIt = Address::invalid(); if (!CopyprivateVars.empty()) { // int32 did_it = 0; - auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); + QualType KmpInt32Ty = + C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); } @@ -2866,7 +3140,7 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, // <copy_func>, did_it); if (DidIt.isValid()) { llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); - auto CopyprivateArrayTy = + QualType CopyprivateArrayTy = C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); // Create a list of all private variables for copyprivate. @@ -2882,14 +3156,14 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, } // Build function that copies private values from single region to all other // threads in the corresponding parallel region. - auto *CpyFn = emitCopyprivateCopyFunction( + llvm::Value *CpyFn = emitCopyprivateCopyFunction( CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), - CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); - auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy); + CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); + llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, CGF.VoidPtrTy); - auto *DidItVal = CGF.Builder.CreateLoad(DidIt); + llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), // ident_t *<loc> getThreadID(CGF, Loc), // i32 <gtid> @@ -2948,19 +3222,19 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, if (auto *OMPRegionInfo = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { - auto *Result = CGF.EmitRuntimeCall( + llvm::Value *Result = CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); if (EmitChecks) { // if (__kmpc_cancel_barrier()) { // exit from construct; // } - auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); - auto *ContBB = CGF.createBasicBlock(".cancel.continue"); - auto *Cmp = CGF.Builder.CreateIsNotNull(Result); + llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); + llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); + llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); CGF.EmitBlock(ExitBB); // exit from construct; - auto CancelDestination = + CodeGenFunction::JumpDest CancelDestination = CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); CGF.EmitBranchThroughCleanup(CancelDestination); CGF.EmitBlock(ContBB, /*IsFinished=*/true); @@ -2971,7 +3245,7 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); } -/// \brief Map the OpenMP loop schedule to the runtime enumeration. +/// Map the OpenMP loop schedule to the runtime enumeration. static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered) { switch (ScheduleKind) { @@ -2993,7 +3267,7 @@ static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, llvm_unreachable("Unexpected runtime schedule"); } -/// \brief Map the OpenMP distribute schedule to the runtime enumeration. +/// Map the OpenMP distribute schedule to the runtime enumeration. static OpenMPSchedType getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { // only static is allowed for dist_schedule @@ -3002,19 +3276,20 @@ getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const { - auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); + OpenMPSchedType Schedule = + getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); return Schedule == OMP_sch_static; } bool CGOpenMPRuntime::isStaticNonchunked( OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { - auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); + OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); return Schedule == OMP_dist_sch_static; } bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { - auto Schedule = + OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); return Schedule != OMP_sch_static; @@ -3147,12 +3422,12 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); assert(isOpenMPWorksharingDirective(DKind) && "Expected loop-based or sections-based directive."); - auto *UpdatedLocation = emitUpdateLocation(CGF, Loc, + llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, isOpenMPLoopDirective(DKind) ? OMP_IDENT_WORK_LOOP : OMP_IDENT_WORK_SECTIONS); - auto *ThreadId = getThreadID(CGF, Loc); - auto *StaticInitFunction = + llvm::Value *ThreadId = getThreadID(CGF, Loc); + llvm::Constant *StaticInitFunction = createForStaticInitFunction(Values.IVSize, Values.IVSigned); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); @@ -3164,10 +3439,10 @@ void CGOpenMPRuntime::emitDistributeStaticInit( const CGOpenMPRuntime::StaticRTInput &Values) { OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); - auto *UpdatedLocation = + llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); - auto *ThreadId = getThreadID(CGF, Loc); - auto *StaticInitFunction = + llvm::Value *ThreadId = getThreadID(CGF, Loc); + llvm::Constant *StaticInitFunction = createForStaticInitFunction(Values.IVSize, Values.IVSigned); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, @@ -3223,7 +3498,7 @@ llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, llvm::Value *Call = CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); return CGF.EmitScalarConversion( - Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), + Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), CGF.getContext().BoolTy, Loc); } @@ -3285,13 +3560,13 @@ void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, } namespace { -/// \brief Indexes of fields for type kmp_task_t. +/// Indexes of fields for type kmp_task_t. enum KmpTaskTFields { - /// \brief List of shared variables. + /// List of shared variables. KmpTaskTShareds, - /// \brief Task routine. + /// Task routine. KmpTaskTRoutine, - /// \brief Partition id for the untied tasks. + /// Partition id for the untied tasks. KmpTaskTPartId, /// Function with call of destructors for private variables. Data1, @@ -3311,11 +3586,11 @@ enum KmpTaskTFields { } // anonymous namespace bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { - // FIXME: Add other entries type when they become supported. - return OffloadEntriesTargetRegion.empty(); + return OffloadEntriesTargetRegion.empty() && + OffloadEntriesDeviceGlobalVar.empty(); } -/// \brief Initialize target region entry. +/// Initialize target region entry. void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, @@ -3325,7 +3600,7 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: "code generation."); OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, - /*Flags=*/0); + OMPTargetRegionEntryTargetRegion); ++OffloadingEntriesNum; } @@ -3333,22 +3608,27 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, llvm::Constant *Addr, llvm::Constant *ID, - int32_t Flags) { + OMPTargetRegionEntryKind Flags) { // If we are emitting code for a target, the entry is already initialized, // only has to be registered. if (CGM.getLangOpts().OpenMPIsDevice) { - assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && - "Entry must exist."); + if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, + "Unable to find target region on line '%0' in the device code."); + CGM.getDiags().Report(DiagID) << LineNum; + return; + } auto &Entry = OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; assert(Entry.isValid() && "Entry not initialized!"); Entry.setAddress(Addr); Entry.setID(ID); Entry.setFlags(Flags); - return; } else { - OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID, Flags); + OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; + ++OffloadingEntriesNum; } } @@ -3376,48 +3656,69 @@ bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( const OffloadTargetRegionEntryInfoActTy &Action) { // Scan all target region entries and perform the provided action. - for (auto &D : OffloadEntriesTargetRegion) - for (auto &F : D.second) - for (auto &P : F.second) - for (auto &L : P.second) + for (const auto &D : OffloadEntriesTargetRegion) + for (const auto &F : D.second) + for (const auto &P : F.second) + for (const auto &L : P.second) Action(D.first, F.first, P.first(), L.first, L.second); } -/// \brief Create a Ctor/Dtor-like function whose body is emitted through -/// \a Codegen. This is used to emit the two functions that register and -/// unregister the descriptor of the current compilation unit. -static llvm::Function * -createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, - const RegionCodeGenTy &Codegen) { - auto &C = CGM.getContext(); - FunctionArgList Args; - ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); - Args.push_back(&DummyPtr); +void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: + initializeDeviceGlobalVarEntryInfo(StringRef Name, + OMPTargetGlobalVarEntryKind Flags, + unsigned Order) { + assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " + "only required for the device " + "code generation."); + OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); + ++OffloadingEntriesNum; +} - CodeGenFunction CGF(CGM); - auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto FTy = CGM.getTypes().GetFunctionType(FI); - auto *Fn = - CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation()); - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation()); - Codegen(CGF); - CGF.FinishFunction(); - return Fn; +void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: + registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, + CharUnits VarSize, + OMPTargetGlobalVarEntryKind Flags, + llvm::GlobalValue::LinkageTypes Linkage) { + if (CGM.getLangOpts().OpenMPIsDevice) { + auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; + assert(Entry.isValid() && Entry.getFlags() == Flags && + "Entry not initialized!"); + assert((!Entry.getAddress() || Entry.getAddress() == Addr) && + "Resetting with the new address."); + if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) + return; + Entry.setAddress(Addr); + Entry.setVarSize(VarSize); + Entry.setLinkage(Linkage); + } else { + if (hasDeviceGlobalVarEntryInfo(VarName)) + return; + OffloadEntriesDeviceGlobalVar.try_emplace( + VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); + ++OffloadingEntriesNum; + } +} + +void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: + actOnDeviceGlobalVarEntriesInfo( + const OffloadDeviceGlobalVarEntryInfoActTy &Action) { + // Scan all target region entries and perform the provided action. + for (const auto &E : OffloadEntriesDeviceGlobalVar) + Action(E.getKey(), E.getValue()); } llvm::Function * CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { - // If we don't have entries or if we are emitting code for the device, we // don't need to do anything. if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) return nullptr; - auto &M = CGM.getModule(); - auto &C = CGM.getContext(); + llvm::Module &M = CGM.getModule(); + ASTContext &C = CGM.getContext(); // Get list of devices we care about - auto &Devices = CGM.getLangOpts().OMPTargetTriples; + const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples; // We should be creating an offloading descriptor only if there are devices // specified. @@ -3425,46 +3726,49 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { // Create the external variables that will point to the begin and end of the // host entries section. These will be defined by the linker. - auto *OffloadEntryTy = + llvm::Type *OffloadEntryTy = CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); - llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable( - M, OffloadEntryTy, /*isConstant=*/true, - llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, - ".omp_offloading.entries_begin"); - llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable( + std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"}); + auto *HostEntriesBegin = new llvm::GlobalVariable( M, OffloadEntryTy, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, - ".omp_offloading.entries_end"); + EntriesBeginName); + std::string EntriesEndName = getName({"omp_offloading", "entries_end"}); + auto *HostEntriesEnd = + new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true, + llvm::GlobalValue::ExternalLinkage, + /*Initializer=*/nullptr, EntriesEndName); // Create all device images auto *DeviceImageTy = cast<llvm::StructType>( CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); ConstantInitBuilder DeviceImagesBuilder(CGM); - auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy); + ConstantArrayBuilder DeviceImagesEntries = + DeviceImagesBuilder.beginArray(DeviceImageTy); - for (unsigned i = 0; i < Devices.size(); ++i) { - StringRef T = Devices[i].getTriple(); + for (const llvm::Triple &Device : Devices) { + StringRef T = Device.getTriple(); + std::string BeginName = getName({"omp_offloading", "img_start", ""}); auto *ImgBegin = new llvm::GlobalVariable( M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, - /*Initializer=*/nullptr, - Twine(".omp_offloading.img_start.") + Twine(T)); + /*Initializer=*/nullptr, Twine(BeginName).concat(T)); + std::string EndName = getName({"omp_offloading", "img_end", ""}); auto *ImgEnd = new llvm::GlobalVariable( M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, - /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T)); + /*Initializer=*/nullptr, Twine(EndName).concat(T)); - auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy); - Dev.add(ImgBegin); - Dev.add(ImgEnd); - Dev.add(HostEntriesBegin); - Dev.add(HostEntriesEnd); - Dev.finishAndAddTo(DeviceImagesEntries); + llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin, + HostEntriesEnd}; + createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data, + DeviceImagesEntries); } // Create device images global array. + std::string ImagesName = getName({"omp_offloading", "device_images"}); llvm::GlobalVariable *DeviceImages = - DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images", - CGM.getPointerAlign(), - /*isConstant=*/true); + DeviceImagesEntries.finishAndCreateGlobal(ImagesName, + CGM.getPointerAlign(), + /*isConstant=*/true); DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); // This is a Zero array to be used in the creation of the constant expressions @@ -3472,49 +3776,64 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { llvm::Constant::getNullValue(CGM.Int32Ty)}; // Create the target region descriptor. - auto *BinaryDescriptorTy = cast<llvm::StructType>( - CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy())); - ConstantInitBuilder DescBuilder(CGM); - auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy); - DescInit.addInt(CGM.Int32Ty, Devices.size()); - DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), - DeviceImages, - Index)); - DescInit.add(HostEntriesBegin); - DescInit.add(HostEntriesEnd); - - auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor", - CGM.getPointerAlign(), - /*isConstant=*/true); + llvm::Constant *Data[] = { + llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), + llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), + DeviceImages, Index), + HostEntriesBegin, HostEntriesEnd}; + std::string Descriptor = getName({"omp_offloading", "descriptor"}); + llvm::GlobalVariable *Desc = createConstantGlobalStruct( + CGM, getTgtBinaryDescriptorQTy(), Data, Descriptor); // Emit code to register or unregister the descriptor at execution // startup or closing, respectively. - // Create a variable to drive the registration and unregistration of the - // descriptor, so we can reuse the logic that emits Ctors and Dtors. - auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var"); - ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(), - IdentInfo, C.CharTy, ImplicitParamDecl::Other); - - auto *UnRegFn = createOffloadingBinaryDescriptorFunction( - CGM, ".omp_offloading.descriptor_unreg", - [&](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), - Desc); - }); - auto *RegFn = createOffloadingBinaryDescriptorFunction( - CGM, ".omp_offloading.descriptor_reg", - [&](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), - Desc); - CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); - }); + llvm::Function *UnRegFn; + { + FunctionArgList Args; + ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); + Args.push_back(&DummyPtr); + + CodeGenFunction CGF(CGM); + // Disable debug info for global (de-)initializer because they are not part + // of some particular construct. + CGF.disableDebugInfo(); + const auto &FI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); + std::string UnregName = getName({"omp_offloading", "descriptor_unreg"}); + UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI); + CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args); + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), + Desc); + CGF.FinishFunction(); + } + llvm::Function *RegFn; + { + CodeGenFunction CGF(CGM); + // Disable debug info for global (de-)initializer because they are not part + // of some particular construct. + CGF.disableDebugInfo(); + const auto &FI = CGM.getTypes().arrangeNullaryFunction(); + llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); + std::string Descriptor = getName({"omp_offloading", "descriptor_reg"}); + RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI); + CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); + // Create a variable to drive the registration and unregistration of the + // descriptor, so we can reuse the logic that emits Ctors and Dtors. + ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), + SourceLocation(), nullptr, C.CharTy, + ImplicitParamDecl::Other); + CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); + CGF.FinishFunction(); + } if (CGM.supportsCOMDAT()) { // It is sufficient to call registration function only once, so create a // COMDAT group for registration/unregistration functions and associated // data. That would reduce startup time and code size. Registration // function serves as a COMDAT group key. - auto ComdatKey = M.getOrInsertComdat(RegFn->getName()); + llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName()); RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); RegFn->setComdat(ComdatKey); @@ -3525,48 +3844,35 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { return RegFn; } -void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID, - llvm::Constant *Addr, uint64_t Size, - int32_t Flags) { +void CGOpenMPRuntime::createOffloadEntry( + llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, + llvm::GlobalValue::LinkageTypes Linkage) { StringRef Name = Addr->getName(); - auto *TgtOffloadEntryType = cast<llvm::StructType>( - CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); - llvm::LLVMContext &C = CGM.getModule().getContext(); llvm::Module &M = CGM.getModule(); - - // Make sure the address has the right type. - llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy); + llvm::LLVMContext &C = M.getContext(); // Create constant string with the name. llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); - llvm::GlobalVariable *Str = - new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true, - llvm::GlobalValue::InternalLinkage, StrPtrInit, - ".omp_offloading.entry_name"); + std::string StringName = getName({"omp_offloading", "entry_name"}); + auto *Str = new llvm::GlobalVariable( + M, StrPtrInit->getType(), /*isConstant=*/true, + llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); - - // We can't have any padding between symbols, so we need to have 1-byte - // alignment. - auto Align = CharUnits::fromQuantity(1); - - // Create the entry struct. - ConstantInitBuilder EntryBuilder(CGM); - auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType); - EntryInit.add(AddrPtr); - EntryInit.add(StrPtr); - EntryInit.addInt(CGM.SizeTy, Size); - EntryInit.addInt(CGM.Int32Ty, Flags); - EntryInit.addInt(CGM.Int32Ty, 0); - llvm::GlobalVariable *Entry = - EntryInit.finishAndCreateGlobal(".omp_offloading.entry", - Align, - /*constant*/ true, - llvm::GlobalValue::ExternalLinkage); + + llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), + llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), + llvm::ConstantInt::get(CGM.SizeTy, Size), + llvm::ConstantInt::get(CGM.Int32Ty, Flags), + llvm::ConstantInt::get(CGM.Int32Ty, 0)}; + std::string EntryName = getName({"omp_offloading", "entry", ""}); + llvm::GlobalVariable *Entry = createConstantGlobalStruct( + CGM, getTgtOffloadEntryQTy(), Data, Twine(EntryName).concat(Name), + llvm::GlobalValue::WeakAnyLinkage); // The entry has to be created in the section the linker expects it to be. - Entry->setSection(".omp_offloading.entries"); + std::string Section = getName({"omp_offloading", "entries"}); + Entry->setSection(Section); } void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { @@ -3579,71 +3885,142 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Right now we only generate metadata for function that contain target // regions. - // If we do not have entries, we dont need to do anything. + // If we do not have entries, we don't need to do anything. if (OffloadEntriesInfoManager.empty()) return; llvm::Module &M = CGM.getModule(); llvm::LLVMContext &C = M.getContext(); - SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> + SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> OrderedEntries(OffloadEntriesInfoManager.size()); - // Create the offloading info metadata node. - llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); - // Auxiliary methods to create metadata values and strings. - auto getMDInt = [&](unsigned v) { + auto &&GetMDInt = [this](unsigned V) { return llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v)); + llvm::ConstantInt::get(CGM.Int32Ty, V)); }; - auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); }; + auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; + + // Create the offloading info metadata node. + llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); // Create function that emits metadata for each target region entry; - auto &&TargetRegionMetadataEmitter = [&]( - unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, - OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { - llvm::SmallVector<llvm::Metadata *, 32> Ops; - // Generate metadata for target regions. Each entry of this metadata - // contains: - // - Entry 0 -> Kind of this type of metadata (0). - // - Entry 1 -> Device ID of the file where the entry was identified. - // - Entry 2 -> File ID of the file where the entry was identified. - // - Entry 3 -> Mangled name of the function where the entry was identified. - // - Entry 4 -> Line in the file where the entry was identified. - // - Entry 5 -> Order the entry was created. - // The first element of the metadata node is the kind. - Ops.push_back(getMDInt(E.getKind())); - Ops.push_back(getMDInt(DeviceID)); - Ops.push_back(getMDInt(FileID)); - Ops.push_back(getMDString(ParentName)); - Ops.push_back(getMDInt(Line)); - Ops.push_back(getMDInt(E.getOrder())); - - // Save this entry in the right position of the ordered entries array. - OrderedEntries[E.getOrder()] = &E; - - // Add metadata to the named metadata node. - MD->addOperand(llvm::MDNode::get(C, Ops)); - }; + auto &&TargetRegionMetadataEmitter = + [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString]( + unsigned DeviceID, unsigned FileID, StringRef ParentName, + unsigned Line, + const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { + // Generate metadata for target regions. Each entry of this metadata + // contains: + // - Entry 0 -> Kind of this type of metadata (0). + // - Entry 1 -> Device ID of the file where the entry was identified. + // - Entry 2 -> File ID of the file where the entry was identified. + // - Entry 3 -> Mangled name of the function where the entry was + // identified. + // - Entry 4 -> Line in the file where the entry was identified. + // - Entry 5 -> Order the entry was created. + // The first element of the metadata node is the kind. + llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), + GetMDInt(FileID), GetMDString(ParentName), + GetMDInt(Line), GetMDInt(E.getOrder())}; + + // Save this entry in the right position of the ordered entries array. + OrderedEntries[E.getOrder()] = &E; + + // Add metadata to the named metadata node. + MD->addOperand(llvm::MDNode::get(C, Ops)); + }; OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( TargetRegionMetadataEmitter); - for (auto *E : OrderedEntries) { + // Create function that emits metadata for each device global variable entry; + auto &&DeviceGlobalVarMetadataEmitter = + [&C, &OrderedEntries, &GetMDInt, &GetMDString, + MD](StringRef MangledName, + const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar + &E) { + // Generate metadata for global variables. Each entry of this metadata + // contains: + // - Entry 0 -> Kind of this type of metadata (1). + // - Entry 1 -> Mangled name of the variable. + // - Entry 2 -> Declare target kind. + // - Entry 3 -> Order the entry was created. + // The first element of the metadata node is the kind. + llvm::Metadata *Ops[] = { + GetMDInt(E.getKind()), GetMDString(MangledName), + GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; + + // Save this entry in the right position of the ordered entries array. + OrderedEntries[E.getOrder()] = &E; + + // Add metadata to the named metadata node. + MD->addOperand(llvm::MDNode::get(C, Ops)); + }; + + OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( + DeviceGlobalVarMetadataEmitter); + + for (const auto *E : OrderedEntries) { assert(E && "All ordered entries must exist!"); - if (auto *CE = + if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( E)) { - assert(CE->getID() && CE->getAddress() && - "Entry ID and Addr are invalid!"); - createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0); - } else + if (!CE->getID() || !CE->getAddress()) { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, + "Offloading entry for target region is incorrect: either the " + "address or the ID is invalid."); + CGM.getDiags().Report(DiagID); + continue; + } + createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, + CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); + } else if (const auto *CE = + dyn_cast<OffloadEntriesInfoManagerTy:: + OffloadEntryInfoDeviceGlobalVar>(E)) { + OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = + static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( + CE->getFlags()); + switch (Flags) { + case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { + if (!CE->getAddress()) { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, + "Offloading entry for declare target variable is incorrect: the " + "address is invalid."); + CGM.getDiags().Report(DiagID); + continue; + } + break; + } + case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: + assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || + (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && + "Declaret target link address is set."); + if (CGM.getLangOpts().OpenMPIsDevice) + continue; + if (!CE->getAddress()) { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, + "Offloading entry for declare target variable is incorrect: the " + "address is invalid."); + CGM.getDiags().Report(DiagID); + continue; + } + break; + } + createOffloadEntry(CE->getAddress(), CE->getAddress(), + CE->getVarSize().getQuantity(), Flags, + CE->getLinkage()); + } else { llvm_unreachable("Unsupported entry kind."); + } } } -/// \brief Loads all the offload entries information from the host IR +/// Loads all the offload entries information from the host IR /// metadata. void CGOpenMPRuntime::loadOffloadInfoMetadata() { // If we are in target mode, load the metadata from the host IR. This code has @@ -3656,44 +4033,57 @@ void CGOpenMPRuntime::loadOffloadInfoMetadata() { return; auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); - if (Buf.getError()) + if (auto EC = Buf.getError()) { + CGM.getDiags().Report(diag::err_cannot_open_file) + << CGM.getLangOpts().OMPHostIRFile << EC.message(); return; + } llvm::LLVMContext C; auto ME = expectedToErrorOrAndEmitErrors( C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); - if (ME.getError()) + if (auto EC = ME.getError()) { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); + CGM.getDiags().Report(DiagID) + << CGM.getLangOpts().OMPHostIRFile << EC.message(); return; + } llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); if (!MD) return; - for (auto I : MD->operands()) { - llvm::MDNode *MN = cast<llvm::MDNode>(I); - - auto getMDInt = [&](unsigned Idx) { - llvm::ConstantAsMetadata *V = - cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); + for (llvm::MDNode *MN : MD->operands()) { + auto &&GetMDInt = [MN](unsigned Idx) { + auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); }; - auto getMDString = [&](unsigned Idx) { - llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx)); + auto &&GetMDString = [MN](unsigned Idx) { + auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); return V->getString(); }; - switch (getMDInt(0)) { + switch (GetMDInt(0)) { default: llvm_unreachable("Unexpected metadata!"); break; case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: - OFFLOAD_ENTRY_INFO_TARGET_REGION: + OffloadingEntryInfoTargetRegion: OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( - /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2), - /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4), - /*Order=*/getMDInt(5)); + /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), + /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), + /*Order=*/GetMDInt(5)); + break; + case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: + OffloadingEntryInfoDeviceGlobalVar: + OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( + /*MangledName=*/GetMDString(1), + static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( + /*Flags=*/GetMDInt(2)), + /*Order=*/GetMDInt(3)); break; } } @@ -3702,7 +4092,7 @@ void CGOpenMPRuntime::loadOffloadInfoMetadata() { void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { if (!KmpRoutineEntryPtrTy) { // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; FunctionProtoType::ExtProtoInfo EPI; KmpRoutineEntryPtrQTy = C.getPointerType( @@ -3711,19 +4101,7 @@ void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { } } -static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, - QualType FieldTy) { - auto *Field = FieldDecl::Create( - C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, - C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), - /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); - Field->setAccess(AS_public); - DC->addDecl(Field); - return Field; -} - QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { - // Make sure the type of the entry is already created. This is the type we // have to create: // struct __tgt_offload_entry{ @@ -3736,7 +4114,7 @@ QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { // }; if (TgtOffloadEntryQTy.isNull()) { ASTContext &C = CGM.getContext(); - auto *RD = C.buildImplicitRecord("__tgt_offload_entry"); + RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); RD->startDefinition(); addFieldToRecordDecl(C, RD, C.VoidPtrTy); addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); @@ -3746,6 +4124,7 @@ QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { addFieldToRecordDecl( C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); RD->completeDefinition(); + RD->addAttr(PackedAttr::CreateImplicit(C)); TgtOffloadEntryQTy = C.getRecordType(RD); } return TgtOffloadEntryQTy; @@ -3765,7 +4144,7 @@ QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { // }; if (TgtDeviceImageQTy.isNull()) { ASTContext &C = CGM.getContext(); - auto *RD = C.buildImplicitRecord("__tgt_device_image"); + RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); RD->startDefinition(); addFieldToRecordDecl(C, RD, C.VoidPtrTy); addFieldToRecordDecl(C, RD, C.VoidPtrTy); @@ -3789,7 +4168,7 @@ QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { // }; if (TgtBinaryDescriptorQTy.isNull()) { ASTContext &C = CGM.getContext(); - auto *RD = C.buildImplicitRecord("__tgt_bin_desc"); + RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); RD->startDefinition(); addFieldToRecordDecl( C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); @@ -3818,17 +4197,16 @@ typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { if (!Privates.empty()) { - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); // Build struct .kmp_privates_t. { // /* private vars */ // }; - auto *RD = C.buildImplicitRecord(".kmp_privates.t"); + RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); RD->startDefinition(); - for (auto &&Pair : Privates) { - auto *VD = Pair.second.Original; - auto Type = VD->getType(); - Type = Type.getNonReferenceType(); - auto *FD = addFieldToRecordDecl(C, RD, Type); + for (const auto &Pair : Privates) { + const VarDecl *VD = Pair.second.Original; + QualType Type = VD->getType().getNonReferenceType(); + FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); if (VD->hasAttrs()) { for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), E(VD->getAttrs().end()); @@ -3846,7 +4224,7 @@ static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy) { - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); // Build struct kmp_task_t { // void * shareds; // kmp_routine_entry_t routine; @@ -3860,13 +4238,13 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, // kmp_int32 liter; // void * reductions; // }; - auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); + RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); UD->startDefinition(); addFieldToRecordDecl(C, UD, KmpInt32Ty); addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); UD->completeDefinition(); QualType KmpCmplrdataTy = C.getRecordType(UD); - auto *RD = C.buildImplicitRecord("kmp_task_t"); + RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); RD->startDefinition(); addFieldToRecordDecl(C, RD, C.VoidPtrTy); addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); @@ -3891,22 +4269,21 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef<PrivateDataTy> Privates) { - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); // Build struct kmp_task_t_with_privates { // kmp_task_t task_data; // .kmp_privates_t. privates; // }; - auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); + RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); RD->startDefinition(); addFieldToRecordDecl(C, RD, KmpTaskTQTy); - if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) { + if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); - } RD->completeDefinition(); return RD; } -/// \brief Emit a proxy function which accepts kmp_task_t as the second +/// Emit a proxy function which accepts kmp_task_t as the second /// argument. /// \code /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { @@ -3924,7 +4301,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Value *TaskFunction, llvm::Value *TaskPrivatesMap) { - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, ImplicitParamDecl::Other); @@ -3933,49 +4310,53 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, ImplicitParamDecl::Other); Args.push_back(&GtidArg); Args.push_back(&TaskTypeArg); - auto &TaskEntryFnInfo = + const auto &TaskEntryFnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); - auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); - auto *TaskEntry = - llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage, - ".omp_task_entry.", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo); + llvm::FunctionType *TaskEntryTy = + CGM.getTypes().GetFunctionType(TaskEntryFnInfo); + std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); + auto *TaskEntry = llvm::Function::Create( + TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); + TaskEntry->setDoesNotRecurse(); CodeGenFunction CGF(CGM); - CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); + CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, + Loc, Loc); // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, // tt, // For taskloops: // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, // tt->task_data.shareds); - auto *GtidParam = CGF.EmitLoadOfScalar( + llvm::Value *GtidParam = CGF.EmitLoadOfScalar( CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); LValue TDBase = CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); - auto *KmpTaskTWithPrivatesQTyRD = + const auto *KmpTaskTWithPrivatesQTyRD = cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); LValue Base = CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); - auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); + const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); - auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); - auto *PartidParam = PartIdLVal.getPointer(); + LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); + llvm::Value *PartidParam = PartIdLVal.getPointer(); auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); - auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); - auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(), + LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); + llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLoadOfScalar(SharedsLVal, Loc), CGF.ConvertTypeForMem(SharedsPtrTy)); auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); llvm::Value *PrivatesParam; if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { - auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); + LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( PrivatesLVal.getPointer(), CGF.VoidPtrTy); - } else + } else { PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + } llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, @@ -3987,20 +4368,20 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, std::end(CommonArgs)); if (isOpenMPTaskLoopDirective(Kind)) { auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); - auto LBLVal = CGF.EmitLValueForField(Base, *LBFI); - auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal(); + LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); + llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); - auto UBLVal = CGF.EmitLValueForField(Base, *UBFI); - auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal(); + LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); + llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); - auto StLVal = CGF.EmitLValueForField(Base, *StFI); - auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal(); + LValue StLVal = CGF.EmitLValueForField(Base, *StFI); + llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); - auto LILVal = CGF.EmitLValueForField(Base, *LIFI); - auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal(); + LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); + llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); - auto RLVal = CGF.EmitLValueForField(Base, *RFI); - auto *RParam = CGF.EmitLoadOfLValue(RLVal, Loc).getScalarVal(); + LValue RLVal = CGF.EmitLValueForField(Base, *RFI); + llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); CallArgs.push_back(LBParam); CallArgs.push_back(UBParam); CallArgs.push_back(StParam); @@ -4011,9 +4392,8 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, CallArgs); - CGF.EmitStoreThroughLValue( - RValue::get(CGF.Builder.getInt32(/*C=*/0)), - CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); + CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), + CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); CGF.FinishFunction(); return TaskEntry; } @@ -4023,7 +4403,7 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy) { - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, ImplicitParamDecl::Other); @@ -4032,30 +4412,34 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, ImplicitParamDecl::Other); Args.push_back(&GtidArg); Args.push_back(&TaskTypeArg); - auto &DestructorFnInfo = + const auto &DestructorFnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); - auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); + llvm::FunctionType *DestructorFnTy = + CGM.getTypes().GetFunctionType(DestructorFnInfo); + std::string Name = + CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); auto *DestructorFn = llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, - ".omp_task_destructor.", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn, + Name, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, DestructorFnInfo); + DestructorFn->setDoesNotRecurse(); CodeGenFunction CGF(CGM); - CGF.disableDebugInfo(); CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, - Args); + Args, Loc, Loc); LValue Base = CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); - auto *KmpTaskTWithPrivatesQTyRD = + const auto *KmpTaskTWithPrivatesQTyRD = cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); Base = CGF.EmitLValueForField(Base, *FI); - for (auto *Field : + for (const auto *Field : cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { - if (auto DtorKind = Field->getType().isDestructedType()) { - auto FieldLValue = CGF.EmitLValueForField(Base, Field); + if (QualType::DestructionKind DtorKind = + Field->getType().isDestructedType()) { + LValue FieldLValue = CGF.EmitLValueForField(Base, Field); CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); } } @@ -4063,7 +4447,7 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, return DestructorFn; } -/// \brief Emit a privates mapping function for correct handling of private and +/// Emit a privates mapping function for correct handling of private and /// firstprivate variables. /// \code /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> @@ -4080,7 +4464,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, ArrayRef<const Expr *> LastprivateVars, QualType PrivatesQTy, ArrayRef<PrivateDataTy> Privates) { - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl TaskPrivatesArg( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, @@ -4089,67 +4473,69 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, Args.push_back(&TaskPrivatesArg); llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; unsigned Counter = 1; - for (auto *E: PrivateVars) { + for (const Expr *E : PrivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) .withConst() .withRestrict(), ImplicitParamDecl::Other)); - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; } - for (auto *E : FirstprivateVars) { + for (const Expr *E : FirstprivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) .withConst() .withRestrict(), ImplicitParamDecl::Other)); - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; } - for (auto *E: LastprivateVars) { + for (const Expr *E : LastprivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) .withConst() .withRestrict(), ImplicitParamDecl::Other)); - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; } - auto &TaskPrivatesMapFnInfo = + const auto &TaskPrivatesMapFnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *TaskPrivatesMapTy = + llvm::FunctionType *TaskPrivatesMapTy = CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); + std::string Name = + CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); auto *TaskPrivatesMap = llvm::Function::Create( - TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, - ".omp_task_privates_map.", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, + TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, + &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, TaskPrivatesMapFnInfo); TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); CodeGenFunction CGF(CGM); - CGF.disableDebugInfo(); CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, - TaskPrivatesMapFnInfo, Args); + TaskPrivatesMapFnInfo, Args, Loc, Loc); // *privi = &.privates.privi; LValue Base = CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType()->castAs<PointerType>()); - auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); + const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); Counter = 0; - for (auto *Field : PrivatesQTyRD->fields()) { - auto FieldLVal = CGF.EmitLValueForField(Base, Field); - auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; - auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); - auto RefLoadLVal = CGF.EmitLoadOfPointerLValue( + for (const FieldDecl *Field : PrivatesQTyRD->fields()) { + LValue FieldLVal = CGF.EmitLValueForField(Base, Field); + const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; + LValue RefLVal = + CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); + LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); ++Counter; @@ -4171,9 +4557,14 @@ static void emitPrivatesInit(CodeGenFunction &CGF, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef<PrivateDataTy> Privates, bool ForDup) { - auto &C = CGF.getContext(); + ASTContext &C = CGF.getContext(); auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); + OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) + ? OMPD_taskloop + : OMPD_task; + const CapturedStmt &CS = *D.getCapturedStmt(Kind); + CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); LValue SrcBase; bool IsTargetTask = isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || @@ -4182,40 +4573,38 @@ static void emitPrivatesInit(CodeGenFunction &CGF, // PointersArray and SizesArray. The original variables for these arrays are // not captured and we get their addresses explicitly. if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || - (IsTargetTask && Data.FirstprivateVars.size() > 3)) { + (IsTargetTask && KmpTaskSharedsPtr.isValid())) { SrcBase = CGF.MakeAddrLValue( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), SharedsTy); } - OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) - ? OMPD_taskloop - : OMPD_task; - CodeGenFunction::CGCapturedStmtInfo CapturesInfo(*D.getCapturedStmt(Kind)); FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); - for (auto &&Pair : Privates) { - auto *VD = Pair.second.PrivateCopy; - auto *Init = VD->getAnyInitializer(); + for (const PrivateDataTy &Pair : Privates) { + const VarDecl *VD = Pair.second.PrivateCopy; + const Expr *Init = VD->getAnyInitializer(); if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && !CGF.isTrivialInitializer(Init)))) { LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); - if (auto *Elem = Pair.second.PrivateElemInit) { - auto *OriginalVD = Pair.second.Original; + if (const VarDecl *Elem = Pair.second.PrivateElemInit) { + const VarDecl *OriginalVD = Pair.second.Original; // Check if the variable is the target-based BasePointersArray, // PointersArray or SizesArray. LValue SharedRefLValue; QualType Type = OriginalVD->getType(); - if (IsTargetTask && isa<ImplicitParamDecl>(OriginalVD) && - isa<CapturedDecl>(OriginalVD->getDeclContext()) && - cast<CapturedDecl>(OriginalVD->getDeclContext())->getNumParams() == - 0 && - isa<TranslationUnitDecl>( - cast<CapturedDecl>(OriginalVD->getDeclContext()) - ->getDeclContext())) { + const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); + if (IsTargetTask && !SharedField) { + assert(isa<ImplicitParamDecl>(OriginalVD) && + isa<CapturedDecl>(OriginalVD->getDeclContext()) && + cast<CapturedDecl>(OriginalVD->getDeclContext()) + ->getNumParams() == 0 && + isa<TranslationUnitDecl>( + cast<CapturedDecl>(OriginalVD->getDeclContext()) + ->getDeclContext()) && + "Expected artificial target data variable."); SharedRefLValue = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); } else { - auto *SharedField = CapturesInfo.lookup(OriginalVD); SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); SharedRefLValue = CGF.MakeAddrLValue( Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), @@ -4226,8 +4615,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF, // Initialize firstprivate array. if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { // Perform simple memcpy. - CGF.EmitAggregateAssign(PrivateLValue.getAddress(), - SharedRefLValue.getAddress(), Type); + CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); } else { // Initialize firstprivate array using element-by-element // initialization. @@ -4258,8 +4646,9 @@ static void emitPrivatesInit(CodeGenFunction &CGF, CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); } - } else + } else { CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); + } } ++FI; } @@ -4269,11 +4658,13 @@ static void emitPrivatesInit(CodeGenFunction &CGF, static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef<PrivateDataTy> Privates) { bool InitRequired = false; - for (auto &&Pair : Privates) { - auto *VD = Pair.second.PrivateCopy; - auto *Init = VD->getAnyInitializer(); + for (const PrivateDataTy &Pair : Privates) { + const VarDecl *VD = Pair.second.PrivateCopy; + const Expr *Init = VD->getAnyInitializer(); InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && !CGF.isTrivialInitializer(Init)); + if (InitRequired) + break; } return InitRequired; } @@ -4297,7 +4688,7 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy, @@ -4310,16 +4701,17 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, Args.push_back(&DstArg); Args.push_back(&SrcArg); Args.push_back(&LastprivArg); - auto &TaskDupFnInfo = + const auto &TaskDupFnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); - auto *TaskDup = - llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage, - ".omp_task_dup.", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo); + llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); + std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); + auto *TaskDup = llvm::Function::Create( + TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); + TaskDup->setDoesNotRecurse(); CodeGenFunction CGF(CGM); - CGF.disableDebugInfo(); - CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args); + CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, + Loc); LValue TDBase = CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(&DstArg), @@ -4362,9 +4754,9 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { bool NeedsCleanup = false; - auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); - auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); - for (auto *FD : PrivateRD->fields()) { + auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); + const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); + for (const FieldDecl *FD : PrivateRD->fields()) { NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); if (NeedsCleanup) break; @@ -4377,41 +4769,41 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data) { - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); llvm::SmallVector<PrivateDataTy, 4> Privates; // Aggregate privates and sort them by the alignment. auto I = Data.PrivateCopies.begin(); - for (auto *E : Data.PrivateVars) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); - Privates.push_back(std::make_pair( + for (const Expr *E : Data.PrivateVars) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Privates.emplace_back( C.getDeclAlign(VD), PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), - /*PrivateElemInit=*/nullptr))); + /*PrivateElemInit=*/nullptr)); ++I; } I = Data.FirstprivateCopies.begin(); auto IElemInitRef = Data.FirstprivateInits.begin(); - for (auto *E : Data.FirstprivateVars) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); - Privates.push_back(std::make_pair( + for (const Expr *E : Data.FirstprivateVars) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Privates.emplace_back( C.getDeclAlign(VD), PrivateHelpersTy( VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), - cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())))); + cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); ++I; ++IElemInitRef; } I = Data.LastprivateCopies.begin(); - for (auto *E : Data.LastprivateVars) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); - Privates.push_back(std::make_pair( + for (const Expr *E : Data.LastprivateVars) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Privates.emplace_back( C.getDeclAlign(VD), PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), - /*PrivateElemInit=*/nullptr))); + /*PrivateElemInit=*/nullptr)); ++I; } std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator); - auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); + QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); // Build type kmp_routine_entry_t (if not built yet). emitKmpRoutineEntryT(KmpInt32Ty); // Build type kmp_task_t (if not built yet). @@ -4432,21 +4824,23 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, } KmpTaskTQTy = SavedKmpTaskTQTy; } - auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); + const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); // Build particular struct kmp_task_t for the given task. - auto *KmpTaskTWithPrivatesQTyRD = + const RecordDecl *KmpTaskTWithPrivatesQTyRD = createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); - auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); + QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); QualType KmpTaskTWithPrivatesPtrQTy = C.getPointerType(KmpTaskTWithPrivatesQTy); - auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); - auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo(); - auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy); + llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); + llvm::Type *KmpTaskTWithPrivatesPtrTy = + KmpTaskTWithPrivatesTy->getPointerTo(); + llvm::Value *KmpTaskTWithPrivatesTySize = + CGF.getTypeSize(KmpTaskTWithPrivatesQTy); QualType SharedsPtrTy = C.getPointerType(SharedsTy); // Emit initial values for private copies (if any). llvm::Value *TaskPrivatesMap = nullptr; - auto *TaskPrivatesMapTy = + llvm::Type *TaskPrivatesMapTy = std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType(); if (!Privates.empty()) { auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); @@ -4461,7 +4855,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, } // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, // kmp_task_t *tt); - auto *TaskEntry = emitProxyTaskFunction( + llvm::Value *TaskEntry = emitProxyTaskFunction( CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); @@ -4487,23 +4881,24 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, } if (Data.Priority.getInt()) Flags = Flags | PriorityFlag; - auto *TaskFlags = + llvm::Value *TaskFlags = Data.Final.getPointer() ? CGF.Builder.CreateSelect(Data.Final.getPointer(), CGF.Builder.getInt32(FinalFlag), CGF.Builder.getInt32(/*C=*/0)) : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); - auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); + llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( TaskEntry, KmpRoutineEntryPtrTy)}; - auto *NewTask = CGF.EmitRuntimeCall( + llvm::Value *NewTask = CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); - auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - NewTask, KmpTaskTWithPrivatesPtrTy); + llvm::Value *NewTaskNewTaskTTy = + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + NewTask, KmpTaskTWithPrivatesPtrTy); LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, KmpTaskTWithPrivatesQTy); LValue TDBase = @@ -4519,7 +4914,9 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, KmpTaskTShareds)), Loc), CGF.getNaturalTypeAlignment(SharedsTy)); - CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); + LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); + LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); + CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); } // Emit initial values for private copies (if any). TaskResultTy Result; @@ -4539,7 +4936,8 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, enum { Priority = 0, Destructors = 1 }; // Provide pointer to function with destructors for privates. auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); - auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl(); + const RecordDecl *KmpCmplrdataUD = + (*FI)->getType()->getAsUnionType()->getDecl(); if (NeedsCleanup) { llvm::Value *DestructorFn = emitDestructorsFunction( CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, @@ -4582,8 +4980,8 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *TaskEntry = Result.TaskEntry; llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; LValue TDBase = Result.TDBase; - RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; - auto &C = CGM.getContext(); + const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; + ASTContext &C = CGM.getContext(); // Process list of dependences. Address DependenciesArray = Address::invalid(); unsigned NumDependencies = Data.Dependences.size(); @@ -4603,8 +5001,9 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); KmpDependInfoRD->completeDefinition(); KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); - } else + } else { KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); + } CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy); // Define type kmp_depend_info[<Dependences.size()>]; QualType KmpDependInfoArrayTy = C.getConstantArrayType( @@ -4613,12 +5012,13 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, // kmp_depend_info[<Dependences.size()>] deps; DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); - for (unsigned i = 0; i < NumDependencies; ++i) { - const Expr *E = Data.Dependences[i].second; - auto Addr = CGF.EmitLValue(E); + for (unsigned I = 0; I < NumDependencies; ++I) { + const Expr *E = Data.Dependences[I].second; + LValue Addr = CGF.EmitLValue(E); llvm::Value *Size; QualType Ty = E->getType(); - if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { + if (const auto *ASE = + dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { LValue UpAddrLVal = CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); llvm::Value *UpAddr = @@ -4627,24 +5027,25 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); - } else + } else { Size = CGF.getTypeSize(Ty); - auto Base = CGF.MakeAddrLValue( - CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize), + } + LValue Base = CGF.MakeAddrLValue( + CGF.Builder.CreateConstArrayGEP(DependenciesArray, I, DependencySize), KmpDependInfoTy); // deps[i].base_addr = &<Dependences[i].second>; - auto BaseAddrLVal = CGF.EmitLValueForField( + LValue BaseAddrLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); CGF.EmitStoreOfScalar( CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), BaseAddrLVal); // deps[i].len = sizeof(<Dependences[i].second>); - auto LenLVal = CGF.EmitLValueForField( + LValue LenLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), Len)); CGF.EmitStoreOfScalar(Size, LenLVal); // deps[i].flags = <Dependences[i].first>; RTLDependenceKindTy DepKind; - switch (Data.Dependences[i].first) { + switch (Data.Dependences[I].first) { case OMPC_DEPEND_in: DepKind = DepIn; break; @@ -4658,7 +5059,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, case OMPC_DEPEND_unknown: llvm_unreachable("Unknown task dependence type"); } - auto FlagsLVal = CGF.EmitLValueForField( + LValue FlagsLVal = CGF.EmitLValueForField( Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), FlagsLVal); @@ -4668,14 +5069,14 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.VoidPtrTy); } - // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() + // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() // libcall. // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence // list is not empty - auto *ThreadID = getThreadID(CGF, Loc); - auto *UpLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *ThreadID = getThreadID(CGF, Loc); + llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; llvm::Value *DepTaskArgs[7]; if (NumDependencies) { @@ -4692,7 +5093,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { if (!Data.Tied) { auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); - auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); + LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); } if (NumDependencies) { @@ -4720,7 +5121,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, NumDependencies, &DepWaitTaskArgs, Loc](CodeGenFunction &CGF, PrePostActionTy &) { - auto &RT = CGF.CGM.getOpenMPRuntime(); + CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); CodeGenFunction::RunCleanupsScope LocalScope(CGF); // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 @@ -4750,9 +5151,9 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, RCG(CGF); }; - if (IfCond) + if (IfCond) { emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); - else { + } else { RegionCodeGenTy ThenRCG(ThenCodeGen); ThenRCG(CGF); } @@ -4768,7 +5169,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, return; TaskResultTy Result = emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); - // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() + // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() // libcall. // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int @@ -4779,27 +5180,28 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, if (IfCond) { IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, /*isSigned=*/true); - } else + } else { IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); + } LValue LBLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); - auto *LBVar = + const auto *LBVar = cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), /*IsInitializer=*/true); LValue UBLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); - auto *UBVar = + const auto *UBVar = cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), /*IsInitializer=*/true); LValue StLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); - auto *StVar = + const auto *StVar = cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), /*IsInitializer=*/true); @@ -4807,9 +5209,9 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, LValue RedLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); - if (Data.Reductions) + if (Data.Reductions) { CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); - else { + } else { CGF.EmitNullInitialization(RedLVal.getAddress(), CGF.getContext().VoidPtrTy); } @@ -4821,7 +5223,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, IfVal, LBLVal.getPointer(), UBLVal.getPointer(), - CGF.EmitLoadOfScalar(StLVal, SourceLocation()), + CGF.EmitLoadOfScalar(StLVal, Loc), llvm::ConstantInt::getNullValue( CGF.IntTy), // Always 0 because taskgroup emitted by the compiler llvm::ConstantInt::getSigned( @@ -4838,7 +5240,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); } -/// \brief Emit reduction operation for each element of array (required for +/// Emit reduction operation for each element of array (required for /// array sections) LHS op = RHS. /// \param Type Type of array. /// \param LHSVar Variable on the left side of the reduction operation @@ -4860,22 +5262,22 @@ static void EmitOMPAggregateReduction( Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); // Drill down to the base element type on both arrays. - auto ArrayTy = Type->getAsArrayTypeUnsafe(); - auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); + const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); + llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); - auto RHSBegin = RHSAddr.getPointer(); - auto LHSBegin = LHSAddr.getPointer(); + llvm::Value *RHSBegin = RHSAddr.getPointer(); + llvm::Value *LHSBegin = LHSAddr.getPointer(); // Cast from pointer to array type to pointer to single element. - auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); + llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); // The basic structure here is a while-do loop. - auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); - auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); - auto IsEmpty = + llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); + llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); + llvm::Value *IsEmpty = CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); // Enter the loop body, making that address the current address. - auto EntryBB = CGF.Builder.GetInsertBlock(); + llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); CGF.EmitBlock(BodyBB); CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); @@ -4896,19 +5298,19 @@ static void EmitOMPAggregateReduction( // Emit copy. CodeGenFunction::OMPPrivateScope Scope(CGF); - Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; }); - Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; }); + Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); + Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); Scope.Privatize(); RedOpGen(CGF, XExpr, EExpr, UpExpr); Scope.ForceCleanup(); // Shift the address forward by one element. - auto LHSElementNext = CGF.Builder.CreateConstGEP1_32( + llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); - auto RHSElementNext = CGF.Builder.CreateConstGEP1_32( + llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); // Check whether we've reached the end. - auto Done = + llvm::Value *Done = CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); @@ -4923,11 +5325,12 @@ static void EmitOMPAggregateReduction( /// UDR combiner function. static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp) { - if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) - if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) - if (auto *DRE = + if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) + if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) + if (const auto *DRE = dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) - if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { + if (const auto *DRD = + dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { std::pair<llvm::Function *, llvm::Function *> Reduction = CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); RValue Func = RValue::get(Reduction.first); @@ -4939,24 +5342,29 @@ static void emitReductionCombiner(CodeGenFunction &CGF, } llvm::Value *CGOpenMPRuntime::emitReductionFunction( - CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, - ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, - ArrayRef<const Expr *> ReductionOps) { - auto &C = CGM.getContext(); + CodeGenModule &CGM, SourceLocation Loc, llvm::Type *ArgsType, + ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { + ASTContext &C = CGM.getContext(); // void reduction_func(void *LHSArg, void *RHSArg); FunctionArgList Args; - ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); - ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl::Other); + ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl::Other); Args.push_back(&LHSArg); Args.push_back(&RHSArg); - auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *Fn = llvm::Function::Create( - CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, - ".omp.reduction.reduction_func", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI); + const auto &CGFI = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + std::string Name = getName({"omp", "reduction", "reduction_func"}); + auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), + llvm::GlobalValue::InternalLinkage, Name, + &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); + Fn->setDoesNotRecurse(); CodeGenFunction CGF(CGM); - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); // Dst = (void*[n])(LHSArg); // Src = (void*[n])(RHSArg); @@ -4974,12 +5382,14 @@ llvm::Value *CGOpenMPRuntime::emitReductionFunction( auto IPriv = Privates.begin(); unsigned Idx = 0; for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { - auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); - Scope.addPrivate(RHSVar, [&]() -> Address { + const auto *RHSVar = + cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); + Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); }); - auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); - Scope.addPrivate(LHSVar, [&]() -> Address { + const auto *LHSVar = + cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); + Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); }); QualType PrivTy = (*IPriv)->getType(); @@ -4989,8 +5399,9 @@ llvm::Value *CGOpenMPRuntime::emitReductionFunction( Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize()); llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); - auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy); - auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); + const VariableArrayType *VLA = + CGF.getContext().getAsVariableArrayType(PrivTy); + const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); CodeGenFunction::OpaqueValueMapping OpaqueMap( CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); CGF.EmitVariablyModifiedType(PrivTy); @@ -5000,19 +5411,20 @@ llvm::Value *CGOpenMPRuntime::emitReductionFunction( IPriv = Privates.begin(); auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); - for (auto *E : ReductionOps) { + for (const Expr *E : ReductionOps) { if ((*IPriv)->getType()->isArrayType()) { // Emit reduction for array section. - auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); EmitOMPAggregateReduction( CGF, (*IPriv)->getType(), LHSVar, RHSVar, [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { emitReductionCombiner(CGF, E); }); - } else + } else { // Emit reduction for array subscript or single variable. emitReductionCombiner(CGF, E); + } ++IPriv; ++ILHS; ++IRHS; @@ -5029,16 +5441,17 @@ void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, const DeclRefExpr *RHS) { if (PrivateRef->getType()->isArrayType()) { // Emit reduction for array section. - auto *LHSVar = cast<VarDecl>(LHS->getDecl()); - auto *RHSVar = cast<VarDecl>(RHS->getDecl()); + const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); + const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); EmitOMPAggregateReduction( CGF, PrivateRef->getType(), LHSVar, RHSVar, [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { emitReductionCombiner(CGF, ReductionOp); }); - } else + } else { // Emit reduction for array subscript or single variable. emitReductionCombiner(CGF, ReductionOp); + } } void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, @@ -5088,14 +5501,14 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); // ... - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); if (SimpleReduction) { CodeGenFunction::RunCleanupsScope Scope(CGF); auto IPriv = Privates.begin(); auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); - for (auto *E : ReductionOps) { + for (const Expr *E : ReductionOps) { emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), cast<DeclRefExpr>(*IRHS)); ++IPriv; @@ -5108,7 +5521,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, // 1. Build a list of reduction variables. // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; auto Size = RHSExprs.size(); - for (auto *E : Privates) { + for (const Expr *E : Privates) { if (E->getType()->isVariablyModifiedType()) // Reserve place for array size. ++Size; @@ -5136,7 +5549,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *Size = CGF.Builder.CreateIntCast( CGF.getVLASize( CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) - .first, + .NumElts, CGF.SizeTy, /*isSigned=*/false); CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), Elem); @@ -5144,19 +5557,20 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, } // 2. Emit reduce_func(). - auto *ReductionFn = emitReductionFunction( - CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, - LHSExprs, RHSExprs, ReductionOps); + llvm::Value *ReductionFn = emitReductionFunction( + CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), + Privates, LHSExprs, RHSExprs, ReductionOps); // 3. Create static kmp_critical_name lock = { 0 }; - auto *Lock = getCriticalRegionLock(".reduction"); + std::string Name = getName({"reduction"}); + llvm::Value *Lock = getCriticalRegionLock(Name); // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), // RedList, reduce_func, &<lock>); - auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); - auto *ThreadId = getThreadID(CGF, Loc); - auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); - auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); + llvm::Value *ThreadId = getThreadID(CGF, Loc); + llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); + llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( ReductionList.getPointer(), CGF.VoidPtrTy); llvm::Value *Args[] = { IdentTLoc, // ident_t *<loc> @@ -5167,14 +5581,15 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ReductionFn, // void (*) (void *, void *) <reduce_func> Lock // kmp_critical_name *&<lock> }; - auto Res = CGF.EmitRuntimeCall( + llvm::Value *Res = CGF.EmitRuntimeCall( createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait : OMPRTL__kmpc_reduce), Args); // 5. Build switch(res) - auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); - auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); + llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); + llvm::SwitchInst *SwInst = + CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); // 6. Build case 1: // ... @@ -5182,7 +5597,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, // ... // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); // break; - auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); + llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); CGF.EmitBlock(Case1BB); @@ -5192,13 +5607,13 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ThreadId, // i32 <gtid> Lock // kmp_critical_name *&<lock> }; - auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps]( - CodeGenFunction &CGF, PrePostActionTy &Action) { - auto &RT = CGF.CGM.getOpenMPRuntime(); + auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( + CodeGenFunction &CGF, PrePostActionTy &Action) { + CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); auto IPriv = Privates.begin(); auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); - for (auto *E : ReductionOps) { + for (const Expr *E : ReductionOps) { RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), cast<DeclRefExpr>(*IRHS)); ++IPriv; @@ -5222,44 +5637,44 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); // ... // break; - auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); + llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); CGF.EmitBlock(Case2BB); - auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps]( - CodeGenFunction &CGF, PrePostActionTy &Action) { + auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( + CodeGenFunction &CGF, PrePostActionTy &Action) { auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); auto IPriv = Privates.begin(); - for (auto *E : ReductionOps) { + for (const Expr *E : ReductionOps) { const Expr *XExpr = nullptr; const Expr *EExpr = nullptr; const Expr *UpExpr = nullptr; BinaryOperatorKind BO = BO_Comma; - if (auto *BO = dyn_cast<BinaryOperator>(E)) { + if (const auto *BO = dyn_cast<BinaryOperator>(E)) { if (BO->getOpcode() == BO_Assign) { XExpr = BO->getLHS(); UpExpr = BO->getRHS(); } } // Try to emit update expression as a simple atomic. - auto *RHSExpr = UpExpr; + const Expr *RHSExpr = UpExpr; if (RHSExpr) { // Analyze RHS part of the whole expression. - if (auto *ACO = dyn_cast<AbstractConditionalOperator>( + if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( RHSExpr->IgnoreParenImpCasts())) { // If this is a conditional operator, analyze its condition for // min/max reduction operator. RHSExpr = ACO->getCond(); } - if (auto *BORHS = + if (const auto *BORHS = dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { EExpr = BORHS->getRHS(); BO = BORHS->getOpcode(); } } if (XExpr) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); auto &&AtomicRedGen = [BO, VD, Loc](CodeGenFunction &CGF, const Expr *XExpr, const Expr *EExpr, const Expr *UpExpr) { @@ -5273,7 +5688,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, [&CGF, UpExpr, VD, Loc](RValue XRValue) { CodeGenFunction::OMPPrivateScope PrivateScope(CGF); PrivateScope.addPrivate( - VD, [&CGF, VD, XRValue, Loc]() -> Address { + VD, [&CGF, VD, XRValue, Loc]() { Address LHSTemp = CGF.CreateMemTemp(VD->getType()); CGF.emitOMPSimpleStore( CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, @@ -5286,19 +5701,22 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, }; if ((*IPriv)->getType()->isArrayType()) { // Emit atomic reduction for array section. - auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + const auto *RHSVar = + cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, AtomicRedGen, XExpr, EExpr, UpExpr); - } else + } else { // Emit atomic reduction for array subscript or single variable. AtomicRedGen(CGF, XExpr, EExpr, UpExpr); + } } else { // Emit as a critical region. auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, - const Expr *, const Expr *) { - auto &RT = CGF.CGM.getOpenMPRuntime(); + const Expr *, const Expr *) { + CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); + std::string Name = RT.getName({"atomic_reduction"}); RT.emitCriticalRegion( - CGF, ".atomic_reduction", + CGF, Name, [=](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); emitReductionCombiner(CGF, E); @@ -5306,12 +5724,15 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, Loc); }; if ((*IPriv)->getType()->isArrayType()) { - auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + const auto *LHSVar = + cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + const auto *RHSVar = + cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, CritRedGen); - } else + } else { CritRedGen(CGF, nullptr, nullptr, nullptr); + } } ++ILHS; ++IRHS; @@ -5331,20 +5752,29 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, EndArgs); AtomicRCG.setAction(Action); AtomicRCG(CGF); - } else + } else { AtomicRCG(CGF); + } CGF.EmitBranch(DefaultBB); CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); } /// Generates unique name for artificial threadprivate variables. -/// Format is: <Prefix> "." <Loc_raw_encoding> "_" <N> -static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc, - unsigned N) { +/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" +static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, + const Expr *Ref) { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); - Out << Prefix << "." << Loc.getRawEncoding() << "_" << N; + const clang::DeclRefExpr *DE; + const VarDecl *D = ::getBaseDecl(Ref, DE); + if (!D) + D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); + D = D->getCanonicalDecl(); + std::string Name = CGM.getOpenMPRuntime().getName( + {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); + Out << Prefix << Name << "_" + << D->getCanonicalDecl()->getLocStart().getRawEncoding(); return Out.str(); } @@ -5359,19 +5789,21 @@ static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc, static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) { - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); FunctionArgList Args; - ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl::Other); Args.emplace_back(&Param); - auto &FnInfo = + const auto &FnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, - ".red_init.", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + Name, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); + Fn->setDoesNotRecurse(); CodeGenFunction CGF(CGM); - CGF.disableDebugInfo(); - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); Address PrivateAddr = CGF.EmitLoadOfPointer( CGF.GetAddrOfLocalVar(&Param), C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); @@ -5381,10 +5813,9 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, if (RCG.getSizes(N).second) { Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( CGF, CGM.getContext().getSizeType(), - generateUniqueName("reduction_size", Loc, N)); - Size = - CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, - CGM.getContext().getSizeType(), SourceLocation()); + generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); + Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, + CGM.getContext().getSizeType(), Loc); } RCG.emitAggregateType(CGF, N, Size); LValue SharedLVal; @@ -5395,7 +5826,10 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, Address SharedAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( CGF, CGM.getContext().VoidPtrTy, - generateUniqueName("reduction", Loc, N)); + generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); + SharedAddr = CGF.EmitLoadOfPointer( + SharedAddr, + CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); } else { SharedLVal = CGF.MakeNaturalAlignAddrLValue( @@ -5427,40 +5861,42 @@ static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef) { - auto &C = CGM.getContext(); - auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); - auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); + ASTContext &C = CGM.getContext(); + const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); + const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); FunctionArgList Args; - ImplicitParamDecl ParamInOut(C, C.VoidPtrTy, ImplicitParamDecl::Other); - ImplicitParamDecl ParamIn(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl::Other); Args.emplace_back(&ParamInOut); Args.emplace_back(&ParamIn); - auto &FnInfo = + const auto &FnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, - ".red_comb.", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + Name, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); + Fn->setDoesNotRecurse(); CodeGenFunction CGF(CGM); - CGF.disableDebugInfo(); - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); llvm::Value *Size = nullptr; // If the size of the reduction item is non-constant, load it from global // threadprivate variable. if (RCG.getSizes(N).second) { Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( CGF, CGM.getContext().getSizeType(), - generateUniqueName("reduction_size", Loc, N)); - Size = - CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, - CGM.getContext().getSizeType(), SourceLocation()); + generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); + Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, + CGM.getContext().getSizeType(), Loc); } RCG.emitAggregateType(CGF, N, Size); // Remap lhs and rhs variables to the addresses of the function arguments. // %lhs = bitcast void* %arg0 to <type>* // %rhs = bitcast void* %arg1 to <type>* CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address { + PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { // Pull out the pointer to the variable. Address PtrAddr = CGF.EmitLoadOfPointer( CGF.GetAddrOfLocalVar(&ParamInOut), @@ -5468,7 +5904,7 @@ static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, return CGF.Builder.CreateElementBitCast( PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); }); - PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address { + PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { // Pull out the pointer to the variable. Address PtrAddr = CGF.EmitLoadOfPointer( CGF.GetAddrOfLocalVar(&ParamIn), @@ -5500,19 +5936,21 @@ static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, ReductionCodeGen &RCG, unsigned N) { if (!RCG.needCleanups(N)) return nullptr; - auto &C = CGM.getContext(); + ASTContext &C = CGM.getContext(); FunctionArgList Args; - ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl::Other); Args.emplace_back(&Param); - auto &FnInfo = + const auto &FnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, - ".red_fini.", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); + Name, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); + Fn->setDoesNotRecurse(); CodeGenFunction CGF(CGM); - CGF.disableDebugInfo(); - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); Address PrivateAddr = CGF.EmitLoadOfPointer( CGF.GetAddrOfLocalVar(&Param), C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); @@ -5522,10 +5960,9 @@ static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, if (RCG.getSizes(N).second) { Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( CGF, CGM.getContext().getSizeType(), - generateUniqueName("reduction_size", Loc, N)); - Size = - CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, - CGM.getContext().getSizeType(), SourceLocation()); + generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); + Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, + CGM.getContext().getSizeType(), Loc); } RCG.emitAggregateType(CGF, N, Size); // Emit the finalizer body: @@ -5551,7 +5988,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( // kmp_task_red_flags_t flags; // flags for additional info from compiler // } kmp_task_red_input_t; ASTContext &C = CGM.getContext(); - auto *RD = C.buildImplicitRecord("kmp_task_red_input_t"); + RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); RD->startDefinition(); const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); @@ -5652,14 +6089,14 @@ void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, /*isSigned=*/false); Address SizeAddr = getAddrOfArtificialThreadPrivate( CGF, CGM.getContext().getSizeType(), - generateUniqueName("reduction_size", Loc, N)); + generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); } // Store address of the original reduction item if custom initializer is used. if (RCG.usesReductionInitializer(N)) { Address SharedAddr = getAddrOfArtificialThreadPrivate( CGF, CGM.getContext().VoidPtrTy, - generateUniqueName("reduction", Loc, N)); + generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); CGF.Builder.CreateStore( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), @@ -5749,18 +6186,18 @@ void CGOpenMPRuntime::emitCancellationPointCall( emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; // Ignore return result until untied tasks are supported. - auto *Result = CGF.EmitRuntimeCall( + llvm::Value *Result = CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); // if (__kmpc_cancellationpoint()) { // exit from construct; // } - auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); - auto *ContBB = CGF.createBasicBlock(".cancel.continue"); - auto *Cmp = CGF.Builder.CreateIsNotNull(Result); + llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); + llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); + llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); CGF.EmitBlock(ExitBB); // exit from construct; - auto CancelDest = + CodeGenFunction::JumpDest CancelDest = CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); CGF.EmitBranchThroughCleanup(CancelDest); CGF.EmitBlock(ContBB, /*IsFinished=*/true); @@ -5779,70 +6216,42 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { - auto &RT = CGF.CGM.getOpenMPRuntime(); + CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); llvm::Value *Args[] = { RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; // Ignore return result until untied tasks are supported. - auto *Result = CGF.EmitRuntimeCall( + llvm::Value *Result = CGF.EmitRuntimeCall( RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); // if (__kmpc_cancel()) { // exit from construct; // } - auto *ExitBB = CGF.createBasicBlock(".cancel.exit"); - auto *ContBB = CGF.createBasicBlock(".cancel.continue"); - auto *Cmp = CGF.Builder.CreateIsNotNull(Result); + llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); + llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); + llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); CGF.EmitBlock(ExitBB); // exit from construct; - auto CancelDest = + CodeGenFunction::JumpDest CancelDest = CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); CGF.EmitBranchThroughCleanup(CancelDest); CGF.EmitBlock(ContBB, /*IsFinished=*/true); }; - if (IfCond) + if (IfCond) { emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &, PrePostActionTy &) {}); - else { + } else { RegionCodeGenTy ThenRCG(ThenGen); ThenRCG(CGF); } } } -/// \brief Obtain information that uniquely identifies a target entry. This -/// consists of the file and device IDs as well as line number associated with -/// the relevant entry source location. -static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, - unsigned &DeviceID, unsigned &FileID, - unsigned &LineNum) { - - auto &SM = C.getSourceManager(); - - // The loc should be always valid and have a file ID (the user cannot use - // #pragma directives in macros) - - assert(Loc.isValid() && "Source location is expected to be always valid."); - assert(Loc.isFileID() && "Source location is expected to refer to a file."); - - PresumedLoc PLoc = SM.getPresumedLoc(Loc); - assert(PLoc.isValid() && "Source location is expected to be always valid."); - - llvm::sys::fs::UniqueID ID; - if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) - llvm_unreachable("Source file with target region no longer exists!"); - - DeviceID = ID.getDevice(); - FileID = ID.getFile(); - LineNum = PLoc.getLine(); -} - void CGOpenMPRuntime::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { assert(!ParentName.empty() && "Invalid target region parent name!"); - emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); } @@ -5872,7 +6281,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; } - const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); + const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); CodeGenFunction CGF(CGM, true); CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); @@ -5898,22 +6307,25 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( if (CGM.getLangOpts().OpenMPIsDevice) { OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); - OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); - } else + OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); + OutlinedFn->setDSOLocal(false); + } else { + std::string Name = getName({EntryFnName, "region_id"}); OutlinedFnID = new llvm::GlobalVariable( CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::PrivateLinkage, - llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id"); + llvm::GlobalValue::WeakAnyLinkage, + llvm::Constant::getNullValue(CGM.Int8Ty), Name); + } // Register the information for the entry associated with this target region. OffloadEntriesInfoManager.registerTargetRegionEntryInfo( DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, - /*Flags=*/0); + OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); } /// discard all CompoundStmts intervening between two constructs static const Stmt *ignoreCompoundStmts(const Stmt *Body) { - while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) + while (const auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) Body = CS->body_front(); return Body; @@ -5931,12 +6343,11 @@ static llvm::Value * emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, CodeGenFunction &CGF, const OMPExecutableDirective &D) { - assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " "teams directive expected to be " "emitted only for the host!"); - auto &Bld = CGF.Builder; + CGBuilderTy &Bld = CGF.Builder; // If the target directive is combined with a teams directive: // Return the value in the num_teams clause, if any. @@ -5944,8 +6355,8 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, if (isOpenMPTeamsDirective(D.getDirectiveKind())) { if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) { CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); - auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(), - /*IgnoreResultAssign*/ true); + llvm::Value *NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(), + /*IgnoreResultAssign*/ true); return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, /*IsSigned=*/true); } @@ -5965,12 +6376,12 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, // the expression is captured in the enclosing target environment when the // teams directive is not combined with target. - const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); + const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); - if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( + if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( ignoreCompoundStmts(CS.getCapturedStmt()))) { if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { - if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { + if (const auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { CGOpenMPInnerExprInfo CGInfo(CGF, CS); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); @@ -6000,12 +6411,11 @@ static llvm::Value * emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, CodeGenFunction &CGF, const OMPExecutableDirective &D) { - assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " "teams directive expected to be " "emitted only for the host!"); - auto &Bld = CGF.Builder; + CGBuilderTy &Bld = CGF.Builder; // // If the target directive is combined with a teams directive: @@ -6030,8 +6440,9 @@ emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, if (const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>()) { CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); - auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(), - /*IgnoreResultAssign*/ true); + llvm::Value *ThreadLimit = + CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(), + /*IgnoreResultAssign*/ true); ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/true); } @@ -6068,12 +6479,12 @@ emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, // the expression is captured in the enclosing target environment when the // teams directive is not combined with target. - const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); + const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); - if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( + if (const auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( ignoreCompoundStmts(CS.getCapturedStmt()))) { if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { - if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { + if (const auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { CGOpenMPInnerExprInfo CGInfo(CGF, CS); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); @@ -6092,42 +6503,50 @@ emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, } namespace { -// \brief Utility to handle information from clauses associated with a given +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); + +// Utility to handle information from clauses associated with a given // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). // It provides a convenient interface to obtain the information and generate // code for that information. class MappableExprsHandler { public: - /// \brief Values for bit flags used to specify the mapping type for + /// Values for bit flags used to specify the mapping type for /// offloading. - enum OpenMPOffloadMappingFlags { - /// \brief Allocate memory on the device and move data from host to device. + enum OpenMPOffloadMappingFlags : uint64_t { + /// No flags + OMP_MAP_NONE = 0x0, + /// Allocate memory on the device and move data from host to device. OMP_MAP_TO = 0x01, - /// \brief Allocate memory on the device and move data from device to host. + /// Allocate memory on the device and move data from device to host. OMP_MAP_FROM = 0x02, - /// \brief Always perform the requested mapping action on the element, even + /// Always perform the requested mapping action on the element, even /// if it was already mapped before. OMP_MAP_ALWAYS = 0x04, - /// \brief Delete the element from the device environment, ignoring the + /// Delete the element from the device environment, ignoring the /// current reference count associated with the element. OMP_MAP_DELETE = 0x08, - /// \brief The element being mapped is a pointer-pointee pair; both the + /// The element being mapped is a pointer-pointee pair; both the /// pointer and the pointee should be mapped. OMP_MAP_PTR_AND_OBJ = 0x10, - /// \brief This flags signals that the base address of an entry should be + /// This flags signals that the base address of an entry should be /// passed to the target kernel as an argument. OMP_MAP_TARGET_PARAM = 0x20, - /// \brief Signal that the runtime library has to return the device pointer + /// Signal that the runtime library has to return the device pointer /// in the current position for the data being mapped. Used when we have the /// use_device_ptr clause. OMP_MAP_RETURN_PARAM = 0x40, - /// \brief This flag signals that the reference being passed is a pointer to + /// This flag signals that the reference being passed is a pointer to /// private data. OMP_MAP_PRIVATE = 0x80, - /// \brief Pass the element to the device by value. + /// Pass the element to the device by value. OMP_MAP_LITERAL = 0x100, /// Implicit map OMP_MAP_IMPLICIT = 0x200, + /// The 16 MSBs of the flags indicate whether the entry is member of some + /// struct/class. + OMP_MAP_MEMBER_OF = 0xffff000000000000, + LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), }; /// Class that associates information with a base pointer to be passed to the @@ -6147,21 +6566,60 @@ public: void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } }; - typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy; - typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy; - typedef SmallVector<uint64_t, 16> MapFlagsArrayTy; + using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; + using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; + using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; + + /// Map between a struct and the its lowest & highest elements which have been + /// mapped. + /// [ValueDecl *] --> {LE(FieldIndex, Pointer), + /// HE(FieldIndex, Pointer)} + struct StructRangeInfoTy { + std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { + 0, Address::invalid()}; + std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { + 0, Address::invalid()}; + Address Base = Address::invalid(); + }; private: - /// \brief Directive from where the map clauses were extracted. + /// Kind that defines how a device pointer has to be returned. + struct MapInfo { + OMPClauseMappableExprCommon::MappableExprComponentListRef Components; + OpenMPMapClauseKind MapType = OMPC_MAP_unknown; + OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown; + bool ReturnDevicePointer = false; + bool IsImplicit = false; + + MapInfo() = default; + MapInfo( + OMPClauseMappableExprCommon::MappableExprComponentListRef Components, + OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, + bool ReturnDevicePointer, bool IsImplicit) + : Components(Components), MapType(MapType), + MapTypeModifier(MapTypeModifier), + ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} + }; + + /// If use_device_ptr is used on a pointer which is a struct member and there + /// is no map information about it, then emission of that entry is deferred + /// until the whole struct has been processed. + struct DeferredDevicePtrEntryTy { + const Expr *IE = nullptr; + const ValueDecl *VD = nullptr; + + DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) + : IE(IE), VD(VD) {} + }; + + /// Directive from where the map clauses were extracted. const OMPExecutableDirective &CurDir; - /// \brief Function the directive is being generated for. + /// Function the directive is being generated for. CodeGenFunction &CGF; - /// \brief Set of all first private variables in the current directive. + /// Set of all first private variables in the current directive. llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; - /// Set of all reduction variables in the current directive. - llvm::SmallPtrSet<const VarDecl *, 8> ReductionDecls; /// Map between device pointer declarations and their expression components. /// The key value for declarations in 'this' is null. @@ -6171,10 +6629,10 @@ private: DevPointersMap; llvm::Value *getExprTypeSize(const Expr *E) const { - auto ExprTy = E->getType().getCanonicalType(); + QualType ExprTy = E->getType().getCanonicalType(); // Reference types are ignored for mapping purposes. - if (auto *RefTy = ExprTy->getAs<ReferenceType>()) + if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) ExprTy = RefTy->getPointeeType().getCanonicalType(); // Given that an array section is considered a built-in type, we need to @@ -6191,10 +6649,10 @@ private: return CGF.getTypeSize(BaseTy); llvm::Value *ElemSize; - if (auto *PTy = BaseTy->getAs<PointerType>()) + if (const auto *PTy = BaseTy->getAs<PointerType>()) { ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); - else { - auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); + } else { + const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); assert(ATy && "Expecting array type if not a pointer type."); ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); } @@ -6204,7 +6662,7 @@ private: if (!OAE->getLength()) return ElemSize; - auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); + llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); LengthVal = CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); @@ -6212,14 +6670,16 @@ private: return CGF.getTypeSize(ExprTy); } - /// \brief Return the corresponding bits for a given map clause modifier. Add + /// Return the corresponding bits for a given map clause modifier. Add /// a flag marking the map as a pointer if requested. Add a flag marking the /// map as the first one of a series of maps that relate to the same map /// expression. - uint64_t getMapTypeBits(OpenMPMapClauseKind MapType, - OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag, - bool AddIsTargetParamFlag) const { - uint64_t Bits = 0u; + OpenMPOffloadMappingFlags getMapTypeBits(OpenMPMapClauseKind MapType, + OpenMPMapClauseKind MapTypeModifier, + bool IsImplicit, bool AddPtrFlag, + bool AddIsTargetParamFlag) const { + OpenMPOffloadMappingFlags Bits = + IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; switch (MapType) { case OMPC_MAP_alloc: case OMPC_MAP_release: @@ -6229,20 +6689,20 @@ private: // type modifiers. break; case OMPC_MAP_to: - Bits = OMP_MAP_TO; + Bits |= OMP_MAP_TO; break; case OMPC_MAP_from: - Bits = OMP_MAP_FROM; + Bits |= OMP_MAP_FROM; break; case OMPC_MAP_tofrom: - Bits = OMP_MAP_TO | OMP_MAP_FROM; + Bits |= OMP_MAP_TO | OMP_MAP_FROM; break; case OMPC_MAP_delete: - Bits = OMP_MAP_DELETE; + Bits |= OMP_MAP_DELETE; break; - default: + case OMPC_MAP_always: + case OMPC_MAP_unknown: llvm_unreachable("Unexpected map type!"); - break; } if (AddPtrFlag) Bits |= OMP_MAP_PTR_AND_OBJ; @@ -6253,10 +6713,10 @@ private: return Bits; } - /// \brief Return true if the provided expression is a final array section. A + /// Return true if the provided expression is a final array section. A /// final array section, is one whose length can't be proved to be one. bool isFinalArraySectionExpression(const Expr *E) const { - auto *OASE = dyn_cast<OMPArraySectionExpr>(E); + const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); // It is not an array section and therefore not a unity-size one. if (!OASE) @@ -6266,16 +6726,16 @@ private: if (OASE->getColonLoc().isInvalid()) return false; - auto *Length = OASE->getLength(); + const Expr *Length = OASE->getLength(); // If we don't have a length we have to check if the array has size 1 // for this dimension. Also, we should always expect a length if the // base type is pointer. if (!Length) { - auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType( - OASE->getBase()->IgnoreParenImpCasts()) - .getCanonicalType(); - if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) + QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( + OASE->getBase()->IgnoreParenImpCasts()) + .getCanonicalType(); + if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) return ATy->getSize().getSExtValue() != 1; // If we don't have a constant dimension length, we have to consider // the current section as having any size, so it is not necessarily @@ -6291,7 +6751,7 @@ private: return ConstLength.getSExtValue() != 1; } - /// \brief Generate the base pointers, section pointers, sizes and map type + /// Generate the base pointers, section pointers, sizes and map type /// bits for the provided map type, map modifier, and expression components. /// \a IsFirstComponent should be set to true if the provided set of /// components is the first associated with a capture. @@ -6300,10 +6760,10 @@ private: OMPClauseMappableExprCommon::MappableExprComponentListRef Components, MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, - bool IsFirstComponentList, bool IsImplicit) const { - + StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, + bool IsImplicit) const { // The following summarizes what has to be generated for each map and the - // types bellow. The generated information is expressed in this order: + // types below. The generated information is expressed in this order: // base pointer, section pointer, size, flags // (to add to the ones that come from the map type and modifier). // @@ -6326,96 +6786,141 @@ private: // S2 *ps; // // map(d) - // &d, &d, sizeof(double), noflags + // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM // // map(i) - // &i, &i, 100*sizeof(int), noflags + // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM // // map(i[1:23]) - // &i(=&i[0]), &i[1], 23*sizeof(int), noflags + // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM // // map(p) - // &p, &p, sizeof(float*), noflags + // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM // // map(p[1:24]) - // p, &p[1], 24*sizeof(float), noflags + // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM // // map(s) - // &s, &s, sizeof(S2), noflags + // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM // // map(s.i) - // &s, &(s.i), sizeof(int), noflags + // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM // // map(s.s.f) - // &s, &(s.i.f), 50*sizeof(int), noflags + // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM // // map(s.p) - // &s, &(s.p), sizeof(double*), noflags + // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM // - // map(s.p[:22], s.a s.b) - // &s, &(s.p), sizeof(double*), noflags - // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + // map(to: s.p[:22]) + // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) + // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) + // &(s.p), &(s.p[0]), 22*sizeof(double), + // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) + // (*) alloc space for struct members, only this is a target parameter + // (**) map the pointer (nothing to be mapped in this example) (the compiler + // optimizes this entry out, same in the examples below) + // (***) map the pointee (map: to) // // map(s.ps) - // &s, &(s.ps), sizeof(S2*), noflags + // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM // - // map(s.ps->s.i) - // &s, &(s.ps), sizeof(S2*), noflags - // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + // map(from: s.ps->s.i) + // &s, &(s.ps), sizeof(S2*), TARGET_PARAM + // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) + // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM // - // map(s.ps->ps) - // &s, &(s.ps), sizeof(S2*), noflags - // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + // map(to: s.ps->ps) + // &s, &(s.ps), sizeof(S2*), TARGET_PARAM + // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) + // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO // // map(s.ps->ps->ps) - // &s, &(s.ps), sizeof(S2*), noflags - // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag - // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + // &s, &(s.ps), sizeof(S2*), TARGET_PARAM + // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) + // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ + // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM // - // map(s.ps->ps->s.f[:22]) - // &s, &(s.ps), sizeof(S2*), noflags - // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag - // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + // map(to: s.ps->ps->s.f[:22]) + // &s, &(s.ps), sizeof(S2*), TARGET_PARAM + // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) + // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ + // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO // // map(ps) - // &ps, &ps, sizeof(S2*), noflags + // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM // // map(ps->i) - // ps, &(ps->i), sizeof(int), noflags + // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM // // map(ps->s.f) - // ps, &(ps->s.f[0]), 50*sizeof(float), noflags + // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM // - // map(ps->p) - // ps, &(ps->p), sizeof(double*), noflags + // map(from: ps->p) + // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM // - // map(ps->p[:22]) - // ps, &(ps->p), sizeof(double*), noflags - // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + // map(to: ps->p[:22]) + // ps, &(ps->p), sizeof(double*), TARGET_PARAM + // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) + // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO // // map(ps->ps) - // ps, &(ps->ps), sizeof(S2*), noflags + // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM // - // map(ps->ps->s.i) - // ps, &(ps->ps), sizeof(S2*), noflags - // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + // map(from: ps->ps->s.i) + // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM + // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) + // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM // - // map(ps->ps->ps) - // ps, &(ps->ps), sizeof(S2*), noflags - // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + // map(from: ps->ps->ps) + // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM + // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) + // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM // // map(ps->ps->ps->ps) - // ps, &(ps->ps), sizeof(S2*), noflags - // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag - // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM + // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) + // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ + // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM // - // map(ps->ps->ps->s.f[:22]) - // ps, &(ps->ps), sizeof(S2*), noflags - // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag - // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + // map(to: ps->ps->ps->s.f[:22]) + // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM + // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) + // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ + // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO + // + // map(to: s.f[:22]) map(from: s.p[:33]) + // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + + // sizeof(double*) (**), TARGET_PARAM + // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO + // &s, &(s.p), sizeof(double*), MEMBER_OF(1) + // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM + // (*) allocate contiguous space needed to fit all mapped members even if + // we allocate space for members not mapped (in this example, + // s.f[22..49] and s.s are not mapped, yet we must allocate space for + // them as well because they fall between &s.f[0] and &s.p) + // + // map(from: s.f[:22]) map(to: ps->p[:33]) + // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM + // ps, &(ps->p), sizeof(S2*), TARGET_PARAM + // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) + // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO + // (*) the struct this entry pertains to is the 2nd element in the list of + // arguments, hence MEMBER_OF(2) + // + // map(from: s.f[:22], s.s) map(to: ps->p[:33]) + // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM + // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM + // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM + // ps, &(ps->p), sizeof(S2*), TARGET_PARAM + // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) + // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO + // (*) the struct this entry pertains to is the 4th element in the list + // of arguments, hence MEMBER_OF(4) // Track if the map information being generated is the first for a capture. bool IsCaptureFirstInfo = IsFirstComponentList; + bool IsLink = false; // Is this variable a "declare target link"? // Scan the components from the base to the complete expression. auto CI = Components.rbegin(); @@ -6425,16 +6930,25 @@ private: // Track if the map information being generated is the first for a list of // components. bool IsExpressionFirstInfo = true; - llvm::Value *BP = nullptr; + Address BP = Address::invalid(); - if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) { + if (isa<MemberExpr>(I->getAssociatedExpression())) { // The base is the 'this' pointer. The content of the pointer is going // to be the base of the field being mapped. - BP = CGF.EmitScalarExpr(ME->getBase()); + BP = CGF.LoadCXXThisAddress(); } else { // The base is the reference to the variable. // BP = &Var. - BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer(); + BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); + if (const auto *VD = + dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { + if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + isDeclareTargetDeclaration(VD)) + if (*Res == OMPDeclareTargetDeclAttr::MT_Link) { + IsLink = true; + BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); + } + } // If the variable is a pointer and is being dereferenced (i.e. is not // the last component), the base has to be the pointer itself, not its @@ -6442,10 +6956,7 @@ private: QualType Ty = I->getAssociatedDeclaration()->getType().getNonReferenceType(); if (Ty->isAnyPointerType() && std::next(I) != CE) { - auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty); - BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(), - Ty->castAs<PointerType>()) - .getPointer(); + BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); // We do not need to generate individual map information for the // pointer, it can be associated with the combined storage. @@ -6453,8 +6964,41 @@ private: } } - uint64_t DefaultFlags = IsImplicit ? OMP_MAP_IMPLICIT : 0; + // Track whether a component of the list should be marked as MEMBER_OF some + // combined entry (for partial structs). Only the first PTR_AND_OBJ entry + // in a component list should be marked as MEMBER_OF, all subsequent entries + // do not belong to the base struct. E.g. + // struct S2 s; + // s.ps->ps->ps->f[:] + // (1) (2) (3) (4) + // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a + // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) + // is the pointee of ps(2) which is not member of struct s, so it should not + // be marked as such (it is still PTR_AND_OBJ). + // The variable is initialized to false so that PTR_AND_OBJ entries which + // are not struct members are not considered (e.g. array of pointers to + // data). + bool ShouldBeMemberOf = false; + + // Variable keeping track of whether or not we have encountered a component + // in the component list which is a member expression. Useful when we have a + // pointer or a final array section, in which case it is the previous + // component in the list which tells us whether we have a member expression. + // E.g. X.f[:] + // While processing the final array section "[:]" it is "f" which tells us + // whether we are dealing with a member of a declared struct. + const MemberExpr *EncounteredME = nullptr; + for (; I != CE; ++I) { + // If the current component is member of a struct (parent struct) mark it. + if (!EncounteredME) { + EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); + // If we encounter a PTR_AND_OBJ entry from now on it should be marked + // as MEMBER_OF the parent struct. + if (EncounteredME) + ShouldBeMemberOf = true; + } + auto Next = std::next(I); // We need to generate the addresses and sizes if this is the last @@ -6472,14 +7016,12 @@ private: const auto *OASE = dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); bool IsPointer = - (OASE && - OMPArraySectionExpr::getBaseOriginalType(OASE) - .getCanonicalType() - ->isAnyPointerType()) || + (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) + .getCanonicalType() + ->isAnyPointerType()) || I->getAssociatedExpression()->getType()->isAnyPointerType(); if (Next == CE || IsPointer || IsFinalArraySection) { - // If this is not the last component, we expect the pointer to be // associated with an array expression or member expression. assert((Next == CE || @@ -6488,44 +7030,68 @@ private: isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && "Unexpected expression"); - llvm::Value *LB = - CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer(); - auto *Size = getExprTypeSize(I->getAssociatedExpression()); - - // If we have a member expression and the current component is a - // reference, we have to map the reference too. Whenever we have a - // reference, the section that reference refers to is going to be a - // load instruction from the storage assigned to the reference. - if (isa<MemberExpr>(I->getAssociatedExpression()) && - I->getAssociatedDeclaration()->getType()->isReferenceType()) { - auto *LI = cast<llvm::LoadInst>(LB); - auto *RefAddr = LI->getPointerOperand(); - - BasePointers.push_back(BP); - Pointers.push_back(RefAddr); - Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); - Types.push_back(DefaultFlags | - getMapTypeBits( - /*MapType*/ OMPC_MAP_alloc, - /*MapTypeModifier=*/OMPC_MAP_unknown, - !IsExpressionFirstInfo, IsCaptureFirstInfo)); - IsExpressionFirstInfo = false; - IsCaptureFirstInfo = false; - // The reference will be the next base address. - BP = RefAddr; - } + Address LB = + CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); + llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); + + // If this component is a pointer inside the base struct then we don't + // need to create any entry for it - it will be combined with the object + // it is pointing to into a single PTR_AND_OBJ entry. + bool IsMemberPointer = + IsPointer && EncounteredME && + (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == + EncounteredME); + if (!IsMemberPointer) { + BasePointers.push_back(BP.getPointer()); + Pointers.push_back(LB.getPointer()); + Sizes.push_back(Size); + + // We need to add a pointer flag for each map that comes from the + // same expression except for the first one. We also need to signal + // this map is the first one that relates with the current capture + // (there is a set of entries for each capture). + OpenMPOffloadMappingFlags Flags = getMapTypeBits( + MapType, MapTypeModifier, IsImplicit, + !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink); + + if (!IsExpressionFirstInfo) { + // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, + // then we reset the TO/FROM/ALWAYS/DELETE flags. + if (IsPointer) + Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | + OMP_MAP_DELETE); + + if (ShouldBeMemberOf) { + // Set placeholder value MEMBER_OF=FFFF to indicate that the flag + // should be later updated with the correct value of MEMBER_OF. + Flags |= OMP_MAP_MEMBER_OF; + // From now on, all subsequent PTR_AND_OBJ entries should not be + // marked as MEMBER_OF. + ShouldBeMemberOf = false; + } + } - BasePointers.push_back(BP); - Pointers.push_back(LB); - Sizes.push_back(Size); + Types.push_back(Flags); + } - // We need to add a pointer flag for each map that comes from the - // same expression except for the first one. We also need to signal - // this map is the first one that relates with the current capture - // (there is a set of entries for each capture). - Types.push_back(DefaultFlags | getMapTypeBits(MapType, MapTypeModifier, - !IsExpressionFirstInfo, - IsCaptureFirstInfo)); + // If we have encountered a member expression so far, keep track of the + // mapped member. If the parent is "*this", then the value declaration + // is nullptr. + if (EncounteredME) { + const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); + unsigned FieldIndex = FD->getFieldIndex(); + + // Update info about the lowest and highest elements for this struct + if (!PartialStruct.Base.isValid()) { + PartialStruct.LowestElem = {FieldIndex, LB}; + PartialStruct.HighestElem = {FieldIndex, LB}; + PartialStruct.Base = BP; + } else if (FieldIndex < PartialStruct.LowestElem.first) { + PartialStruct.LowestElem = {FieldIndex, LB}; + } else if (FieldIndex > PartialStruct.HighestElem.first) { + PartialStruct.HighestElem = {FieldIndex, LB}; + } + } // If we have a final array section, we are done with this expression. if (IsFinalArraySection) @@ -6541,11 +7107,11 @@ private: } } - /// \brief Return the adjusted map modifiers if the declaration a capture - /// refers to appears in a first-private clause. This is expected to be used - /// only with directives that start with 'target'. - unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap, - unsigned CurrentModifiers) { + /// Return the adjusted map modifiers if the declaration a capture refers to + /// appears in a first-private clause. This is expected to be used only with + /// directives that start with 'target'. + MappableExprsHandler::OpenMPOffloadMappingFlags + getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { assert(Cap.capturesVariable() && "Expected capture by reference only!"); // A first private variable captured by reference will use only the @@ -6554,15 +7120,29 @@ private: if (FirstPrivateDecls.count(Cap.getCapturedVar())) return MappableExprsHandler::OMP_MAP_PRIVATE | MappableExprsHandler::OMP_MAP_TO; - // Reduction variable will use only the 'private ptr' and 'map to_from' - // flag. - if (ReductionDecls.count(Cap.getCapturedVar())) { - return MappableExprsHandler::OMP_MAP_TO | - MappableExprsHandler::OMP_MAP_FROM; - } + return MappableExprsHandler::OMP_MAP_TO | + MappableExprsHandler::OMP_MAP_FROM; + } + + static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { + // Member of is given by the 16 MSB of the flag, so rotate by 48 bits. + return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) + << 48); + } + + static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, + OpenMPOffloadMappingFlags MemberOfFlag) { + // If the entry is PTR_AND_OBJ but has not been marked with the special + // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be + // marked as MEMBER_OF. + if ((Flags & OMP_MAP_PTR_AND_OBJ) && + ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) + return; - // We didn't modify anything. - return CurrentModifiers; + // Reset the placeholder value to prepare the flag for the assignment of the + // proper MEMBER_OF value. + Flags &= ~OMP_MAP_MEMBER_OF; + Flags |= MemberOfFlag; } public: @@ -6573,58 +7153,54 @@ public: for (const auto *D : C->varlists()) FirstPrivateDecls.insert( cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); - for (const auto *C : Dir.getClausesOfKind<OMPReductionClause>()) { - for (const auto *D : C->varlists()) { - ReductionDecls.insert( - cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); - } - } // Extract device pointer clause information. for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) for (auto L : C->component_lists()) DevPointersMap[L.first].push_back(L.second); } - /// \brief Generate all the base pointers, section pointers, sizes and map + /// Generate code for the combined entry if we have a partially mapped struct + /// and take care of the mapping flags of the arguments corresponding to + /// individual struct members. + void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, + MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, + MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, + const StructRangeInfoTy &PartialStruct) const { + // Base is the base of the struct + BasePointers.push_back(PartialStruct.Base.getPointer()); + // Pointer is the address of the lowest element + llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); + Pointers.push_back(LB); + // Size is (addr of {highest+1} element) - (addr of lowest element) + llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); + llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); + llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); + llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); + llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); + llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.SizeTy, + /*isSinged=*/false); + Sizes.push_back(Size); + // Map type is always TARGET_PARAM + Types.push_back(OMP_MAP_TARGET_PARAM); + // Remove TARGET_PARAM flag from the first element + (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; + + // All other current entries will be MEMBER_OF the combined entry + // (except for PTR_AND_OBJ entries which do not have a placeholder value + // 0xFFFF in the MEMBER_OF field). + OpenMPOffloadMappingFlags MemberOfFlag = + getMemberOfFlag(BasePointers.size() - 1); + for (auto &M : CurTypes) + setCorrectMemberOfFlag(M, MemberOfFlag); + } + + /// Generate all the base pointers, section pointers, sizes and map /// types for the extracted mappable expressions. Also, for each item that /// relates with a device pointer, a pair of the relevant declaration and /// index where it occurs is appended to the device pointers info array. void generateAllInfo(MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types) const { - BasePointers.clear(); - Pointers.clear(); - Sizes.clear(); - Types.clear(); - - struct MapInfo { - /// Kind that defines how a device pointer has to be returned. - enum ReturnPointerKind { - // Don't have to return any pointer. - RPK_None, - // Pointer is the base of the declaration. - RPK_Base, - // Pointer is a member of the base declaration - 'this' - RPK_Member, - // Pointer is a reference and a member of the base declaration - 'this' - RPK_MemberReference, - }; - OMPClauseMappableExprCommon::MappableExprComponentListRef Components; - OpenMPMapClauseKind MapType = OMPC_MAP_unknown; - OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown; - ReturnPointerKind ReturnDevicePointer = RPK_None; - bool IsImplicit = false; - - MapInfo() = default; - MapInfo( - OMPClauseMappableExprCommon::MappableExprComponentListRef Components, - OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, - ReturnPointerKind ReturnDevicePointer, bool IsImplicit) - : Components(Components), MapType(MapType), - MapTypeModifier(MapTypeModifier), - ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} - }; - // We have to process the component lists that relate with the same // declaration in a single chunk so that we can generate the map flags // correctly. Therefore, we organize all lists in a map. @@ -6636,7 +7212,7 @@ public: const ValueDecl *D, OMPClauseMappableExprCommon::MappableExprComponentListRef L, OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, - MapInfo::ReturnPointerKind ReturnDevicePointer, bool IsImplicit) { + bool ReturnDevicePointer, bool IsImplicit) { const ValueDecl *VD = D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer, @@ -6644,33 +7220,39 @@ public: }; // FIXME: MSVC 2013 seems to require this-> to find member CurDir. - for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) - for (auto L : C->component_lists()) { + for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) + for (const auto &L : C->component_lists()) { InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(), - MapInfo::RPK_None, C->isImplicit()); + /*ReturnDevicePointer=*/false, C->isImplicit()); } - for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) - for (auto L : C->component_lists()) { + for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) + for (const auto &L : C->component_lists()) { InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown, - MapInfo::RPK_None, C->isImplicit()); + /*ReturnDevicePointer=*/false, C->isImplicit()); } - for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) - for (auto L : C->component_lists()) { + for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) + for (const auto &L : C->component_lists()) { InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown, - MapInfo::RPK_None, C->isImplicit()); + /*ReturnDevicePointer=*/false, C->isImplicit()); } // Look at the use_device_ptr clause information and mark the existing map // entries as such. If there is no map information for an entry in the // use_device_ptr list, we create one with map type 'alloc' and zero size - // section. It is the user fault if that was not mapped before. + // section. It is the user fault if that was not mapped before. If there is + // no map information and the pointer is a struct member, then we defer the + // emission of that entry until the whole struct has been processed. + llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> + DeferredInfo; + // FIXME: MSVC 2013 seems to require this-> to find member CurDir. - for (auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) - for (auto L : C->component_lists()) { + for (const auto *C : + this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) { + for (const auto &L : C->component_lists()) { assert(!L.second.empty() && "Not expecting empty list of components!"); const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); VD = cast<ValueDecl>(VD->getCanonicalDecl()); - auto *IE = L.second.back().getAssociatedExpression(); + const Expr *IE = L.second.back().getAssociatedExpression(); // If the first component is a member expression, we have to look into // 'this', which maps to null in the map of map information. Otherwise // look directly for the information. @@ -6686,113 +7268,135 @@ public: // If we found a map entry, signal that the pointer has to be returned // and move on to the next declaration. if (CI != It->second.end()) { - CI->ReturnDevicePointer = isa<MemberExpr>(IE) - ? (VD->getType()->isReferenceType() - ? MapInfo::RPK_MemberReference - : MapInfo::RPK_Member) - : MapInfo::RPK_Base; + CI->ReturnDevicePointer = true; continue; } } // We didn't find any match in our map information - generate a zero - // size array section. + // size array section - if the pointer is a struct member we defer this + // action until the whole struct has been processed. // FIXME: MSVC 2013 seems to require this-> to find member CGF. - llvm::Value *Ptr = - this->CGF - .EmitLoadOfLValue(this->CGF.EmitLValue(IE), SourceLocation()) - .getScalarVal(); - BasePointers.push_back({Ptr, VD}); - Pointers.push_back(Ptr); - Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); - Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); + if (isa<MemberExpr>(IE)) { + // Insert the pointer into Info to be processed by + // generateInfoForComponentList. Because it is a member pointer + // without a pointee, no entry will be generated for it, therefore + // we need to generate one after the whole struct has been processed. + // Nonetheless, generateInfoForComponentList must be called to take + // the pointer into account for the calculation of the range of the + // partial struct. + InfoGen(nullptr, L.second, OMPC_MAP_unknown, OMPC_MAP_unknown, + /*ReturnDevicePointer=*/false, C->isImplicit()); + DeferredInfo[nullptr].emplace_back(IE, VD); + } else { + llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( + this->CGF.EmitLValue(IE), IE->getExprLoc()); + BasePointers.emplace_back(Ptr, VD); + Pointers.push_back(Ptr); + Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); + Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); + } } + } - for (auto &M : Info) { + for (const auto &M : Info) { // We need to know when we generate information for the first component // associated with a capture, because the mapping flags depend on it. bool IsFirstComponentList = true; - for (MapInfo &L : M.second) { + + // Temporary versions of arrays + MapBaseValuesArrayTy CurBasePointers; + MapValuesArrayTy CurPointers; + MapValuesArrayTy CurSizes; + MapFlagsArrayTy CurTypes; + StructRangeInfoTy PartialStruct; + + for (const MapInfo &L : M.second) { assert(!L.Components.empty() && "Not expecting declaration with no component lists."); // Remember the current base pointer index. - unsigned CurrentBasePointersIdx = BasePointers.size(); + unsigned CurrentBasePointersIdx = CurBasePointers.size(); // FIXME: MSVC 2013 seems to require this-> to find the member method. this->generateInfoForComponentList( - L.MapType, L.MapTypeModifier, L.Components, BasePointers, Pointers, - Sizes, Types, IsFirstComponentList, L.IsImplicit); + L.MapType, L.MapTypeModifier, L.Components, CurBasePointers, + CurPointers, CurSizes, CurTypes, PartialStruct, + IsFirstComponentList, L.IsImplicit); // If this entry relates with a device pointer, set the relevant // declaration and add the 'return pointer' flag. - if (IsFirstComponentList && - L.ReturnDevicePointer != MapInfo::RPK_None) { - // If the pointer is not the base of the map, we need to skip the - // base. If it is a reference in a member field, we also need to skip - // the map of the reference. - if (L.ReturnDevicePointer != MapInfo::RPK_Base) { - ++CurrentBasePointersIdx; - if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference) - ++CurrentBasePointersIdx; - } - assert(BasePointers.size() > CurrentBasePointersIdx && + if (L.ReturnDevicePointer) { + assert(CurBasePointers.size() > CurrentBasePointersIdx && "Unexpected number of mapped base pointers."); - auto *RelevantVD = L.Components.back().getAssociatedDeclaration(); + const ValueDecl *RelevantVD = + L.Components.back().getAssociatedDeclaration(); assert(RelevantVD && "No relevant declaration related with device pointer??"); - BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); - Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; + CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); + CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; } IsFirstComponentList = false; } + + // Append any pending zero-length pointers which are struct members and + // used with use_device_ptr. + auto CI = DeferredInfo.find(M.first); + if (CI != DeferredInfo.end()) { + for (const DeferredDevicePtrEntryTy &L : CI->second) { + llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); + llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( + this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); + CurBasePointers.emplace_back(BasePtr, L.VD); + CurPointers.push_back(Ptr); + CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); + // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder + // value MEMBER_OF=FFFF so that the entry is later updated with the + // correct value of MEMBER_OF. + CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | + OMP_MAP_MEMBER_OF); + } + } + + // If there is an entry in PartialStruct it means we have a struct with + // individual members mapped. Emit an extra combined entry. + if (PartialStruct.Base.isValid()) + emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, + PartialStruct); + + // We need to append the results of this capture to what we already have. + BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); + Pointers.append(CurPointers.begin(), CurPointers.end()); + Sizes.append(CurSizes.begin(), CurSizes.end()); + Types.append(CurTypes.begin(), CurTypes.end()); } } - /// \brief Generate the base pointers, section pointers, sizes and map types + /// Generate the base pointers, section pointers, sizes and map types /// associated to a given capture. void generateInfoForCapture(const CapturedStmt::Capture *Cap, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, - MapValuesArrayTy &Sizes, - MapFlagsArrayTy &Types) const { + MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, + StructRangeInfoTy &PartialStruct) const { assert(!Cap->capturesVariableArrayType() && "Not expecting to generate map info for a variable array type!"); - BasePointers.clear(); - Pointers.clear(); - Sizes.clear(); - Types.clear(); - // We need to know when we generating information for the first component // associated with a capture, because the mapping flags depend on it. bool IsFirstComponentList = true; - const ValueDecl *VD = - Cap->capturesThis() - ? nullptr - : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl()); + const ValueDecl *VD = Cap->capturesThis() + ? nullptr + : Cap->getCapturedVar()->getCanonicalDecl(); // If this declaration appears in a is_device_ptr clause we just have to // pass the pointer by value. If it is a reference to a declaration, we just - // pass its value, otherwise, if it is a member expression, we need to map - // 'to' the field. - if (!VD) { - auto It = DevPointersMap.find(VD); - if (It != DevPointersMap.end()) { - for (auto L : It->second) { - generateInfoForComponentList( - /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L, - BasePointers, Pointers, Sizes, Types, IsFirstComponentList, - /*IsImplicit=*/false); - IsFirstComponentList = false; - } - return; - } - } else if (DevPointersMap.count(VD)) { - BasePointers.push_back({Arg, VD}); + // pass its value. + if (DevPointersMap.count(VD)) { + BasePointers.emplace_back(Arg, VD); Pointers.push_back(Arg); Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); @@ -6800,35 +7404,63 @@ public: } // FIXME: MSVC 2013 seems to require this-> to find member CurDir. - for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) - for (auto L : C->decl_component_lists(VD)) { + for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) + for (const auto &L : C->decl_component_lists(VD)) { assert(L.first == VD && "We got information for the wrong declaration??"); assert(!L.second.empty() && "Not expecting declaration with no component lists."); - generateInfoForComponentList( - C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers, - Pointers, Sizes, Types, IsFirstComponentList, C->isImplicit()); + generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(), + L.second, BasePointers, Pointers, Sizes, + Types, PartialStruct, IsFirstComponentList, + C->isImplicit()); IsFirstComponentList = false; } + } - return; + /// Generate the base pointers, section pointers, sizes and map types + /// associated with the declare target link variables. + void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, + MapValuesArrayTy &Pointers, + MapValuesArrayTy &Sizes, + MapFlagsArrayTy &Types) const { + // Map other list items in the map clause which are not captured variables + // but "declare target link" global variables., + for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { + for (const auto &L : C->component_lists()) { + if (!L.first) + continue; + const auto *VD = dyn_cast<VarDecl>(L.first); + if (!VD) + continue; + llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + isDeclareTargetDeclaration(VD); + if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) + continue; + StructRangeInfoTy PartialStruct; + generateInfoForComponentList( + C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers, + Pointers, Sizes, Types, PartialStruct, + /*IsFirstComponentList=*/true, C->isImplicit()); + assert(!PartialStruct.Base.isValid() && + "No partial structs for declare target link expected."); + } + } } - /// \brief Generate the default map information for a given capture \a CI, + /// Generate the default map information for a given capture \a CI, /// record field declaration \a RI and captured value \a CV. void generateDefaultMapInfo(const CapturedStmt::Capture &CI, const FieldDecl &RI, llvm::Value *CV, MapBaseValuesArrayTy &CurBasePointers, MapValuesArrayTy &CurPointers, MapValuesArrayTy &CurSizes, - MapFlagsArrayTy &CurMapTypes) { - + MapFlagsArrayTy &CurMapTypes) const { // Do the default mapping. if (CI.capturesThis()) { CurBasePointers.push_back(CV); CurPointers.push_back(CV); - const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); + const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType())); // Default map type. CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); @@ -6843,7 +7475,7 @@ public: } else { // Pointers are implicitly mapped with a zero size and no flags // (other than first map that is added for all implicit maps). - CurMapTypes.push_back(0u); + CurMapTypes.push_back(OMP_MAP_NONE); CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy)); } } else { @@ -6851,30 +7483,30 @@ public: CurBasePointers.push_back(CV); CurPointers.push_back(CV); - const ReferenceType *PtrTy = - cast<ReferenceType>(RI.getType().getTypePtr()); + const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); QualType ElementType = PtrTy->getPointeeType(); CurSizes.push_back(CGF.getTypeSize(ElementType)); // The default map type for a scalar/complex type is 'to' because by // default the value doesn't have to be retrieved. For an aggregate // type, the default is 'tofrom'. - CurMapTypes.emplace_back(adjustMapModifiersForPrivateClauses( - CI, ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM) - : OMP_MAP_TO)); + CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); } // Every default map produces a single argument which is a target parameter. CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; + + // Add flag stating this is an implicit map. + CurMapTypes.back() |= OMP_MAP_IMPLICIT; } }; enum OpenMPOffloadingReservedDeviceIDs { - /// \brief Device ID if the device was not defined, runtime should get it + /// Device ID if the device was not defined, runtime should get it /// from environment variables in the spec. OMP_DEVICEID_UNDEF = -1, }; } // anonymous namespace -/// \brief Emit the arrays used to pass the captures and map information to the +/// Emit the arrays used to pass the captures and map information to the /// offloading runtime library. If there is no map or capture information, /// return nullptr by reference. static void @@ -6884,8 +7516,8 @@ emitOffloadingArrays(CodeGenFunction &CGF, MappableExprsHandler::MapValuesArrayTy &Sizes, MappableExprsHandler::MapFlagsArrayTy &MapTypes, CGOpenMPRuntime::TargetDataInfo &Info) { - auto &CGM = CGF.CGM; - auto &Ctx = CGF.getContext(); + CodeGenModule &CGM = CGF.CGM; + ASTContext &Ctx = CGF.getContext(); // Reset the array information. Info.clearArrayInfo(); @@ -6895,7 +7527,7 @@ emitOffloadingArrays(CodeGenFunction &CGF, // Detect if we have any capture size requiring runtime evaluation of the // size so that a constant array could be eventually used. bool hasRuntimeEvaluationCaptureSize = false; - for (auto *S : Sizes) + for (llvm::Value *S : Sizes) if (!isa<llvm::Constant>(S)) { hasRuntimeEvaluationCaptureSize = true; break; @@ -6924,48 +7556,53 @@ emitOffloadingArrays(CodeGenFunction &CGF, // We expect all the sizes to be constant, so we collect them to create // a constant array. SmallVector<llvm::Constant *, 16> ConstSizes; - for (auto S : Sizes) + for (llvm::Value *S : Sizes) ConstSizes.push_back(cast<llvm::Constant>(S)); auto *SizesArrayInit = llvm::ConstantArray::get( llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); + std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); auto *SizesArrayGbl = new llvm::GlobalVariable( CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, - SizesArrayInit, ".offload_sizes"); + SizesArrayInit, Name); SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); Info.SizesArray = SizesArrayGbl; } // The map types are always constant so we don't need to generate code to // fill arrays. Instead, we create an array constant. + SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); + llvm::copy(MapTypes, Mapping.begin()); llvm::Constant *MapTypesArrayInit = - llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); + llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); + std::string MaptypesName = + CGM.getOpenMPRuntime().getName({"offload_maptypes"}); auto *MapTypesArrayGbl = new llvm::GlobalVariable( CGM.getModule(), MapTypesArrayInit->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, - MapTypesArrayInit, ".offload_maptypes"); + MapTypesArrayInit, MaptypesName); MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); Info.MapTypesArray = MapTypesArrayGbl; - for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) { - llvm::Value *BPVal = *BasePointers[i]; + for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { + llvm::Value *BPVal = *BasePointers[I]; llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), - Info.BasePointersArray, 0, i); + Info.BasePointersArray, 0, I); BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); CGF.Builder.CreateStore(BPVal, BPAddr); if (Info.requiresDevicePointerInfo()) - if (auto *DevVD = BasePointers[i].getDevicePtrDecl()) - Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr)); + if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) + Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); - llvm::Value *PVal = Pointers[i]; + llvm::Value *PVal = Pointers[I]; llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), - Info.PointersArray, 0, i); + Info.PointersArray, 0, I); P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); @@ -6976,22 +7613,22 @@ emitOffloadingArrays(CodeGenFunction &CGF, llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, /*Idx0=*/0, - /*Idx1=*/i); + /*Idx1=*/I); Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); CGF.Builder.CreateStore( - CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true), + CGF.Builder.CreateIntCast(Sizes[I], CGM.SizeTy, /*isSigned=*/true), SAddr); } } } } -/// \brief Emit the arguments to be passed to the runtime library based on the +/// Emit the arguments to be passed to the runtime library based on the /// arrays of pointers, sizes and map types. static void emitOffloadingArraysArgument( CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { - auto &CGM = CGF.CGM; + CodeGenModule &CGM = CGF.CGM; if (Info.NumberOfPtrs) { BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), @@ -7023,86 +7660,27 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID, - const Expr *IfCond, const Expr *Device, - ArrayRef<llvm::Value *> CapturedVars) { + const Expr *IfCond, const Expr *Device) { if (!CGF.HaveInsertPoint()) return; assert(OutlinedFn && "Invalid outlined function!"); - // Fill up the arrays with all the captured variables. - MappableExprsHandler::MapValuesArrayTy KernelArgs; - MappableExprsHandler::MapBaseValuesArrayTy BasePointers; - MappableExprsHandler::MapValuesArrayTy Pointers; - MappableExprsHandler::MapValuesArrayTy Sizes; - MappableExprsHandler::MapFlagsArrayTy MapTypes; - - MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; - MappableExprsHandler::MapValuesArrayTy CurPointers; - MappableExprsHandler::MapValuesArrayTy CurSizes; - MappableExprsHandler::MapFlagsArrayTy CurMapTypes; - - // Get mappable expression information. - MappableExprsHandler MEHandler(D, CGF); - - const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); - auto RI = CS.getCapturedRecordDecl()->field_begin(); - auto CV = CapturedVars.begin(); - for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), - CE = CS.capture_end(); - CI != CE; ++CI, ++RI, ++CV) { - CurBasePointers.clear(); - CurPointers.clear(); - CurSizes.clear(); - CurMapTypes.clear(); - - // VLA sizes are passed to the outlined region by copy and do not have map - // information associated. - if (CI->capturesVariableArrayType()) { - CurBasePointers.push_back(*CV); - CurPointers.push_back(*CV); - CurSizes.push_back(CGF.getTypeSize(RI->getType())); - // Copy to the device as an argument. No need to retrieve it. - CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | - MappableExprsHandler::OMP_MAP_TARGET_PARAM); - } else { - // If we have any information in the map clause, we use it, otherwise we - // just do a default mapping. - MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, - CurSizes, CurMapTypes); - if (CurBasePointers.empty()) - MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, - CurPointers, CurSizes, CurMapTypes); - } - // We expect to have at least an element of information for this capture. - assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!"); - assert(CurBasePointers.size() == CurPointers.size() && - CurBasePointers.size() == CurSizes.size() && - CurBasePointers.size() == CurMapTypes.size() && - "Inconsistent map information sizes!"); - - // The kernel args are always the first elements of the base pointers - // associated with a capture. - KernelArgs.push_back(*CurBasePointers.front()); - // We need to append the results of this capture to what we already have. - BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); - Pointers.append(CurPointers.begin(), CurPointers.end()); - Sizes.append(CurSizes.begin(), CurSizes.end()); - MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); - } + const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); + llvm::SmallVector<llvm::Value *, 16> CapturedVars; + const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); + auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, + PrePostActionTy &) { + CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); + }; + emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); + CodeGenFunction::OMPTargetDataInfo InputInfo; + llvm::Value *MapTypesArray = nullptr; // Fill up the pointer arrays and transfer execution to the device. - auto &&ThenGen = [this, &BasePointers, &Pointers, &Sizes, &MapTypes, Device, - OutlinedFn, OutlinedFnID, &D, - &KernelArgs](CodeGenFunction &CGF, PrePostActionTy &) { - auto &RT = CGF.CGM.getOpenMPRuntime(); - // Emit the offloading arrays. - TargetDataInfo Info; - emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); - emitOffloadingArraysArgument(CGF, Info.BasePointersArray, - Info.PointersArray, Info.SizesArray, - Info.MapTypesArray, Info); - + auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, + &MapTypesArray, &CS, RequiresOuterTask, + &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { // On top of the arrays that were filled up, the target offloading call // takes as arguments the device id as well as the host pointer. The host // pointer is used by the runtime library to identify the current target @@ -7125,13 +7703,14 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, } // Emit the number of elements in the offloading arrays. - llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size()); + llvm::Value *PointerNum = + CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); // Return value of the runtime offloading call. llvm::Value *Return; - auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D); - auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D); + llvm::Value *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D); + llvm::Value *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D); bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); // The target region is an outlined function launched by the runtime @@ -7169,25 +7748,30 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // passed to the runtime library - a 32-bit integer with the value zero. assert(NumThreads && "Thread limit expression should be available along " "with number of teams."); - llvm::Value *OffloadingArgs[] = { - DeviceID, OutlinedFnID, - PointerNum, Info.BasePointersArray, - Info.PointersArray, Info.SizesArray, - Info.MapTypesArray, NumTeams, - NumThreads}; + llvm::Value *OffloadingArgs[] = {DeviceID, + OutlinedFnID, + PointerNum, + InputInfo.BasePointersArray.getPointer(), + InputInfo.PointersArray.getPointer(), + InputInfo.SizesArray.getPointer(), + MapTypesArray, + NumTeams, + NumThreads}; Return = CGF.EmitRuntimeCall( - RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait - : OMPRTL__tgt_target_teams), + createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait + : OMPRTL__tgt_target_teams), OffloadingArgs); } else { - llvm::Value *OffloadingArgs[] = { - DeviceID, OutlinedFnID, - PointerNum, Info.BasePointersArray, - Info.PointersArray, Info.SizesArray, - Info.MapTypesArray}; + llvm::Value *OffloadingArgs[] = {DeviceID, + OutlinedFnID, + PointerNum, + InputInfo.BasePointersArray.getPointer(), + InputInfo.PointersArray.getPointer(), + InputInfo.SizesArray.getPointer(), + MapTypesArray}; Return = CGF.EmitRuntimeCall( - RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait - : OMPRTL__tgt_target), + createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait + : OMPRTL__tgt_target), OffloadingArgs); } @@ -7200,17 +7784,120 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); CGF.EmitBlock(OffloadFailedBlock); - emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, KernelArgs); + if (RequiresOuterTask) { + CapturedVars.clear(); + CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); + } + emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars); CGF.EmitBranch(OffloadContBlock); CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); }; // Notify that the host version must be executed. - auto &&ElseGen = [this, &D, OutlinedFn, &KernelArgs](CodeGenFunction &CGF, - PrePostActionTy &) { - emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, - KernelArgs); + auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, + RequiresOuterTask](CodeGenFunction &CGF, + PrePostActionTy &) { + if (RequiresOuterTask) { + CapturedVars.clear(); + CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); + } + emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars); + }; + + auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, + &CapturedVars, RequiresOuterTask, + &CS](CodeGenFunction &CGF, PrePostActionTy &) { + // Fill up the arrays with all the captured variables. + MappableExprsHandler::MapBaseValuesArrayTy BasePointers; + MappableExprsHandler::MapValuesArrayTy Pointers; + MappableExprsHandler::MapValuesArrayTy Sizes; + MappableExprsHandler::MapFlagsArrayTy MapTypes; + + // Get mappable expression information. + MappableExprsHandler MEHandler(D, CGF); + + auto RI = CS.getCapturedRecordDecl()->field_begin(); + auto CV = CapturedVars.begin(); + for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), + CE = CS.capture_end(); + CI != CE; ++CI, ++RI, ++CV) { + MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; + MappableExprsHandler::MapValuesArrayTy CurPointers; + MappableExprsHandler::MapValuesArrayTy CurSizes; + MappableExprsHandler::MapFlagsArrayTy CurMapTypes; + MappableExprsHandler::StructRangeInfoTy PartialStruct; + + // VLA sizes are passed to the outlined region by copy and do not have map + // information associated. + if (CI->capturesVariableArrayType()) { + CurBasePointers.push_back(*CV); + CurPointers.push_back(*CV); + CurSizes.push_back(CGF.getTypeSize(RI->getType())); + // Copy to the device as an argument. No need to retrieve it. + CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | + MappableExprsHandler::OMP_MAP_TARGET_PARAM); + } else { + // If we have any information in the map clause, we use it, otherwise we + // just do a default mapping. + MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, + CurSizes, CurMapTypes, PartialStruct); + if (CurBasePointers.empty()) + MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, + CurPointers, CurSizes, CurMapTypes); + } + // We expect to have at least an element of information for this capture. + assert(!CurBasePointers.empty() && + "Non-existing map pointer for capture!"); + assert(CurBasePointers.size() == CurPointers.size() && + CurBasePointers.size() == CurSizes.size() && + CurBasePointers.size() == CurMapTypes.size() && + "Inconsistent map information sizes!"); + + // If there is an entry in PartialStruct it means we have a struct with + // individual members mapped. Emit an extra combined entry. + if (PartialStruct.Base.isValid()) + MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, + CurMapTypes, PartialStruct); + + // We need to append the results of this capture to what we already have. + BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); + Pointers.append(CurPointers.begin(), CurPointers.end()); + Sizes.append(CurSizes.begin(), CurSizes.end()); + MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); + } + // Map other list items in the map clause which are not captured variables + // but "declare target link" global variables. + MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, + MapTypes); + + TargetDataInfo Info; + // Fill up the arrays and create the arguments. + emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); + emitOffloadingArraysArgument(CGF, Info.BasePointersArray, + Info.PointersArray, Info.SizesArray, + Info.MapTypesArray, Info); + InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; + InputInfo.BasePointersArray = + Address(Info.BasePointersArray, CGM.getPointerAlign()); + InputInfo.PointersArray = + Address(Info.PointersArray, CGM.getPointerAlign()); + InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); + MapTypesArray = Info.MapTypesArray; + if (RequiresOuterTask) + CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); + else + emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); + }; + + auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( + CodeGenFunction &CGF, PrePostActionTy &) { + if (RequiresOuterTask) { + CodeGenFunction::OMPTargetDataInfo InputInfo; + CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); + } else { + emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); + } }; // If we have a target function ID it means that we need to support @@ -7218,14 +7905,14 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // regardless of the conditional in the if clause if, e.g., the user do not // specify target triples. if (OutlinedFnID) { - if (IfCond) - emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); - else { - RegionCodeGenTy ThenRCG(ThenGen); + if (IfCond) { + emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); + } else { + RegionCodeGenTy ThenRCG(TargetThenGen); ThenRCG(CGF); } } else { - RegionCodeGenTy ElseRCG(ElseGen); + RegionCodeGenTy ElseRCG(TargetElseGen); ElseRCG(CGF); } } @@ -7236,13 +7923,13 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, return; // Codegen OMP target directives that offload compute to the device. - bool requiresDeviceCodegen = + bool RequiresDeviceCodegen = isa<OMPExecutableDirective>(S) && isOpenMPTargetExecutionDirective( cast<OMPExecutableDirective>(S)->getDirectiveKind()); - if (requiresDeviceCodegen) { - auto &E = *cast<OMPExecutableDirective>(S); + if (RequiresDeviceCodegen) { + const auto &E = *cast<OMPExecutableDirective>(S); unsigned DeviceID; unsigned FileID; unsigned Line; @@ -7255,66 +7942,118 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, ParentName, Line)) return; - switch (S->getStmtClass()) { - case Stmt::OMPTargetDirectiveClass: - CodeGenFunction::EmitOMPTargetDeviceFunction( - CGM, ParentName, cast<OMPTargetDirective>(*S)); + switch (E.getDirectiveKind()) { + case OMPD_target: + CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, + cast<OMPTargetDirective>(E)); break; - case Stmt::OMPTargetParallelDirectiveClass: + case OMPD_target_parallel: CodeGenFunction::EmitOMPTargetParallelDeviceFunction( - CGM, ParentName, cast<OMPTargetParallelDirective>(*S)); + CGM, ParentName, cast<OMPTargetParallelDirective>(E)); break; - case Stmt::OMPTargetTeamsDirectiveClass: + case OMPD_target_teams: CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( - CGM, ParentName, cast<OMPTargetTeamsDirective>(*S)); + CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); break; - case Stmt::OMPTargetTeamsDistributeDirectiveClass: + case OMPD_target_teams_distribute: CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( - CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(*S)); + CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); break; - case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass: + case OMPD_target_teams_distribute_simd: CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( - CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(*S)); + CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); break; - case Stmt::OMPTargetParallelForDirectiveClass: + case OMPD_target_parallel_for: CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( - CGM, ParentName, cast<OMPTargetParallelForDirective>(*S)); + CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); break; - case Stmt::OMPTargetParallelForSimdDirectiveClass: + case OMPD_target_parallel_for_simd: CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( - CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(*S)); + CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); break; - case Stmt::OMPTargetSimdDirectiveClass: + case OMPD_target_simd: CodeGenFunction::EmitOMPTargetSimdDeviceFunction( - CGM, ParentName, cast<OMPTargetSimdDirective>(*S)); + CGM, ParentName, cast<OMPTargetSimdDirective>(E)); break; - default: + case OMPD_target_teams_distribute_parallel_for: + CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( + CGM, ParentName, + cast<OMPTargetTeamsDistributeParallelForDirective>(E)); + break; + case OMPD_target_teams_distribute_parallel_for_simd: + CodeGenFunction:: + EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( + CGM, ParentName, + cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); + break; + case OMPD_parallel: + case OMPD_for: + case OMPD_parallel_for: + case OMPD_parallel_sections: + case OMPD_for_simd: + case OMPD_parallel_for_simd: + case OMPD_cancel: + case OMPD_cancellation_point: + case OMPD_ordered: + case OMPD_threadprivate: + case OMPD_task: + case OMPD_simd: + case OMPD_sections: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_taskgroup: + case OMPD_atomic: + case OMPD_flush: + case OMPD_teams: + case OMPD_target_data: + case OMPD_target_exit_data: + case OMPD_target_enter_data: + case OMPD_distribute: + case OMPD_distribute_simd: + case OMPD_distribute_parallel_for: + case OMPD_distribute_parallel_for_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_target_update: + case OMPD_declare_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_declare_reduction: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_unknown: llvm_unreachable("Unknown target directive for OpenMP device codegen."); } return; } - if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) { - if (!E->hasAssociatedStmt()) + if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { + if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) return; scanForTargetRegionsFunctions( - cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(), - ParentName); + E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); return; } // If this is a lambda function, look into its body. - if (auto *L = dyn_cast<LambdaExpr>(S)) + if (const auto *L = dyn_cast<LambdaExpr>(S)) S = L->getBody(); // Keep looking for target regions recursively. - for (auto *II : S->children()) + for (const Stmt *II : S->children()) scanForTargetRegionsFunctions(II, ParentName); } bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { - auto &FD = *cast<FunctionDecl>(GD.getDecl()); + const auto *FD = cast<FunctionDecl>(GD.getDecl()); // If emitting code for the host, we do not process FD here. Instead we do // the normal code generation. @@ -7322,12 +8061,11 @@ bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { return false; // Try to detect target regions in the function. - scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD)); + scanForTargetRegionsFunctions(FD->getBody(), CGM.getMangledName(GD)); - // We should not emit any function other that the ones created during the - // scanning. Therefore, we signal that this function is completely dealt - // with. - return true; + // Do not to emit function if it is not marked as declare target. + return !isDeclareTargetDeclaration(FD) && + AlreadyEmittedTargetFunctions.count(FD->getCanonicalDecl()) == 0; } bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { @@ -7338,33 +8076,101 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { // regions in it. We use the complete variant to produce the kernel name // mangling. QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); - if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { - for (auto *Ctor : RD->ctors()) { + if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { + for (const CXXConstructorDecl *Ctor : RD->ctors()) { StringRef ParentName = CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); } - auto *Dtor = RD->getDestructor(); - if (Dtor) { + if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { StringRef ParentName = CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); } } - // If we are in target mode, we do not emit any global (declare target is not - // implemented yet). Therefore we signal that GD was processed in this case. - return true; + // Do not to emit variable if it is not marked as declare target. + llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + isDeclareTargetDeclaration(cast<VarDecl>(GD.getDecl())); + return !Res || *Res == OMPDeclareTargetDeclAttr::MT_Link; +} + +void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, + llvm::Constant *Addr) { + if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + isDeclareTargetDeclaration(VD)) { + OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; + StringRef VarName; + CharUnits VarSize; + llvm::GlobalValue::LinkageTypes Linkage; + switch (*Res) { + case OMPDeclareTargetDeclAttr::MT_To: + Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; + VarName = CGM.getMangledName(VD); + VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); + Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); + break; + case OMPDeclareTargetDeclAttr::MT_Link: + Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; + if (CGM.getLangOpts().OpenMPIsDevice) { + VarName = Addr->getName(); + Addr = nullptr; + } else { + VarName = getAddrOfDeclareTargetLink(VD).getName(); + Addr = + cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer()); + } + VarSize = CGM.getPointerSize(); + Linkage = llvm::GlobalValue::WeakAnyLinkage; + break; + } + OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( + VarName, Addr, VarSize, Flags, Linkage); + } } bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { - auto *VD = GD.getDecl(); - if (isa<FunctionDecl>(VD)) + if (isa<FunctionDecl>(GD.getDecl())) return emitTargetFunctions(GD); return emitTargetGlobalVariable(GD); } +CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( + CodeGenModule &CGM) + : CGM(CGM) { + if (CGM.getLangOpts().OpenMPIsDevice) { + SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; + CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; + } +} + +CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { + if (CGM.getLangOpts().OpenMPIsDevice) + CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; +} + +bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { + if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) + return true; + + const auto *D = cast<FunctionDecl>(GD.getDecl()); + const FunctionDecl *FD = D->getCanonicalDecl(); + // Do not to emit function if it is marked as declare target as it was already + // emitted. + if (isDeclareTargetDeclaration(D)) { + if (D->hasBody() && AlreadyEmittedTargetFunctions.count(FD) == 0) { + if (auto *F = dyn_cast_or_null<llvm::Function>( + CGM.GetGlobalValue(CGM.getMangledName(GD)))) + return !F->isDeclaration(); + return false; + } + return true; + } + + return !AlreadyEmittedTargetFunctions.insert(FD).second; +} + llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { // If we have offloading in the current module, we need to emit the entries // now and register the offloading descriptor. @@ -7384,7 +8190,7 @@ void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, if (!CGF.HaveInsertPoint()) return; - auto *RTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); CodeGenFunction::RunCleanupsScope Scope(CGF); // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); @@ -7396,7 +8202,7 @@ void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, RealArgs.append(std::begin(Args), std::end(Args)); RealArgs.append(CapturedVars.begin(), CapturedVars.end()); - auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); + llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); CGF.EmitRuntimeCall(RTLFn, RealArgs); } @@ -7407,16 +8213,16 @@ void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, if (!CGF.HaveInsertPoint()) return; - auto *RTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); llvm::Value *NumTeamsVal = - (NumTeams) + NumTeams ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), CGF.CGM.Int32Ty, /* isSigned = */ true) : CGF.Builder.getInt32(0); llvm::Value *ThreadLimitVal = - (ThreadLimit) + ThreadLimit ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), CGF.CGM.Int32Ty, /* isSigned = */ true) : CGF.Builder.getInt32(0); @@ -7473,7 +8279,7 @@ void CGOpenMPRuntime::emitTargetDataCalls( } // Emit the number of elements in the offloading arrays. - auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); + llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); llvm::Value *OffloadingArgs[] = { DeviceID, PointerNum, BasePointersArrayArg, @@ -7509,7 +8315,7 @@ void CGOpenMPRuntime::emitTargetDataCalls( } // Emit the number of elements in the offloading arrays. - auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); + llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); llvm::Value *OffloadingArgs[] = { DeviceID, PointerNum, BasePointersArrayArg, @@ -7596,9 +8402,6 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); OpenMPRTLFunction RTLFn; switch (D.getDirectiveKind()) { - default: - llvm_unreachable("Unexpected standalone target data directive."); - break; case OMPD_target_enter_data: RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait : OMPRTL__tgt_target_data_begin; @@ -7611,6 +8414,58 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait : OMPRTL__tgt_target_data_update; break; + case OMPD_parallel: + case OMPD_for: + case OMPD_parallel_for: + case OMPD_parallel_sections: + case OMPD_for_simd: + case OMPD_parallel_for_simd: + case OMPD_cancel: + case OMPD_cancellation_point: + case OMPD_ordered: + case OMPD_threadprivate: + case OMPD_task: + case OMPD_simd: + case OMPD_sections: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_taskgroup: + case OMPD_atomic: + case OMPD_flush: + case OMPD_teams: + case OMPD_target_data: + case OMPD_distribute: + case OMPD_distribute_simd: + case OMPD_distribute_parallel_for: + case OMPD_distribute_parallel_for_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_declare_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_declare_reduction: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_target: + case OMPD_target_simd: + case OMPD_target_teams_distribute: + case OMPD_target_teams_distribute_simd: + case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: + case OMPD_target_teams: + case OMPD_target_parallel: + case OMPD_target_parallel_for: + case OMPD_target_parallel_for_simd: + case OMPD_unknown: + llvm_unreachable("Unexpected standalone target data directive."); + break; } CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); }; @@ -7644,13 +8499,13 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( if (D.hasClausesOfKind<OMPDependClause>()) CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); else - emitInlinedDirective(CGF, OMPD_target_update, ThenGen); + emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); }; - if (IfCond) + if (IfCond) { emitOMPIfClause(CGF, IfCond, TargetThenGen, [](CodeGenFunction &CGF, PrePostActionTy &) {}); - else { + } else { RegionCodeGenTy ThenRCG(TargetThenGen); ThenRCG(CGF); } @@ -7693,11 +8548,11 @@ static unsigned evaluateCDTSize(const FunctionDecl *FD, return 0; ASTContext &C = FD->getASTContext(); QualType CDT; - if (!RetType.isNull() && !RetType->isVoidType()) + if (!RetType.isNull() && !RetType->isVoidType()) { CDT = RetType; - else { + } else { unsigned Offset = 0; - if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) { + if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { if (ParamAttrs[Offset].Kind == Vector) CDT = C.getPointerType(C.getRecordType(MD->getParent())); ++Offset; @@ -7755,17 +8610,18 @@ emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, Masked.push_back('M'); break; } - for (auto Mask : Masked) { - for (auto &Data : ISAData) { + for (char Mask : Masked) { + for (const ISADataTy &Data : ISAData) { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); Out << "_ZGV" << Data.ISA << Mask; if (!VLENVal) { Out << llvm::APSInt::getUnsigned(Data.VecRegSize / evaluateCDTSize(FD, ParamAttrs)); - } else + } else { Out << VLENVal; - for (auto &ParamAttr : ParamAttrs) { + } + for (const ParamAttrTy &ParamAttr : ParamAttrs) { switch (ParamAttr.Kind){ case LinearWithVarStride: Out << 's' << ParamAttr.StrideOrArg; @@ -7794,90 +8650,95 @@ emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn) { ASTContext &C = CGM.getContext(); - FD = FD->getCanonicalDecl(); + FD = FD->getMostRecentDecl(); // Map params to their positions in function decl. llvm::DenseMap<const Decl *, unsigned> ParamPositions; if (isa<CXXMethodDecl>(FD)) - ParamPositions.insert({FD, 0}); + ParamPositions.try_emplace(FD, 0); unsigned ParamPos = ParamPositions.size(); - for (auto *P : FD->parameters()) { - ParamPositions.insert({P->getCanonicalDecl(), ParamPos}); + for (const ParmVarDecl *P : FD->parameters()) { + ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); ++ParamPos; } - for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { - llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); - // Mark uniform parameters. - for (auto *E : Attr->uniforms()) { - E = E->IgnoreParenImpCasts(); - unsigned Pos; - if (isa<CXXThisExpr>(E)) - Pos = ParamPositions[FD]; - else { - auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) - ->getCanonicalDecl(); - Pos = ParamPositions[PVD]; - } - ParamAttrs[Pos].Kind = Uniform; - } - // Get alignment info. - auto NI = Attr->alignments_begin(); - for (auto *E : Attr->aligneds()) { - E = E->IgnoreParenImpCasts(); - unsigned Pos; - QualType ParmTy; - if (isa<CXXThisExpr>(E)) { - Pos = ParamPositions[FD]; - ParmTy = E->getType(); - } else { - auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) - ->getCanonicalDecl(); - Pos = ParamPositions[PVD]; - ParmTy = PVD->getType(); + while (FD) { + for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { + llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); + // Mark uniform parameters. + for (const Expr *E : Attr->uniforms()) { + E = E->IgnoreParenImpCasts(); + unsigned Pos; + if (isa<CXXThisExpr>(E)) { + Pos = ParamPositions[FD]; + } else { + const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) + ->getCanonicalDecl(); + Pos = ParamPositions[PVD]; + } + ParamAttrs[Pos].Kind = Uniform; } - ParamAttrs[Pos].Alignment = - (*NI) ? (*NI)->EvaluateKnownConstInt(C) + // Get alignment info. + auto NI = Attr->alignments_begin(); + for (const Expr *E : Attr->aligneds()) { + E = E->IgnoreParenImpCasts(); + unsigned Pos; + QualType ParmTy; + if (isa<CXXThisExpr>(E)) { + Pos = ParamPositions[FD]; + ParmTy = E->getType(); + } else { + const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) + ->getCanonicalDecl(); + Pos = ParamPositions[PVD]; + ParmTy = PVD->getType(); + } + ParamAttrs[Pos].Alignment = + (*NI) + ? (*NI)->EvaluateKnownConstInt(C) : llvm::APSInt::getUnsigned( C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) .getQuantity()); - ++NI; - } - // Mark linear parameters. - auto SI = Attr->steps_begin(); - auto MI = Attr->modifiers_begin(); - for (auto *E : Attr->linears()) { - E = E->IgnoreParenImpCasts(); - unsigned Pos; - if (isa<CXXThisExpr>(E)) - Pos = ParamPositions[FD]; - else { - auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) - ->getCanonicalDecl(); - Pos = ParamPositions[PVD]; + ++NI; } - auto &ParamAttr = ParamAttrs[Pos]; - ParamAttr.Kind = Linear; - if (*SI) { - if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, - Expr::SE_AllowSideEffects)) { - if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { - if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { - ParamAttr.Kind = LinearWithVarStride; - ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( - ParamPositions[StridePVD->getCanonicalDecl()]); + // Mark linear parameters. + auto SI = Attr->steps_begin(); + auto MI = Attr->modifiers_begin(); + for (const Expr *E : Attr->linears()) { + E = E->IgnoreParenImpCasts(); + unsigned Pos; + if (isa<CXXThisExpr>(E)) { + Pos = ParamPositions[FD]; + } else { + const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) + ->getCanonicalDecl(); + Pos = ParamPositions[PVD]; + } + ParamAttrTy &ParamAttr = ParamAttrs[Pos]; + ParamAttr.Kind = Linear; + if (*SI) { + if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, + Expr::SE_AllowSideEffects)) { + if (const auto *DRE = + cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { + if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { + ParamAttr.Kind = LinearWithVarStride; + ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( + ParamPositions[StridePVD->getCanonicalDecl()]); + } } } } + ++SI; + ++MI; } - ++SI; - ++MI; + llvm::APSInt VLENVal; + if (const Expr *VLEN = Attr->getSimdlen()) + VLENVal = VLEN->EvaluateKnownConstInt(C); + OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); + if (CGM.getTriple().getArch() == llvm::Triple::x86 || + CGM.getTriple().getArch() == llvm::Triple::x86_64) + emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); } - llvm::APSInt VLENVal; - if (const Expr *VLEN = Attr->getSimdlen()) - VLENVal = VLEN->EvaluateKnownConstInt(C); - OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); - if (CGM.getTriple().getArch() == llvm::Triple::x86 || - CGM.getTriple().getArch() == llvm::Triple::x86_64) - emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); + FD = FD->getPreviousDecl(); } } @@ -7926,8 +8787,9 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, addFieldToRecordDecl(C, RD, Int64Ty); RD->completeDefinition(); KmpDimTy = C.getRecordType(RD); - } else + } else { RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); + } Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims"); CGF.EmitNullInitialization(DimsAddr, KmpDimTy); @@ -7979,18 +8841,19 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, getThreadID(CGF, C->getLocStart()), CntAddr.getPointer()}; llvm::Value *RTLFn; - if (C->getDependencyKind() == OMPC_DEPEND_source) + if (C->getDependencyKind() == OMPC_DEPEND_source) { RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); - else { + } else { assert(C->getDependencyKind() == OMPC_DEPEND_sink); RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); } CGF.EmitRuntimeCall(RTLFn, Args); } -void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, llvm::Value *Callee, - ArrayRef<llvm::Value *> Args, - SourceLocation Loc) const { +void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, + llvm::Value *Callee, + ArrayRef<llvm::Value *> Args) const { + assert(Loc.isValid() && "Outlined function call location must be valid."); auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); if (auto *Fn = dyn_cast<llvm::Function>(Callee)) { @@ -8005,8 +8868,7 @@ void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, llvm::Value *Callee, void CGOpenMPRuntime::emitOutlinedFunctionCall( CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef<llvm::Value *> Args) const { - assert(Loc.isValid() && "Outlined function call location must be valid."); - emitCall(CGF, OutlinedFn, Args, Loc); + emitCall(CGF, Loc, OutlinedFn, Args); } Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, @@ -8014,3 +8876,303 @@ Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, const VarDecl *TargetParam) const { return CGF.GetAddrOfLocalVar(NativeParam); } + +Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, + const VarDecl *VD) { + return Address::invalid(); +} + +llvm::Value *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +llvm::Value *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +llvm::Value *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + const VarDecl *PartIDVar, const VarDecl *TaskTVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + bool Tied, unsigned &NumberOfParts) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, + SourceLocation Loc, + llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> CapturedVars, + const Expr *IfCond) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitCriticalRegion( + CodeGenFunction &CGF, StringRef CriticalName, + const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, + const Expr *Hint) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, + const RegionCodeGenTy &MasterOpGen, + SourceLocation Loc) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, + SourceLocation Loc) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitTaskgroupRegion( + CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, + SourceLocation Loc) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitSingleRegion( + CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, + SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, + ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, + ArrayRef<const Expr *> AssignmentOps) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, + const RegionCodeGenTy &OrderedOpGen, + SourceLocation Loc, + bool IsThreads) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, + SourceLocation Loc, + OpenMPDirectiveKind Kind, + bool EmitChecks, + bool ForceSimpleCall) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitForDispatchInit( + CodeGenFunction &CGF, SourceLocation Loc, + const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, + bool Ordered, const DispatchRTInput &DispatchValues) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitForStaticInit( + CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, + const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitDistributeStaticInit( + CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, + SourceLocation Loc, + unsigned IVSize, + bool IVSigned) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, + SourceLocation Loc, + OpenMPDirectiveKind DKind) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, + SourceLocation Loc, + unsigned IVSize, bool IVSigned, + Address IL, Address LB, + Address UB, Address ST) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, + llvm::Value *NumThreads, + SourceLocation Loc) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, + OpenMPProcBindClauseKind ProcBind, + SourceLocation Loc) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, + const VarDecl *VD, + Address VDAddr, + SourceLocation Loc) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( + const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, + CodeGenFunction *CGF) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( + CodeGenFunction &CGF, QualType VarType, StringRef Name) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, + ArrayRef<const Expr *> Vars, + SourceLocation Loc) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, + const OMPExecutableDirective &D, + llvm::Value *TaskFunction, + QualType SharedsTy, Address Shareds, + const Expr *IfCond, + const OMPTaskDataTy &Data) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitTaskLoopCall( + CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, + llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, + const Expr *IfCond, const OMPTaskDataTy &Data) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitReduction( + CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, + ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, + ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { + assert(Options.SimpleReduction && "Only simple reduction is expected."); + CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, + ReductionOps, Options); +} + +llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( + CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, + SourceLocation Loc, + ReductionCodeGen &RCG, + unsigned N) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, + SourceLocation Loc, + llvm::Value *ReductionsPtr, + LValue SharedLVal) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, + SourceLocation Loc) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitCancellationPointCall( + CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind CancelRegion) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, + SourceLocation Loc, const Expr *IfCond, + OpenMPDirectiveKind CancelRegion) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( + const OMPExecutableDirective &D, StringRef ParentName, + llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, + bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + llvm::Value *OutlinedFn, + llvm::Value *OutlinedFnID, + const Expr *IfCond, const Expr *Device) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { + return false; +} + +llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { + return nullptr; +} + +void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + SourceLocation Loc, + llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> CapturedVars) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, + const Expr *NumTeams, + const Expr *ThreadLimit, + SourceLocation Loc) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitTargetDataCalls( + CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, + const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, + const Expr *Device) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, + const OMPLoopDirective &D) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, + const OMPDependClause *C) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +const VarDecl * +CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, + const VarDecl *NativeParam) const { + llvm_unreachable("Not supported in SIMD-only mode"); +} + +Address +CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, + const VarDecl *NativeParam, + const VarDecl *TargetParam) const { + llvm_unreachable("Not supported in SIMD-only mode"); +} + |
