diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:11:37 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:11:37 +0000 | 
| commit | 461a67fa15370a9ec88f8f8a240bf7c123bb2029 (patch) | |
| tree | 6942083d7d56bba40ec790a453ca58ad3baf6832 /lib/CodeGen/CGOpenMPRuntime.cpp | |
| parent | 75c3240472ba6ac2669ee72ca67eb72d4e2851fc (diff) | |
Notes
Diffstat (limited to 'lib/CodeGen/CGOpenMPRuntime.cpp')
| -rw-r--r-- | lib/CodeGen/CGOpenMPRuntime.cpp | 860 | 
1 files changed, 528 insertions, 332 deletions
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 9f8aa6c8d964c..5db29eb6004d7 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -19,6 +19,7 @@  #include "clang/AST/Decl.h"  #include "clang/AST/StmtOpenMP.h"  #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitmaskEnum.h"  #include "llvm/Bitcode/BitcodeReader.h"  #include "llvm/IR/CallSite.h"  #include "llvm/IR/DerivedTypes.h" @@ -427,7 +428,7 @@ public:  /// \brief Values for bit flags used in the ident_t to describe the fields.  /// All enumeric elements are named and described in accordance with the code  /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h -enum OpenMPLocationFlags { +enum OpenMPLocationFlags : unsigned {    /// \brief Use trampoline for internal microtask.    OMP_IDENT_IMD = 0x01,    /// \brief Use c-style ident structure. @@ -443,7 +444,14 @@ enum OpenMPLocationFlags {    /// \brief Implicit barrier in 'sections' directive.    OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,    /// \brief Implicit barrier in 'single' directive. -  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140 +  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, +  /// Call of __kmp_for_static_init for static loop. +  OMP_IDENT_WORK_LOOP = 0x200, +  /// Call of __kmp_for_static_init for sections. +  OMP_IDENT_WORK_SECTIONS = 0x400, +  /// Call of __kmp_for_static_init for distribute. +  OMP_IDENT_WORK_DISTRIBUTE = 0x800, +  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)  };  /// \brief Describes ident structure that describes a source location. @@ -660,27 +668,47 @@ enum OpenMPRTLFunction {    //    // Offloading related calls    // -  // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t -  // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t +  // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t +  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t    // *arg_types);    OMPRTL__tgt_target, -  // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, -  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, -  // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); +  // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, +  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t +  // *arg_types); +  OMPRTL__tgt_target_nowait, +  // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, +  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t +  // *arg_types, int32_t num_teams, int32_t thread_limit);    OMPRTL__tgt_target_teams, +  // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void +  // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t +  // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); +  OMPRTL__tgt_target_teams_nowait,    // Call to void __tgt_register_lib(__tgt_bin_desc *desc);    OMPRTL__tgt_register_lib,    // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);    OMPRTL__tgt_unregister_lib, -  // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, -  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); +  // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, +  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);    OMPRTL__tgt_target_data_begin, -  // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num, -  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); +  // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t +  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t +  // *arg_types); +  OMPRTL__tgt_target_data_begin_nowait, +  // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, +  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);    OMPRTL__tgt_target_data_end, -  // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num, -  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); +  // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t +  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t +  // *arg_types); +  OMPRTL__tgt_target_data_end_nowait, +  // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, +  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);    OMPRTL__tgt_target_data_update, +  // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t +  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t +  // *arg_types); +  OMPRTL__tgt_target_data_update_nowait,  };  /// A basic class for pre|post-action for advanced codegen sequence for OpenMP @@ -862,18 +890,7 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,  }  LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { -  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) -    return CGF.EmitOMPArraySectionExpr(OASE); -  if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E)) -    return CGF.EmitLValue(ASE); -  auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); -  DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), -                  CGF.CapturedStmtInfo && -                      CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, -                  E->getType(), VK_LValue, E->getExprLoc()); -  // Store the address of the original variable associated with the LHS -  // implicit variable. -  return CGF.EmitLValue(&DRE); +  return CGF.EmitOMPSharedLValue(E);  }  LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, @@ -919,8 +936,9 @@ ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,  void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {    assert(SharedAddresses.size() == N &&           "Number of generated lvalues must be exactly N."); -  SharedAddresses.emplace_back(emitSharedLValue(CGF, ClausesData[N].Ref), -                               emitSharedLValueUB(CGF, ClausesData[N].Ref)); +  LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); +  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); +  SharedAddresses.emplace_back(First, Second);  }  void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { @@ -928,7 +946,7 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {        cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());    QualType PrivateType = PrivateVD->getType();    bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); -  if (!AsArraySection && !PrivateType->isVariablyModifiedType()) { +  if (!PrivateType->isVariablyModifiedType()) {      Sizes.emplace_back(          CGF.getTypeSize(              SharedAddresses[N].first.getType().getNonReferenceType()), @@ -966,10 +984,9 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,    auto *PrivateVD =        cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());    QualType PrivateType = PrivateVD->getType(); -  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); -  if (!AsArraySection && !PrivateType->isVariablyModifiedType()) { +  if (!PrivateType->isVariablyModifiedType()) {      assert(!Size && !Sizes[N].second && -           "Size should be nullptr for non-variably modified redution " +           "Size should be nullptr for non-variably modified reduction "             "items.");      return;    } @@ -995,9 +1012,9 @@ void ReductionCodeGen::emitInitialization(    SharedLVal = CGF.MakeAddrLValue(        CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),                                         CGF.ConvertTypeForMem(SharedType)), -      SharedType, SharedAddresses[N].first.getBaseInfo()); -  if (isa<OMPArraySectionExpr>(ClausesData[N].Ref) || -      CGF.getContext().getAsArrayType(PrivateVD->getType())) { +      SharedType, SharedAddresses[N].first.getBaseInfo(), +      CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); +  if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {      emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);    } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {      emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, @@ -1040,15 +1057,16 @@ static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,      if (auto *PtrTy = BaseTy->getAs<PointerType>())        BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);      else { -      BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), -                                             BaseTy->castAs<ReferenceType>()); +      LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); +      BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);      }      BaseTy = BaseTy->getPointeeType();    }    return CGF.MakeAddrLValue(        CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),                                         CGF.ConvertTypeForMem(ElTy)), -      BaseLV.getType(), BaseLV.getBaseInfo()); +      BaseLV.getType(), BaseLV.getBaseInfo(), +      CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));  }  static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, @@ -1106,11 +1124,14 @@ Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,                      OriginalBaseLValue);      llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(          BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); -    llvm::Value *Ptr = -        CGF.Builder.CreateGEP(PrivateAddr.getPointer(), Adjustment); +    llvm::Value *PrivatePointer = +        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( +            PrivateAddr.getPointer(), +            SharedAddresses[N].first.getAddress().getType()); +    llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);      return castToBase(CGF, OrigVD->getType(),                        SharedAddresses[N].first.getType(), -                      OriginalBaseLValue.getPointer()->getType(), +                      OriginalBaseLValue.getAddress().getType(),                        OriginalBaseLValue.getAlignment(), Ptr);    }    BaseDecls.emplace_back( @@ -1146,7 +1167,7 @@ LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(      CodeGenFunction &CGF) {    return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),                              getThreadIDVariable()->getType(), -                            LValueBaseInfo(AlignmentSource::Decl, false)); +                            AlignmentSource::Decl);  }  CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) @@ -1204,7 +1225,14 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,          .getAddress();    });    (void)Scope.Privatize(); -  CGF.EmitIgnoredExpr(CombinerInitializer); +  if (!IsCombiner && Out->hasInit() && +      !CGF.isTrivialInitializer(Out->getInit())) { +    CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), +                         Out->getType().getQualifiers(), +                         /*IsInitializer=*/true); +  } +  if (CombinerInitializer) +    CGF.EmitIgnoredExpr(CombinerInitializer);    Scope.ForceCleanup();    CGF.FinishFunction();    return Fn; @@ -1230,7 +1258,10 @@ void CGOpenMPRuntime::emitUserDefinedReduction(        Orig = &C.Idents.get("omp_orig");      }      Initializer = emitCombinerOrInitializer( -        CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()), +        CGM, D->getType(), +        D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init +                                                                     : nullptr, +        cast<VarDecl>(D->lookup(Orig).front()),          cast<VarDecl>(D->lookup(Priv).front()),          /*IsCombiner=*/false);    } @@ -1283,6 +1314,15 @@ static llvm::Value *emitParallelOrTeamsOutlinedFunction(      HasCancel = OPSD->hasCancel();    else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))      HasCancel = OPFD->hasCancel(); +  else if (auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) +    HasCancel = OPFD->hasCancel(); +  else if (auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) +    HasCancel = OPFD->hasCancel(); +  else if (auto *OPFD = dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) +    HasCancel = OPFD->hasCancel(); +  else if (auto *OPFD = +               dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) +    HasCancel = OPFD->hasCancel();    CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,                                      HasCancel, OutlinedHelperName);    CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); @@ -1442,19 +1482,24 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,      if (ThreadID != nullptr)        return ThreadID;    } -  if (auto *OMPRegionInfo = -          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { -    if (OMPRegionInfo->getThreadIDVariable()) { -      // Check if this an outlined function with thread id passed as argument. -      auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); -      ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); -      // If value loaded in entry block, cache it and use it everywhere in -      // function. -      if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { -        auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); -        Elem.second.ThreadID = ThreadID; +  // If exceptions are enabled, do not use parameter to avoid possible crash. +  if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || +      !CGF.getLangOpts().CXXExceptions || +      CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { +    if (auto *OMPRegionInfo = +            dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { +      if (OMPRegionInfo->getThreadIDVariable()) { +        // Check if this an outlined function with thread id passed as argument. +        auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); +        ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); +        // If value loaded in entry block, cache it and use it everywhere in +        // function. +        if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { +          auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); +          Elem.second.ThreadID = ThreadID; +        } +        return ThreadID;        } -      return ThreadID;      }    } @@ -1464,12 +1509,13 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,    // function.    CGBuilderTy::InsertPointGuard IPG(CGF.Builder);    CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); -  ThreadID = -      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), -                          emitUpdateLocation(CGF, Loc)); +  auto *Call = CGF.Builder.CreateCall( +      createRuntimeFunction(OMPRTL__kmpc_global_thread_num), +      emitUpdateLocation(CGF, Loc)); +  Call->setCallingConv(CGF.getRuntimeCC());    auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); -  Elem.second.ThreadID = ThreadID; -  return ThreadID; +  Elem.second.ThreadID = Call; +  return Call;  }  void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { @@ -2001,32 +2047,48 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {      break;    }    case OMPRTL__tgt_target: { -    // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t -    // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t +    // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t +    // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t      // *arg_types); -    llvm::Type *TypeParams[] = {CGM.Int32Ty, +    llvm::Type *TypeParams[] = {CGM.Int64Ty,                                  CGM.VoidPtrTy,                                  CGM.Int32Ty,                                  CGM.VoidPtrPtrTy,                                  CGM.VoidPtrPtrTy,                                  CGM.SizeTy->getPointerTo(), -                                CGM.Int32Ty->getPointerTo()}; +                                CGM.Int64Ty->getPointerTo()};      llvm::FunctionType *FnTy =          llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);      RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");      break;    } +  case OMPRTL__tgt_target_nowait: { +    // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, +    // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, +    // int64_t *arg_types); +    llvm::Type *TypeParams[] = {CGM.Int64Ty, +                                CGM.VoidPtrTy, +                                CGM.Int32Ty, +                                CGM.VoidPtrPtrTy, +                                CGM.VoidPtrPtrTy, +                                CGM.SizeTy->getPointerTo(), +                                CGM.Int64Ty->getPointerTo()}; +    llvm::FunctionType *FnTy = +        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); +    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); +    break; +  }    case OMPRTL__tgt_target_teams: { -    // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, +    // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,      // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, -    // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); -    llvm::Type *TypeParams[] = {CGM.Int32Ty, +    // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); +    llvm::Type *TypeParams[] = {CGM.Int64Ty,                                  CGM.VoidPtrTy,                                  CGM.Int32Ty,                                  CGM.VoidPtrPtrTy,                                  CGM.VoidPtrPtrTy,                                  CGM.SizeTy->getPointerTo(), -                                CGM.Int32Ty->getPointerTo(), +                                CGM.Int64Ty->getPointerTo(),                                  CGM.Int32Ty,                                  CGM.Int32Ty};      llvm::FunctionType *FnTy = @@ -2034,6 +2096,24 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {      RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");      break;    } +  case OMPRTL__tgt_target_teams_nowait: { +    // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void +    // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t +    // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); +    llvm::Type *TypeParams[] = {CGM.Int64Ty, +                                CGM.VoidPtrTy, +                                CGM.Int32Ty, +                                CGM.VoidPtrPtrTy, +                                CGM.VoidPtrPtrTy, +                                CGM.SizeTy->getPointerTo(), +                                CGM.Int64Ty->getPointerTo(), +                                CGM.Int32Ty, +                                CGM.Int32Ty}; +    llvm::FunctionType *FnTy = +        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); +    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); +    break; +  }    case OMPRTL__tgt_register_lib: {      // Build void __tgt_register_lib(__tgt_bin_desc *desc);      QualType ParamTy = @@ -2055,47 +2135,92 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {      break;    }    case OMPRTL__tgt_target_data_begin: { -    // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, -    // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); -    llvm::Type *TypeParams[] = {CGM.Int32Ty, +    // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, +    // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); +    llvm::Type *TypeParams[] = {CGM.Int64Ty,                                  CGM.Int32Ty,                                  CGM.VoidPtrPtrTy,                                  CGM.VoidPtrPtrTy,                                  CGM.SizeTy->getPointerTo(), -                                CGM.Int32Ty->getPointerTo()}; +                                CGM.Int64Ty->getPointerTo()};      llvm::FunctionType *FnTy =          llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);      RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");      break;    } +  case OMPRTL__tgt_target_data_begin_nowait: { +    // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t +    // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t +    // *arg_types); +    llvm::Type *TypeParams[] = {CGM.Int64Ty, +                                CGM.Int32Ty, +                                CGM.VoidPtrPtrTy, +                                CGM.VoidPtrPtrTy, +                                CGM.SizeTy->getPointerTo(), +                                CGM.Int64Ty->getPointerTo()}; +    auto *FnTy = +        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); +    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); +    break; +  }    case OMPRTL__tgt_target_data_end: { -    // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num, -    // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); -    llvm::Type *TypeParams[] = {CGM.Int32Ty, +    // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, +    // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); +    llvm::Type *TypeParams[] = {CGM.Int64Ty,                                  CGM.Int32Ty,                                  CGM.VoidPtrPtrTy,                                  CGM.VoidPtrPtrTy,                                  CGM.SizeTy->getPointerTo(), -                                CGM.Int32Ty->getPointerTo()}; +                                CGM.Int64Ty->getPointerTo()};      llvm::FunctionType *FnTy =          llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);      RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");      break;    } +  case OMPRTL__tgt_target_data_end_nowait: { +    // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t +    // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t +    // *arg_types); +    llvm::Type *TypeParams[] = {CGM.Int64Ty, +                                CGM.Int32Ty, +                                CGM.VoidPtrPtrTy, +                                CGM.VoidPtrPtrTy, +                                CGM.SizeTy->getPointerTo(), +                                CGM.Int64Ty->getPointerTo()}; +    auto *FnTy = +        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); +    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); +    break; +  }    case OMPRTL__tgt_target_data_update: { -    // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num, -    // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); -    llvm::Type *TypeParams[] = {CGM.Int32Ty, +    // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, +    // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); +    llvm::Type *TypeParams[] = {CGM.Int64Ty,                                  CGM.Int32Ty,                                  CGM.VoidPtrPtrTy,                                  CGM.VoidPtrPtrTy,                                  CGM.SizeTy->getPointerTo(), -                                CGM.Int32Ty->getPointerTo()}; +                                CGM.Int64Ty->getPointerTo()};      llvm::FunctionType *FnTy =          llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);      RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");      break;    } +  case OMPRTL__tgt_target_data_update_nowait: { +    // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t +    // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t +    // *arg_types); +    llvm::Type *TypeParams[] = {CGM.Int64Ty, +                                CGM.Int32Ty, +                                CGM.VoidPtrPtrTy, +                                CGM.VoidPtrPtrTy, +                                CGM.SizeTy->getPointerTo(), +                                CGM.Int64Ty->getPointerTo()}; +    auto *FnTy = +        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); +    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); +    break; +  }    }    assert(RTLFn && "Unable to find OpenMP runtime function");    return RTLFn; @@ -2459,7 +2584,7 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,      OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());      OutlinedFnArgs.push_back(ZeroAddr.getPointer());      OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); -    CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); +    RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);      // __kmpc_end_serialized_parallel(&Loc, GTid);      llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; @@ -2968,87 +3093,101 @@ static void emitForStaticInitCall(      CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,      llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,      OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, -    unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB, -    Address ST, llvm::Value *Chunk) { +    const CGOpenMPRuntime::StaticRTInput &Values) {    if (!CGF.HaveInsertPoint()) -     return; - -   assert(!Ordered); -   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || -          Schedule == OMP_sch_static_balanced_chunked || -          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || -          Schedule == OMP_dist_sch_static || -          Schedule == OMP_dist_sch_static_chunked); - -   // Call __kmpc_for_static_init( -   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, -   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, -   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, -   //          kmp_int[32|64] incr, kmp_int[32|64] chunk); -   if (Chunk == nullptr) { -     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || -             Schedule == OMP_dist_sch_static) && -            "expected static non-chunked schedule"); -     // If the Chunk was not specified in the clause - use default value 1. -       Chunk = CGF.Builder.getIntN(IVSize, 1); -   } else { -     assert((Schedule == OMP_sch_static_chunked || -             Schedule == OMP_sch_static_balanced_chunked || -             Schedule == OMP_ord_static_chunked || -             Schedule == OMP_dist_sch_static_chunked) && -            "expected static chunked schedule"); -   } -   llvm::Value *Args[] = { -       UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier( -                                     Schedule, M1, M2)), // Schedule type -       IL.getPointer(),                                  // &isLastIter -       LB.getPointer(),                                  // &LB -       UB.getPointer(),                                  // &UB -       ST.getPointer(),                                  // &Stride -       CGF.Builder.getIntN(IVSize, 1),                   // Incr -       Chunk                                             // Chunk -   }; -   CGF.EmitRuntimeCall(ForStaticInitFunction, Args); +    return; + +  assert(!Values.Ordered); +  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || +         Schedule == OMP_sch_static_balanced_chunked || +         Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || +         Schedule == OMP_dist_sch_static || +         Schedule == OMP_dist_sch_static_chunked); + +  // Call __kmpc_for_static_init( +  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, +  //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, +  //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, +  //          kmp_int[32|64] incr, kmp_int[32|64] chunk); +  llvm::Value *Chunk = Values.Chunk; +  if (Chunk == nullptr) { +    assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || +            Schedule == OMP_dist_sch_static) && +           "expected static non-chunked schedule"); +    // If the Chunk was not specified in the clause - use default value 1. +    Chunk = CGF.Builder.getIntN(Values.IVSize, 1); +  } else { +    assert((Schedule == OMP_sch_static_chunked || +            Schedule == OMP_sch_static_balanced_chunked || +            Schedule == OMP_ord_static_chunked || +            Schedule == OMP_dist_sch_static_chunked) && +           "expected static chunked schedule"); +  } +  llvm::Value *Args[] = { +      UpdateLocation, +      ThreadId, +      CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, +                                                  M2)), // Schedule type +      Values.IL.getPointer(),                           // &isLastIter +      Values.LB.getPointer(),                           // &LB +      Values.UB.getPointer(),                           // &UB +      Values.ST.getPointer(),                           // &Stride +      CGF.Builder.getIntN(Values.IVSize, 1),            // Incr +      Chunk                                             // Chunk +  }; +  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);  }  void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,                                          SourceLocation Loc, +                                        OpenMPDirectiveKind DKind,                                          const OpenMPScheduleTy &ScheduleKind, -                                        unsigned IVSize, bool IVSigned, -                                        bool Ordered, Address IL, Address LB, -                                        Address UB, Address ST, -                                        llvm::Value *Chunk) { -  OpenMPSchedType ScheduleNum = -      getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); -  auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); +                                        const StaticRTInput &Values) { +  OpenMPSchedType ScheduleNum = getRuntimeSchedule( +      ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); +  assert(isOpenMPWorksharingDirective(DKind) && +         "Expected loop-based or sections-based directive."); +  auto *UpdatedLocation = emitUpdateLocation(CGF, Loc, +                                             isOpenMPLoopDirective(DKind) +                                                 ? OMP_IDENT_WORK_LOOP +                                                 : OMP_IDENT_WORK_SECTIONS);    auto *ThreadId = getThreadID(CGF, Loc); -  auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); +  auto *StaticInitFunction = +      createForStaticInitFunction(Values.IVSize, Values.IVSigned);    emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, -                        ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize, -                        Ordered, IL, LB, UB, ST, Chunk); +                        ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);  }  void CGOpenMPRuntime::emitDistributeStaticInit(      CodeGenFunction &CGF, SourceLocation Loc, -    OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned, -    bool Ordered, Address IL, Address LB, Address UB, Address ST, -    llvm::Value *Chunk) { -  OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr); -  auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); +    OpenMPDistScheduleClauseKind SchedKind, +    const CGOpenMPRuntime::StaticRTInput &Values) { +  OpenMPSchedType ScheduleNum = +      getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); +  auto *UpdatedLocation = +      emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);    auto *ThreadId = getThreadID(CGF, Loc); -  auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); +  auto *StaticInitFunction = +      createForStaticInitFunction(Values.IVSize, Values.IVSigned);    emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,                          ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, -                        OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB, -                        UB, ST, Chunk); +                        OMPC_SCHEDULE_MODIFIER_unknown, Values);  }  void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, -                                          SourceLocation Loc) { +                                          SourceLocation Loc, +                                          OpenMPDirectiveKind DKind) {    if (!CGF.HaveInsertPoint())      return;    // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); -  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; +  llvm::Value *Args[] = { +      emitUpdateLocation(CGF, Loc, +                         isOpenMPDistributeDirective(DKind) +                             ? OMP_IDENT_WORK_DISTRIBUTE +                             : isOpenMPLoopDirective(DKind) +                                   ? OMP_IDENT_WORK_LOOP +                                   : OMP_IDENT_WORK_SECTIONS), +      getThreadID(CGF, Loc)};    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),                        Args);  } @@ -3360,14 +3499,14 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {    auto *UnRegFn = createOffloadingBinaryDescriptorFunction(        CGM, ".omp_offloading.descriptor_unreg",        [&](CodeGenFunction &CGF, PrePostActionTy &) { -        CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), -                             Desc); +        CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), +                            Desc);        });    auto *RegFn = createOffloadingBinaryDescriptorFunction(        CGM, ".omp_offloading.descriptor_reg",        [&](CodeGenFunction &CGF, PrePostActionTy &) { -        CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), -                             Desc); +        CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), +                            Desc);          CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);        });    if (CGM.supportsCOMDAT()) { @@ -3802,7 +3941,6 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,                               ".omp_task_entry.", &CGM.getModule());    CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);    CodeGenFunction CGF(CGM); -  CGF.disableDebugInfo();    CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);    // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, @@ -3871,7 +4009,8 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,    }    CallArgs.push_back(SharedsParam); -  CGF.EmitCallOrInvoke(TaskFunction, CallArgs); +  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, +                                                  CallArgs);    CGF.EmitStoreThroughLValue(        RValue::get(CGF.Builder.getInt32(/*C=*/0)),        CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); @@ -3893,7 +4032,6 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,                                  ImplicitParamDecl::Other);    Args.push_back(&GtidArg);    Args.push_back(&TaskTypeArg); -  FunctionType::ExtInfo Info;    auto &DestructorFnInfo =        CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);    auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); @@ -4020,9 +4158,9 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,    return TaskPrivatesMap;  } -static int array_pod_sort_comparator(const PrivateDataTy *P1, -                                     const PrivateDataTy *P2) { -  return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); +static bool stable_sort_comparator(const PrivateDataTy P1, +                                   const PrivateDataTy P2) { +  return P1.first > P2.first;  }  /// Emit initialization for private variables in task-based directives. @@ -4059,8 +4197,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF,          SharedRefLValue = CGF.MakeAddrLValue(              Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),              SharedRefLValue.getType(), -            LValueBaseInfo(AlignmentSource::Decl, -                           SharedRefLValue.getBaseInfo().getMayAlias())); +            LValueBaseInfo(AlignmentSource::Decl), +            SharedRefLValue.getTBAAInfo());          QualType Type = OriginalVD->getType();          if (Type->isArrayType()) {            // Initialize firstprivate array. @@ -4250,8 +4388,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,                           /*PrivateElemInit=*/nullptr)));      ++I;    } -  llvm::array_pod_sort(Privates.begin(), Privates.end(), -                       array_pod_sort_comparator); +  std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator);    auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);    // Build type kmp_routine_entry_t (if not built yet).    emitKmpRoutineEntryT(KmpInt32Ty); @@ -4262,7 +4399,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,            CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));      }      KmpTaskTQTy = SavedKmpTaskloopTQTy; -  } else if (D.getDirectiveKind() == OMPD_task) { +  } else {      assert(D.getDirectiveKind() == OMPD_task &&             "Expected taskloop or task directive");      if (SavedKmpTaskTQTy.isNull()) { @@ -4557,8 +4694,8 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,      DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);    }    auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, -                        NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF, -                                                           PrePostActionTy &) { +                        NumDependencies, &DepWaitTaskArgs, +                        Loc](CodeGenFunction &CGF, PrePostActionTy &) {      auto &RT = CGF.CGM.getOpenMPRuntime();      CodeGenFunction::RunCleanupsScope LocalScope(CGF);      // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, @@ -4569,11 +4706,12 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,        CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),                            DepWaitTaskArgs);      // Call proxy_task_entry(gtid, new_task); -    auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy]( -        CodeGenFunction &CGF, PrePostActionTy &Action) { +    auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, +                      Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {        Action.Enter(CGF);        llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; -      CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); +      CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, +                                                          OutlinedFnArgs);      };      // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, @@ -5805,21 +5943,21 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,    const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); -  // FIXME: Accommodate other combined directives with teams when they become -  // available. -  if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( +  if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(            ignoreCompoundStmts(CS.getCapturedStmt()))) { -    if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { -      CGOpenMPInnerExprInfo CGInfo(CGF, CS); -      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); -      llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); -      return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, -                               /*IsSigned=*/true); -    } +    if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { +      if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { +        CGOpenMPInnerExprInfo CGInfo(CGF, CS); +        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); +        llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); +        return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, +                                 /*IsSigned=*/true); +      } -    // If we have an enclosed teams directive but no num_teams clause we use -    // the default value 0. -    return Bld.getInt32(0); +      // If we have an enclosed teams directive but no num_teams clause we use +      // the default value 0. +      return Bld.getInt32(0); +    }    }    // No teams associated with the directive. @@ -5908,21 +6046,21 @@ emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,    const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); -  // FIXME: Accommodate other combined directives with teams when they become -  // available. -  if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( +  if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(            ignoreCompoundStmts(CS.getCapturedStmt()))) { -    if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { -      CGOpenMPInnerExprInfo CGInfo(CGF, CS); -      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); -      llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); -      return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, -                                       /*IsSigned=*/true); -    } +    if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { +      if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { +        CGOpenMPInnerExprInfo CGInfo(CGF, CS); +        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); +        llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); +        return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, +                                         /*IsSigned=*/true); +      } -    // If we have an enclosed teams directive but no thread_limit clause we use -    // the default value 0. -    return CGF.Builder.getInt32(0); +      // If we have an enclosed teams directive but no thread_limit clause we +      // use the default value 0. +      return CGF.Builder.getInt32(0); +    }    }    // No teams associated with the directive. @@ -5949,22 +6087,23 @@ public:      /// \brief Delete the element from the device environment, ignoring the      /// current reference count associated with the element.      OMP_MAP_DELETE = 0x08, -    /// \brief The element being mapped is a pointer, therefore the pointee -    /// should be mapped as well. -    OMP_MAP_IS_PTR = 0x10, -    /// \brief This flags signals that an argument is the first one relating to -    /// a map/private clause expression. For some cases a single -    /// map/privatization results in multiple arguments passed to the runtime -    /// library. -    OMP_MAP_FIRST_REF = 0x20, +    /// \brief The element being mapped is a pointer-pointee pair; both the +    /// pointer and the pointee should be mapped. +    OMP_MAP_PTR_AND_OBJ = 0x10, +    /// \brief This flags signals that the base address of an entry should be +    /// passed to the target kernel as an argument. +    OMP_MAP_TARGET_PARAM = 0x20,      /// \brief Signal that the runtime library has to return the device pointer -    /// in the current position for the data being mapped. -    OMP_MAP_RETURN_PTR = 0x40, +    /// in the current position for the data being mapped. Used when we have the +    /// use_device_ptr clause. +    OMP_MAP_RETURN_PARAM = 0x40,      /// \brief This flag signals that the reference being passed is a pointer to      /// private data. -    OMP_MAP_PRIVATE_PTR = 0x80, +    OMP_MAP_PRIVATE = 0x80,      /// \brief Pass the element to the device by value. -    OMP_MAP_PRIVATE_VAL = 0x100, +    OMP_MAP_LITERAL = 0x100, +    /// Implicit map +    OMP_MAP_IMPLICIT = 0x200,    };    /// Class that associates information with a base pointer to be passed to the @@ -5986,7 +6125,7 @@ public:    typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy;    typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy; -  typedef SmallVector<unsigned, 16> MapFlagsArrayTy; +  typedef SmallVector<uint64_t, 16> MapFlagsArrayTy;  private:    /// \brief Directive from where the map clauses were extracted. @@ -5997,6 +6136,8 @@ private:    /// \brief Set of all first private variables in the current directive.    llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; +  /// Set of all reduction variables in the current directive. +  llvm::SmallPtrSet<const VarDecl *, 8> ReductionDecls;    /// Map between device pointer declarations and their expression components.    /// The key value for declarations in 'this' is null. @@ -6051,10 +6192,10 @@ private:    /// a flag marking the map as a pointer if requested. Add a flag marking the    /// map as the first one of a series of maps that relate to the same map    /// expression. -  unsigned getMapTypeBits(OpenMPMapClauseKind MapType, +  uint64_t getMapTypeBits(OpenMPMapClauseKind MapType,                            OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag, -                          bool AddIsFirstFlag) const { -    unsigned Bits = 0u; +                          bool AddIsTargetParamFlag) const { +    uint64_t Bits = 0u;      switch (MapType) {      case OMPC_MAP_alloc:      case OMPC_MAP_release: @@ -6080,9 +6221,9 @@ private:        break;      }      if (AddPtrFlag) -      Bits |= OMP_MAP_IS_PTR; -    if (AddIsFirstFlag) -      Bits |= OMP_MAP_FIRST_REF; +      Bits |= OMP_MAP_PTR_AND_OBJ; +    if (AddIsTargetParamFlag) +      Bits |= OMP_MAP_TARGET_PARAM;      if (MapTypeModifier == OMPC_MAP_always)        Bits |= OMP_MAP_ALWAYS;      return Bits; @@ -6135,7 +6276,7 @@ private:        OMPClauseMappableExprCommon::MappableExprComponentListRef Components,        MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,        MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, -      bool IsFirstComponentList) const { +      bool IsFirstComponentList, bool IsImplicit) const {      // The following summarizes what has to be generated for each map and the      // types bellow. The generated information is expressed in this order: @@ -6189,28 +6330,28 @@ private:      //      // map(s.p[:22], s.a s.b)      // &s, &(s.p), sizeof(double*), noflags -    // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag +    // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag      //      // map(s.ps)      // &s, &(s.ps), sizeof(S2*), noflags      //      // map(s.ps->s.i)      // &s, &(s.ps), sizeof(S2*), noflags -    // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag +    // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag      //      // map(s.ps->ps)      // &s, &(s.ps), sizeof(S2*), noflags -    // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag +    // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag      //      // map(s.ps->ps->ps)      // &s, &(s.ps), sizeof(S2*), noflags -    // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag -    // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag +    // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag +    // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag      //      // map(s.ps->ps->s.f[:22])      // &s, &(s.ps), sizeof(S2*), noflags -    // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag -    // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag +    // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag +    // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag      //      // map(ps)      // &ps, &ps, sizeof(S2*), noflags @@ -6226,29 +6367,28 @@ private:      //      // map(ps->p[:22])      // ps, &(ps->p), sizeof(double*), noflags -    // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag +    // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag      //      // map(ps->ps)      // ps, &(ps->ps), sizeof(S2*), noflags      //      // map(ps->ps->s.i)      // ps, &(ps->ps), sizeof(S2*), noflags -    // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag +    // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag      //      // map(ps->ps->ps)      // ps, &(ps->ps), sizeof(S2*), noflags -    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag +    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag      //      // map(ps->ps->ps->ps)      // ps, &(ps->ps), sizeof(S2*), noflags -    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag -    // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag +    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag +    // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag      //      // map(ps->ps->ps->s.f[:22])      // ps, &(ps->ps), sizeof(S2*), noflags -    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag -    // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + -    // extra_flag +    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag +    // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag      // Track if the map information being generated is the first for a capture.      bool IsCaptureFirstInfo = IsFirstComponentList; @@ -6270,8 +6410,7 @@ private:      } else {        // The base is the reference to the variable.        // BP = &Var. -      BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression())) -               .getPointer(); +      BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer();        // If the variable is a pointer and is being dereferenced (i.e. is not        // the last component), the base has to be the pointer itself, not its @@ -6290,6 +6429,7 @@ private:        }      } +    uint64_t DefaultFlags = IsImplicit ? OMP_MAP_IMPLICIT : 0;      for (; I != CE; ++I) {        auto Next = std::next(I); @@ -6324,7 +6464,8 @@ private:                  isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&                 "Unexpected expression"); -        auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer(); +        llvm::Value *LB = +            CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer();          auto *Size = getExprTypeSize(I->getAssociatedExpression());          // If we have a member expression and the current component is a @@ -6339,9 +6480,11 @@ private:            BasePointers.push_back(BP);            Pointers.push_back(RefAddr);            Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); -          Types.push_back(getMapTypeBits( -              /*MapType*/ OMPC_MAP_alloc, /*MapTypeModifier=*/OMPC_MAP_unknown, -              !IsExpressionFirstInfo, IsCaptureFirstInfo)); +          Types.push_back(DefaultFlags | +                          getMapTypeBits( +                              /*MapType*/ OMPC_MAP_alloc, +                              /*MapTypeModifier=*/OMPC_MAP_unknown, +                              !IsExpressionFirstInfo, IsCaptureFirstInfo));            IsExpressionFirstInfo = false;            IsCaptureFirstInfo = false;            // The reference will be the next base address. @@ -6356,9 +6499,9 @@ private:          // same expression except for the first one. We also need to signal          // this map is the first one that relates with the current capture          // (there is a set of entries for each capture). -        Types.push_back(getMapTypeBits(MapType, MapTypeModifier, -                                       !IsExpressionFirstInfo, -                                       IsCaptureFirstInfo)); +        Types.push_back(DefaultFlags | getMapTypeBits(MapType, MapTypeModifier, +                                                      !IsExpressionFirstInfo, +                                                      IsCaptureFirstInfo));          // If we have a final array section, we are done with this expression.          if (IsFinalArraySection) @@ -6370,7 +6513,6 @@ private:          IsExpressionFirstInfo = false;          IsCaptureFirstInfo = false; -        continue;        }      }    } @@ -6386,8 +6528,14 @@ private:      // 'private ptr' and 'map to' flag. Return the right flags if the captured      // declaration is known as first-private in this handler.      if (FirstPrivateDecls.count(Cap.getCapturedVar())) -      return MappableExprsHandler::OMP_MAP_PRIVATE_PTR | +      return MappableExprsHandler::OMP_MAP_PRIVATE |               MappableExprsHandler::OMP_MAP_TO; +    // Reduction variable  will use only the 'private ptr' and 'map to_from' +    // flag. +    if (ReductionDecls.count(Cap.getCapturedVar())) { +      return MappableExprsHandler::OMP_MAP_TO | +             MappableExprsHandler::OMP_MAP_FROM; +    }      // We didn't modify anything.      return CurrentModifiers; @@ -6401,6 +6549,12 @@ public:        for (const auto *D : C->varlists())          FirstPrivateDecls.insert(              cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); +    for (const auto *C : Dir.getClausesOfKind<OMPReductionClause>()) { +      for (const auto *D : C->varlists()) { +        ReductionDecls.insert( +            cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); +      } +    }      // Extract device pointer clause information.      for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())        for (auto L : C->component_lists()) @@ -6432,20 +6586,19 @@ public:          RPK_MemberReference,        };        OMPClauseMappableExprCommon::MappableExprComponentListRef Components; -      OpenMPMapClauseKind MapType; -      OpenMPMapClauseKind MapTypeModifier; -      ReturnPointerKind ReturnDevicePointer; +      OpenMPMapClauseKind MapType = OMPC_MAP_unknown; +      OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown; +      ReturnPointerKind ReturnDevicePointer = RPK_None; +      bool IsImplicit = false; -      MapInfo() -          : MapType(OMPC_MAP_unknown), MapTypeModifier(OMPC_MAP_unknown), -            ReturnDevicePointer(RPK_None) {} +      MapInfo() = default;        MapInfo(            OMPClauseMappableExprCommon::MappableExprComponentListRef Components,            OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, -          ReturnPointerKind ReturnDevicePointer) +          ReturnPointerKind ReturnDevicePointer, bool IsImplicit)            : Components(Components), MapType(MapType),              MapTypeModifier(MapTypeModifier), -            ReturnDevicePointer(ReturnDevicePointer) {} +            ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}      };      // We have to process the component lists that relate with the same @@ -6459,25 +6612,29 @@ public:          const ValueDecl *D,          OMPClauseMappableExprCommon::MappableExprComponentListRef L,          OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, -        MapInfo::ReturnPointerKind ReturnDevicePointer) { +        MapInfo::ReturnPointerKind ReturnDevicePointer, bool IsImplicit) {        const ValueDecl *VD =            D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; -      Info[VD].push_back({L, MapType, MapModifier, ReturnDevicePointer}); +      Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer, +                            IsImplicit);      };      // FIXME: MSVC 2013 seems to require this-> to find member CurDir.      for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) -      for (auto L : C->component_lists()) +      for (auto L : C->component_lists()) {          InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(), -                MapInfo::RPK_None); +                MapInfo::RPK_None, C->isImplicit()); +      }      for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) -      for (auto L : C->component_lists()) +      for (auto L : C->component_lists()) {          InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown, -                MapInfo::RPK_None); +                MapInfo::RPK_None, C->isImplicit()); +      }      for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) -      for (auto L : C->component_lists()) +      for (auto L : C->component_lists()) {          InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown, -                MapInfo::RPK_None); +                MapInfo::RPK_None, C->isImplicit()); +      }      // Look at the use_device_ptr clause information and mark the existing map      // entries as such. If there is no map information for an entry in the @@ -6524,7 +6681,7 @@ public:          BasePointers.push_back({Ptr, VD});          Pointers.push_back(Ptr);          Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); -        Types.push_back(OMP_MAP_RETURN_PTR | OMP_MAP_FIRST_REF); +        Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);        }      for (auto &M : Info) { @@ -6538,9 +6695,9 @@ public:          // Remember the current base pointer index.          unsigned CurrentBasePointersIdx = BasePointers.size();          // FIXME: MSVC 2013 seems to require this-> to find the member method. -        this->generateInfoForComponentList(L.MapType, L.MapTypeModifier, -                                           L.Components, BasePointers, Pointers, -                                           Sizes, Types, IsFirstComponentList); +        this->generateInfoForComponentList( +            L.MapType, L.MapTypeModifier, L.Components, BasePointers, Pointers, +            Sizes, Types, IsFirstComponentList, L.IsImplicit);          // If this entry relates with a device pointer, set the relevant          // declaration and add the 'return pointer' flag. @@ -6562,7 +6719,7 @@ public:                   "No relevant declaration related with device pointer??");            BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); -          Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PTR; +          Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;          }          IsFirstComponentList = false;        } @@ -6604,7 +6761,8 @@ public:          for (auto L : It->second) {            generateInfoForComponentList(                /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L, -              BasePointers, Pointers, Sizes, Types, IsFirstComponentList); +              BasePointers, Pointers, Sizes, Types, IsFirstComponentList, +              /*IsImplicit=*/false);            IsFirstComponentList = false;          }          return; @@ -6613,7 +6771,7 @@ public:        BasePointers.push_back({Arg, VD});        Pointers.push_back(Arg);        Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); -      Types.push_back(OMP_MAP_PRIVATE_VAL | OMP_MAP_FIRST_REF); +      Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);        return;      } @@ -6624,9 +6782,9 @@ public:                 "We got information for the wrong declaration??");          assert(!L.second.empty() &&                 "Not expecting declaration with no component lists."); -        generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(), -                                     L.second, BasePointers, Pointers, Sizes, -                                     Types, IsFirstComponentList); +        generateInfoForComponentList( +            C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers, +            Pointers, Sizes, Types, IsFirstComponentList, C->isImplicit());          IsFirstComponentList = false;        } @@ -6656,7 +6814,7 @@ public:        if (!RI.getType()->isAnyPointerType()) {          // We have to signal to the runtime captures passed by value that are          // not pointers. -        CurMapTypes.push_back(OMP_MAP_PRIVATE_VAL); +        CurMapTypes.push_back(OMP_MAP_LITERAL);          CurSizes.push_back(CGF.getTypeSize(RI.getType()));        } else {          // Pointers are implicitly mapped with a zero size and no flags @@ -6676,19 +6834,12 @@ public:        // The default map type for a scalar/complex type is 'to' because by        // default the value doesn't have to be retrieved. For an aggregate        // type, the default is 'tofrom'. -      CurMapTypes.push_back(ElementType->isAggregateType() -                                ? (OMP_MAP_TO | OMP_MAP_FROM) -                                : OMP_MAP_TO); - -      // If we have a capture by reference we may need to add the private -      // pointer flag if the base declaration shows in some first-private -      // clause. -      CurMapTypes.back() = -          adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back()); +      CurMapTypes.emplace_back(adjustMapModifiersForPrivateClauses( +          CI, ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM) +                                             : OMP_MAP_TO));      } -    // Every default map produces a single argument, so, it is always the -    // first one. -    CurMapTypes.back() |= OMP_MAP_FIRST_REF; +    // Every default map produces a single argument which is a target parameter. +    CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;    }  }; @@ -6831,7 +6982,7 @@ static void emitOffloadingArraysArgument(          llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,          /*Idx0=*/0, /*Idx1=*/0);      MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( -        llvm::ArrayType::get(CGM.Int32Ty, Info.NumberOfPtrs), +        llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),          Info.MapTypesArray,          /*Idx0=*/0,          /*Idx1=*/0); @@ -6840,7 +6991,7 @@ static void emitOffloadingArraysArgument(      PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);      SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());      MapTypesArrayArg = -        llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()); +        llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());    }  } @@ -6855,8 +7006,6 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,    assert(OutlinedFn && "Invalid outlined function!"); -  auto &Ctx = CGF.getContext(); -    // Fill up the arrays with all the captured variables.    MappableExprsHandler::MapValuesArrayTy KernelArgs;    MappableExprsHandler::MapBaseValuesArrayTy BasePointers; @@ -6878,9 +7027,6 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,    for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),                                              CE = CS.capture_end();         CI != CE; ++CI, ++RI, ++CV) { -    StringRef Name; -    QualType Ty; -      CurBasePointers.clear();      CurPointers.clear();      CurSizes.clear(); @@ -6893,8 +7039,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,        CurPointers.push_back(*CV);        CurSizes.push_back(CGF.getTypeSize(RI->getType()));        // Copy to the device as an argument. No need to retrieve it. -      CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL | -                            MappableExprsHandler::OMP_MAP_FIRST_REF); +      CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | +                            MappableExprsHandler::OMP_MAP_TARGET_PARAM);      } else {        // If we have any information in the map clause, we use it, otherwise we        // just do a default mapping. @@ -6921,19 +7067,10 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,      MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());    } -  // Keep track on whether the host function has to be executed. -  auto OffloadErrorQType = -      Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); -  auto OffloadError = CGF.MakeAddrLValue( -      CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"), -      OffloadErrorQType); -  CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), -                        OffloadError); -    // Fill up the pointer arrays and transfer execution to the device. -  auto &&ThenGen = [&BasePointers, &Pointers, &Sizes, &MapTypes, Device, -                    OutlinedFnID, OffloadError, -                    &D](CodeGenFunction &CGF, PrePostActionTy &) { +  auto &&ThenGen = [this, &BasePointers, &Pointers, &Sizes, &MapTypes, Device, +                    OutlinedFn, OutlinedFnID, &D, +                    &KernelArgs](CodeGenFunction &CGF, PrePostActionTy &) {      auto &RT = CGF.CGM.getOpenMPRuntime();      // Emit the offloading arrays.      TargetDataInfo Info; @@ -6956,11 +7093,12 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,      // Emit device ID if any.      llvm::Value *DeviceID; -    if (Device) +    if (Device) {        DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), -                                           CGF.Int32Ty, /*isSigned=*/true); -    else -      DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); +                                           CGF.Int64Ty, /*isSigned=*/true); +    } else { +      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); +    }      // Emit the number of elements in the offloading arrays.      llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size()); @@ -6971,6 +7109,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,      auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D);      auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D); +    bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();      // The target region is an outlined function launched by the runtime      // via calls __tgt_target() or __tgt_target_teams().      // @@ -7013,24 +7152,41 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,            Info.MapTypesArray, NumTeams,            NumThreads};        Return = CGF.EmitRuntimeCall( -          RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); +          RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait +                                             : OMPRTL__tgt_target_teams), +          OffloadingArgs);      } else {        llvm::Value *OffloadingArgs[] = {            DeviceID,           OutlinedFnID,            PointerNum,         Info.BasePointersArray,            Info.PointersArray, Info.SizesArray,            Info.MapTypesArray}; -      Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target), -                                   OffloadingArgs); +      Return = CGF.EmitRuntimeCall( +          RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait +                                             : OMPRTL__tgt_target), +          OffloadingArgs);      } -    CGF.EmitStoreOfScalar(Return, OffloadError); +    // Check the error code and execute the host version if required. +    llvm::BasicBlock *OffloadFailedBlock = +        CGF.createBasicBlock("omp_offload.failed"); +    llvm::BasicBlock *OffloadContBlock = +        CGF.createBasicBlock("omp_offload.cont"); +    llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); +    CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); + +    CGF.EmitBlock(OffloadFailedBlock); +    emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, KernelArgs); +    CGF.EmitBranch(OffloadContBlock); + +    CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);    };    // Notify that the host version must be executed. -  auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) { -    CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u), -                          OffloadError); +  auto &&ElseGen = [this, &D, OutlinedFn, &KernelArgs](CodeGenFunction &CGF, +                                                      PrePostActionTy &) { +    emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, +                             KernelArgs);    };    // If we have a target function ID it means that we need to support @@ -7048,19 +7204,6 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,      RegionCodeGenTy ElseRCG(ElseGen);      ElseRCG(CGF);    } - -  // Check the error code and execute the host version if required. -  auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); -  auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont"); -  auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation()); -  auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal); -  CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); - -  CGF.EmitBlock(OffloadFailedBlock); -  CGF.Builder.CreateCall(OutlinedFn, KernelArgs); -  CGF.EmitBranch(OffloadContBlock); - -  CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);  }  void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, @@ -7101,6 +7244,26 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,        CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(            CGM, ParentName, cast<OMPTargetTeamsDirective>(*S));        break; +    case Stmt::OMPTargetTeamsDistributeDirectiveClass: +      CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( +          CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(*S)); +      break; +    case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass: +      CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( +          CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(*S)); +      break; +    case Stmt::OMPTargetParallelForDirectiveClass: +      CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( +          CGM, ParentName, cast<OMPTargetParallelForDirective>(*S)); +      break; +    case Stmt::OMPTargetParallelForSimdDirectiveClass: +      CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( +          CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(*S)); +      break; +    case Stmt::OMPTargetSimdDirectiveClass: +      CodeGenFunction::EmitOMPTargetSimdDeviceFunction( +          CGM, ParentName, cast<OMPTargetSimdDirective>(*S)); +      break;      default:        llvm_unreachable("Unknown target directive for OpenMP device codegen.");      } @@ -7278,11 +7441,12 @@ void CGOpenMPRuntime::emitTargetDataCalls(      // Emit device ID if any.      llvm::Value *DeviceID = nullptr; -    if (Device) +    if (Device) {        DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), -                                           CGF.Int32Ty, /*isSigned=*/true); -    else -      DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); +                                           CGF.Int64Ty, /*isSigned=*/true); +    } else { +      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); +    }      // Emit the number of elements in the offloading arrays.      auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); @@ -7313,11 +7477,12 @@ void CGOpenMPRuntime::emitTargetDataCalls(      // Emit device ID if any.      llvm::Value *DeviceID = nullptr; -    if (Device) +    if (Device) {        DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), -                                           CGF.Int32Ty, /*isSigned=*/true); -    else -      DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); +                                           CGF.Int64Ty, /*isSigned=*/true); +    } else { +      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); +    }      // Emit the number of elements in the offloading arrays.      auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); @@ -7399,11 +7564,12 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(      // Emit device ID if any.      llvm::Value *DeviceID = nullptr; -    if (Device) +    if (Device) {        DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), -                                           CGF.Int32Ty, /*isSigned=*/true); -    else -      DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); +                                           CGF.Int64Ty, /*isSigned=*/true); +    } else { +      DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); +    }      // Emit the number of elements in the offloading arrays.      auto *PointerNum = CGF.Builder.getInt32(BasePointers.size()); @@ -7415,19 +7581,23 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(      auto &RT = CGF.CGM.getOpenMPRuntime();      // Select the right runtime function call for each expected standalone      // directive. +    const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();      OpenMPRTLFunction RTLFn;      switch (D.getDirectiveKind()) {      default:        llvm_unreachable("Unexpected standalone target data directive.");        break;      case OMPD_target_enter_data: -      RTLFn = OMPRTL__tgt_target_data_begin; +      RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait +                        : OMPRTL__tgt_target_data_begin;        break;      case OMPD_target_exit_data: -      RTLFn = OMPRTL__tgt_target_data_end; +      RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait +                        : OMPRTL__tgt_target_data_end;        break;      case OMPD_target_update: -      RTLFn = OMPRTL__tgt_target_data_update; +      RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait +                        : OMPRTL__tgt_target_data_update;        break;      }      CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs); @@ -7777,3 +7947,29 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,    CGF.EmitRuntimeCall(RTLFn, Args);  } +void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, llvm::Value *Callee, +                               ArrayRef<llvm::Value *> Args, +                               SourceLocation Loc) const { +  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); + +  if (auto *Fn = dyn_cast<llvm::Function>(Callee)) { +    if (Fn->doesNotThrow()) { +      CGF.EmitNounwindRuntimeCall(Fn, Args); +      return; +    } +  } +  CGF.EmitRuntimeCall(Callee, Args); +} + +void CGOpenMPRuntime::emitOutlinedFunctionCall( +    CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, +    ArrayRef<llvm::Value *> Args) const { +  assert(Loc.isValid() && "Outlined function call location must be valid."); +  emitCall(CGF, OutlinedFn, Args, Loc); +} + +Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, +                                             const VarDecl *NativeParam, +                                             const VarDecl *TargetParam) const { +  return CGF.GetAddrOfLocalVar(NativeParam); +}  | 
