diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 | 
| commit | cfca06d7963fa0909f90483b42a6d7d194d01e08 (patch) | |
| tree | 209fb2a2d68f8f277793fc8df46c753d31bc853b /clang/lib/CodeGen/CGStmtOpenMP.cpp | |
| parent | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff) | |
Notes
Diffstat (limited to 'clang/lib/CodeGen/CGStmtOpenMP.cpp')
| -rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 1726 | 
1 files changed, 1426 insertions, 300 deletions
| diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index dc3899f0e4ea..cfd5eda8cc80 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -18,14 +18,22 @@  #include "clang/AST/ASTContext.h"  #include "clang/AST/Attr.h"  #include "clang/AST/DeclOpenMP.h" +#include "clang/AST/OpenMPClause.h"  #include "clang/AST/Stmt.h"  #include "clang/AST/StmtOpenMP.h" +#include "clang/Basic/OpenMPKinds.h"  #include "clang/Basic/PrettyStackTrace.h" +#include "llvm/Frontend/OpenMP/OMPConstants.h"  #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/AtomicOrdering.h"  using namespace clang;  using namespace CodeGen;  using namespace llvm::omp; +static const VarDecl *getBaseDecl(const Expr *Ref); +  namespace {  /// Lexical scope for OpenMP executable constructs, that handles correct codegen  /// for captured expressions. @@ -53,7 +61,8 @@ class OMPLexicalScope : public CodeGenFunction::LexicalScope {    static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {      return CGF.LambdaCaptureFields.lookup(VD) ||             (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || -           (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); +           (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && +            cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));    }  public: @@ -214,6 +223,12 @@ public:            if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))              CGF.EmitVarDecl(*OED);          } +      } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) { +        for (const Expr *E : UDP->varlists()) { +          const Decl *D = getBaseDecl(E); +          if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) +            CGF.EmitVarDecl(*OED); +        }        }      }      if (!isOpenMPSimdDirective(S.getDirectiveKind())) @@ -365,26 +380,28 @@ static QualType getCanonicalParamType(ASTContext &C, QualType T) {  }  namespace { -  /// Contains required data for proper outlined function codegen. -  struct FunctionOptions { -    /// Captured statement for which the function is generated. -    const CapturedStmt *S = nullptr; -    /// true if cast to/from  UIntPtr is required for variables captured by -    /// value. -    const bool UIntPtrCastRequired = true; -    /// true if only casted arguments must be registered as local args or VLA -    /// sizes. -    const bool RegisterCastedArgsOnly = false; -    /// Name of the generated function. -    const StringRef FunctionName; -    explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, -                             bool RegisterCastedArgsOnly, -                             StringRef FunctionName) -        : S(S), UIntPtrCastRequired(UIntPtrCastRequired), -          RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), -          FunctionName(FunctionName) {} -  }; -} +/// Contains required data for proper outlined function codegen. +struct FunctionOptions { +  /// Captured statement for which the function is generated. +  const CapturedStmt *S = nullptr; +  /// true if cast to/from  UIntPtr is required for variables captured by +  /// value. +  const bool UIntPtrCastRequired = true; +  /// true if only casted arguments must be registered as local args or VLA +  /// sizes. +  const bool RegisterCastedArgsOnly = false; +  /// Name of the generated function. +  const StringRef FunctionName; +  /// Location of the non-debug version of the outlined function. +  SourceLocation Loc; +  explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, +                           bool RegisterCastedArgsOnly, StringRef FunctionName, +                           SourceLocation Loc) +      : S(S), UIntPtrCastRequired(UIntPtrCastRequired), +        RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), +        FunctionName(FunctionName), Loc(Loc) {} +}; +} // namespace  static llvm::Function *emitOutlinedFunctionPrologue(      CodeGenFunction &CGF, FunctionArgList &Args, @@ -485,7 +502,9 @@ static llvm::Function *emitOutlinedFunctionPrologue(    // Generate the function.    CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, -                    FO.S->getBeginLoc(), CD->getBody()->getBeginLoc()); +                    FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(), +                    FO.UIntPtrCastRequired ? FO.Loc +                                           : CD->getBody()->getBeginLoc());    unsigned Cnt = CD->getContextParamPosition();    I = FO.S->captures().begin();    for (const FieldDecl *FD : RD->fields()) { @@ -560,7 +579,8 @@ static llvm::Function *emitOutlinedFunctionPrologue(  }  llvm::Function * -CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { +CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, +                                                    SourceLocation Loc) {    assert(        CapturedStmtInfo &&        "CapturedStmtInfo should be set when generating the captured function"); @@ -577,7 +597,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {    if (NeedWrapperFunction)      Out << "_debug__";    FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, -                     Out.str()); +                     Out.str(), Loc);    llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,                                                     VLASizes, CXXThisValue, FO);    CodeGenFunction::OMPPrivateScope LocalScope(*this); @@ -600,7 +620,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {    FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,                              /*RegisterCastedArgsOnly=*/true, -                            CapturedStmtInfo->getHelperName()); +                            CapturedStmtInfo->getHelperName(), Loc);    CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);    WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;    Args.clear(); @@ -632,8 +652,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {      }      CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));    } -  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getBeginLoc(), -                                                  F, CallArgs); +  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);    WrapperCGF.FinishFunction();    return WrapperF;  } @@ -747,11 +766,12 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,        getLangOpts().OpenMPIsDevice &&        isOpenMPTargetExecutionDirective(D.getDirectiveKind());    bool FirstprivateIsLastprivate = false; -  llvm::DenseSet<const VarDecl *> Lastprivates; +  llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;    for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {      for (const auto *D : C->varlists()) -      Lastprivates.insert( -          cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); +      Lastprivates.try_emplace( +          cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(), +          C->getKind());    }    llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;    llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; @@ -761,8 +781,8 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,    bool MustEmitFirstprivateCopy =        CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;    for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { -    auto IRef = C->varlist_begin(); -    auto InitsRef = C->inits().begin(); +    const auto *IRef = C->varlist_begin(); +    const auto *InitsRef = C->inits().begin();      for (const Expr *IInit : C->private_copies()) {        const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());        bool ThisFirstprivateIsLastprivate = @@ -853,14 +873,34 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,                });          } else {            Address OriginalAddr = OriginalLVal.getAddress(*this); -          IsRegistered = PrivateScope.addPrivate( -              OrigVD, [this, VDInit, OriginalAddr, VD]() { +          IsRegistered = +              PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD, +                                               ThisFirstprivateIsLastprivate, +                                               OrigVD, &Lastprivates, IRef]() {                  // Emit private VarDecl with copy init.                  // Remap temp VDInit variable to the address of the original                  // variable (for proper handling of captured global variables).                  setAddrOfLocalVar(VDInit, OriginalAddr);                  EmitDecl(*VD);                  LocalDeclMap.erase(VDInit); +                if (ThisFirstprivateIsLastprivate && +                    Lastprivates[OrigVD->getCanonicalDecl()] == +                        OMPC_LASTPRIVATE_conditional) { +                  // Create/init special variable for lastprivate conditionals. +                  Address VDAddr = +                      CGM.getOpenMPRuntime().emitLastprivateConditionalInit( +                          *this, OrigVD); +                  llvm::Value *V = EmitLoadOfScalar( +                      MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(), +                                     AlignmentSource::Decl), +                      (*IRef)->getExprLoc()); +                  EmitStoreOfScalar(V, +                                    MakeAddrLValue(VDAddr, (*IRef)->getType(), +                                                   AlignmentSource::Decl)); +                  LocalDeclMap.erase(VD); +                  setAddrOfLocalVar(VD, VDAddr); +                  return VDAddr; +                }                  return GetAddrOfLocalVar(VD);                });          } @@ -990,8 +1030,8 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(      if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&          !getLangOpts().OpenMPSimd)        break; -    auto IRef = C->varlist_begin(); -    auto IDestRef = C->destination_exprs().begin(); +    const auto *IRef = C->varlist_begin(); +    const auto *IDestRef = C->destination_exprs().begin();      for (const Expr *IInit : C->private_copies()) {        // Keep the address of the original variable for future update at the end        // of the loop. @@ -1013,7 +1053,15 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(          // for 'firstprivate' clause.          if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {            const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); -          bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() { +          bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C, +                                                               OrigVD]() { +            if (C->getKind() == OMPC_LASTPRIVATE_conditional) { +              Address VDAddr = +                  CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this, +                                                                        OrigVD); +              setAddrOfLocalVar(VD, VDAddr); +              return VDAddr; +            }              // Emit private VarDecl with copy init.              EmitDecl(*VD);              return GetAddrOfLocalVar(VD); @@ -1099,7 +1147,7 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(          if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())            PrivateAddr =                Address(Builder.CreateLoad(PrivateAddr), -                      getNaturalTypeAlignment(RefTy->getPointeeType())); +                      CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));          // Store the last value to the private copy in the last iteration.          if (C->getKind() == OMPC_LASTPRIVATE_conditional)            CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( @@ -1122,7 +1170,7 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(  void CodeGenFunction::EmitOMPReductionClauseInit(      const OMPExecutableDirective &D, -    CodeGenFunction::OMPPrivateScope &PrivateScope) { +    CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {    if (!HaveInsertPoint())      return;    SmallVector<const Expr *, 4> Shareds; @@ -1130,32 +1178,36 @@ void CodeGenFunction::EmitOMPReductionClauseInit(    SmallVector<const Expr *, 4> ReductionOps;    SmallVector<const Expr *, 4> LHSs;    SmallVector<const Expr *, 4> RHSs; +  OMPTaskDataTy Data; +  SmallVector<const Expr *, 4> TaskLHSs; +  SmallVector<const Expr *, 4> TaskRHSs;    for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { -    auto IPriv = C->privates().begin(); -    auto IRed = C->reduction_ops().begin(); -    auto ILHS = C->lhs_exprs().begin(); -    auto IRHS = C->rhs_exprs().begin(); -    for (const Expr *Ref : C->varlists()) { -      Shareds.emplace_back(Ref); -      Privates.emplace_back(*IPriv); -      ReductionOps.emplace_back(*IRed); -      LHSs.emplace_back(*ILHS); -      RHSs.emplace_back(*IRHS); -      std::advance(IPriv, 1); -      std::advance(IRed, 1); -      std::advance(ILHS, 1); -      std::advance(IRHS, 1); +    if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) +      continue; +    Shareds.append(C->varlist_begin(), C->varlist_end()); +    Privates.append(C->privates().begin(), C->privates().end()); +    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); +    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); +    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); +    if (C->getModifier() == OMPC_REDUCTION_task) { +      Data.ReductionVars.append(C->privates().begin(), C->privates().end()); +      Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); +      Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); +      Data.ReductionOps.append(C->reduction_ops().begin(), +                               C->reduction_ops().end()); +      TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); +      TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());      }    } -  ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); +  ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);    unsigned Count = 0; -  auto ILHS = LHSs.begin(); -  auto IRHS = RHSs.begin(); -  auto IPriv = Privates.begin(); +  auto *ILHS = LHSs.begin(); +  auto *IRHS = RHSs.begin(); +  auto *IPriv = Privates.begin();    for (const Expr *IRef : Shareds) {      const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());      // Emit private VarDecl with reduction init. -    RedCG.emitSharedLValue(*this, Count); +    RedCG.emitSharedOrigLValue(*this, Count);      RedCG.emitAggregateType(*this, Count);      AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);      RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), @@ -1222,6 +1274,118 @@ void CodeGenFunction::EmitOMPReductionClauseInit(      ++IPriv;      ++Count;    } +  if (!Data.ReductionVars.empty()) { +    Data.IsReductionWithTaskMod = true; +    Data.IsWorksharingReduction = +        isOpenMPWorksharingDirective(D.getDirectiveKind()); +    llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit( +        *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data); +    const Expr *TaskRedRef = nullptr; +    switch (D.getDirectiveKind()) { +    case OMPD_parallel: +      TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr(); +      break; +    case OMPD_for: +      TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr(); +      break; +    case OMPD_sections: +      TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr(); +      break; +    case OMPD_parallel_for: +      TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr(); +      break; +    case OMPD_parallel_master: +      TaskRedRef = +          cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr(); +      break; +    case OMPD_parallel_sections: +      TaskRedRef = +          cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr(); +      break; +    case OMPD_target_parallel: +      TaskRedRef = +          cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr(); +      break; +    case OMPD_target_parallel_for: +      TaskRedRef = +          cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr(); +      break; +    case OMPD_distribute_parallel_for: +      TaskRedRef = +          cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr(); +      break; +    case OMPD_teams_distribute_parallel_for: +      TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D) +                       .getTaskReductionRefExpr(); +      break; +    case OMPD_target_teams_distribute_parallel_for: +      TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D) +                       .getTaskReductionRefExpr(); +      break; +    case OMPD_simd: +    case OMPD_for_simd: +    case OMPD_section: +    case OMPD_single: +    case OMPD_master: +    case OMPD_critical: +    case OMPD_parallel_for_simd: +    case OMPD_task: +    case OMPD_taskyield: +    case OMPD_barrier: +    case OMPD_taskwait: +    case OMPD_taskgroup: +    case OMPD_flush: +    case OMPD_depobj: +    case OMPD_scan: +    case OMPD_ordered: +    case OMPD_atomic: +    case OMPD_teams: +    case OMPD_target: +    case OMPD_cancellation_point: +    case OMPD_cancel: +    case OMPD_target_data: +    case OMPD_target_enter_data: +    case OMPD_target_exit_data: +    case OMPD_taskloop: +    case OMPD_taskloop_simd: +    case OMPD_master_taskloop: +    case OMPD_master_taskloop_simd: +    case OMPD_parallel_master_taskloop: +    case OMPD_parallel_master_taskloop_simd: +    case OMPD_distribute: +    case OMPD_target_update: +    case OMPD_distribute_parallel_for_simd: +    case OMPD_distribute_simd: +    case OMPD_target_parallel_for_simd: +    case OMPD_target_simd: +    case OMPD_teams_distribute: +    case OMPD_teams_distribute_simd: +    case OMPD_teams_distribute_parallel_for_simd: +    case OMPD_target_teams: +    case OMPD_target_teams_distribute: +    case OMPD_target_teams_distribute_parallel_for_simd: +    case OMPD_target_teams_distribute_simd: +    case OMPD_declare_target: +    case OMPD_end_declare_target: +    case OMPD_threadprivate: +    case OMPD_allocate: +    case OMPD_declare_reduction: +    case OMPD_declare_mapper: +    case OMPD_declare_simd: +    case OMPD_requires: +    case OMPD_declare_variant: +    case OMPD_begin_declare_variant: +    case OMPD_end_declare_variant: +    case OMPD_unknown: +    default: +      llvm_unreachable("Enexpected directive with task reductions."); +    } + +    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl()); +    EmitVarDecl(*VD); +    EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD), +                      /*Volatile=*/false, TaskRedRef->getType()); +  }  }  void CodeGenFunction::EmitOMPReductionClauseFinal( @@ -1233,14 +1397,25 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(    llvm::SmallVector<const Expr *, 8> RHSExprs;    llvm::SmallVector<const Expr *, 8> ReductionOps;    bool HasAtLeastOneReduction = false; +  bool IsReductionWithTaskMod = false;    for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { +    // Do not emit for inscan reductions. +    if (C->getModifier() == OMPC_REDUCTION_inscan) +      continue;      HasAtLeastOneReduction = true;      Privates.append(C->privates().begin(), C->privates().end());      LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());      RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());      ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); +    IsReductionWithTaskMod = +        IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;    }    if (HasAtLeastOneReduction) { +    if (IsReductionWithTaskMod) { +      CGM.getOpenMPRuntime().emitTaskReductionFini( +          *this, D.getBeginLoc(), +          isOpenMPWorksharingDirective(D.getDirectiveKind())); +    }      bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||                        isOpenMPParallelDirective(D.getDirectiveKind()) ||                        ReductionKind == OMPD_simd; @@ -1288,6 +1463,63 @@ typedef llvm::function_ref<void(CodeGenFunction &,      CodeGenBoundParametersTy;  } // anonymous namespace +static void +checkForLastprivateConditionalUpdate(CodeGenFunction &CGF, +                                     const OMPExecutableDirective &S) { +  if (CGF.getLangOpts().OpenMP < 50) +    return; +  llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls; +  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { +    for (const Expr *Ref : C->varlists()) { +      if (!Ref->getType()->isScalarType()) +        continue; +      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); +      if (!DRE) +        continue; +      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); +      CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); +    } +  } +  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { +    for (const Expr *Ref : C->varlists()) { +      if (!Ref->getType()->isScalarType()) +        continue; +      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); +      if (!DRE) +        continue; +      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); +      CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); +    } +  } +  for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { +    for (const Expr *Ref : C->varlists()) { +      if (!Ref->getType()->isScalarType()) +        continue; +      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); +      if (!DRE) +        continue; +      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); +      CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); +    } +  } +  // Privates should ne analyzed since they are not captured at all. +  // Task reductions may be skipped - tasks are ignored. +  // Firstprivates do not return value but may be passed by reference - no need +  // to check for updated lastprivate conditional. +  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { +    for (const Expr *Ref : C->varlists()) { +      if (!Ref->getType()->isScalarType()) +        continue; +      const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); +      if (!DRE) +        continue; +      PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); +    } +  } +  CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional( +      CGF, S, PrivateDecls); +} +  static void emitCommonOMPParallelDirective(      CodeGenFunction &CGF, const OMPExecutableDirective &S,      OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, @@ -1334,9 +1566,97 @@ static void emitEmptyBoundParameters(CodeGenFunction &,                                       const OMPExecutableDirective &,                                       llvm::SmallVectorImpl<llvm::Value *> &) {} -void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { +Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( +    CodeGenFunction &CGF, const VarDecl *VD) { +  CodeGenModule &CGM = CGF.CGM; +  auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + +  if (!VD) +    return Address::invalid(); +  const VarDecl *CVD = VD->getCanonicalDecl(); +  if (!CVD->hasAttr<OMPAllocateDeclAttr>()) +    return Address::invalid(); +  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); +  // Use the default allocation. +  if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && +      !AA->getAllocator()) +    return Address::invalid(); +  llvm::Value *Size; +  CharUnits Align = CGM.getContext().getDeclAlign(CVD); +  if (CVD->getType()->isVariablyModifiedType()) { +    Size = CGF.getTypeSize(CVD->getType()); +    // Align the size: ((size + align - 1) / align) * align +    Size = CGF.Builder.CreateNUWAdd( +        Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); +    Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); +    Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); +  } else { +    CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); +    Size = CGM.getSize(Sz.alignTo(Align)); +  } + +  assert(AA->getAllocator() && +         "Expected allocator expression for non-default allocator."); +  llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); +  // According to the standard, the original allocator type is a enum (integer). +  // Convert to pointer type, if required. +  if (Allocator->getType()->isIntegerTy()) +    Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); +  else if (Allocator->getType()->isPointerTy()) +    Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, +                                                                CGM.VoidPtrTy); + +  llvm::Value *Addr = OMPBuilder.CreateOMPAlloc( +      CGF.Builder, Size, Allocator, +      getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", ".")); +  llvm::CallInst *FreeCI = +      OMPBuilder.CreateOMPFree(CGF.Builder, Addr, Allocator); + +  CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI); +  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( +      Addr, +      CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), +      getNameWithSeparators({CVD->getName(), ".addr"}, ".", ".")); +  return Address(Addr, Align); +} + +Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate( +    CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, +    SourceLocation Loc) { +  CodeGenModule &CGM = CGF.CGM; +  if (CGM.getLangOpts().OpenMPUseTLS && +      CGM.getContext().getTargetInfo().isTLSSupported()) +    return VDAddr; + +  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + +  llvm::Type *VarTy = VDAddr.getElementType(); +  llvm::Value *Data = +      CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy); +  llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)); +  std::string Suffix = getNameWithSeparators({"cache", ""}); +  llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix); + +  llvm::CallInst *ThreadPrivateCacheCall = +      OMPBuilder.CreateCachedThreadPrivate(CGF.Builder, Data, Size, CacheName); + +  return Address(ThreadPrivateCacheCall, VDAddr.getAlignment()); +} -  if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { +std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators( +    ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) { +  SmallString<128> Buffer; +  llvm::raw_svector_ostream OS(Buffer); +  StringRef Sep = FirstSeparator; +  for (StringRef Part : Parts) { +    OS << Sep << Part; +    Sep = Separator; +  } +  return OS.str().str(); +} +void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { +  if (CGM.getLangOpts().OpenMPIRBuilder) { +    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();      // Check if we have any if clause associated with the directive.      llvm::Value *IfCond = nullptr;      if (const auto *C = S.getSingleClause<OMPIfClause>()) @@ -1357,15 +1677,7 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {      // The cleanup callback that finalizes all variabels at the given location,      // thus calls destructors etc.      auto FiniCB = [this](InsertPointTy IP) { -      CGBuilderTy::InsertPointGuard IPG(Builder); -      assert(IP.getBlock()->end() != IP.getPoint() && -             "OpenMP IR Builder should cause terminated block!"); -      llvm::BasicBlock *IPBB = IP.getBlock(); -      llvm::BasicBlock *DestBB = IPBB->splitBasicBlock(IP.getPoint()); -      IPBB->getTerminator()->eraseFromParent(); -      Builder.SetInsertPoint(IPBB); -      CodeGenFunction::JumpDest Dest = getJumpDestInCurrentScope(DestBB); -      EmitBranchThroughCleanup(Dest); +      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);      };      // Privatization callback that performs appropriate action for @@ -1387,32 +1699,17 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {      auto BodyGenCB = [ParallelRegionBodyStmt,                        this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,                              llvm::BasicBlock &ContinuationBB) { -      auto OldAllocaIP = AllocaInsertPt; -      AllocaInsertPt = &*AllocaIP.getPoint(); - -      auto OldReturnBlock = ReturnBlock; -      ReturnBlock = getJumpDestInCurrentScope(&ContinuationBB); - -      llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); -      CodeGenIPBB->splitBasicBlock(CodeGenIP.getPoint()); -      llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator(); -      CodeGenIPBBTI->removeFromParent(); - -      Builder.SetInsertPoint(CodeGenIPBB); - -      EmitStmt(ParallelRegionBodyStmt); - -      Builder.Insert(CodeGenIPBBTI); - -      AllocaInsertPt = OldAllocaIP; -      ReturnBlock = OldReturnBlock; +      OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP, +                                                      ContinuationBB); +      OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt, +                                             CodeGenIP, ContinuationBB);      };      CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); -    Builder.restoreIP(OMPBuilder->CreateParallel(Builder, BodyGenCB, PrivCB, -                                                 FiniCB, IfCond, NumThreads, -                                                 ProcBind, S.hasCancel())); +    Builder.restoreIP(OMPBuilder.CreateParallel(Builder, BodyGenCB, PrivCB, +                                                FiniCB, IfCond, NumThreads, +                                                ProcBind, S.hasCancel()));      return;    } @@ -1436,10 +1733,16 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {      CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());      CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);    }; -  emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, -                                 emitEmptyBoundParameters); -  emitPostUpdateForReductionClause(*this, S, -                                   [](CodeGenFunction &) { return nullptr; }); +  { +    auto LPCRegion = +        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); +    emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, +                                   emitEmptyBoundParameters); +    emitPostUpdateForReductionClause(*this, S, +                                     [](CodeGenFunction &) { return nullptr; }); +  } +  // Check for outer lastprivate conditional update. +  checkForLastprivateConditionalUpdate(*this, S);  }  static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, @@ -1506,6 +1809,27 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,                           getProfileCount(D.getBody()));      EmitBlock(NextBB);    } + +  OMPPrivateScope InscanScope(*this); +  EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true); +  bool IsInscanRegion = InscanScope.Privatize(); +  if (IsInscanRegion) { +    // Need to remember the block before and after scan directive +    // to dispatch them correctly depending on the clause used in +    // this directive, inclusive or exclusive. For inclusive scan the natural +    // order of the blocks is used, for exclusive clause the blocks must be +    // executed in reverse order. +    OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb"); +    OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb"); +    // No need to allocate inscan exit block, in simd mode it is selected in the +    // codegen for the scan directive. +    if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd) +      OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb"); +    OMPScanDispatch = createBasicBlock("omp.inscan.dispatch"); +    EmitBranch(OMPScanDispatch); +    EmitBlock(OMPBeforeScanBlock); +  } +    // Emit loop variables for C++ range loops.    const Stmt *Body =        D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); @@ -1515,13 +1839,17 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,                 Body, /*TryImperfectlyNestedLoops=*/true),             D.getCollapsedNumber()); +  // Jump to the dispatcher at the end of the loop body. +  if (IsInscanRegion) +    EmitBranch(OMPScanExitBlock); +    // The end (updates/cleanups).    EmitBlock(Continue.getBlock());    BreakContinueStack.pop_back();  }  void CodeGenFunction::EmitOMPInnerLoop( -    const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, +    const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,      const Expr *IncExpr,      const llvm::function_ref<void(CodeGenFunction &)> BodyGen,      const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { @@ -1531,8 +1859,19 @@ void CodeGenFunction::EmitOMPInnerLoop(    auto CondBlock = createBasicBlock("omp.inner.for.cond");    EmitBlock(CondBlock);    const SourceRange R = S.getSourceRange(); -  LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), -                 SourceLocToDebugLoc(R.getEnd())); + +  // If attributes are attached, push to the basic block with them. +  const auto &OMPED = cast<OMPExecutableDirective>(S); +  const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); +  const Stmt *SS = ICS->getCapturedStmt(); +  const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS); +  if (AS) +    LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), +                   AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()), +                   SourceLocToDebugLoc(R.getEnd())); +  else +    LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), +                   SourceLocToDebugLoc(R.getEnd()));    // If there are any cleanups between here and the loop-exit scope,    // create a block to stage a loop exit along. @@ -1671,7 +2010,7 @@ static void emitAlignedClause(CodeGenFunction &CGF,               "alignment is not power of 2");        if (Alignment != 0) {          llvm::Value *PtrValue = CGF.EmitScalarExpr(E); -        CGF.EmitAlignmentAssumption( +        CGF.emitAlignmentAssumption(              PtrValue, E, /*No second loc needed*/ SourceLocation(),              llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));        } @@ -1835,6 +2174,18 @@ void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,    LoopStack.setParallel(!IsMonotonic);    LoopStack.setVectorizeEnable();    emitSimdlenSafelenClause(*this, D, IsMonotonic); +  if (const auto *C = D.getSingleClause<OMPOrderClause>()) +    if (C->getKind() == OMPC_ORDER_concurrent) +      LoopStack.setParallel(/*Enable=*/true); +  if ((D.getDirectiveKind() == OMPD_simd || +       (getLangOpts().OpenMPSimd && +        isOpenMPSimdDirective(D.getDirectiveKind()))) && +      llvm::any_of(D.getClausesOfKind<OMPReductionClause>(), +                   [](const OMPReductionClause *C) { +                     return C->getModifier() == OMPC_REDUCTION_inscan; +                   })) +    // Disable parallel access in case of prefix sum. +    LoopStack.setParallel(/*Enable=*/false);  }  void CodeGenFunction::EmitOMPSimdFinal( @@ -1886,7 +2237,6 @@ void CodeGenFunction::EmitOMPSimdFinal(  static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,                                           const OMPLoopDirective &S,                                           CodeGenFunction::JumpDest LoopExit) { -  CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S);    CGF.EmitOMPLoopBody(S, LoopExit);    CGF.EmitStopPoint(&S);  } @@ -1917,12 +2267,14 @@ static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,      BodyCodeGen(CGF);    };    const Expr *IfCond = nullptr; -  for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { -    if (CGF.getLangOpts().OpenMP >= 50 && -        (C->getNameModifier() == OMPD_unknown || -         C->getNameModifier() == OMPD_simd)) { -      IfCond = C->getCondition(); -      break; +  if (isOpenMPSimdDirective(S.getDirectiveKind())) { +    for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { +      if (CGF.getLangOpts().OpenMP >= 50 && +          (C->getNameModifier() == OMPD_unknown || +           C->getNameModifier() == OMPD_simd)) { +        IfCond = C->getCondition(); +        break; +      }      }    }    if (IfCond) { @@ -2007,10 +2359,8 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,            CGF.EmitOMPInnerLoop(                S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),                [&S](CodeGenFunction &CGF) { -                CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter( -                    CGF, S); -                CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); -                CGF.EmitStopPoint(&S); +                emitOMPLoopBodyWithStopPoint(CGF, S, +                                             CodeGenFunction::JumpDest());                },                [](CodeGenFunction &) {});          }); @@ -2031,11 +2381,19 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,  }  void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { +  ParentLoopDirectiveForScanRegion ScanRegion(*this, S); +  OMPFirstScanLoop = true;    auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {      emitOMPSimdRegion(CGF, S, Action);    }; -  OMPLexicalScope Scope(*this, S, OMPD_unknown); -  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); +  { +    auto LPCRegion = +        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); +    OMPLexicalScope Scope(*this, S, OMPD_unknown); +    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); +  } +  // Check for outer lastprivate conditional update. +  checkForLastprivateConditionalUpdate(*this, S);  }  void CodeGenFunction::EmitOMPOuterLoop( @@ -2103,10 +2461,14 @@ void CodeGenFunction::EmitOMPOuterLoop(        [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {          // Generate !llvm.loop.parallel metadata for loads and stores for loops          // with dynamic/guided scheduling and without ordered clause. -        if (!isOpenMPSimdDirective(S.getDirectiveKind())) +        if (!isOpenMPSimdDirective(S.getDirectiveKind())) {            CGF.LoopStack.setParallel(!IsMonotonic); -        else +          if (const auto *C = S.getSingleClause<OMPOrderClause>()) +            if (C->getKind() == OMPC_ORDER_concurrent) +              CGF.LoopStack.setParallel(/*Enable=*/true); +        } else {            CGF.EmitOMPSimdInit(S, IsMonotonic); +        }        },        [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,         &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { @@ -2612,6 +2974,14 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(        bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,            /* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&            isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); +      bool IsMonotonic = +          Ordered || +          ((ScheduleKind.Schedule == OMPC_SCHEDULE_static || +            ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) && +           !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || +             ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || +          ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || +          ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;        if ((RT.isStaticNonchunked(ScheduleKind.Schedule,                                   /* Chunked */ Chunk != nullptr) ||             StaticChunkedOne) && @@ -2620,9 +2990,13 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(              getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));          emitCommonSimdLoop(              *this, S, -            [&S](CodeGenFunction &CGF, PrePostActionTy &) { -              if (isOpenMPSimdDirective(S.getDirectiveKind())) -                CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); +            [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { +              if (isOpenMPSimdDirective(S.getDirectiveKind())) { +                CGF.EmitOMPSimdInit(S, IsMonotonic); +              } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { +                if (C->getKind() == OMPC_ORDER_concurrent) +                  CGF.LoopStack.setParallel(/*Enable=*/true); +              }              },              [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,               &S, ScheduleKind, LoopExit, @@ -2663,10 +3037,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(                                     : S.getCond(),                    StaticChunkedOne ? S.getDistInc() : S.getInc(),                    [&S, LoopExit](CodeGenFunction &CGF) { -                    CGF.CGM.getOpenMPRuntime() -                        .initLastprivateConditionalCounter(CGF, S); -                    CGF.EmitOMPLoopBody(S, LoopExit); -                    CGF.EmitStopPoint(&S); +                    emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);                    },                    [](CodeGenFunction &) {});              }); @@ -2678,11 +3049,6 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(          };          OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);        } else { -        const bool IsMonotonic = -            Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || -            ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || -            ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || -            ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;          // Emit the outer loop, which requests its work chunk [LB..UB] from          // runtime and runs the inner loop to process it.          const OMPLoopArguments LoopArguments( @@ -2755,16 +3121,233 @@ emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,    return {LBVal, UBVal};  } +/// Emits the code for the directive with inscan reductions. +/// The code is the following: +/// \code +/// size num_iters = <num_iters>; +/// <type> buffer[num_iters]; +/// #pragma omp ... +/// for (i: 0..<num_iters>) { +///   <input phase>; +///   buffer[i] = red; +/// } +/// for (int k = 0; k != ceil(log2(num_iters)); ++k) +/// for (size cnt = last_iter; cnt >= pow(2, k); --k) +///   buffer[i] op= buffer[i-pow(2,k)]; +/// #pragma omp ... +/// for (0..<num_iters>) { +///   red = InclusiveScan ? buffer[i] : buffer[i-1]; +///   <scan phase>; +/// } +/// \endcode +static void emitScanBasedDirective( +    CodeGenFunction &CGF, const OMPLoopDirective &S, +    llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen, +    llvm::function_ref<void(CodeGenFunction &)> FirstGen, +    llvm::function_ref<void(CodeGenFunction &)> SecondGen) { +  llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( +      NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); +  SmallVector<const Expr *, 4> Shareds; +  SmallVector<const Expr *, 4> Privates; +  SmallVector<const Expr *, 4> ReductionOps; +  SmallVector<const Expr *, 4> LHSs; +  SmallVector<const Expr *, 4> RHSs; +  SmallVector<const Expr *, 4> CopyOps; +  SmallVector<const Expr *, 4> CopyArrayTemps; +  SmallVector<const Expr *, 4> CopyArrayElems; +  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { +    assert(C->getModifier() == OMPC_REDUCTION_inscan && +           "Only inscan reductions are expected."); +    Shareds.append(C->varlist_begin(), C->varlist_end()); +    Privates.append(C->privates().begin(), C->privates().end()); +    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); +    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); +    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); +    CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); +    CopyArrayTemps.append(C->copy_array_temps().begin(), +                          C->copy_array_temps().end()); +    CopyArrayElems.append(C->copy_array_elems().begin(), +                          C->copy_array_elems().end()); +  } +  { +    // Emit buffers for each reduction variables. +    // ReductionCodeGen is required to emit correctly the code for array +    // reductions. +    ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); +    unsigned Count = 0; +    auto *ITA = CopyArrayTemps.begin(); +    for (const Expr *IRef : Privates) { +      const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); +      // Emit variably modified arrays, used for arrays/array sections +      // reductions. +      if (PrivateVD->getType()->isVariablyModifiedType()) { +        RedCG.emitSharedOrigLValue(CGF, Count); +        RedCG.emitAggregateType(CGF, Count); +      } +      CodeGenFunction::OpaqueValueMapping DimMapping( +          CGF, +          cast<OpaqueValueExpr>( +              cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe()) +                  ->getSizeExpr()), +          RValue::get(OMPScanNumIterations)); +      // Emit temp buffer. +      CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl())); +      ++ITA; +      ++Count; +    } +  } +  CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); +  { +    // Emit loop with input phase: +    // #pragma omp ... +    // for (i: 0..<num_iters>) { +    //   <input phase>; +    //   buffer[i] = red; +    // } +    CGF.OMPFirstScanLoop = true; +    CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); +    FirstGen(CGF); +  } +  // Emit prefix reduction: +  // for (int k = 0; k <= ceil(log2(n)); ++k) +  llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); +  llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body"); +  llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit"); +  llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy); +  llvm::Value *Arg = +      CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy); +  llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg); +  F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy); +  LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal); +  LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy); +  llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( +      OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1)); +  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc()); +  CGF.EmitBlock(LoopBB); +  auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2); +  // size pow2k = 1; +  auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2); +  Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB); +  Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB); +  // for (size i = n - 1; i >= 2 ^ k; --i) +  //   tmp[i] op= tmp[i-pow2k]; +  llvm::BasicBlock *InnerLoopBB = +      CGF.createBasicBlock("omp.inner.log.scan.body"); +  llvm::BasicBlock *InnerExitBB = +      CGF.createBasicBlock("omp.inner.log.scan.exit"); +  llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K); +  CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); +  CGF.EmitBlock(InnerLoopBB); +  auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2); +  IVal->addIncoming(NMin1, LoopBB); +  { +    CodeGenFunction::OMPPrivateScope PrivScope(CGF); +    auto *ILHS = LHSs.begin(); +    auto *IRHS = RHSs.begin(); +    for (const Expr *CopyArrayElem : CopyArrayElems) { +      const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); +      const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); +      Address LHSAddr = Address::invalid(); +      { +        CodeGenFunction::OpaqueValueMapping IdxMapping( +            CGF, +            cast<OpaqueValueExpr>( +                cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), +            RValue::get(IVal)); +        LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); +      } +      PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; }); +      Address RHSAddr = Address::invalid(); +      { +        llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K); +        CodeGenFunction::OpaqueValueMapping IdxMapping( +            CGF, +            cast<OpaqueValueExpr>( +                cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), +            RValue::get(OffsetIVal)); +        RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); +      } +      PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; }); +      ++ILHS; +      ++IRHS; +    } +    PrivScope.Privatize(); +    CGF.CGM.getOpenMPRuntime().emitReduction( +        CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, +        {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown}); +  } +  llvm::Value *NextIVal = +      CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1)); +  IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock()); +  CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K); +  CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); +  CGF.EmitBlock(InnerExitBB); +  llvm::Value *Next = +      CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1)); +  Counter->addIncoming(Next, CGF.Builder.GetInsertBlock()); +  // pow2k <<= 1; +  llvm::Value *NextPow2K = CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true); +  Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock()); +  llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal); +  CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB); +  auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc()); +  CGF.EmitBlock(ExitBB); + +  CGF.OMPFirstScanLoop = false; +  SecondGen(CGF); +} + +static bool emitWorksharingDirective(CodeGenFunction &CGF, +                                     const OMPLoopDirective &S, +                                     bool HasCancel) { +  bool HasLastprivates; +  if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), +                   [](const OMPReductionClause *C) { +                     return C->getModifier() == OMPC_REDUCTION_inscan; +                   })) { +    const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { +      CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); +      OMPLoopScope LoopScope(CGF, S); +      return CGF.EmitScalarExpr(S.getNumIterations()); +    }; +    const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) { +      CodeGenFunction::OMPCancelStackRAII CancelRegion( +          CGF, S.getDirectiveKind(), HasCancel); +      (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), +                                       emitForLoopBounds, +                                       emitDispatchForLoopBounds); +      // Emit an implicit barrier at the end. +      CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), +                                                 OMPD_for); +    }; +    const auto &&SecondGen = [&S, HasCancel, +                              &HasLastprivates](CodeGenFunction &CGF) { +      CodeGenFunction::OMPCancelStackRAII CancelRegion( +          CGF, S.getDirectiveKind(), HasCancel); +      HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), +                                                   emitForLoopBounds, +                                                   emitDispatchForLoopBounds); +    }; +    emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); +  } else { +    CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), +                                                     HasCancel); +    HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), +                                                 emitForLoopBounds, +                                                 emitDispatchForLoopBounds); +  } +  return HasLastprivates; +} +  void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {    bool HasLastprivates = false;    auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,                                            PrePostActionTy &) { -    OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); -    HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), -                                                 emitForLoopBounds, -                                                 emitDispatchForLoopBounds); +    HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());    };    { +    auto LPCRegion = +        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);      OMPLexicalScope Scope(*this, S, OMPD_unknown);      CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,                                                  S.hasCancel()); @@ -2773,17 +3356,19 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {    // Emit an implicit barrier at the end.    if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)      CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); +  // Check for outer lastprivate conditional update. +  checkForLastprivateConditionalUpdate(*this, S);  }  void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {    bool HasLastprivates = false;    auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,                                            PrePostActionTy &) { -    HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), -                                                 emitForLoopBounds, -                                                 emitDispatchForLoopBounds); +    HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);    };    { +    auto LPCRegion = +        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);      OMPLexicalScope Scope(*this, S, OMPD_unknown);      CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);    } @@ -2791,6 +3376,8 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {    // Emit an implicit barrier at the end.    if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)      CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); +  // Check for outer lastprivate conditional update. +  checkForLastprivateConditionalUpdate(*this, S);  }  static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, @@ -2808,7 +3395,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {    bool HasLastprivates = false;    auto &&CodeGen = [&S, CapturedStmt, CS,                      &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { -    ASTContext &C = CGF.getContext(); +    const ASTContext &C = CGF.getContext();      QualType KmpInt32Ty =          C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);      // Emit helper vars inits. @@ -2830,11 +3417,13 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {      OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);      CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);      // Generate condition for loop. -    BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, -                        OK_Ordinary, S.getBeginLoc(), FPOptions()); +    BinaryOperator *Cond = BinaryOperator::Create( +        C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary, +        S.getBeginLoc(), FPOptionsOverride());      // Increment for loop counter. -    UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, -                      S.getBeginLoc(), true); +    UnaryOperator *Inc = UnaryOperator::Create( +        C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, +        S.getBeginLoc(), true, FPOptionsOverride());      auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {        // Iterate through all sections and emit a switch construct:        // switch (IV) { @@ -2847,7 +3436,6 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {        //     break;        // }        // .omp.sections.exit: -      CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S);        llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");        llvm::SwitchInst *SwitchStmt =            CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), @@ -2905,7 +3493,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {      // IV = LB;      CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);      // while (idx <= UB) { BODY; ++idx; } -    CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, +    CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,                           [](CodeGenFunction &) {});      // Tell the runtime we are done.      auto &&CodeGen = [&S](CodeGenFunction &CGF) { @@ -2949,6 +3537,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {  void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {    { +    auto LPCRegion = +        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);      OMPLexicalScope Scope(*this, S, OMPD_unknown);      EmitSections(S);    } @@ -2957,6 +3547,8 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {      CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),                                             OMPD_sections);    } +  // Check for outer lastprivate conditional update. +  checkForLastprivateConditionalUpdate(*this, S);  }  void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { @@ -2995,6 +3587,8 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {      CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());    };    { +    auto LPCRegion = +        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);      OMPLexicalScope Scope(*this, S, OMPD_unknown);      CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),                                              CopyprivateVars, DestExprs, @@ -3007,6 +3601,8 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {          *this, S.getBeginLoc(),          S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);    } +  // Check for outer lastprivate conditional update. +  checkForLastprivateConditionalUpdate(*this, S);  }  static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { @@ -3018,11 +3614,75 @@ static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {  }  void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { +  if (CGM.getLangOpts().OpenMPIRBuilder) { +    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); +    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + +    const CapturedStmt *CS = S.getInnermostCapturedStmt(); +    const Stmt *MasterRegionBodyStmt = CS->getCapturedStmt(); + +    auto FiniCB = [this](InsertPointTy IP) { +      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); +    }; + +    auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, +                                                  InsertPointTy CodeGenIP, +                                                  llvm::BasicBlock &FiniBB) { +      OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); +      OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt, +                                             CodeGenIP, FiniBB); +    }; + +    CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); +    CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); +    Builder.restoreIP(OMPBuilder.CreateMaster(Builder, BodyGenCB, FiniCB)); + +    return; +  }    OMPLexicalScope Scope(*this, S, OMPD_unknown);    emitMaster(*this, S);  }  void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { +  if (CGM.getLangOpts().OpenMPIRBuilder) { +    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); +    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + +    const CapturedStmt *CS = S.getInnermostCapturedStmt(); +    const Stmt *CriticalRegionBodyStmt = CS->getCapturedStmt(); +    const Expr *Hint = nullptr; +    if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) +      Hint = HintClause->getHint(); + +    // TODO: This is slightly different from what's currently being done in +    // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything +    // about typing is final. +    llvm::Value *HintInst = nullptr; +    if (Hint) +      HintInst = +          Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false); + +    auto FiniCB = [this](InsertPointTy IP) { +      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); +    }; + +    auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, +                                                    InsertPointTy CodeGenIP, +                                                    llvm::BasicBlock &FiniBB) { +      OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); +      OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt, +                                             CodeGenIP, FiniBB); +    }; + +    CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); +    CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); +    Builder.restoreIP(OMPBuilder.CreateCritical( +        Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(), +        HintInst)); + +    return; +  } +    auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {      Action.Enter(CGF);      CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); @@ -3042,12 +3702,16 @@ void CodeGenFunction::EmitOMPParallelForDirective(    // directives: 'parallel' with 'for' directive.    auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {      Action.Enter(CGF); -    OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); -    CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, -                               emitDispatchForLoopBounds); +    (void)emitWorksharingDirective(CGF, S, S.hasCancel());    }; -  emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, -                                 emitEmptyBoundParameters); +  { +    auto LPCRegion = +        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); +    emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, +                                   emitEmptyBoundParameters); +  } +  // Check for outer lastprivate conditional update. +  checkForLastprivateConditionalUpdate(*this, S);  }  void CodeGenFunction::EmitOMPParallelForSimdDirective( @@ -3056,11 +3720,16 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective(    // directives: 'parallel' with 'for' directive.    auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {      Action.Enter(CGF); -    CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, -                               emitDispatchForLoopBounds); +    (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);    }; -  emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, -                                 emitEmptyBoundParameters); +  { +    auto LPCRegion = +        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); +    emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen, +                                   emitEmptyBoundParameters); +  } +  // Check for outer lastprivate conditional update. +  checkForLastprivateConditionalUpdate(*this, S);  }  void CodeGenFunction::EmitOMPParallelMasterDirective( @@ -3086,10 +3755,16 @@ void CodeGenFunction::EmitOMPParallelMasterDirective(      emitMaster(CGF, S);      CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);    }; -  emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, -                                 emitEmptyBoundParameters); -  emitPostUpdateForReductionClause(*this, S, -                                   [](CodeGenFunction &) { return nullptr; }); +  { +    auto LPCRegion = +        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); +    emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, +                                   emitEmptyBoundParameters); +    emitPostUpdateForReductionClause(*this, S, +                                     [](CodeGenFunction &) { return nullptr; }); +  } +  // Check for outer lastprivate conditional update. +  checkForLastprivateConditionalUpdate(*this, S);  }  void CodeGenFunction::EmitOMPParallelSectionsDirective( @@ -3100,8 +3775,14 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective(      Action.Enter(CGF);      CGF.EmitSections(S);    }; -  emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, -                                 emitEmptyBoundParameters); +  { +    auto LPCRegion = +        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); +    emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, +                                   emitEmptyBoundParameters); +  } +  // Check for outer lastprivate conditional update. +  checkForLastprivateConditionalUpdate(*this, S);  }  void CodeGenFunction::EmitOMPTaskBasedDirective( @@ -3188,33 +3869,28 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(    SmallVector<const Expr *, 4> LHSs;    SmallVector<const Expr *, 4> RHSs;    for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { -    auto IPriv = C->privates().begin(); -    auto IRed = C->reduction_ops().begin(); -    auto ILHS = C->lhs_exprs().begin(); -    auto IRHS = C->rhs_exprs().begin(); -    for (const Expr *Ref : C->varlists()) { -      Data.ReductionVars.emplace_back(Ref); -      Data.ReductionCopies.emplace_back(*IPriv); -      Data.ReductionOps.emplace_back(*IRed); -      LHSs.emplace_back(*ILHS); -      RHSs.emplace_back(*IRHS); -      std::advance(IPriv, 1); -      std::advance(IRed, 1); -      std::advance(ILHS, 1); -      std::advance(IRHS, 1); -    } +    Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); +    Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); +    Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); +    Data.ReductionOps.append(C->reduction_ops().begin(), +                             C->reduction_ops().end()); +    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); +    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());    }    Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(        *this, S.getBeginLoc(), LHSs, RHSs, Data);    // Build list of dependences. -  for (const auto *C : S.getClausesOfKind<OMPDependClause>()) -    for (const Expr *IRef : C->varlists()) -      Data.Dependences.emplace_back(C->getDependencyKind(), IRef); +  for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { +    OMPTaskDataTy::DependData &DD = +        Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); +    DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); +  }    auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,                      CapturedRegion](CodeGenFunction &CGF,                                      PrePostActionTy &Action) {      // Set proper addresses for generated private copies.      OMPPrivateScope Scope(CGF); +    llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;      if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||          !Data.LastprivateVars.empty()) {        llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( @@ -3241,6 +3917,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(              CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),                                ".firstpriv.ptr.addr");          PrivatePtrs.emplace_back(VD, PrivatePtr); +        FirstprivatePtrs.emplace_back(VD, PrivatePtr);          CallArgs.push_back(PrivatePtr.getPointer());        }        for (const Expr *E : Data.LastprivateVars) { @@ -3271,13 +3948,21 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(        }      }      if (Data.Reductions) { +      OMPPrivateScope FirstprivateScope(CGF); +      for (const auto &Pair : FirstprivatePtrs) { +        Address Replacement(CGF.Builder.CreateLoad(Pair.second), +                            CGF.getContext().getDeclAlign(Pair.first)); +        FirstprivateScope.addPrivate(Pair.first, +                                     [Replacement]() { return Replacement; }); +      } +      (void)FirstprivateScope.Privatize();        OMPLexicalScope LexScope(CGF, S, CapturedRegion); -      ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, -                             Data.ReductionOps); +      ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, +                             Data.ReductionCopies, Data.ReductionOps);        llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(            CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));        for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { -        RedCG.emitSharedLValue(CGF, Cnt); +        RedCG.emitSharedOrigLValue(CGF, Cnt);          RedCG.emitAggregateType(CGF, Cnt);          // FIXME: This must removed once the runtime library is fixed.          // Emit required threadprivate variables for @@ -3322,9 +4007,9 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(      // privatized earlier.      OMPPrivateScope InRedScope(CGF);      if (!InRedVars.empty()) { -      ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); +      ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);        for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { -        RedCG.emitSharedLValue(CGF, Cnt); +        RedCG.emitSharedOrigLValue(CGF, Cnt);          RedCG.emitAggregateType(CGF, Cnt);          // The taskgroup descriptor variable is always implicit firstprivate and          // privatized already during processing of the firstprivates. @@ -3333,9 +4018,13 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(          // initializer/combiner/finalizer.          CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),                                                             RedCG, Cnt); -        llvm::Value *ReductionsPtr = -            CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]), -                                 TaskgroupDescriptors[Cnt]->getExprLoc()); +        llvm::Value *ReductionsPtr; +        if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { +          ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), +                                               TRExpr->getExprLoc()); +        } else { +          ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); +        }          Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(              CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));          Replacement = Address( @@ -3448,9 +4137,11 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(    }    (void)TargetScope.Privatize();    // Build list of dependences. -  for (const auto *C : S.getClausesOfKind<OMPDependClause>()) -    for (const Expr *IRef : C->varlists()) -      Data.Dependences.emplace_back(C->getDependencyKind(), IRef); +  for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { +    OMPTaskDataTy::DependData &DD = +        Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); +    DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); +  }    auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD,                      &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {      // Set proper addresses for generated private copies. @@ -3537,6 +4228,8 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {                                              SharedsTy, CapturedStruct, IfCond,                                              Data);    }; +  auto LPCRegion = +      CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);    EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);  } @@ -3562,21 +4255,13 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(        SmallVector<const Expr *, 4> RHSs;        OMPTaskDataTy Data;        for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { -        auto IPriv = C->privates().begin(); -        auto IRed = C->reduction_ops().begin(); -        auto ILHS = C->lhs_exprs().begin(); -        auto IRHS = C->rhs_exprs().begin(); -        for (const Expr *Ref : C->varlists()) { -          Data.ReductionVars.emplace_back(Ref); -          Data.ReductionCopies.emplace_back(*IPriv); -          Data.ReductionOps.emplace_back(*IRed); -          LHSs.emplace_back(*ILHS); -          RHSs.emplace_back(*IRHS); -          std::advance(IPriv, 1); -          std::advance(IRed, 1); -          std::advance(ILHS, 1); -          std::advance(IRHS, 1); -        } +        Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); +        Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); +        Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); +        Data.ReductionOps.append(C->reduction_ops().begin(), +                                 C->reduction_ops().end()); +        LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); +        RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());        }        llvm::Value *ReductionDesc =            CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), @@ -3593,6 +4278,9 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(  }  void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { +  llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>() +                                ? llvm::AtomicOrdering::NotAtomic +                                : llvm::AtomicOrdering::AcquireRelease;    CGM.getOpenMPRuntime().emitFlush(        *this,        [&S]() -> ArrayRef<const Expr *> { @@ -3601,7 +4289,233 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {                                      FlushClause->varlist_end());          return llvm::None;        }(), -      S.getBeginLoc()); +      S.getBeginLoc(), AO); +} + +void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) { +  const auto *DO = S.getSingleClause<OMPDepobjClause>(); +  LValue DOLVal = EmitLValue(DO->getDepobj()); +  if (const auto *DC = S.getSingleClause<OMPDependClause>()) { +    OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(), +                                           DC->getModifier()); +    Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end()); +    Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause( +        *this, Dependencies, DC->getBeginLoc()); +    EmitStoreOfScalar(DepAddr.getPointer(), DOLVal); +    return; +  } +  if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) { +    CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc()); +    return; +  } +  if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) { +    CGM.getOpenMPRuntime().emitUpdateClause( +        *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc()); +    return; +  } +} + +void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { +  if (!OMPParentLoopDirectiveForScan) +    return; +  const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; +  bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>(); +  SmallVector<const Expr *, 4> Shareds; +  SmallVector<const Expr *, 4> Privates; +  SmallVector<const Expr *, 4> LHSs; +  SmallVector<const Expr *, 4> RHSs; +  SmallVector<const Expr *, 4> ReductionOps; +  SmallVector<const Expr *, 4> CopyOps; +  SmallVector<const Expr *, 4> CopyArrayTemps; +  SmallVector<const Expr *, 4> CopyArrayElems; +  for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) { +    if (C->getModifier() != OMPC_REDUCTION_inscan) +      continue; +    Shareds.append(C->varlist_begin(), C->varlist_end()); +    Privates.append(C->privates().begin(), C->privates().end()); +    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); +    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); +    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); +    CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); +    CopyArrayTemps.append(C->copy_array_temps().begin(), +                          C->copy_array_temps().end()); +    CopyArrayElems.append(C->copy_array_elems().begin(), +                          C->copy_array_elems().end()); +  } +  if (ParentDir.getDirectiveKind() == OMPD_simd || +      (getLangOpts().OpenMPSimd && +       isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) { +    // For simd directive and simd-based directives in simd only mode, use the +    // following codegen: +    // int x = 0; +    // #pragma omp simd reduction(inscan, +: x) +    // for (..) { +    //   <first part> +    //   #pragma omp scan inclusive(x) +    //   <second part> +    //  } +    // is transformed to: +    // int x = 0; +    // for (..) { +    //   int x_priv = 0; +    //   <first part> +    //   x = x_priv + x; +    //   x_priv = x; +    //   <second part> +    // } +    // and +    // int x = 0; +    // #pragma omp simd reduction(inscan, +: x) +    // for (..) { +    //   <first part> +    //   #pragma omp scan exclusive(x) +    //   <second part> +    // } +    // to +    // int x = 0; +    // for (..) { +    //   int x_priv = 0; +    //   <second part> +    //   int temp = x; +    //   x = x_priv + x; +    //   x_priv = temp; +    //   <first part> +    // } +    llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce"); +    EmitBranch(IsInclusive +                   ? OMPScanReduce +                   : BreakContinueStack.back().ContinueBlock.getBlock()); +    EmitBlock(OMPScanDispatch); +    { +      // New scope for correct construction/destruction of temp variables for +      // exclusive scan. +      LexicalScope Scope(*this, S.getSourceRange()); +      EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock); +      EmitBlock(OMPScanReduce); +      if (!IsInclusive) { +        // Create temp var and copy LHS value to this temp value. +        // TMP = LHS; +        for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { +          const Expr *PrivateExpr = Privates[I]; +          const Expr *TempExpr = CopyArrayTemps[I]; +          EmitAutoVarDecl( +              *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl())); +          LValue DestLVal = EmitLValue(TempExpr); +          LValue SrcLVal = EmitLValue(LHSs[I]); +          EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), +                      SrcLVal.getAddress(*this), +                      cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), +                      cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), +                      CopyOps[I]); +        } +      } +      CGM.getOpenMPRuntime().emitReduction( +          *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, +          {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd}); +      for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { +        const Expr *PrivateExpr = Privates[I]; +        LValue DestLVal; +        LValue SrcLVal; +        if (IsInclusive) { +          DestLVal = EmitLValue(RHSs[I]); +          SrcLVal = EmitLValue(LHSs[I]); +        } else { +          const Expr *TempExpr = CopyArrayTemps[I]; +          DestLVal = EmitLValue(RHSs[I]); +          SrcLVal = EmitLValue(TempExpr); +        } +        EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), +                    SrcLVal.getAddress(*this), +                    cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), +                    cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), +                    CopyOps[I]); +      } +    } +    EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock); +    OMPScanExitBlock = IsInclusive +                           ? BreakContinueStack.back().ContinueBlock.getBlock() +                           : OMPScanReduce; +    EmitBlock(OMPAfterScanBlock); +    return; +  } +  if (!IsInclusive) { +    EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); +    EmitBlock(OMPScanExitBlock); +  } +  if (OMPFirstScanLoop) { +    // Emit buffer[i] = red; at the end of the input phase. +    const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) +                             .getIterationVariable() +                             ->IgnoreParenImpCasts(); +    LValue IdxLVal = EmitLValue(IVExpr); +    llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); +    IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); +    for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { +      const Expr *PrivateExpr = Privates[I]; +      const Expr *OrigExpr = Shareds[I]; +      const Expr *CopyArrayElem = CopyArrayElems[I]; +      OpaqueValueMapping IdxMapping( +          *this, +          cast<OpaqueValueExpr>( +              cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), +          RValue::get(IdxVal)); +      LValue DestLVal = EmitLValue(CopyArrayElem); +      LValue SrcLVal = EmitLValue(OrigExpr); +      EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), +                  SrcLVal.getAddress(*this), +                  cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), +                  cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), +                  CopyOps[I]); +    } +  } +  EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); +  if (IsInclusive) { +    EmitBlock(OMPScanExitBlock); +    EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); +  } +  EmitBlock(OMPScanDispatch); +  if (!OMPFirstScanLoop) { +    // Emit red = buffer[i]; at the entrance to the scan phase. +    const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) +                             .getIterationVariable() +                             ->IgnoreParenImpCasts(); +    LValue IdxLVal = EmitLValue(IVExpr); +    llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); +    IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); +    llvm::BasicBlock *ExclusiveExitBB = nullptr; +    if (!IsInclusive) { +      llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec"); +      ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit"); +      llvm::Value *Cmp = Builder.CreateIsNull(IdxVal); +      Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB); +      EmitBlock(ContBB); +      // Use idx - 1 iteration for exclusive scan. +      IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1)); +    } +    for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { +      const Expr *PrivateExpr = Privates[I]; +      const Expr *OrigExpr = Shareds[I]; +      const Expr *CopyArrayElem = CopyArrayElems[I]; +      OpaqueValueMapping IdxMapping( +          *this, +          cast<OpaqueValueExpr>( +              cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), +          RValue::get(IdxVal)); +      LValue SrcLVal = EmitLValue(CopyArrayElem); +      LValue DestLVal = EmitLValue(OrigExpr); +      EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), +                  SrcLVal.getAddress(*this), +                  cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), +                  cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), +                  CopyOps[I]); +    } +    if (!IsInclusive) { +      EmitBlock(ExclusiveExitBB); +    } +  } +  EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock +                                               : OMPAfterScanBlock); +  EmitBlock(OMPAfterScanBlock);  }  void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, @@ -3790,7 +4704,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,              });          EmitBlock(LoopExit.getBlock());          // Tell the runtime we are done. -        RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind()); +        RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind());        } else {          // Emit the outer loop, which requests its work chunk [LB..UB] from          // runtime and runs the inner loop to process it. @@ -3843,11 +4757,12 @@ void CodeGenFunction::EmitOMPDistributeDirective(  }  static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, -                                                   const CapturedStmt *S) { +                                                   const CapturedStmt *S, +                                                   SourceLocation Loc) {    CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);    CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;    CGF.CapturedStmtInfo = &CapStmtInfo; -  llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); +  llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);    Fn->setDoesNotRecurse();    return Fn;  } @@ -3867,7 +4782,8 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {      if (C) {        llvm::SmallVector<llvm::Value *, 16> CapturedVars;        CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); -      llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); +      llvm::Function *OutlinedFn = +          emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());        CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),                                                        OutlinedFn, CapturedVars);      } else { @@ -3918,16 +4834,22 @@ convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,    return ComplexVal;  } -static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, +static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,                                    LValue LVal, RValue RVal) { -  if (LVal.isGlobalReg()) { +  if (LVal.isGlobalReg())      CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); -  } else { -    CGF.EmitAtomicStore(RVal, LVal, -                        IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent -                                 : llvm::AtomicOrdering::Monotonic, -                        LVal.isVolatile(), /*isInit=*/false); -  } +  else +    CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false); +} + +static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF, +                                   llvm::AtomicOrdering AO, LValue LVal, +                                   SourceLocation Loc) { +  if (LVal.isGlobalReg()) +    return CGF.EmitLoadOfLValue(LVal, Loc); +  return CGF.EmitAtomicLoad( +      LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO), +      LVal.isVolatile());  }  void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, @@ -3948,7 +4870,7 @@ void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,    }  } -static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, +static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,                                    const Expr *X, const Expr *V,                                    SourceLocation Loc) {    // v = x; @@ -3956,34 +4878,54 @@ static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,    assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");    LValue XLValue = CGF.EmitLValue(X);    LValue VLValue = CGF.EmitLValue(V); -  RValue Res = XLValue.isGlobalReg() -                   ? CGF.EmitLoadOfLValue(XLValue, Loc) -                   : CGF.EmitAtomicLoad( -                         XLValue, Loc, -                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent -                                  : llvm::AtomicOrdering::Monotonic, -                         XLValue.isVolatile()); -  // OpenMP, 2.12.6, atomic Construct -  // Any atomic construct with a seq_cst clause forces the atomically -  // performed operation to include an implicit flush operation without a -  // list. -  if (IsSeqCst) -    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); +  RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc); +  // OpenMP, 2.17.7, atomic Construct +  // If the read or capture clause is specified and the acquire, acq_rel, or +  // seq_cst clause is specified then the strong flush on exit from the atomic +  // operation is also an acquire flush. +  switch (AO) { +  case llvm::AtomicOrdering::Acquire: +  case llvm::AtomicOrdering::AcquireRelease: +  case llvm::AtomicOrdering::SequentiallyConsistent: +    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, +                                         llvm::AtomicOrdering::Acquire); +    break; +  case llvm::AtomicOrdering::Monotonic: +  case llvm::AtomicOrdering::Release: +    break; +  case llvm::AtomicOrdering::NotAtomic: +  case llvm::AtomicOrdering::Unordered: +    llvm_unreachable("Unexpected ordering."); +  }    CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); +  CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);  } -static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, -                                   const Expr *X, const Expr *E, -                                   SourceLocation Loc) { +static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, +                                   llvm::AtomicOrdering AO, const Expr *X, +                                   const Expr *E, SourceLocation Loc) {    // x = expr;    assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); -  emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); -  // OpenMP, 2.12.6, atomic Construct -  // Any atomic construct with a seq_cst clause forces the atomically -  // performed operation to include an implicit flush operation without a -  // list. -  if (IsSeqCst) -    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); +  emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); +  CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); +  // OpenMP, 2.17.7, atomic Construct +  // If the write, update, or capture clause is specified and the release, +  // acq_rel, or seq_cst clause is specified then the strong flush on entry to +  // the atomic operation is also a release flush. +  switch (AO) { +  case llvm::AtomicOrdering::Release: +  case llvm::AtomicOrdering::AcquireRelease: +  case llvm::AtomicOrdering::SequentiallyConsistent: +    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, +                                         llvm::AtomicOrdering::Release); +    break; +  case llvm::AtomicOrdering::Acquire: +  case llvm::AtomicOrdering::Monotonic: +    break; +  case llvm::AtomicOrdering::NotAtomic: +  case llvm::AtomicOrdering::Unordered: +    llvm_unreachable("Unexpected ordering."); +  }  }  static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, @@ -4104,10 +5046,10 @@ std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(    return Res;  } -static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, -                                    const Expr *X, const Expr *E, -                                    const Expr *UE, bool IsXLHSInRHSPart, -                                    SourceLocation Loc) { +static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, +                                    llvm::AtomicOrdering AO, const Expr *X, +                                    const Expr *E, const Expr *UE, +                                    bool IsXLHSInRHSPart, SourceLocation Loc) {    assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&           "Update expr in 'atomic update' must be a binary operator.");    const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); @@ -4120,9 +5062,6 @@ static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,    assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");    LValue XLValue = CGF.EmitLValue(X);    RValue ExprRValue = CGF.EmitAnyExpr(E); -  llvm::AtomicOrdering AO = IsSeqCst -                                ? llvm::AtomicOrdering::SequentiallyConsistent -                                : llvm::AtomicOrdering::Monotonic;    const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());    const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());    const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; @@ -4134,12 +5073,25 @@ static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,    };    (void)CGF.EmitOMPAtomicSimpleUpdateExpr(        XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); -  // OpenMP, 2.12.6, atomic Construct -  // Any atomic construct with a seq_cst clause forces the atomically -  // performed operation to include an implicit flush operation without a -  // list. -  if (IsSeqCst) -    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); +  CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); +  // OpenMP, 2.17.7, atomic Construct +  // If the write, update, or capture clause is specified and the release, +  // acq_rel, or seq_cst clause is specified then the strong flush on entry to +  // the atomic operation is also a release flush. +  switch (AO) { +  case llvm::AtomicOrdering::Release: +  case llvm::AtomicOrdering::AcquireRelease: +  case llvm::AtomicOrdering::SequentiallyConsistent: +    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, +                                         llvm::AtomicOrdering::Release); +    break; +  case llvm::AtomicOrdering::Acquire: +  case llvm::AtomicOrdering::Monotonic: +    break; +  case llvm::AtomicOrdering::NotAtomic: +  case llvm::AtomicOrdering::Unordered: +    llvm_unreachable("Unexpected ordering."); +  }  }  static RValue convertToType(CodeGenFunction &CGF, RValue Value, @@ -4159,7 +5111,8 @@ static RValue convertToType(CodeGenFunction &CGF, RValue Value,    llvm_unreachable("Must be a scalar or complex.");  } -static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, +static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, +                                     llvm::AtomicOrdering AO,                                       bool IsPostfixUpdate, const Expr *V,                                       const Expr *X, const Expr *E,                                       const Expr *UE, bool IsXLHSInRHSPart, @@ -4170,9 +5123,6 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,    LValue VLValue = CGF.EmitLValue(V);    LValue XLValue = CGF.EmitLValue(X);    RValue ExprRValue = CGF.EmitAnyExpr(E); -  llvm::AtomicOrdering AO = IsSeqCst -                                ? llvm::AtomicOrdering::SequentiallyConsistent -                                : llvm::AtomicOrdering::Monotonic;    QualType NewVValType;    if (UE) {      // 'x' is updated with some additional value. @@ -4200,6 +5150,7 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,      };      auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(          XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); +    CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);      if (Res.first) {        // 'atomicrmw' instruction was generated.        if (IsPostfixUpdate) { @@ -4226,6 +5177,7 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,      auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(          XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,          Loc, Gen); +    CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);      if (Res.first) {        // 'atomicrmw' instruction was generated.        NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; @@ -4233,32 +5185,54 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,    }    // Emit post-update store to 'v' of old/new 'x' value.    CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); -  // OpenMP, 2.12.6, atomic Construct -  // Any atomic construct with a seq_cst clause forces the atomically -  // performed operation to include an implicit flush operation without a -  // list. -  if (IsSeqCst) -    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); +  CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); +  // OpenMP, 2.17.7, atomic Construct +  // If the write, update, or capture clause is specified and the release, +  // acq_rel, or seq_cst clause is specified then the strong flush on entry to +  // the atomic operation is also a release flush. +  // If the read or capture clause is specified and the acquire, acq_rel, or +  // seq_cst clause is specified then the strong flush on exit from the atomic +  // operation is also an acquire flush. +  switch (AO) { +  case llvm::AtomicOrdering::Release: +    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, +                                         llvm::AtomicOrdering::Release); +    break; +  case llvm::AtomicOrdering::Acquire: +    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, +                                         llvm::AtomicOrdering::Acquire); +    break; +  case llvm::AtomicOrdering::AcquireRelease: +  case llvm::AtomicOrdering::SequentiallyConsistent: +    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, +                                         llvm::AtomicOrdering::AcquireRelease); +    break; +  case llvm::AtomicOrdering::Monotonic: +    break; +  case llvm::AtomicOrdering::NotAtomic: +  case llvm::AtomicOrdering::Unordered: +    llvm_unreachable("Unexpected ordering."); +  }  }  static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, -                              bool IsSeqCst, bool IsPostfixUpdate, +                              llvm::AtomicOrdering AO, bool IsPostfixUpdate,                                const Expr *X, const Expr *V, const Expr *E,                                const Expr *UE, bool IsXLHSInRHSPart,                                SourceLocation Loc) {    switch (Kind) {    case OMPC_read: -    emitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); +    emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);      break;    case OMPC_write: -    emitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); +    emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);      break;    case OMPC_unknown:    case OMPC_update: -    emitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); +    emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);      break;    case OMPC_capture: -    emitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, +    emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,                               IsXLHSInRHSPart, Loc);      break;    case OMPC_if: @@ -4277,12 +5251,17 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,    case OMPC_collapse:    case OMPC_default:    case OMPC_seq_cst: +  case OMPC_acq_rel: +  case OMPC_acquire: +  case OMPC_release: +  case OMPC_relaxed:    case OMPC_shared:    case OMPC_linear:    case OMPC_aligned:    case OMPC_copyin:    case OMPC_copyprivate:    case OMPC_flush: +  case OMPC_depobj:    case OMPC_proc_bind:    case OMPC_schedule:    case OMPC_ordered: @@ -4308,6 +5287,7 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,    case OMPC_to:    case OMPC_from:    case OMPC_use_device_ptr: +  case OMPC_use_device_addr:    case OMPC_is_device_ptr:    case OMPC_unified_address:    case OMPC_unified_shared_memory: @@ -4317,38 +5297,76 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,    case OMPC_device_type:    case OMPC_match:    case OMPC_nontemporal: +  case OMPC_order: +  case OMPC_destroy: +  case OMPC_detach: +  case OMPC_inclusive: +  case OMPC_exclusive: +  case OMPC_uses_allocators: +  case OMPC_affinity: +  default:      llvm_unreachable("Clause is not allowed in 'omp atomic'.");    }  }  void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { -  bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); +  llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic; +  bool MemOrderingSpecified = false; +  if (S.getSingleClause<OMPSeqCstClause>()) { +    AO = llvm::AtomicOrdering::SequentiallyConsistent; +    MemOrderingSpecified = true; +  } else if (S.getSingleClause<OMPAcqRelClause>()) { +    AO = llvm::AtomicOrdering::AcquireRelease; +    MemOrderingSpecified = true; +  } else if (S.getSingleClause<OMPAcquireClause>()) { +    AO = llvm::AtomicOrdering::Acquire; +    MemOrderingSpecified = true; +  } else if (S.getSingleClause<OMPReleaseClause>()) { +    AO = llvm::AtomicOrdering::Release; +    MemOrderingSpecified = true; +  } else if (S.getSingleClause<OMPRelaxedClause>()) { +    AO = llvm::AtomicOrdering::Monotonic; +    MemOrderingSpecified = true; +  }    OpenMPClauseKind Kind = OMPC_unknown;    for (const OMPClause *C : S.clauses()) { -    // Find first clause (skip seq_cst clause, if it is first). -    if (C->getClauseKind() != OMPC_seq_cst) { +    // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause, +    // if it is first). +    if (C->getClauseKind() != OMPC_seq_cst && +        C->getClauseKind() != OMPC_acq_rel && +        C->getClauseKind() != OMPC_acquire && +        C->getClauseKind() != OMPC_release && +        C->getClauseKind() != OMPC_relaxed) {        Kind = C->getClauseKind();        break;      }    } - -  const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers(); -  if (const auto *FE = dyn_cast<FullExpr>(CS)) -    enterFullExpression(FE); -  // Processing for statements under 'atomic capture'. -  if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { -    for (const Stmt *C : Compound->body()) { -      if (const auto *FE = dyn_cast<FullExpr>(C)) -        enterFullExpression(FE); +  if (!MemOrderingSpecified) { +    llvm::AtomicOrdering DefaultOrder = +        CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); +    if (DefaultOrder == llvm::AtomicOrdering::Monotonic || +        DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent || +        (DefaultOrder == llvm::AtomicOrdering::AcquireRelease && +         Kind == OMPC_capture)) { +      AO = DefaultOrder; +    } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) { +      if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) { +        AO = llvm::AtomicOrdering::Release; +      } else if (Kind == OMPC_read) { +        assert(Kind == OMPC_read && "Unexpected atomic kind."); +        AO = llvm::AtomicOrdering::Acquire; +      }      }    } -  auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, +  const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers(); + +  auto &&CodeGen = [&S, Kind, AO, CS](CodeGenFunction &CGF,                                              PrePostActionTy &) {      CGF.EmitStopPoint(CS); -    emitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), -                      S.getV(), S.getExpr(), S.getUpdateExpr(), -                      S.isXLHSInRHSPart(), S.getBeginLoc()); +    emitOMPAtomicExpr(CGF, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(), +                      S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(), +                      S.getBeginLoc());    };    OMPLexicalScope Scope(*this, S, OMPD_unknown);    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); @@ -4370,6 +5388,8 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,      return;    } +  auto LPCRegion = +      CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);    llvm::Function *Fn = nullptr;    llvm::Constant *FnID = nullptr; @@ -4384,9 +5404,10 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,    }    // Check if we have any device clause associated with the directive. -  const Expr *Device = nullptr; +  llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device( +      nullptr, OMPC_DEVICE_unknown);    if (auto *C = S.getSingleClause<OMPDeviceClause>()) -    Device = C->getDevice(); +    Device.setPointerAndInt(C->getDevice(), C->getModifier());    // Check if we have an if clause whose conditional always evaluates to false    // or if we do not have any targets specified. If so the target region is not @@ -4856,7 +5877,8 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {        break;      }    } -  if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { +  if (CGM.getLangOpts().OpenMPIRBuilder) { +    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();      // TODO: This check is necessary as we only generate `omp parallel` through      // the OpenMPIRBuilder for now.      if (S.getCancelRegion() == OMPD_parallel) { @@ -4865,7 +5887,7 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {          IfCondition = EmitScalarExpr(IfCond,                                       /*IgnoreResultAssign=*/true);        return Builder.restoreIP( -          OMPBuilder->CreateCancel(Builder, IfCondition, S.getCancelRegion())); +          OMPBuilder.CreateCancel(Builder, IfCondition, S.getCancelRegion()));      }    } @@ -4876,7 +5898,8 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {  CodeGenFunction::JumpDest  CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {    if (Kind == OMPD_parallel || Kind == OMPD_task || -      Kind == OMPD_target_parallel) +      Kind == OMPD_target_parallel || Kind == OMPD_taskloop || +      Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)      return ReturnBlock;    assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||           Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || @@ -4888,9 +5911,8 @@ CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {  }  void CodeGenFunction::EmitOMPUseDevicePtrClause( -    const OMPClause &NC, OMPPrivateScope &PrivateScope, +    const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,      const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { -  const auto &C = cast<OMPUseDevicePtrClause>(NC);    auto OrigVarIt = C.varlist_begin();    auto InitIt = C.inits().begin();    for (const Expr *PvtVarIt : C.private_copies()) { @@ -4951,6 +5973,60 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause(    }  } +static const VarDecl *getBaseDecl(const Expr *Ref) { +  const Expr *Base = Ref->IgnoreParenImpCasts(); +  while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base)) +    Base = OASE->getBase()->IgnoreParenImpCasts(); +  while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base)) +    Base = ASE->getBase()->IgnoreParenImpCasts(); +  return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl()); +} + +void CodeGenFunction::EmitOMPUseDeviceAddrClause( +    const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, +    const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { +  llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; +  for (const Expr *Ref : C.varlists()) { +    const VarDecl *OrigVD = getBaseDecl(Ref); +    if (!Processed.insert(OrigVD).second) +      continue; +    // In order to identify the right initializer we need to match the +    // declaration used by the mapping logic. In some cases we may get +    // OMPCapturedExprDecl that refers to the original declaration. +    const ValueDecl *MatchingVD = OrigVD; +    if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { +      // OMPCapturedExprDecl are used to privative fields of the current +      // structure. +      const auto *ME = cast<MemberExpr>(OED->getInit()); +      assert(isa<CXXThisExpr>(ME->getBase()) && +             "Base should be the current struct!"); +      MatchingVD = ME->getMemberDecl(); +    } + +    // If we don't have information about the current list item, move on to +    // the next one. +    auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); +    if (InitAddrIt == CaptureDeviceAddrMap.end()) +      continue; + +    Address PrivAddr = InitAddrIt->getSecond(); +    // For declrefs and variable length array need to load the pointer for +    // correct mapping, since the pointer to the data was passed to the runtime. +    if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) || +        MatchingVD->getType()->isArrayType()) +      PrivAddr = +          EmitLoadOfPointer(PrivAddr, getContext() +                                          .getPointerType(OrigVD->getType()) +                                          ->castAs<PointerType>()); +    llvm::Type *RealTy = +        ConvertTypeForMem(OrigVD->getType().getNonReferenceType()) +            ->getPointerTo(); +    PrivAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(PrivAddr, RealTy); + +    (void)PrivateScope.addPrivate(OrigVD, [PrivAddr]() { return PrivAddr; }); +  } +} +  // Generate the instructions for '#pragma omp target data' directive.  void CodeGenFunction::EmitOMPTargetDataDirective(      const OMPTargetDataDirective &S) { @@ -4995,9 +6071,13 @@ void CodeGenFunction::EmitOMPTargetDataDirective(          for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())            CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,                                          Info.CaptureDeviceAddrMap); +        for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) +          CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope, +                                         Info.CaptureDeviceAddrMap);          (void)PrivateScope.Privatize();          RCG(CGF);        } else { +        OMPLexicalScope Scope(CGF, S, OMPD_unknown);          RCG(CGF);        }      }; @@ -5222,7 +6302,11 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {    assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));    // Emit outlined function for task construct.    const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); -  Address CapturedStruct = GenerateCapturedStmtArgument(*CS); +  Address CapturedStruct = Address::invalid(); +  { +    OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); +    CapturedStruct = GenerateCapturedStmtArgument(*CS); +  }    QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());    const Expr *IfCond = nullptr;    for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { @@ -5322,8 +6406,8 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {              CGF.EmitOMPInnerLoop(                  S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),                  [&S](CodeGenFunction &CGF) { -                  CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); -                  CGF.EmitStopPoint(&S); +                  emitOMPLoopBodyWithStopPoint(CGF, S, +                                               CodeGenFunction::JumpDest());                  },                  [](CodeGenFunction &) {});            }); @@ -5376,11 +6460,15 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {  }  void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { +  auto LPCRegion = +      CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);    EmitOMPTaskLoopBasedDirective(S);  }  void CodeGenFunction::EmitOMPTaskLoopSimdDirective(      const OMPTaskLoopSimdDirective &S) { +  auto LPCRegion = +      CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);    OMPLexicalScope Scope(*this, S);    EmitOMPTaskLoopBasedDirective(S);  } @@ -5391,6 +6479,8 @@ void CodeGenFunction::EmitOMPMasterTaskLoopDirective(      Action.Enter(CGF);      EmitOMPTaskLoopBasedDirective(S);    }; +  auto LPCRegion = +      CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);    OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false);    CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());  } @@ -5401,6 +6491,8 @@ void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(      Action.Enter(CGF);      EmitOMPTaskLoopBasedDirective(S);    }; +  auto LPCRegion = +      CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);    OMPLexicalScope Scope(*this, S);    CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());  } @@ -5413,10 +6505,12 @@ void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(        Action.Enter(CGF);        CGF.EmitOMPTaskLoopBasedDirective(S);      }; -    OMPLexicalScope Scope(CGF, S, llvm::None, /*EmitPreInitStmt=*/false); +    OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);      CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,                                              S.getBeginLoc());    }; +  auto LPCRegion = +      CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);    emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen,                                   emitEmptyBoundParameters);  } @@ -5433,6 +6527,8 @@ void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(      CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,                                              S.getBeginLoc());    }; +  auto LPCRegion = +      CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);    emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen,                                   emitEmptyBoundParameters);  } @@ -5461,19 +6557,43 @@ void CodeGenFunction::EmitOMPTargetUpdateDirective(  void CodeGenFunction::EmitSimpleOMPExecutableDirective(      const OMPExecutableDirective &D) { +  if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) { +    EmitOMPScanDirective(*SD); +    return; +  }    if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())      return;    auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { +    OMPPrivateScope GlobalsScope(CGF); +    if (isOpenMPTaskingDirective(D.getDirectiveKind())) { +      // Capture global firstprivates to avoid crash. +      for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { +        for (const Expr *Ref : C->varlists()) { +          const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); +          if (!DRE) +            continue; +          const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()); +          if (!VD || VD->hasLocalStorage()) +            continue; +          if (!CGF.LocalDeclMap.count(VD)) { +            LValue GlobLVal = CGF.EmitLValue(Ref); +            GlobalsScope.addPrivate( +                VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); }); +          } +        } +      } +    }      if (isOpenMPSimdDirective(D.getDirectiveKind())) { +      (void)GlobalsScope.Privatize(); +      ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);        emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);      } else { -      OMPPrivateScope LoopGlobals(CGF);        if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {          for (const Expr *E : LD->counters()) {            const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());            if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {              LValue GlobLVal = CGF.EmitLValue(E); -            LoopGlobals.addPrivate( +            GlobalsScope.addPrivate(                  VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); });            }            if (isa<OMPCapturedExprDecl>(VD)) { @@ -5497,14 +6617,20 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective(            }          }        } -      LoopGlobals.Privatize(); +      (void)GlobalsScope.Privatize();        CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());      }    }; -  OMPSimdLexicalScope Scope(*this, D); -  CGM.getOpenMPRuntime().emitInlinedDirective( -      *this, -      isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd -                                                  : D.getDirectiveKind(), -      CodeGen); +  { +    auto LPCRegion = +        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D); +    OMPSimdLexicalScope Scope(*this, D); +    CGM.getOpenMPRuntime().emitInlinedDirective( +        *this, +        isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd +                                                    : D.getDirectiveKind(), +        CodeGen); +  } +  // Check for outer lastprivate conditional update. +  checkForLastprivateConditionalUpdate(*this, D);  } | 
