diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2016-01-06 20:02:26 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2016-01-06 20:02:26 +0000 | 
| commit | 97b17066aaac3f1590a809d79abe98fde03821ec (patch) | |
| tree | 955a1295c3fd4378a49478ad5835ca21b769417e /lib/CodeGen/CGStmtOpenMP.cpp | |
| parent | 45b533945f0851ec234ca846e1af5ee1e4df0b6e (diff) | |
Notes
Diffstat (limited to 'lib/CodeGen/CGStmtOpenMP.cpp')
| -rw-r--r-- | lib/CodeGen/CGStmtOpenMP.cpp | 116 | 
1 files changed, 81 insertions, 35 deletions
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index bcd2ac51929d1..14917c20c535e 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -1125,7 +1125,8 @@ emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,  }  static void emitSimdlenSafelenClause(CodeGenFunction &CGF, -                                     const OMPExecutableDirective &D) { +                                     const OMPExecutableDirective &D, +                                     bool IsMonotonic) {    if (!CGF.HaveInsertPoint())      return;    if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { @@ -1136,7 +1137,8 @@ static void emitSimdlenSafelenClause(CodeGenFunction &CGF,      // In presence of finite 'safelen', it may be unsafe to mark all      // the memory instructions parallel, because loop-carried      // dependences of 'safelen' iterations are possible. -    CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); +    if (!IsMonotonic) +      CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());    } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {      RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),                                   /*ignoreResult=*/true); @@ -1149,11 +1151,12 @@ static void emitSimdlenSafelenClause(CodeGenFunction &CGF,    }  } -void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { +void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, +                                      bool IsMonotonic) {    // Walk clauses and process safelen/lastprivate. -  LoopStack.setParallel(); +  LoopStack.setParallel(!IsMonotonic);    LoopStack.setVectorizeEnable(true); -  emitSimdlenSafelenClause(*this, D); +  emitSimdlenSafelenClause(*this, D, IsMonotonic);  }  void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) { @@ -1255,12 +1258,10 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);  } -void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, -                                          const OMPLoopDirective &S, -                                          OMPPrivateScope &LoopScope, -                                          bool Ordered, Address LB, -                                          Address UB, Address ST, -                                          Address IL, llvm::Value *Chunk) { +void CodeGenFunction::EmitOMPForOuterLoop( +    OpenMPScheduleClauseKind ScheduleKind, bool IsMonotonic, +    const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, +    Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {    auto &RT = CGM.getOpenMPRuntime();    // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). @@ -1378,13 +1379,10 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,    // Generate !llvm.loop.parallel metadata for loads and stores for loops    // with dynamic/guided scheduling and without ordered clause. -  if (!isOpenMPSimdDirective(S.getDirectiveKind())) { -    LoopStack.setParallel((ScheduleKind == OMPC_SCHEDULE_dynamic || -                           ScheduleKind == OMPC_SCHEDULE_guided) && -                          !Ordered); -  } else { -    EmitOMPSimdInit(S); -  } +  if (!isOpenMPSimdDirective(S.getDirectiveKind())) +    LoopStack.setParallel(!IsMonotonic); +  else +    EmitOMPSimdInit(S, IsMonotonic);    SourceLocation Loc = S.getLocStart();    EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), @@ -1425,14 +1423,30 @@ static LValue EmitOMPHelperVar(CodeGenFunction &CGF,    return CGF.EmitLValue(Helper);  } -static std::pair<llvm::Value * /*Chunk*/, OpenMPScheduleClauseKind> +namespace { +  struct ScheduleKindModifiersTy { +    OpenMPScheduleClauseKind Kind; +    OpenMPScheduleClauseModifier M1; +    OpenMPScheduleClauseModifier M2; +    ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, +                            OpenMPScheduleClauseModifier M1, +                            OpenMPScheduleClauseModifier M2) +        : Kind(Kind), M1(M1), M2(M2) {} +  }; +} // namespace + +static std::pair<llvm::Value * /*Chunk*/, ScheduleKindModifiersTy>  emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S,                     bool OuterRegion) {    // Detect the loop schedule kind and chunk.    auto ScheduleKind = OMPC_SCHEDULE_unknown; +  OpenMPScheduleClauseModifier M1 = OMPC_SCHEDULE_MODIFIER_unknown; +  OpenMPScheduleClauseModifier M2 = OMPC_SCHEDULE_MODIFIER_unknown;    llvm::Value *Chunk = nullptr;    if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {      ScheduleKind = C->getScheduleKind(); +    M1 = C->getFirstScheduleModifier(); +    M2 = C->getSecondScheduleModifier();      if (const auto *Ch = C->getChunkSize()) {        if (auto *ImpRef = cast_or_null<DeclRefExpr>(C->getHelperChunkSize())) {          if (OuterRegion) { @@ -1454,7 +1468,7 @@ emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S,        }      }    } -  return std::make_pair(Chunk, ScheduleKind); +  return std::make_pair(Chunk, ScheduleKindModifiersTy(ScheduleKind, M1, M2));  }  bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { @@ -1530,16 +1544,21 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {        auto ScheduleInfo =            emitScheduleClause(*this, S, /*OuterRegion=*/false);        Chunk = ScheduleInfo.first; -      ScheduleKind = ScheduleInfo.second; +      ScheduleKind = ScheduleInfo.second.Kind; +      const OpenMPScheduleClauseModifier M1 = ScheduleInfo.second.M1; +      const OpenMPScheduleClauseModifier M2 = ScheduleInfo.second.M2;        const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());        const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();        const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr; +      // OpenMP 4.5, 2.7.1 Loop Construct, Description. +      // If the static schedule kind is specified or if the ordered clause is +      // specified, and if no monotonic modifier is specified, the effect will +      // be as if the monotonic modifier was specified.        if (RT.isStaticNonchunked(ScheduleKind,                                  /* Chunked */ Chunk != nullptr) &&            !Ordered) { -        if (isOpenMPSimdDirective(S.getDirectiveKind())) { -          EmitOMPSimdInit(S); -        } +        if (isOpenMPSimdDirective(S.getDirectiveKind())) +          EmitOMPSimdInit(S, /*IsMonotonic=*/true);          // OpenMP [2.7.1, Loop Construct, Description, table 2-1]          // When no chunk_size is specified, the iteration space is divided into          // chunks that are approximately equal in size, and at most one chunk is @@ -1549,7 +1568,8 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {                               IVSize, IVSigned, Ordered,                               IL.getAddress(), LB.getAddress(),                               UB.getAddress(), ST.getAddress()); -        auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); +        auto LoopExit = +            getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));          // UB = min(UB, GlobalUB);          EmitIgnoredExpr(S.getEnsureUpperBound());          // IV = LB; @@ -1566,9 +1586,14 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {          // Tell the runtime we are done.          RT.emitForStaticFinish(*this, S.getLocStart());        } else { +        const bool IsMonotonic = Ordered || +                                 ScheduleKind == OMPC_SCHEDULE_static || +                                 ScheduleKind == OMPC_SCHEDULE_unknown || +                                 M1 == OMPC_SCHEDULE_MODIFIER_monotonic || +                                 M2 == OMPC_SCHEDULE_MODIFIER_monotonic;          // Emit the outer loop, which requests its work chunk [LB..UB] from          // runtime and runs the inner loop to process it. -        EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, Ordered, +        EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,                              LB.getAddress(), UB.getAddress(), ST.getAddress(),                              IL.getAddress(), Chunk);        } @@ -2546,14 +2571,8 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {    llvm::SmallVector<llvm::Value *, 16> CapturedVars;    GenerateOpenMPCapturedVars(CS, CapturedVars); -  // Emit target region as a standalone region. -  auto &&CodeGen = [&CS](CodeGenFunction &CGF) { -    CGF.EmitStmt(CS.getCapturedStmt()); -  }; - -  // Obtain the target region outlined function. -  llvm::Value *Fn = -      CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, CodeGen); +  llvm::Function *Fn = nullptr; +  llvm::Constant *FnID = nullptr;    // Check if we have any if clause associated with the directive.    const Expr *IfCond = nullptr; @@ -2568,7 +2587,34 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {      Device = C->getDevice();    } -  CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, IfCond, Device, +  // Check if we have an if clause whose conditional always evaluates to false +  // or if we do not have any targets specified. If so the target region is not +  // an offload entry point. +  bool IsOffloadEntry = true; +  if (IfCond) { +    bool Val; +    if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) +      IsOffloadEntry = false; +  } +  if (CGM.getLangOpts().OMPTargetTriples.empty()) +    IsOffloadEntry = false; + +  assert(CurFuncDecl && "No parent declaration for target region!"); +  StringRef ParentName; +  // In case we have Ctors/Dtors we use the complete type variant to produce +  // the mangling of the device outlined kernel. +  if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl)) +    ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); +  else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl)) +    ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); +  else +    ParentName = +        CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl))); + +  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, +                                                    IsOffloadEntry); + +  CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device,                                          CapturedVars);  }  | 
