diff options
Diffstat (limited to 'contrib/llvm-project/openmp/runtime/src/kmp_csupport.cpp')
-rw-r--r-- | contrib/llvm-project/openmp/runtime/src/kmp_csupport.cpp | 154 |
1 files changed, 85 insertions, 69 deletions
diff --git a/contrib/llvm-project/openmp/runtime/src/kmp_csupport.cpp b/contrib/llvm-project/openmp/runtime/src/kmp_csupport.cpp index a31490cbd162..4b188763a58a 100644 --- a/contrib/llvm-project/openmp/runtime/src/kmp_csupport.cpp +++ b/contrib/llvm-project/openmp/runtime/src/kmp_csupport.cpp @@ -231,13 +231,12 @@ void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads) { KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", global_tid, num_threads)); - + __kmp_assert_valid_gtid(global_tid); __kmp_push_num_threads(loc, global_tid, num_threads); } void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); - /* the num_threads are automatically popped */ } @@ -245,7 +244,7 @@ void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind) { KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, proc_bind)); - + __kmp_assert_valid_gtid(global_tid); __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); } @@ -298,8 +297,8 @@ void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); } ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(gtid); } + OMPT_STORE_RETURN_ADDRESS(gtid); #endif #if INCLUDE_SSC_MARKS @@ -325,6 +324,7 @@ void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { #if KMP_STATS_ENABLED if (previous_state == stats_state_e::SERIAL_REGION) { KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); + KMP_SET_THREAD_STATE(previous_state); } else { KMP_POP_PARTITIONED_TIMER(); } @@ -347,7 +347,7 @@ void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, KA_TRACE(20, ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", global_tid, num_teams, num_threads)); - + __kmp_assert_valid_gtid(global_tid); __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); } @@ -437,6 +437,7 @@ void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, #if KMP_STATS_ENABLED if (previous_state == stats_state_e::SERIAL_REGION) { KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); + KMP_SET_THREAD_STATE(previous_state); } else { KMP_POP_PARTITIONED_TIMER(); } @@ -462,9 +463,10 @@ conditional parallel region, like this, when the condition is false. */ void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { -// The implementation is now in kmp_runtime.cpp so that it can share static -// functions with kmp_fork_call since the tasks to be done are similar in -// each case. + // The implementation is now in kmp_runtime.cpp so that it can share static + // functions with kmp_fork_call since the tasks to be done are similar in + // each case. + __kmp_assert_valid_gtid(global_tid); #if OMPT_SUPPORT OMPT_STORE_RETURN_ADDRESS(global_tid); #endif @@ -492,6 +494,7 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { return; // Not autopar code + __kmp_assert_valid_gtid(global_tid); if (!TCR_4(__kmp_init_parallel)) __kmp_parallel_initialize(); @@ -667,17 +670,6 @@ void __kmpc_flush(ident_t *loc) { // Nothing to see here move along #elif KMP_ARCH_PPC64 // Nothing needed here (we have a real MB above). -#if KMP_OS_CNK - // The flushing thread needs to yield here; this prevents a - // busy-waiting thread from saturating the pipeline. flush is - // often used in loops like this: - // while (!flag) { - // #pragma omp flush(flag) - // } - // and adding the yield here is good for at least a 10x speedup - // when running >2 threads per core (on the NAS LU benchmark). - __kmp_yield(); -#endif #else #error Unknown or unsupported architecture #endif @@ -701,6 +693,7 @@ Execute a barrier. void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { KMP_COUNT_BLOCK(OMP_BARRIER); KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); + __kmp_assert_valid_gtid(global_tid); if (!TCR_4(__kmp_init_parallel)) __kmp_parallel_initialize(); @@ -720,8 +713,8 @@ void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); } + OMPT_STORE_RETURN_ADDRESS(global_tid); #endif __kmp_threads[global_tid]->th.th_ident = loc; // TODO: explicit barrier_wait_id: @@ -750,6 +743,7 @@ kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { int status = 0; KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); + __kmp_assert_valid_gtid(global_tid); if (!TCR_4(__kmp_init_parallel)) __kmp_parallel_initialize(); @@ -764,12 +758,12 @@ kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { #if OMPT_SUPPORT && OMPT_OPTIONAL if (status) { - if (ompt_enabled.ompt_callback_master) { + if (ompt_enabled.ompt_callback_masked) { kmp_info_t *this_thr = __kmp_threads[global_tid]; kmp_team_t *team = this_thr->th.th_team; int tid = __kmp_tid_from_gtid(global_tid); - ompt_callbacks.ompt_callback(ompt_callback_master)( + ompt_callbacks.ompt_callback(ompt_callback_masked)( ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), OMPT_GET_RETURN_ADDRESS(0)); @@ -804,16 +798,16 @@ thread that executes the <tt>master</tt> region. */ void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); - + __kmp_assert_valid_gtid(global_tid); KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); KMP_POP_PARTITIONED_TIMER(); #if OMPT_SUPPORT && OMPT_OPTIONAL kmp_info_t *this_thr = __kmp_threads[global_tid]; kmp_team_t *team = this_thr->th.th_team; - if (ompt_enabled.ompt_callback_master) { + if (ompt_enabled.ompt_callback_masked) { int tid = __kmp_tid_from_gtid(global_tid); - ompt_callbacks.ompt_callback(ompt_callback_master)( + ompt_callbacks.ompt_callback(ompt_callback_masked)( ompt_scope_end, &(team->t.ompt_team_info.parallel_data), &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), OMPT_GET_RETURN_ADDRESS(0)); @@ -821,9 +815,6 @@ void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { #endif if (__kmp_env_consistency_check) { - if (global_tid < 0) - KMP_WARNING(ThreadIdentInvalid); - if (KMP_MASTER_GTID(global_tid)) __kmp_pop_sync(global_tid, ct_master, loc); } @@ -842,6 +833,7 @@ void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { KMP_DEBUG_ASSERT(__kmp_init_serial); KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); + __kmp_assert_valid_gtid(gtid); if (!TCR_4(__kmp_init_parallel)) __kmp_parallel_initialize(); @@ -859,8 +851,8 @@ void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { kmp_team_t *team; ompt_wait_id_t lck; void *codeptr_ra; + OMPT_STORE_RETURN_ADDRESS(gtid); if (ompt_enabled.enabled) { - OMPT_STORE_RETURN_ADDRESS(gtid); team = __kmp_team_from_gtid(gtid); lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; /* OMPT state update */ @@ -913,6 +905,7 @@ void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { kmp_info_t *th; KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); + __kmp_assert_valid_gtid(gtid); #if USE_ITT_BUILD __kmp_itt_ordered_end(gtid); @@ -1135,7 +1128,7 @@ static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, /*! @ingroup WORK_SHARING @param loc source location information. -@param global_tid global thread number . +@param global_tid global thread number. @param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or some other suitably unique value. @@ -1158,6 +1151,7 @@ void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_user_lock_p lck; KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); + __kmp_assert_valid_gtid(global_tid); // TODO: add THR_OVHD_STATE @@ -1255,7 +1249,7 @@ static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { if (hint & kmp_lock_hint_hle) return KMP_TSX_LOCK(hle); if (hint & kmp_lock_hint_rtm) - return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq; + return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq; if (hint & kmp_lock_hint_adaptive) return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; @@ -1274,9 +1268,9 @@ static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) return lockseq_tas; - // HLE lock for speculation + // Use RTM lock for speculation if (hint & omp_lock_hint_speculative) - return KMP_TSX_LOCK(hle); + return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq; return __kmp_user_lock_seq; } @@ -1297,6 +1291,7 @@ __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { return kmp_mutex_impl_spin; #if KMP_USE_TSX case locktag_hle: + case locktag_rtm_spin: return kmp_mutex_impl_speculative; #endif default: @@ -1308,7 +1303,7 @@ __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { switch (ilock->type) { #if KMP_USE_TSX case locktag_adaptive: - case locktag_rtm: + case locktag_rtm_queuing: return kmp_mutex_impl_speculative; #endif case locktag_nested_tas: @@ -1342,7 +1337,8 @@ static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { return kmp_mutex_impl_queuing; #if KMP_USE_TSX case lk_hle: - case lk_rtm: + case lk_rtm_queuing: + case lk_rtm_spin: case lk_adaptive: return kmp_mutex_impl_speculative; #endif @@ -1380,6 +1376,7 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, #endif KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); + __kmp_assert_valid_gtid(global_tid); kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; // Check if it is initialized. @@ -1595,8 +1592,8 @@ this function. */ kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { int status; - KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); + __kmp_assert_valid_gtid(global_tid); if (!TCR_4(__kmp_init_parallel)) __kmp_parallel_initialize(); @@ -1612,8 +1609,8 @@ kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); } + OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; @@ -1639,7 +1636,7 @@ still be waiting at the barrier and this call releases them. */ void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); - + __kmp_assert_valid_gtid(global_tid); __kmp_end_split_barrier(bs_plain_barrier, global_tid); } @@ -1655,8 +1652,8 @@ There is no equivalent "end" function, since the */ kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { kmp_int32 ret; - KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); + __kmp_assert_valid_gtid(global_tid); if (!TCR_4(__kmp_init_parallel)) __kmp_parallel_initialize(); @@ -1676,8 +1673,8 @@ kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); } + OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; @@ -1694,14 +1691,9 @@ kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { if (__kmp_env_consistency_check) { /* there's no __kmpc_end_master called; so the (stats) */ /* actions of __kmpc_end_master are done here */ - - if (global_tid < 0) { - KMP_WARNING(ThreadIdentInvalid); - } if (ret) { /* only one thread should do the pop since only */ /* one did the push (see __kmpc_master()) */ - __kmp_pop_sync(global_tid, ct_master, loc); } } @@ -1722,6 +1714,7 @@ should introduce an explicit barrier if it is required. */ kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { + __kmp_assert_valid_gtid(global_tid); kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); if (rc) { @@ -1774,6 +1767,7 @@ only be called by the thread that executed the block of code protected by the `single` construct. */ void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { + __kmp_assert_valid_gtid(global_tid); __kmp_exit_single(global_tid); KMP_POP_PARTITIONED_TIMER(); @@ -1847,7 +1841,7 @@ void ompc_set_dynamic(int flag) { __kmp_save_internal_controls(thread); - set__dynamic(thread, flag ? TRUE : FALSE); + set__dynamic(thread, flag ? true : false); } void ompc_set_nested(int flag) { @@ -2053,8 +2047,8 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void (*cpy_func)(void *, void *), kmp_int32 didit) { void **data_ptr; - KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); + __kmp_assert_valid_gtid(gtid); KMP_MB(); @@ -2077,8 +2071,8 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(gtid); } + OMPT_STORE_RETURN_ADDRESS(gtid); #endif /* This barrier is not a barrier region boundary */ #if USE_ITT_NOTIFY @@ -2091,11 +2085,9 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, // Consider next barrier a user-visible barrier for barrier region boundaries // Nesting checks are already handled by the single construct checks - + { #if OMPT_SUPPORT - if (ompt_enabled.enabled) { OMPT_STORE_RETURN_ADDRESS(gtid); - } #endif #if USE_ITT_NOTIFY __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. @@ -2107,6 +2099,7 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, ompt_frame->enter_frame = ompt_data_none; } #endif + } } /* -------------------------------------------------------------------------- */ @@ -2153,7 +2146,8 @@ __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq) { #if KMP_USE_TSX // Don't have nested lock implementation for speculative locks - if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive) + if (seq == lockseq_hle || seq == lockseq_rtm_queuing || + seq == lockseq_rtm_spin || seq == lockseq_adaptive) seq = __kmp_user_lock_seq; #endif switch (seq) { @@ -3338,7 +3332,7 @@ __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { th->th.th_team = team; th->th.th_team_nproc = team->t.t_nproc; th->th.th_task_team = team->t.t_task_team[task_state]; - th->th.th_task_state = task_state; + __kmp_type_convert(task_state, &(th->th.th_task_state)); } /* 2.a.i. Reduce Block without a terminating barrier */ @@ -3370,6 +3364,7 @@ __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, kmp_team_t *team; int teams_swapped = 0, task_state; KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); + __kmp_assert_valid_gtid(global_tid); // why do we need this initialization here at all? // Reduction clause can not be used as a stand-alone directive. @@ -3469,8 +3464,8 @@ __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); } + OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; @@ -3523,6 +3518,7 @@ void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, PACKED_REDUCTION_METHOD_T packed_reduction_method; KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); + __kmp_assert_valid_gtid(global_tid); packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); @@ -3597,6 +3593,7 @@ kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, int teams_swapped = 0, task_state; KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); + __kmp_assert_valid_gtid(global_tid); // why do we need this initialization here at all? // Reduction clause can not be a stand-alone directive. @@ -3656,8 +3653,8 @@ kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); } + OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = @@ -3715,6 +3712,7 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, int teams_swapped = 0, task_state; KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); + __kmp_assert_valid_gtid(global_tid); th = __kmp_thread_from_gtid(global_tid); teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); @@ -3737,8 +3735,8 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); } + OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; @@ -3763,8 +3761,8 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); } + OMPT_STORE_RETURN_ADDRESS(global_tid); #endif #if USE_ITT_NOTIFY __kmp_threads[global_tid]->th.th_ident = loc; @@ -3784,8 +3782,8 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); if (ompt_frame->enter_frame.ptr == NULL) ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); - OMPT_STORE_RETURN_ADDRESS(global_tid); } + OMPT_STORE_RETURN_ADDRESS(global_tid); #endif // TODO: implicit barrier: should be exposed #if USE_ITT_NOTIFY @@ -3871,6 +3869,7 @@ e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. */ void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims) { + __kmp_assert_valid_gtid(gtid); int j, idx; kmp_int64 last, trace_count; kmp_info_t *th = __kmp_threads[gtid]; @@ -3966,7 +3965,8 @@ void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, #endif if (flags == NULL) { // we are the first thread, allocate the array of flags - size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration + size_t size = + (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); KMP_MB(); sh_buf->doacross_flags = flags; @@ -3990,7 +3990,9 @@ void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, } void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { - kmp_int32 shft, num_dims, i; + __kmp_assert_valid_gtid(gtid); + kmp_int64 shft; + size_t num_dims, i; kmp_uint32 flag; kmp_int64 iter_number; // iteration number of "collapsed" loop nest kmp_info_t *th = __kmp_threads[gtid]; @@ -4007,7 +4009,7 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { // calculate sequential iteration number and check out-of-bounds condition pr_buf = th->th.th_dispatch; KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); - num_dims = pr_buf->th_doacross_info[0]; + num_dims = (size_t)pr_buf->th_doacross_info[0]; lo = pr_buf->th_doacross_info[2]; up = pr_buf->th_doacross_info[3]; st = pr_buf->th_doacross_info[4]; @@ -4045,7 +4047,7 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { #endif for (i = 1; i < num_dims; ++i) { kmp_int64 iter, ln; - kmp_int32 j = i * 4; + size_t j = i * 4; ln = pr_buf->th_doacross_info[j + 1]; lo = pr_buf->th_doacross_info[j + 2]; up = pr_buf->th_doacross_info[j + 3]; @@ -4091,7 +4093,7 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.ompt_callback_dependences) { ompt_callbacks.ompt_callback(ompt_callback_dependences)( - &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims); + &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); } #endif KA_TRACE(20, @@ -4100,7 +4102,9 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { } void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { - kmp_int32 shft, num_dims, i; + __kmp_assert_valid_gtid(gtid); + kmp_int64 shft; + size_t num_dims, i; kmp_uint32 flag; kmp_int64 iter_number; // iteration number of "collapsed" loop nest kmp_info_t *th = __kmp_threads[gtid]; @@ -4118,7 +4122,7 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { // out-of-bounds checks) pr_buf = th->th.th_dispatch; KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); - num_dims = pr_buf->th_doacross_info[0]; + num_dims = (size_t)pr_buf->th_doacross_info[0]; lo = pr_buf->th_doacross_info[2]; st = pr_buf->th_doacross_info[4]; #if OMPT_SUPPORT && OMPT_OPTIONAL @@ -4137,7 +4141,7 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { #endif for (i = 1; i < num_dims; ++i) { kmp_int64 iter, ln; - kmp_int32 j = i * 4; + size_t j = i * 4; ln = pr_buf->th_doacross_info[j + 1]; lo = pr_buf->th_doacross_info[j + 2]; st = pr_buf->th_doacross_info[j + 4]; @@ -4157,7 +4161,7 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.ompt_callback_dependences) { ompt_callbacks.ompt_callback(ompt_callback_dependences)( - &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims); + &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); } #endif shft = iter_number % 32; // use 32-bit granularity @@ -4171,6 +4175,7 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { } void __kmpc_doacross_fini(ident_t *loc, int gtid) { + __kmp_assert_valid_gtid(gtid); kmp_int32 num_done; kmp_info_t *th = __kmp_threads[gtid]; kmp_team_t *team = th->th.th_team; @@ -4181,7 +4186,8 @@ void __kmpc_doacross_fini(ident_t *loc, int gtid) { KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); return; // nothing to do } - num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1; + num_done = + KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1; if (num_done == th->th.th_team_nproc) { // we are the last thread, need to free shared resources int idx = pr_buf->th_doacross_buf_idx - 1; @@ -4204,11 +4210,21 @@ void __kmpc_doacross_fini(ident_t *loc, int gtid) { KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); } -/* omp_alloc/omp_free only defined for C/C++, not for Fortran */ +/* omp_alloc/omp_calloc/omp_free only defined for C/C++, not for Fortran */ void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); } +void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { + return __kmpc_calloc(__kmp_entry_gtid(), nmemb, size, allocator); +} + +void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, + omp_allocator_handle_t free_allocator) { + return __kmpc_realloc(__kmp_entry_gtid(), ptr, size, allocator, + free_allocator); +} + void omp_free(void *ptr, omp_allocator_handle_t allocator) { __kmpc_free(__kmp_entry_gtid(), ptr, allocator); } |