aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/openmp/runtime/src/kmp_csupport.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/openmp/runtime/src/kmp_csupport.cpp')
-rw-r--r--contrib/llvm-project/openmp/runtime/src/kmp_csupport.cpp154
1 files changed, 85 insertions, 69 deletions
diff --git a/contrib/llvm-project/openmp/runtime/src/kmp_csupport.cpp b/contrib/llvm-project/openmp/runtime/src/kmp_csupport.cpp
index a31490cbd162..4b188763a58a 100644
--- a/contrib/llvm-project/openmp/runtime/src/kmp_csupport.cpp
+++ b/contrib/llvm-project/openmp/runtime/src/kmp_csupport.cpp
@@ -231,13 +231,12 @@ void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
kmp_int32 num_threads) {
KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
global_tid, num_threads));
-
+ __kmp_assert_valid_gtid(global_tid);
__kmp_push_num_threads(loc, global_tid, num_threads);
}
void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
-
/* the num_threads are automatically popped */
}
@@ -245,7 +244,7 @@ void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
kmp_int32 proc_bind) {
KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
proc_bind));
-
+ __kmp_assert_valid_gtid(global_tid);
__kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
}
@@ -298,8 +297,8 @@ void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
}
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- OMPT_STORE_RETURN_ADDRESS(gtid);
}
+ OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
#if INCLUDE_SSC_MARKS
@@ -325,6 +324,7 @@ void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
#if KMP_STATS_ENABLED
if (previous_state == stats_state_e::SERIAL_REGION) {
KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
+ KMP_SET_THREAD_STATE(previous_state);
} else {
KMP_POP_PARTITIONED_TIMER();
}
@@ -347,7 +347,7 @@ void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
KA_TRACE(20,
("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
global_tid, num_teams, num_threads));
-
+ __kmp_assert_valid_gtid(global_tid);
__kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
}
@@ -437,6 +437,7 @@ void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
#if KMP_STATS_ENABLED
if (previous_state == stats_state_e::SERIAL_REGION) {
KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
+ KMP_SET_THREAD_STATE(previous_state);
} else {
KMP_POP_PARTITIONED_TIMER();
}
@@ -462,9 +463,10 @@ conditional parallel region, like this,
when the condition is false.
*/
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
-// The implementation is now in kmp_runtime.cpp so that it can share static
-// functions with kmp_fork_call since the tasks to be done are similar in
-// each case.
+ // The implementation is now in kmp_runtime.cpp so that it can share static
+ // functions with kmp_fork_call since the tasks to be done are similar in
+ // each case.
+ __kmp_assert_valid_gtid(global_tid);
#if OMPT_SUPPORT
OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
@@ -492,6 +494,7 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
return;
// Not autopar code
+ __kmp_assert_valid_gtid(global_tid);
if (!TCR_4(__kmp_init_parallel))
__kmp_parallel_initialize();
@@ -667,17 +670,6 @@ void __kmpc_flush(ident_t *loc) {
// Nothing to see here move along
#elif KMP_ARCH_PPC64
// Nothing needed here (we have a real MB above).
-#if KMP_OS_CNK
- // The flushing thread needs to yield here; this prevents a
- // busy-waiting thread from saturating the pipeline. flush is
- // often used in loops like this:
- // while (!flag) {
- // #pragma omp flush(flag)
- // }
- // and adding the yield here is good for at least a 10x speedup
- // when running >2 threads per core (on the NAS LU benchmark).
- __kmp_yield();
-#endif
#else
#error Unknown or unsupported architecture
#endif
@@ -701,6 +693,7 @@ Execute a barrier.
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
KMP_COUNT_BLOCK(OMP_BARRIER);
KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
+ __kmp_assert_valid_gtid(global_tid);
if (!TCR_4(__kmp_init_parallel))
__kmp_parallel_initialize();
@@ -720,8 +713,8 @@ void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- OMPT_STORE_RETURN_ADDRESS(global_tid);
}
+ OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
__kmp_threads[global_tid]->th.th_ident = loc;
// TODO: explicit barrier_wait_id:
@@ -750,6 +743,7 @@ kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
int status = 0;
KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
+ __kmp_assert_valid_gtid(global_tid);
if (!TCR_4(__kmp_init_parallel))
__kmp_parallel_initialize();
@@ -764,12 +758,12 @@ kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (status) {
- if (ompt_enabled.ompt_callback_master) {
+ if (ompt_enabled.ompt_callback_masked) {
kmp_info_t *this_thr = __kmp_threads[global_tid];
kmp_team_t *team = this_thr->th.th_team;
int tid = __kmp_tid_from_gtid(global_tid);
- ompt_callbacks.ompt_callback(ompt_callback_master)(
+ ompt_callbacks.ompt_callback(ompt_callback_masked)(
ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
OMPT_GET_RETURN_ADDRESS(0));
@@ -804,16 +798,16 @@ thread that executes the <tt>master</tt> region.
*/
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
-
+ __kmp_assert_valid_gtid(global_tid);
KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
KMP_POP_PARTITIONED_TIMER();
#if OMPT_SUPPORT && OMPT_OPTIONAL
kmp_info_t *this_thr = __kmp_threads[global_tid];
kmp_team_t *team = this_thr->th.th_team;
- if (ompt_enabled.ompt_callback_master) {
+ if (ompt_enabled.ompt_callback_masked) {
int tid = __kmp_tid_from_gtid(global_tid);
- ompt_callbacks.ompt_callback(ompt_callback_master)(
+ ompt_callbacks.ompt_callback(ompt_callback_masked)(
ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
OMPT_GET_RETURN_ADDRESS(0));
@@ -821,9 +815,6 @@ void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
#endif
if (__kmp_env_consistency_check) {
- if (global_tid < 0)
- KMP_WARNING(ThreadIdentInvalid);
-
if (KMP_MASTER_GTID(global_tid))
__kmp_pop_sync(global_tid, ct_master, loc);
}
@@ -842,6 +833,7 @@ void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
KMP_DEBUG_ASSERT(__kmp_init_serial);
KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
+ __kmp_assert_valid_gtid(gtid);
if (!TCR_4(__kmp_init_parallel))
__kmp_parallel_initialize();
@@ -859,8 +851,8 @@ void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
kmp_team_t *team;
ompt_wait_id_t lck;
void *codeptr_ra;
+ OMPT_STORE_RETURN_ADDRESS(gtid);
if (ompt_enabled.enabled) {
- OMPT_STORE_RETURN_ADDRESS(gtid);
team = __kmp_team_from_gtid(gtid);
lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
/* OMPT state update */
@@ -913,6 +905,7 @@ void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
kmp_info_t *th;
KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
+ __kmp_assert_valid_gtid(gtid);
#if USE_ITT_BUILD
__kmp_itt_ordered_end(gtid);
@@ -1135,7 +1128,7 @@ static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
/*!
@ingroup WORK_SHARING
@param loc source location information.
-@param global_tid global thread number .
+@param global_tid global thread number.
@param crit identity of the critical section. This could be a pointer to a lock
associated with the critical section, or some other suitably unique value.
@@ -1158,6 +1151,7 @@ void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
kmp_user_lock_p lck;
KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
+ __kmp_assert_valid_gtid(global_tid);
// TODO: add THR_OVHD_STATE
@@ -1255,7 +1249,7 @@ static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
if (hint & kmp_lock_hint_hle)
return KMP_TSX_LOCK(hle);
if (hint & kmp_lock_hint_rtm)
- return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
+ return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
if (hint & kmp_lock_hint_adaptive)
return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
@@ -1274,9 +1268,9 @@ static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
return lockseq_tas;
- // HLE lock for speculation
+ // Use RTM lock for speculation
if (hint & omp_lock_hint_speculative)
- return KMP_TSX_LOCK(hle);
+ return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
return __kmp_user_lock_seq;
}
@@ -1297,6 +1291,7 @@ __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
return kmp_mutex_impl_spin;
#if KMP_USE_TSX
case locktag_hle:
+ case locktag_rtm_spin:
return kmp_mutex_impl_speculative;
#endif
default:
@@ -1308,7 +1303,7 @@ __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
switch (ilock->type) {
#if KMP_USE_TSX
case locktag_adaptive:
- case locktag_rtm:
+ case locktag_rtm_queuing:
return kmp_mutex_impl_speculative;
#endif
case locktag_nested_tas:
@@ -1342,7 +1337,8 @@ static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
return kmp_mutex_impl_queuing;
#if KMP_USE_TSX
case lk_hle:
- case lk_rtm:
+ case lk_rtm_queuing:
+ case lk_rtm_spin:
case lk_adaptive:
return kmp_mutex_impl_speculative;
#endif
@@ -1380,6 +1376,7 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
#endif
KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
+ __kmp_assert_valid_gtid(global_tid);
kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
// Check if it is initialized.
@@ -1595,8 +1592,8 @@ this function.
*/
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
int status;
-
KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
+ __kmp_assert_valid_gtid(global_tid);
if (!TCR_4(__kmp_init_parallel))
__kmp_parallel_initialize();
@@ -1612,8 +1609,8 @@ kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- OMPT_STORE_RETURN_ADDRESS(global_tid);
}
+ OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident = loc;
@@ -1639,7 +1636,7 @@ still be waiting at the barrier and this call releases them.
*/
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
-
+ __kmp_assert_valid_gtid(global_tid);
__kmp_end_split_barrier(bs_plain_barrier, global_tid);
}
@@ -1655,8 +1652,8 @@ There is no equivalent "end" function, since the
*/
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
kmp_int32 ret;
-
KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
+ __kmp_assert_valid_gtid(global_tid);
if (!TCR_4(__kmp_init_parallel))
__kmp_parallel_initialize();
@@ -1676,8 +1673,8 @@ kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- OMPT_STORE_RETURN_ADDRESS(global_tid);
}
+ OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident = loc;
@@ -1694,14 +1691,9 @@ kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
if (__kmp_env_consistency_check) {
/* there's no __kmpc_end_master called; so the (stats) */
/* actions of __kmpc_end_master are done here */
-
- if (global_tid < 0) {
- KMP_WARNING(ThreadIdentInvalid);
- }
if (ret) {
/* only one thread should do the pop since only */
/* one did the push (see __kmpc_master()) */
-
__kmp_pop_sync(global_tid, ct_master, loc);
}
}
@@ -1722,6 +1714,7 @@ should introduce an explicit barrier if it is required.
*/
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
+ __kmp_assert_valid_gtid(global_tid);
kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
if (rc) {
@@ -1774,6 +1767,7 @@ only be called by the thread that executed the block of code protected
by the `single` construct.
*/
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
+ __kmp_assert_valid_gtid(global_tid);
__kmp_exit_single(global_tid);
KMP_POP_PARTITIONED_TIMER();
@@ -1847,7 +1841,7 @@ void ompc_set_dynamic(int flag) {
__kmp_save_internal_controls(thread);
- set__dynamic(thread, flag ? TRUE : FALSE);
+ set__dynamic(thread, flag ? true : false);
}
void ompc_set_nested(int flag) {
@@ -2053,8 +2047,8 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
void *cpy_data, void (*cpy_func)(void *, void *),
kmp_int32 didit) {
void **data_ptr;
-
KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
+ __kmp_assert_valid_gtid(gtid);
KMP_MB();
@@ -2077,8 +2071,8 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- OMPT_STORE_RETURN_ADDRESS(gtid);
}
+ OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
/* This barrier is not a barrier region boundary */
#if USE_ITT_NOTIFY
@@ -2091,11 +2085,9 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
// Consider next barrier a user-visible barrier for barrier region boundaries
// Nesting checks are already handled by the single construct checks
-
+ {
#if OMPT_SUPPORT
- if (ompt_enabled.enabled) {
OMPT_STORE_RETURN_ADDRESS(gtid);
- }
#endif
#if USE_ITT_NOTIFY
__kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
@@ -2107,6 +2099,7 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
ompt_frame->enter_frame = ompt_data_none;
}
#endif
+ }
}
/* -------------------------------------------------------------------------- */
@@ -2153,7 +2146,8 @@ __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
kmp_dyna_lockseq_t seq) {
#if KMP_USE_TSX
// Don't have nested lock implementation for speculative locks
- if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
+ if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
+ seq == lockseq_rtm_spin || seq == lockseq_adaptive)
seq = __kmp_user_lock_seq;
#endif
switch (seq) {
@@ -3338,7 +3332,7 @@ __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
th->th.th_team = team;
th->th.th_team_nproc = team->t.t_nproc;
th->th.th_task_team = team->t.t_task_team[task_state];
- th->th.th_task_state = task_state;
+ __kmp_type_convert(task_state, &(th->th.th_task_state));
}
/* 2.a.i. Reduce Block without a terminating barrier */
@@ -3370,6 +3364,7 @@ __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
kmp_team_t *team;
int teams_swapped = 0, task_state;
KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
+ __kmp_assert_valid_gtid(global_tid);
// why do we need this initialization here at all?
// Reduction clause can not be used as a stand-alone directive.
@@ -3469,8 +3464,8 @@ __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- OMPT_STORE_RETURN_ADDRESS(global_tid);
}
+ OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident = loc;
@@ -3523,6 +3518,7 @@ void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
PACKED_REDUCTION_METHOD_T packed_reduction_method;
KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
+ __kmp_assert_valid_gtid(global_tid);
packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
@@ -3597,6 +3593,7 @@ kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
int teams_swapped = 0, task_state;
KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
+ __kmp_assert_valid_gtid(global_tid);
// why do we need this initialization here at all?
// Reduction clause can not be a stand-alone directive.
@@ -3656,8 +3653,8 @@ kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- OMPT_STORE_RETURN_ADDRESS(global_tid);
}
+ OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident =
@@ -3715,6 +3712,7 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
int teams_swapped = 0, task_state;
KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
+ __kmp_assert_valid_gtid(global_tid);
th = __kmp_thread_from_gtid(global_tid);
teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
@@ -3737,8 +3735,8 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- OMPT_STORE_RETURN_ADDRESS(global_tid);
}
+ OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident = loc;
@@ -3763,8 +3761,8 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- OMPT_STORE_RETURN_ADDRESS(global_tid);
}
+ OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident = loc;
@@ -3784,8 +3782,8 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- OMPT_STORE_RETURN_ADDRESS(global_tid);
}
+ OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
// TODO: implicit barrier: should be exposed
#if USE_ITT_NOTIFY
@@ -3871,6 +3869,7 @@ e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
*/
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
const struct kmp_dim *dims) {
+ __kmp_assert_valid_gtid(gtid);
int j, idx;
kmp_int64 last, trace_count;
kmp_info_t *th = __kmp_threads[gtid];
@@ -3966,7 +3965,8 @@ void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
#endif
if (flags == NULL) {
// we are the first thread, allocate the array of flags
- size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
+ size_t size =
+ (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
KMP_MB();
sh_buf->doacross_flags = flags;
@@ -3990,7 +3990,9 @@ void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
}
void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
- kmp_int32 shft, num_dims, i;
+ __kmp_assert_valid_gtid(gtid);
+ kmp_int64 shft;
+ size_t num_dims, i;
kmp_uint32 flag;
kmp_int64 iter_number; // iteration number of "collapsed" loop nest
kmp_info_t *th = __kmp_threads[gtid];
@@ -4007,7 +4009,7 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
// calculate sequential iteration number and check out-of-bounds condition
pr_buf = th->th.th_dispatch;
KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
- num_dims = pr_buf->th_doacross_info[0];
+ num_dims = (size_t)pr_buf->th_doacross_info[0];
lo = pr_buf->th_doacross_info[2];
up = pr_buf->th_doacross_info[3];
st = pr_buf->th_doacross_info[4];
@@ -4045,7 +4047,7 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
#endif
for (i = 1; i < num_dims; ++i) {
kmp_int64 iter, ln;
- kmp_int32 j = i * 4;
+ size_t j = i * 4;
ln = pr_buf->th_doacross_info[j + 1];
lo = pr_buf->th_doacross_info[j + 2];
up = pr_buf->th_doacross_info[j + 3];
@@ -4091,7 +4093,7 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_dependences) {
ompt_callbacks.ompt_callback(ompt_callback_dependences)(
- &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims);
+ &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
}
#endif
KA_TRACE(20,
@@ -4100,7 +4102,9 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
}
void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
- kmp_int32 shft, num_dims, i;
+ __kmp_assert_valid_gtid(gtid);
+ kmp_int64 shft;
+ size_t num_dims, i;
kmp_uint32 flag;
kmp_int64 iter_number; // iteration number of "collapsed" loop nest
kmp_info_t *th = __kmp_threads[gtid];
@@ -4118,7 +4122,7 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
// out-of-bounds checks)
pr_buf = th->th.th_dispatch;
KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
- num_dims = pr_buf->th_doacross_info[0];
+ num_dims = (size_t)pr_buf->th_doacross_info[0];
lo = pr_buf->th_doacross_info[2];
st = pr_buf->th_doacross_info[4];
#if OMPT_SUPPORT && OMPT_OPTIONAL
@@ -4137,7 +4141,7 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
#endif
for (i = 1; i < num_dims; ++i) {
kmp_int64 iter, ln;
- kmp_int32 j = i * 4;
+ size_t j = i * 4;
ln = pr_buf->th_doacross_info[j + 1];
lo = pr_buf->th_doacross_info[j + 2];
st = pr_buf->th_doacross_info[j + 4];
@@ -4157,7 +4161,7 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_dependences) {
ompt_callbacks.ompt_callback(ompt_callback_dependences)(
- &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims);
+ &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
}
#endif
shft = iter_number % 32; // use 32-bit granularity
@@ -4171,6 +4175,7 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
}
void __kmpc_doacross_fini(ident_t *loc, int gtid) {
+ __kmp_assert_valid_gtid(gtid);
kmp_int32 num_done;
kmp_info_t *th = __kmp_threads[gtid];
kmp_team_t *team = th->th.th_team;
@@ -4181,7 +4186,8 @@ void __kmpc_doacross_fini(ident_t *loc, int gtid) {
KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
return; // nothing to do
}
- num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
+ num_done =
+ KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
if (num_done == th->th.th_team_nproc) {
// we are the last thread, need to free shared resources
int idx = pr_buf->th_doacross_buf_idx - 1;
@@ -4204,11 +4210,21 @@ void __kmpc_doacross_fini(ident_t *loc, int gtid) {
KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
}
-/* omp_alloc/omp_free only defined for C/C++, not for Fortran */
+/* omp_alloc/omp_calloc/omp_free only defined for C/C++, not for Fortran */
void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
}
+void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
+ return __kmpc_calloc(__kmp_entry_gtid(), nmemb, size, allocator);
+}
+
+void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
+ omp_allocator_handle_t free_allocator) {
+ return __kmpc_realloc(__kmp_entry_gtid(), ptr, size, allocator,
+ free_allocator);
+}
+
void omp_free(void *ptr, omp_allocator_handle_t allocator) {
__kmpc_free(__kmp_entry_gtid(), ptr, allocator);
}