diff options
Diffstat (limited to 'runtime/src/kmp_csupport.cpp')
-rw-r--r-- | runtime/src/kmp_csupport.cpp | 269 |
1 files changed, 146 insertions, 123 deletions
diff --git a/runtime/src/kmp_csupport.cpp b/runtime/src/kmp_csupport.cpp index 61d4a93011205..c778c97022f5b 100644 --- a/runtime/src/kmp_csupport.cpp +++ b/runtime/src/kmp_csupport.cpp @@ -4,10 +4,9 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -245,8 +244,6 @@ void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { /* the num_threads are automatically popped */ } -#if OMP_40_ENABLED - void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind) { KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, @@ -255,8 +252,6 @@ void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); } -#endif /* OMP_40_ENABLED */ - /*! @ingroup PARALLEL @param loc source location information @@ -345,7 +340,6 @@ void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { #endif // KMP_STATS_ENABLED } -#if OMP_40_ENABLED /*! @ingroup PARALLEL @param loc source location information @@ -383,7 +377,15 @@ void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, va_list ap; va_start(ap, microtask); +#if KMP_STATS_ENABLED KMP_COUNT_BLOCK(OMP_TEAMS); + stats_state_e previous_state = KMP_GET_THREAD_STATE(); + if (previous_state == stats_state_e::SERIAL_REGION) { + KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); + } else { + KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); + } +#endif // remember teams entry point and nesting level this_thr->th.th_teams_microtask = microtask; @@ -426,12 +428,35 @@ void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, #endif ); + // Pop current CG root off list + KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); + kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; + this_thr->th.th_cg_roots = tmp->up; + KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" + " to node %p. cg_nthreads was %d\n", + this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); + KMP_DEBUG_ASSERT(tmp->cg_nthreads); + int i = tmp->cg_nthreads--; + if (i == 1) { // check is we are the last thread in CG (not always the case) + __kmp_free(tmp); + } + // Restore current task's thread_limit from CG root + KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); + this_thr->th.th_current_task->td_icvs.thread_limit = + this_thr->th.th_cg_roots->cg_thread_limit; + this_thr->th.th_teams_microtask = NULL; this_thr->th.th_teams_level = 0; *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; va_end(ap); +#if KMP_STATS_ENABLED + if (previous_state == stats_state_e::SERIAL_REGION) { + KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); + } else { + KMP_POP_PARTITIONED_TIMER(); + } +#endif // KMP_STATS_ENABLED } -#endif /* OMP_40_ENABLED */ // I don't think this function should ever have been exported. // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated @@ -485,16 +510,15 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { if (!TCR_4(__kmp_init_parallel)) __kmp_parallel_initialize(); + __kmp_resume_if_soft_paused(); + this_thr = __kmp_threads[global_tid]; serial_team = this_thr->th.th_serial_team; -#if OMP_45_ENABLED kmp_task_team_t *task_team = this_thr->th.th_task_team; - // we need to wait for the proxy tasks before finishing the thread if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); -#endif KMP_MB(); KMP_DEBUG_ASSERT(serial_team); @@ -549,9 +573,7 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { serial_team->t.t_dispatch->th_disp_buffer->next; __kmp_free(disp_buffer); } -#if OMP_50_ENABLED this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore -#endif --serial_team->t.t_serialized; if (serial_team->t.t_serialized == 0) { @@ -667,7 +689,7 @@ void __kmpc_flush(ident_t *loc) { // } // and adding the yield here is good for at least a 10x speedup // when running >2 threads per core (on the NAS LU benchmark). - __kmp_yield(TRUE); + __kmp_yield(); #endif #else #error Unknown or unsupported architecture @@ -696,11 +718,12 @@ void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { if (!TCR_4(__kmp_init_parallel)) __kmp_parallel_initialize(); + __kmp_resume_if_soft_paused(); + if (__kmp_env_consistency_check) { if (loc == 0) { KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? } - __kmp_check_barrier(global_tid, ct_barrier, loc); } @@ -744,6 +767,8 @@ kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { if (!TCR_4(__kmp_init_parallel)) __kmp_parallel_initialize(); + __kmp_resume_if_soft_paused(); + if (KMP_MASTER_GTID(global_tid)) { KMP_COUNT_BLOCK(OMP_MASTER); KMP_PUSH_PARTITIONED_TIMER(OMP_master); @@ -834,6 +859,8 @@ void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { if (!TCR_4(__kmp_init_parallel)) __kmp_parallel_initialize(); + __kmp_resume_if_soft_paused(); + #if USE_ITT_BUILD __kmp_itt_ordered_prep(gtid); // TODO: ordered_wait_id @@ -848,7 +875,7 @@ void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { if (ompt_enabled.enabled) { OMPT_STORE_RETURN_ADDRESS(gtid); team = __kmp_team_from_gtid(gtid); - lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value; + lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; /* OMPT state update */ th->th.ompt_thread_info.wait_id = lck; th->th.ompt_thread_info.state = ompt_state_wait_ordered; @@ -857,8 +884,8 @@ void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( - ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, - (ompt_wait_id_t)lck, codeptr_ra); + ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, + codeptr_ra); } } #endif @@ -877,7 +904,7 @@ void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { /* OMPT event callback */ if (ompt_enabled.ompt_callback_mutex_acquired) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra); + ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); } } #endif @@ -917,7 +944,8 @@ void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { if (ompt_enabled.ompt_callback_mutex_released) { ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( ompt_mutex_ordered, - (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value, + (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) + ->t.t_ordered.dt.t_value, OMPT_LOAD_RETURN_ADDRESS(gtid)); } #endif @@ -965,24 +993,18 @@ __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, kmp_uint32 spins; \ KMP_FSYNC_PREPARE(l); \ KMP_INIT_YIELD(spins); \ - if (TCR_4(__kmp_nth) > \ - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ - KMP_YIELD(TRUE); \ - } else { \ - KMP_YIELD_SPIN(spins); \ - } \ kmp_backoff_t backoff = __kmp_spin_backoff_params; \ - while ( \ - KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ - !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ - __kmp_spin_backoff(&backoff); \ + do { \ if (TCR_4(__kmp_nth) > \ (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ KMP_YIELD(TRUE); \ } else { \ KMP_YIELD_SPIN(spins); \ } \ - } \ + __kmp_spin_backoff(&backoff); \ + } while ( \ + KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ + !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ } \ KMP_FSYNC_ACQUIRED(l); \ } @@ -1068,8 +1090,7 @@ __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ } \ KMP_MB(); \ - KMP_YIELD(TCR_4(__kmp_nth) > \ - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \ + KMP_YIELD_OVERSUB(); \ } #endif // KMP_USE_FUTEX @@ -1188,7 +1209,7 @@ void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, ti = __kmp_threads[global_tid]->th.ompt_thread_info; /* OMPT state update */ prev_state = ti.state; - ti.wait_id = (ompt_wait_id_t)lck; + ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; ti.state = ompt_state_wait_critical; /* OMPT event callback */ @@ -1196,7 +1217,7 @@ void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), - (ompt_wait_id_t)crit, codeptr_ra); + (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); } } #endif @@ -1216,7 +1237,7 @@ void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, /* OMPT event callback */ if (ompt_enabled.ompt_callback_mutex_acquired) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra); + ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); } } #endif @@ -1402,14 +1423,15 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, ti = __kmp_threads[global_tid]->th.ompt_thread_info; /* OMPT state update */ prev_state = ti.state; - ti.wait_id = (ompt_wait_id_t)lck; + ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; ti.state = ompt_state_wait_critical; /* OMPT event callback */ if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_critical, (unsigned int)hint, - __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr); + __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, + codeptr); } } #endif @@ -1440,14 +1462,15 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, ti = __kmp_threads[global_tid]->th.ompt_thread_info; /* OMPT state update */ prev_state = ti.state; - ti.wait_id = (ompt_wait_id_t)lck; + ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; ti.state = ompt_state_wait_critical; /* OMPT event callback */ if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_critical, (unsigned int)hint, - __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr); + __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, + codeptr); } } #endif @@ -1467,7 +1490,7 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, /* OMPT event callback */ if (ompt_enabled.ompt_callback_mutex_acquired) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr); + ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); } } #endif @@ -1565,7 +1588,8 @@ void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, OMPT_STORE_RETURN_ADDRESS(global_tid); if (ompt_enabled.ompt_callback_mutex_released) { ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0)); + ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, + OMPT_LOAD_RETURN_ADDRESS(0)); } #endif @@ -1590,6 +1614,8 @@ kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { if (!TCR_4(__kmp_init_parallel)) __kmp_parallel_initialize(); + __kmp_resume_if_soft_paused(); + if (__kmp_env_consistency_check) __kmp_check_barrier(global_tid, ct_barrier, loc); @@ -1648,6 +1674,8 @@ kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { if (!TCR_4(__kmp_init_parallel)) __kmp_parallel_initialize(); + __kmp_resume_if_soft_paused(); + if (__kmp_env_consistency_check) { if (loc == 0) { KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? @@ -1843,7 +1871,7 @@ void ompc_set_nested(int flag) { __kmp_save_internal_controls(thread); - set__nested(thread, flag ? TRUE : FALSE); + set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); } void ompc_set_max_active_levels(int max_active_levels) { @@ -1867,7 +1895,6 @@ int ompc_get_team_size(int level) { return __kmp_get_team_size(__kmp_entry_gtid(), level); } -#if OMP_50_ENABLED /* OpenMP 5.0 Affinity Format API */ void ompc_set_affinity_format(char const *format) { @@ -1918,7 +1945,6 @@ size_t ompc_capture_affinity(char *buffer, size_t buf_size, __kmp_str_buf_free(&capture_buf); return num_required; } -#endif /* OMP_50_ENABLED */ void kmpc_set_stacksize(int arg) { // __kmp_aux_set_stacksize initializes the library if needed @@ -2189,8 +2215,8 @@ void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, if (ompt_enabled.ompt_callback_lock_init) { ompt_callbacks.ompt_callback(ompt_callback_lock_init)( ompt_mutex_lock, (omp_lock_hint_t)hint, - __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, - codeptr); + __ompt_get_mutex_impl_type(user_lock), + (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); } #endif } @@ -2213,8 +2239,8 @@ void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, if (ompt_enabled.ompt_callback_lock_init) { ompt_callbacks.ompt_callback(ompt_callback_lock_init)( ompt_mutex_nest_lock, (omp_lock_hint_t)hint, - __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, - codeptr); + __ompt_get_mutex_impl_type(user_lock), + (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); } #endif } @@ -2239,8 +2265,8 @@ void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_lock_init) { ompt_callbacks.ompt_callback(ompt_callback_lock_init)( ompt_mutex_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, - codeptr); + __ompt_get_mutex_impl_type(user_lock), + (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); } #endif @@ -2282,7 +2308,7 @@ void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_lock_init) { ompt_callbacks.ompt_callback(ompt_callback_lock_init)( ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), - (ompt_wait_id_t)user_lock, codeptr); + (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); } #endif @@ -2311,8 +2337,8 @@ void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_lock_init) { ompt_callbacks.ompt_callback(ompt_callback_lock_init)( ompt_mutex_nest_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, - codeptr); + __ompt_get_mutex_impl_type(user_lock), + (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); } #endif @@ -2357,7 +2383,7 @@ void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_lock_init) { ompt_callbacks.ompt_callback(ompt_callback_lock_init)( ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), - (ompt_wait_id_t)user_lock, codeptr); + (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); } #endif @@ -2393,7 +2419,7 @@ void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { lck = (kmp_user_lock_p)user_lock; } ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( - ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); + ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); } #endif KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); @@ -2421,7 +2447,7 @@ void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { codeptr = OMPT_GET_RETURN_ADDRESS(0); if (ompt_enabled.ompt_callback_lock_destroy) { ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( - ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); + ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); } #endif @@ -2461,7 +2487,7 @@ void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { codeptr = OMPT_GET_RETURN_ADDRESS(0); if (ompt_enabled.ompt_callback_lock_destroy) { ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( - ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); + ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); } #endif KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); @@ -2493,7 +2519,7 @@ void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { codeptr = OMPT_GET_RETURN_ADDRESS(0); if (ompt_enabled.ompt_callback_lock_destroy) { ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( - ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); + ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); } #endif @@ -2538,8 +2564,8 @@ void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, - codeptr); + __ompt_get_mutex_impl_type(user_lock), + (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); } #endif #if KMP_USE_INLINED_TAS @@ -2560,7 +2586,7 @@ void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.ompt_callback_mutex_acquired) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); + ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); } #endif @@ -2593,7 +2619,7 @@ void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), - (ompt_wait_id_t)lck, codeptr); + (ompt_wait_id_t)(uintptr_t)lck, codeptr); } #endif @@ -2606,7 +2632,7 @@ void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.ompt_callback_mutex_acquired) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); + ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); } #endif @@ -2628,8 +2654,8 @@ void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_nest_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, - codeptr); + __ompt_get_mutex_impl_type(user_lock), + (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); } } #endif @@ -2646,13 +2672,14 @@ void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquired) { // lock_first ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); + ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, + codeptr); } } else { if (ompt_enabled.ompt_callback_nest_lock) { // lock_next ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( - ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr); + ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); } } } @@ -2690,7 +2717,8 @@ void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_nest_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr); + __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, + codeptr); } } #endif @@ -2707,13 +2735,13 @@ void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquired) { // lock_first ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); + ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); } } else { if (ompt_enabled.ompt_callback_nest_lock) { // lock_next ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( - ompt_scope_begin, (ompt_wait_id_t)lck, codeptr); + ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); } } } @@ -2749,7 +2777,7 @@ void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { codeptr = OMPT_GET_RETURN_ADDRESS(0); if (ompt_enabled.ompt_callback_mutex_released) { ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); + ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); } #endif @@ -2778,7 +2806,7 @@ void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { codeptr = OMPT_GET_RETURN_ADDRESS(0); if (ompt_enabled.ompt_callback_mutex_released) { ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); + ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); } #endif @@ -2810,7 +2838,7 @@ void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { codeptr = OMPT_GET_RETURN_ADDRESS(0); if (ompt_enabled.ompt_callback_mutex_released) { ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); + ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); } #endif @@ -2838,12 +2866,13 @@ void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_released) { // release_lock_last ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); + ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, + codeptr); } } else if (ompt_enabled.ompt_callback_nest_lock) { // release_lock_prev ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( - ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr); + ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); } } #endif @@ -2887,12 +2916,12 @@ void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_released) { // release_lock_last ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); + ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); } } else if (ompt_enabled.ompt_callback_nest_lock) { // release_lock_previous ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( - ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr); + ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); } } #endif @@ -2929,12 +2958,12 @@ void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_released) { // release_lock_last ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); + ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); } } else if (ompt_enabled.ompt_callback_nest_lock) { // release_lock_previous ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( - ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr); + ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); } } #endif @@ -2960,8 +2989,8 @@ int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, - codeptr); + __ompt_get_mutex_impl_type(user_lock), + (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); } #endif #if KMP_USE_INLINED_TAS @@ -2983,7 +3012,7 @@ int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.ompt_callback_mutex_acquired) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); + ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); } #endif return FTN_TRUE; @@ -3024,7 +3053,7 @@ int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), - (ompt_wait_id_t)lck, codeptr); + (ompt_wait_id_t)(uintptr_t)lck, codeptr); } #endif @@ -3039,7 +3068,7 @@ int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { #if OMPT_SUPPORT && OMPT_OPTIONAL if (rc && ompt_enabled.ompt_callback_mutex_acquired) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); + ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); } #endif @@ -3065,8 +3094,8 @@ int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_nest_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, - codeptr); + __ompt_get_mutex_impl_type(user_lock), + (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); } #endif rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); @@ -3083,13 +3112,14 @@ int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquired) { // lock_first ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); + ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, + codeptr); } } else { if (ompt_enabled.ompt_callback_nest_lock) { // lock_next ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( - ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr); + ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); } } } @@ -3130,7 +3160,8 @@ int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_nest_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr); + __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, + codeptr); } #endif @@ -3148,13 +3179,13 @@ int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquired) { // lock_first ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); + ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); } } else { if (ompt_enabled.ompt_callback_nest_lock) { // lock_next ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( - ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr); + ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); } } } @@ -3288,7 +3319,6 @@ __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, #endif // KMP_USE_DYNAMIC_LOCK } // __kmp_end_critical_section_reduce_block -#if OMP_40_ENABLED static __forceinline int __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, int *task_state) { @@ -3323,7 +3353,6 @@ __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { th->th.th_task_team = team->t.t_task_team[task_state]; th->th.th_task_state = task_state; } -#endif /* 2.a.i. Reduce Block without a terminating barrier */ /*! @@ -3350,11 +3379,9 @@ __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, KMP_COUNT_BLOCK(REDUCE_nowait); int retval = 0; PACKED_REDUCTION_METHOD_T packed_reduction_method; -#if OMP_40_ENABLED kmp_info_t *th; kmp_team_t *team; int teams_swapped = 0, task_state; -#endif KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); // why do we need this initialization here at all? @@ -3366,6 +3393,8 @@ __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, if (!TCR_4(__kmp_init_parallel)) __kmp_parallel_initialize(); + __kmp_resume_if_soft_paused(); + // check correctness of reduce block nesting #if KMP_USE_DYNAMIC_LOCK if (__kmp_env_consistency_check) @@ -3375,10 +3404,8 @@ __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, __kmp_push_sync(global_tid, ct_reduce, loc, NULL); #endif -#if OMP_40_ENABLED th = __kmp_thread_from_gtid(global_tid); teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); -#endif // OMP_40_ENABLED // packed_reduction_method value will be reused by __kmp_end_reduce* function, // the value should be kept in a variable @@ -3479,11 +3506,9 @@ __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, // should never reach this block KMP_ASSERT(0); // "unexpected method" } -#if OMP_40_ENABLED if (teams_swapped) { __kmp_restore_swapped_teams(th, team, task_state); } -#endif KA_TRACE( 10, ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", @@ -3569,11 +3594,9 @@ kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, KMP_COUNT_BLOCK(REDUCE_wait); int retval = 0; PACKED_REDUCTION_METHOD_T packed_reduction_method; -#if OMP_40_ENABLED kmp_info_t *th; kmp_team_t *team; int teams_swapped = 0, task_state; -#endif KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); @@ -3586,6 +3609,8 @@ kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, if (!TCR_4(__kmp_init_parallel)) __kmp_parallel_initialize(); + __kmp_resume_if_soft_paused(); + // check correctness of reduce block nesting #if KMP_USE_DYNAMIC_LOCK if (__kmp_env_consistency_check) @@ -3595,10 +3620,8 @@ kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, __kmp_push_sync(global_tid, ct_reduce, loc, NULL); #endif -#if OMP_40_ENABLED th = __kmp_thread_from_gtid(global_tid); teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); -#endif // OMP_40_ENABLED packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); @@ -3661,16 +3684,13 @@ kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, // should never reach this block KMP_ASSERT(0); // "unexpected method" } -#if OMP_40_ENABLED if (teams_swapped) { __kmp_restore_swapped_teams(th, team, task_state); } -#endif KA_TRACE(10, ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval)); - return retval; } @@ -3688,18 +3708,14 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck) { PACKED_REDUCTION_METHOD_T packed_reduction_method; -#if OMP_40_ENABLED kmp_info_t *th; kmp_team_t *team; int teams_swapped = 0, task_state; -#endif KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); -#if OMP_40_ENABLED th = __kmp_thread_from_gtid(global_tid); teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); -#endif // OMP_40_ENABLED packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); @@ -3707,7 +3723,6 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, // tool (it's a terminating barrier on constructs if NOWAIT not specified) if (packed_reduction_method == critical_reduce_block) { - __kmp_end_critical_section_reduce_block(loc, global_tid, lck); // TODO: implicit barrier: should be exposed @@ -3788,11 +3803,9 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, // should never reach this block KMP_ASSERT(0); // "unexpected method" } -#if OMP_40_ENABLED if (teams_swapped) { __kmp_restore_swapped_teams(th, team, task_state); } -#endif if (__kmp_env_consistency_check) __kmp_pop_sync(global_tid, ct_reduce, loc); @@ -3838,7 +3851,6 @@ kmp_uint64 __kmpc_get_parent_taskid() { } // __kmpc_get_parent_taskid -#if OMP_45_ENABLED /*! @ingroup WORK_SHARING @param loc source location information. @@ -3932,8 +3944,8 @@ void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, // __kmp_dispatch_num_buffers) if (idx != sh_buf->doacross_buf_idx) { // Shared buffer is occupied, wait for it to be free - __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, - __kmp_eq_4, NULL); + __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, + __kmp_eq_4, NULL); } #if KMP_32_BIT_ARCH // Check if we are the first thread. After the CAS the first thread gets 0, @@ -4150,15 +4162,26 @@ void __kmpc_doacross_fini(ident_t *loc, int gtid) { pr_buf->th_doacross_info = NULL; KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); } -#endif -#if OMP_50_ENABLED +/* omp_alloc/omp_free only defined for C/C++, not for Fortran */ +void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { + return __kmpc_alloc(__kmp_entry_gtid(), size, allocator); +} + +void omp_free(void *ptr, omp_allocator_handle_t allocator) { + __kmpc_free(__kmp_entry_gtid(), ptr, allocator); +} + int __kmpc_get_target_offload(void) { if (!__kmp_init_serial) { __kmp_serial_initialize(); } return __kmp_target_offload; } -#endif // OMP_50_ENABLED -// end of file // +int __kmpc_pause_resource(kmp_pause_status_t level) { + if (!__kmp_init_serial) { + return 1; // Can't pause if runtime is not initialized + } + return __kmp_pause_resource(level); +} |