summaryrefslogtreecommitdiff
path: root/runtime/src/kmp_runtime.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/src/kmp_runtime.cpp')
-rw-r--r--runtime/src/kmp_runtime.cpp741
1 files changed, 383 insertions, 358 deletions
diff --git a/runtime/src/kmp_runtime.cpp b/runtime/src/kmp_runtime.cpp
index 0db376159fc7a..7f6c149c792e7 100644
--- a/runtime/src/kmp_runtime.cpp
+++ b/runtime/src/kmp_runtime.cpp
@@ -4,10 +4,9 @@
//===----------------------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.txt for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -47,16 +46,8 @@ char const __kmp_version_alt_comp[] =
KMP_VERSION_PREFIX "alternative compiler support: yes";
#endif /* defined(KMP_GOMP_COMPAT) */
-char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
-#if OMP_50_ENABLED
- "5.0 (201611)";
-#elif OMP_45_ENABLED
- "4.5 (201511)";
-#elif OMP_40_ENABLED
- "4.0 (201307)";
-#else
- "3.1 (201107)";
-#endif
+char const __kmp_version_omp_api[] =
+ KMP_VERSION_PREFIX "API version: 5.0 (201611)";
#ifdef KMP_DEBUG
char const __kmp_version_lock[] =
@@ -80,7 +71,7 @@ static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid,
static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
kmp_internal_control_t *new_icvs,
ident_t *loc);
-#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
+#if KMP_AFFINITY_SUPPORTED
static void __kmp_partition_places(kmp_team_t *team,
int update_master_only = 0);
#endif
@@ -328,7 +319,7 @@ void __kmp_infinite_loop(void) {
static int done = FALSE;
while (!done) {
- KMP_YIELD(1);
+ KMP_YIELD(TRUE);
}
}
@@ -533,22 +524,10 @@ static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team,
&team->t.t_disp_buffer[num_disp_buff],
sizeof(dispatch_shared_info_t) * num_disp_buff,
"%s_%d.t_disp_buffer", header, team_id);
-
- __kmp_print_storage_map_gtid(-1, &team->t.t_taskq, &team->t.t_copypriv_data,
- sizeof(kmp_taskq_t), "%s_%d.t_taskq", header,
- team_id);
}
-static void __kmp_init_allocator() {
-#if OMP_50_ENABLED
- __kmp_init_memkind();
-#endif
-}
-static void __kmp_fini_allocator() {
-#if OMP_50_ENABLED
- __kmp_fini_memkind();
-#endif
-}
+static void __kmp_init_allocator() { __kmp_init_memkind(); }
+static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
/* ------------------------------------------------------------------------ */
@@ -673,24 +652,6 @@ BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
#endif /* KMP_OS_WINDOWS */
#endif /* KMP_DYNAMIC_LIB */
-/* Change the library type to "status" and return the old type */
-/* called from within initialization routines where __kmp_initz_lock is held */
-int __kmp_change_library(int status) {
- int old_status;
-
- old_status = __kmp_yield_init &
- 1; // check whether KMP_LIBRARY=throughput (even init count)
-
- if (status) {
- __kmp_yield_init |= 1; // throughput => turnaround (odd init count)
- } else {
- __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
- }
-
- return old_status; // return previous setting of whether
- // KMP_LIBRARY=throughput
-}
-
/* __kmp_parallel_deo -- Wait until it's our turn. */
void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
int gtid = *gtid_ref;
@@ -709,8 +670,8 @@ void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
#ifdef BUILD_PARALLEL_ORDERED
if (!team->t.t_serialized) {
KMP_MB();
- KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid),
- KMP_EQ, NULL);
+ KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
+ NULL);
KMP_MB();
}
#endif /* BUILD_PARALLEL_ORDERED */
@@ -751,6 +712,7 @@ int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) {
if (!TCR_4(__kmp_init_parallel))
__kmp_parallel_initialize();
+ __kmp_resume_if_soft_paused();
th = __kmp_threads[gtid];
team = th->th.th_team;
@@ -773,10 +735,7 @@ int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) {
}
#if USE_ITT_BUILD
if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
- KMP_MASTER_GTID(gtid) &&
-#if OMP_40_ENABLED
- th->th.th_teams_microtask == NULL &&
-#endif
+ KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
team->t.t_active_level ==
1) { // Only report metadata by master of active team at level 1
__kmp_itt_metadata_single(id_ref);
@@ -814,16 +773,13 @@ void __kmp_exit_single(int gtid) {
* otherwise the number of threads to use
* The forkjoin lock is held by the caller. */
static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
- int master_tid, int set_nthreads
-#if OMP_40_ENABLED
- ,
- int enter_teams
-#endif /* OMP_40_ENABLED */
- ) {
+ int master_tid, int set_nthreads,
+ int enter_teams) {
int capacity;
int new_nthreads;
KMP_DEBUG_ASSERT(__kmp_init_serial);
KMP_DEBUG_ASSERT(root && parent_team);
+ kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
// If dyn-var is set, dynamically adjust the number of desired threads,
// according to the method specified by dynamic_mode.
@@ -913,10 +869,12 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
}
// Respect OMP_THREAD_LIMIT
- if (root->r.r_cg_nthreads + new_nthreads -
+ int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
+ int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
+ if (cg_nthreads + new_nthreads -
(root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
- __kmp_cg_max_nth) {
- int tl_nthreads = __kmp_cg_max_nth - root->r.r_cg_nthreads +
+ max_cg_threads) {
+ int tl_nthreads = max_cg_threads - cg_nthreads +
(root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
if (tl_nthreads <= 0) {
tl_nthreads = 1;
@@ -1069,11 +1027,9 @@ static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
__kmp_gtid_from_tid(i, team), team->t.t_id, i,
team->t.t_bar[bs_forkjoin_barrier].b_arrived,
team->t.t_bar[bs_plain_barrier].b_arrived));
-#if OMP_40_ENABLED
thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
thr->th.th_teams_level = master_th->th.th_teams_level;
thr->th.th_teams_size = master_th->th.th_teams_size;
-#endif
{ // Initialize threads' barrier data.
int b;
kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
@@ -1087,12 +1043,11 @@ static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
}
}
-#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
+#if KMP_AFFINITY_SUPPORTED
__kmp_partition_places(team);
#endif
}
-#if OMP_50_ENABLED
if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
for (i = 0; i < team->t.t_nproc; i++) {
kmp_info_t *thr = team->t.t_threads[i];
@@ -1103,7 +1058,6 @@ static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
}
}
}
-#endif
KMP_MB();
}
@@ -1188,6 +1142,7 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
if (!TCR_4(__kmp_init_parallel))
__kmp_parallel_initialize();
+ __kmp_resume_if_soft_paused();
this_thr = __kmp_threads[global_tid];
serial_team = this_thr->th.th_serial_team;
@@ -1208,7 +1163,6 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
this_thr->th.th_task_team = NULL;
}
-#if OMP_40_ENABLED
kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
proc_bind = proc_bind_false;
@@ -1219,7 +1173,6 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
}
// Reset for next parallel region
this_thr->th.th_set_proc_bind = proc_bind_default;
-#endif /* OMP_40_ENABLED */
#if OMPT_SUPPORT
ompt_data_t ompt_parallel_data = ompt_data_none;
@@ -1254,15 +1207,13 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
__kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
- new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
+ new_team =
+ __kmp_allocate_team(this_thr->th.th_root, 1, 1,
#if OMPT_SUPPORT
- ompt_parallel_data,
+ ompt_parallel_data,
#endif
-#if OMP_40_ENABLED
- proc_bind,
-#endif
- &this_thr->th.th_current_task->td_icvs,
- 0 USE_NESTED_HOT_ARG(NULL));
+ proc_bind, &this_thr->th.th_current_task->td_icvs,
+ 0 USE_NESTED_HOT_ARG(NULL));
__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
KMP_ASSERT(new_team);
@@ -1319,13 +1270,11 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
__kmp_nested_nth.nth[level + 1];
}
-#if OMP_40_ENABLED
if (__kmp_nested_proc_bind.used &&
(level + 1 < __kmp_nested_proc_bind.used)) {
this_thr->th.th_current_task->td_icvs.proc_bind =
__kmp_nested_proc_bind.bind_types[level + 1];
}
-#endif /* OMP_40_ENABLED */
#if USE_DEBUGGER
serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger.
@@ -1339,9 +1288,7 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
-#if OMP_50_ENABLED
serial_team->t.t_def_allocator = this_thr->th.th_def_allocator; // save
-#endif
propagateFPControl(serial_team);
@@ -1391,11 +1338,8 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
KMP_MB();
}
-#if OMP_40_ENABLED
KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
-#endif
-#if OMP_50_ENABLED
// Perform the display affinity functionality for
// serialized parallel regions
if (__kmp_display_affinity) {
@@ -1407,7 +1351,6 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
this_thr->th.th_prev_num_threads = 1;
}
}
-#endif
if (__kmp_env_consistency_check)
__kmp_push_parallel(global_tid, NULL);
@@ -1465,10 +1408,8 @@ int __kmp_fork_call(ident_t *loc, int gtid,
int master_active;
int master_set_numthreads;
int level;
-#if OMP_40_ENABLED
int active_level;
int teams_level;
-#endif
#if KMP_NESTED_HOT_TEAMS
kmp_hot_team_ptr_t **p_hot_teams;
#endif
@@ -1491,6 +1432,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
__kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown
if (!TCR_4(__kmp_init_parallel))
__kmp_parallel_initialize();
+ __kmp_resume_if_soft_paused();
/* setup current data */
master_th = __kmp_threads[gtid]; // AC: potentially unsafe, not in sync with
@@ -1520,10 +1462,8 @@ int __kmp_fork_call(ident_t *loc, int gtid,
level = parent_team->t.t_level;
// used to launch non-serial teams even if nested is not allowed
active_level = parent_team->t.t_active_level;
-#if OMP_40_ENABLED
// needed to check nesting inside the teams
teams_level = master_th->th.th_teams_level;
-#endif
#if KMP_NESTED_HOT_TEAMS
p_hot_teams = &master_th->th.th_hot_teams;
if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
@@ -1551,7 +1491,6 @@ int __kmp_fork_call(ident_t *loc, int gtid,
master_th->th.th_ident = loc;
-#if OMP_40_ENABLED
if (master_th->th.th_teams_microtask && ap &&
microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
// AC: This is start of parallel that is nested inside teams construct.
@@ -1645,9 +1584,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
KMP_ATOMIC_INC(&root->r.r_in_parallel);
parent_team->t.t_active_level++;
parent_team->t.t_level++;
-#if OMP_50_ENABLED
parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save
-#endif
/* Change number of threads in the team if requested */
if (master_set_numthreads) { // The parallel has num_threads clause
@@ -1695,7 +1632,6 @@ int __kmp_fork_call(ident_t *loc, int gtid,
return TRUE;
} // Parallel closely nested in teams construct
-#endif /* OMP_40_ENABLED */
#if KMP_DEBUG
if (__kmp_tasking_mode != tskm_immediate_exec) {
@@ -1708,10 +1644,8 @@ int __kmp_fork_call(ident_t *loc, int gtid,
master_th->th.th_current_task->td_icvs.max_active_levels) {
nthreads = 1;
} else {
-#if OMP_40_ENABLED
int enter_teams = ((ap == NULL && active_level == 0) ||
(ap && teams_level > 0 && teams_level == level));
-#endif
nthreads =
master_set_numthreads
? master_set_numthreads
@@ -1723,11 +1657,8 @@ int __kmp_fork_call(ident_t *loc, int gtid,
// parallel out of teams construct). This code moved here from
// __kmp_reserve_threads() to speedup nested serialized parallels.
if (nthreads > 1) {
- if ((!get__nested(master_th) && (root->r.r_in_parallel
-#if OMP_40_ENABLED
- && !enter_teams
-#endif /* OMP_40_ENABLED */
- )) ||
+ if ((get__max_active_levels(master_th) == 1 &&
+ (root->r.r_in_parallel && !enter_teams)) ||
(__kmp_library == library_serial)) {
KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d"
" threads\n",
@@ -1738,18 +1669,12 @@ int __kmp_fork_call(ident_t *loc, int gtid,
if (nthreads > 1) {
/* determine how many new threads we can use */
__kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
- nthreads = __kmp_reserve_threads(
- root, parent_team, master_tid, nthreads
-#if OMP_40_ENABLED
- /* AC: If we execute teams from parallel region (on host), then
- teams should be created but each can only have 1 thread if
- nesting is disabled. If teams called from serial region, then
- teams and their threads should be created regardless of the
- nesting setting. */
- ,
- enter_teams
-#endif /* OMP_40_ENABLED */
- );
+ /* AC: If we execute teams from parallel region (on host), then teams
+ should be created but each can only have 1 thread if nesting is
+ disabled. If teams called from serial region, then teams and their
+ threads should be created regardless of the nesting setting. */
+ nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
+ nthreads, enter_teams);
if (nthreads == 1) {
// Free lock for single thread execution here; for multi-thread
// execution it will be freed later after team of threads created
@@ -1782,7 +1707,6 @@ int __kmp_fork_call(ident_t *loc, int gtid,
if (call_context == fork_context_intel) {
/* TODO this sucks, use the compiler itself to pass args! :) */
master_th->th.th_serial_team->t.t_ident = loc;
-#if OMP_40_ENABLED
if (!ap) {
// revert change made in __kmpc_serialized_parallel()
master_th->th.th_serial_team->t.t_level--;
@@ -1877,14 +1801,13 @@ int __kmp_fork_call(ident_t *loc, int gtid,
// AC: call special invoker for outer "parallel" of teams construct
invoker(gtid);
} else {
-#endif /* OMP_40_ENABLED */
argv = args;
for (i = argc - 1; i >= 0; --i)
// TODO: revert workaround for Intel(R) 64 tracker #96
#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
*argv++ = va_arg(*ap, void *);
#else
- *argv++ = va_arg(ap, void *);
+ *argv++ = va_arg(ap, void *);
#endif
KMP_MB();
@@ -1950,9 +1873,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
master_th->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
-#if OMP_40_ENABLED
}
-#endif /* OMP_40_ENABLED */
} else if (call_context == fork_context_gnu) {
#if OMPT_SUPPORT
ompt_lw_taskteam_t lwt;
@@ -1989,10 +1910,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
// KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
master_th->th.th_current_task->td_flags.executing = 0;
-#if OMP_40_ENABLED
- if (!master_th->th.th_teams_microtask || level > teams_level)
-#endif /* OMP_40_ENABLED */
- {
+ if (!master_th->th.th_teams_microtask || level > teams_level) {
/* Increment our nested depth level */
KMP_ATOMIC_INC(&root->r.r_in_parallel);
}
@@ -2006,7 +1924,6 @@ int __kmp_fork_call(ident_t *loc, int gtid,
nthreads_icv = 0; // don't update
}
-#if OMP_40_ENABLED
// Figure out the proc_bind_policy for the new team.
kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
kmp_proc_bind_t proc_bind_icv =
@@ -2032,25 +1949,17 @@ int __kmp_fork_call(ident_t *loc, int gtid,
// Reset for next parallel region
master_th->th.th_set_proc_bind = proc_bind_default;
-#endif /* OMP_40_ENABLED */
- if ((nthreads_icv > 0)
-#if OMP_40_ENABLED
- || (proc_bind_icv != proc_bind_default)
-#endif /* OMP_40_ENABLED */
- ) {
+ if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
kmp_internal_control_t new_icvs;
copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
new_icvs.next = NULL;
if (nthreads_icv > 0) {
new_icvs.nproc = nthreads_icv;
}
-
-#if OMP_40_ENABLED
if (proc_bind_icv != proc_bind_default) {
new_icvs.proc_bind = proc_bind_icv;
}
-#endif /* OMP_40_ENABLED */
/* allocate a new parallel team */
KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
@@ -2058,10 +1967,8 @@ int __kmp_fork_call(ident_t *loc, int gtid,
#if OMPT_SUPPORT
ompt_parallel_data,
#endif
-#if OMP_40_ENABLED
- proc_bind,
-#endif
- &new_icvs, argc USE_NESTED_HOT_ARG(master_th));
+ proc_bind, &new_icvs,
+ argc USE_NESTED_HOT_ARG(master_th));
} else {
/* allocate a new parallel team */
KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
@@ -2069,9 +1976,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
#if OMPT_SUPPORT
ompt_parallel_data,
#endif
-#if OMP_40_ENABLED
proc_bind,
-#endif
&master_th->th.th_current_task->td_icvs,
argc USE_NESTED_HOT_ARG(master_th));
}
@@ -2089,15 +1994,12 @@ int __kmp_fork_call(ident_t *loc, int gtid,
return_address);
#endif
KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe
-// TODO: parent_team->t.t_level == INT_MAX ???
-#if OMP_40_ENABLED
+ // TODO: parent_team->t.t_level == INT_MAX ???
if (!master_th->th.th_teams_microtask || level > teams_level) {
-#endif /* OMP_40_ENABLED */
int new_level = parent_team->t.t_level + 1;
KMP_CHECK_UPDATE(team->t.t_level, new_level);
new_level = parent_team->t.t_active_level + 1;
KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
-#if OMP_40_ENABLED
} else {
// AC: Do not increase parallel level at start of the teams construct
int new_level = parent_team->t.t_level;
@@ -2105,17 +2007,12 @@ int __kmp_fork_call(ident_t *loc, int gtid,
new_level = parent_team->t.t_active_level;
KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
}
-#endif /* OMP_40_ENABLED */
kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
// set master's schedule as new run-time schedule
KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
-#if OMP_40_ENABLED
KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
-#endif
-#if OMP_50_ENABLED
KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
-#endif
// Update the floating point rounding in the team if required.
propagateFPControl(team);
@@ -2191,27 +2088,23 @@ int __kmp_fork_call(ident_t *loc, int gtid,
/* now, setup the arguments */
argv = (void **)team->t.t_argv;
-#if OMP_40_ENABLED
if (ap) {
-#endif /* OMP_40_ENABLED */
for (i = argc - 1; i >= 0; --i) {
// TODO: revert workaround for Intel(R) 64 tracker #96
#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
void *new_argv = va_arg(*ap, void *);
#else
- void *new_argv = va_arg(ap, void *);
+ void *new_argv = va_arg(ap, void *);
#endif
KMP_CHECK_UPDATE(*argv, new_argv);
argv++;
}
-#if OMP_40_ENABLED
} else {
for (i = 0; i < argc; ++i) {
// Get args from parent team for teams construct
KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
}
}
-#endif /* OMP_40_ENABLED */
/* now actually fork the threads */
KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
@@ -2230,10 +2123,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
#if USE_ITT_BUILD
if (team->t.t_active_level == 1 // only report frames at level 1
-#if OMP_40_ENABLED
- && !master_th->th.th_teams_microtask // not in teams construct
-#endif /* OMP_40_ENABLED */
- ) {
+ && !master_th->th.th_teams_microtask) { // not in teams construct
#if USE_ITT_NOTIFY
if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
(__kmp_forkjoin_frames_mode == 3 ||
@@ -2271,12 +2161,9 @@ int __kmp_fork_call(ident_t *loc, int gtid,
}
#endif /* USE_ITT_BUILD */
-#if OMP_40_ENABLED
// AC: skip __kmp_internal_fork at teams construct, let only master
// threads execute
- if (ap)
-#endif /* OMP_40_ENABLED */
- {
+ if (ap) {
__kmp_internal_fork(loc, gtid, team);
KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, "
"master_th=%p, gtid=%d\n",
@@ -2293,9 +2180,25 @@ int __kmp_fork_call(ident_t *loc, int gtid,
team->t.t_id, team->t.t_pkfn));
} // END of timer KMP_fork_call block
+#if KMP_STATS_ENABLED
+ // If beginning a teams construct, then change thread state
+ stats_state_e previous_state = KMP_GET_THREAD_STATE();
+ if (!ap) {
+ KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
+ }
+#endif
+
if (!team->t.t_invoke(gtid)) {
KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread");
}
+
+#if KMP_STATS_ENABLED
+ // If was beginning of a teams construct, then reset thread state
+ if (!ap) {
+ KMP_SET_THREAD_STATE(previous_state);
+ }
+#endif
+
KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
team->t.t_id, team->t.t_pkfn));
KMP_MB(); /* Flush all pending memory write invalidates. */
@@ -2340,18 +2243,14 @@ void __kmp_join_call(ident_t *loc, int gtid
,
enum fork_context_e fork_context
#endif
-#if OMP_40_ENABLED
,
- int exit_teams
-#endif /* OMP_40_ENABLED */
- ) {
+ int exit_teams) {
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
kmp_team_t *team;
kmp_team_t *parent_team;
kmp_info_t *master_th;
kmp_root_t *root;
int master_active;
- int i;
KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid));
@@ -2382,7 +2281,6 @@ void __kmp_join_call(ident_t *loc, int gtid
#endif
if (team->t.t_serialized) {
-#if OMP_40_ENABLED
if (master_th->th.th_teams_microtask) {
// We are in teams construct
int level = team->t.t_level;
@@ -2398,7 +2296,6 @@ void __kmp_join_call(ident_t *loc, int gtid
team->t.t_serialized++;
}
}
-#endif /* OMP_40_ENABLED */
__kmpc_end_serialized_parallel(loc, gtid);
#if OMPT_SUPPORT
@@ -2412,20 +2309,14 @@ void __kmp_join_call(ident_t *loc, int gtid
master_active = team->t.t_master_active;
-#if OMP_40_ENABLED
- if (!exit_teams)
-#endif /* OMP_40_ENABLED */
- {
+ if (!exit_teams) {
// AC: No barrier for internal teams at exit from teams construct.
// But there is barrier for external team (league).
__kmp_internal_join(loc, gtid, team);
- }
-#if OMP_40_ENABLED
- else {
+ } else {
master_th->th.th_task_state =
0; // AC: no tasking in teams (out of any parallel)
}
-#endif /* OMP_40_ENABLED */
KMP_MB();
@@ -2442,11 +2333,8 @@ void __kmp_join_call(ident_t *loc, int gtid
}
// Mark end of "parallel" region for Intel(R) VTune(TM) analyzer.
- if (team->t.t_active_level == 1
-#if OMP_40_ENABLED
- && !master_th->th.th_teams_microtask /* not in teams construct */
-#endif /* OMP_40_ENABLED */
- ) {
+ if (team->t.t_active_level == 1 &&
+ !master_th->th.th_teams_microtask) { /* not in teams construct */
master_th->th.th_ident = loc;
// only one notification scheme (either "submit" or "forking/joined", not
// both)
@@ -2461,7 +2349,6 @@ void __kmp_join_call(ident_t *loc, int gtid
} // active_level == 1
#endif /* USE_ITT_BUILD */
-#if OMP_40_ENABLED
if (master_th->th.th_teams_microtask && !exit_teams &&
team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
team->t.t_level == master_th->th.th_teams_level + 1) {
@@ -2474,21 +2361,24 @@ void __kmp_join_call(ident_t *loc, int gtid
team->t.t_active_level--;
KMP_ATOMIC_DEC(&root->r.r_in_parallel);
- /* Restore number of threads in the team if needed */
+ // Restore number of threads in the team if needed. This code relies on
+ // the proper adjustment of th_teams_size.nth after the fork in
+ // __kmp_teams_master on each teams master in the case that
+ // __kmp_reserve_threads reduced it.
if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
int old_num = master_th->th.th_team_nproc;
int new_num = master_th->th.th_teams_size.nth;
kmp_info_t **other_threads = team->t.t_threads;
team->t.t_nproc = new_num;
- for (i = 0; i < old_num; ++i) {
+ for (int i = 0; i < old_num; ++i) {
other_threads[i]->th.th_team_nproc = new_num;
}
// Adjust states of non-used threads of the team
- for (i = old_num; i < new_num; ++i) {
+ for (int i = old_num; i < new_num; ++i) {
// Re-initialize thread's barrier data.
- int b;
+ KMP_DEBUG_ASSERT(other_threads[i]);
kmp_balign_t *balign = other_threads[i]->th.th_bar;
- for (b = 0; b < bs_last_barrier; ++b) {
+ for (int b = 0; b < bs_last_barrier; ++b) {
balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
#if USE_DEBUGGER
@@ -2511,7 +2401,6 @@ void __kmp_join_call(ident_t *loc, int gtid
return;
}
-#endif /* OMP_40_ENABLED */
/* do cleanup and restore the parent team */
master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
@@ -2524,11 +2413,8 @@ void __kmp_join_call(ident_t *loc, int gtid
from the serial user code called after this function returns. */
__kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
-#if OMP_40_ENABLED
if (!master_th->th.th_teams_microtask ||
- team->t.t_level > master_th->th.th_teams_level)
-#endif /* OMP_40_ENABLED */
- {
+ team->t.t_level > master_th->th.th_teams_level) {
/* Decrement our nested depth level */
KMP_ATOMIC_DEC(&root->r.r_in_parallel);
}
@@ -2553,14 +2439,12 @@ void __kmp_join_call(ident_t *loc, int gtid
master_th, team));
__kmp_pop_current_task_from_thread(master_th);
-#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
+#if KMP_AFFINITY_SUPPORTED
// Restore master thread's partition.
master_th->th.th_first_place = team->t.t_first_place;
master_th->th.th_last_place = team->t.t_last_place;
-#endif /* OMP_40_ENABLED */
-#if OMP_50_ENABLED
+#endif // KMP_AFFINITY_SUPPORTED
master_th->th.th_def_allocator = team->t.t_def_allocator;
-#endif
updateHWFPControl(team);
@@ -2791,9 +2675,13 @@ int __kmp_get_max_active_levels(int gtid) {
return thread->th.th_current_task->td_icvs.max_active_levels;
}
+KMP_BUILD_ASSERT(sizeof(kmp_sched_t) == sizeof(int));
+KMP_BUILD_ASSERT(sizeof(enum sched_type) == sizeof(int));
+
/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
kmp_info_t *thread;
+ kmp_sched_t orig_kind;
// kmp_team_t *team;
KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
@@ -2804,6 +2692,9 @@ void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
// Valid parameters should fit in one of two intervals - standard or extended:
// <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
// 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
+ orig_kind = kind;
+ kind = __kmp_sched_without_mods(kind);
+
if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
(kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
// TODO: Hint needs attention in case we change the default schedule.
@@ -2834,6 +2725,8 @@ void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
__kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
kmp_sched_lower - 2];
}
+ __kmp_sched_apply_mods_intkind(
+ orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
if (kind == kmp_sched_auto || chunk < 1) {
// ignore parameter chunk for schedule auto
thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
@@ -2853,12 +2746,12 @@ void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) {
thread = __kmp_threads[gtid];
th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
-
- switch (th_type) {
+ switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
case kmp_sch_static:
case kmp_sch_static_greedy:
case kmp_sch_static_balanced:
*kind = kmp_sched_static;
+ __kmp_sched_apply_mods_stdkind(kind, th_type);
*chunk = 0; // chunk was not set, try to show this fact via zero value
return;
case kmp_sch_static_chunked:
@@ -2887,6 +2780,7 @@ void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) {
KMP_FATAL(UnknownSchedulingType, th_type);
}
+ __kmp_sched_apply_mods_stdkind(kind, th_type);
*chunk = thread->th.th_current_task->td_icvs.sched.chunk;
}
@@ -2910,7 +2804,6 @@ int __kmp_get_ancestor_thread_num(int gtid, int level) {
if (level > ii)
return -1;
-#if OMP_40_ENABLED
if (thr->th.th_teams_microtask) {
// AC: we are in teams region where multiple nested teams have same level
int tlevel = thr->th.th_teams_level; // the level of the teams construct
@@ -2926,7 +2819,6 @@ int __kmp_get_ancestor_thread_num(int gtid, int level) {
}
}
}
-#endif
if (ii == level)
return __kmp_tid_from_gtid(gtid);
@@ -2970,7 +2862,6 @@ int __kmp_get_team_size(int gtid, int level) {
if (level > ii)
return -1;
-#if OMP_40_ENABLED
if (thr->th.th_teams_microtask) {
// AC: we are in teams region where multiple nested teams have same level
int tlevel = thr->th.th_teams_level; // the level of the teams construct
@@ -2986,7 +2877,6 @@ int __kmp_get_team_size(int gtid, int level) {
}
}
}
-#endif
while (ii > level) {
for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
@@ -3015,15 +2905,18 @@ kmp_r_sched_t __kmp_get_schedule_global() {
// __kmp_guided. __kmp_sched should keep original value, so that user can set
// KMP_SCHEDULE multiple times, and thus have different run-time schedules in
// different roots (even in OMP 2.5)
- if (__kmp_sched == kmp_sch_static) {
+ enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
+ enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
+ if (s == kmp_sch_static) {
// replace STATIC with more detailed schedule (balanced or greedy)
r_sched.r_sched_type = __kmp_static;
- } else if (__kmp_sched == kmp_sch_guided_chunked) {
+ } else if (s == kmp_sch_guided_chunked) {
// replace GUIDED with more detailed schedule (iterative or analytical)
r_sched.r_sched_type = __kmp_guided;
} else { // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
r_sched.r_sched_type = __kmp_sched;
}
+ SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
// __kmp_chunk may be wrong here (if it was not ever set)
@@ -3099,9 +2992,7 @@ static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) {
/* setup dispatch buffers */
for (i = 0; i < num_disp_buff; ++i) {
team->t.t_disp_buffer[i].buffer_index = i;
-#if OMP_45_ENABLED
team->t.t_disp_buffer[i].doacross_buf_idx = i;
-#endif
}
}
@@ -3146,14 +3037,10 @@ static kmp_internal_control_t __kmp_get_global_icvs(void) {
kmp_r_sched_t r_sched =
__kmp_get_schedule_global(); // get current state of scheduling globals
-#if OMP_40_ENABLED
KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
-#endif /* OMP_40_ENABLED */
kmp_internal_control_t g_icvs = {
0, // int serial_nesting_level; //corresponds to value of th_team_serialized
- (kmp_int8)__kmp_dflt_nested, // int nested; //internal control
- // for nested parallelism (per thread)
(kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic
// adjustment of threads (per thread)
(kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for
@@ -3166,14 +3053,13 @@ static kmp_internal_control_t __kmp_get_global_icvs(void) {
__kmp_dflt_team_nth, // int nproc; //internal control for # of threads for
// next parallel region (per thread)
// (use a max ub on value if __kmp_parallel_initialize not called yet)
+ __kmp_cg_max_nth, // int thread_limit;
__kmp_dflt_max_active_levels, // int max_active_levels; //internal control
// for max_active_levels
r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule
-// {sched,chunk} pair
-#if OMP_40_ENABLED
+ // {sched,chunk} pair
__kmp_nested_proc_bind.bind_types[0],
__kmp_default_device,
-#endif /* OMP_40_ENABLED */
NULL // struct kmp_internal_control *next;
};
@@ -3208,8 +3094,6 @@ static void __kmp_initialize_root(kmp_root_t *root) {
root->r.r_active = FALSE;
root->r.r_in_parallel = 0;
root->r.r_blocktime = __kmp_dflt_blocktime;
- root->r.r_nested = __kmp_dflt_nested;
- root->r.r_cg_nthreads = 1;
/* setup the root team for this task */
/* allocate the root team structure */
@@ -3222,10 +3106,7 @@ static void __kmp_initialize_root(kmp_root_t *root) {
#if OMPT_SUPPORT
ompt_data_none, // root parallel id
#endif
-#if OMP_40_ENABLED
- __kmp_nested_proc_bind.bind_types[0],
-#endif
- &r_icvs,
+ __kmp_nested_proc_bind.bind_types[0], &r_icvs,
0 // argc
USE_NESTED_HOT_ARG(NULL) // master thread is unknown
);
@@ -3262,10 +3143,7 @@ static void __kmp_initialize_root(kmp_root_t *root) {
#if OMPT_SUPPORT
ompt_data_none, // root parallel id
#endif
-#if OMP_40_ENABLED
- __kmp_nested_proc_bind.bind_types[0],
-#endif
- &r_icvs,
+ __kmp_nested_proc_bind.bind_types[0], &r_icvs,
0 // argc
USE_NESTED_HOT_ARG(NULL) // master thread is unknown
);
@@ -3406,9 +3284,7 @@ void __kmp_print_structure(void) {
thread->th.th_team_master);
__kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized);
__kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc);
-#if OMP_40_ENABLED
__kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
-#endif
__kmp_print_structure_thread(" Next in pool: ",
thread->th.th_next_pool);
__kmp_printf("\n");
@@ -3434,7 +3310,6 @@ void __kmp_print_structure(void) {
__kmp_print_structure_thread(" Uber Thread: ",
root->r.r_uber_thread);
__kmp_printf(" Active?: %2d\n", root->r.r_active);
- __kmp_printf(" Nested?: %2d\n", root->r.r_nested);
__kmp_printf(" In Parallel: %2d\n",
KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
__kmp_printf("\n");
@@ -3775,15 +3650,12 @@ int __kmp_register_root(int initial_thread) {
if (!root_thread->th.th_serial_team) {
kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
KF_TRACE(10, ("__kmp_register_root: before serial_team\n"));
- root_thread->th.th_serial_team =
- __kmp_allocate_team(root, 1, 1,
+ root_thread->th.th_serial_team = __kmp_allocate_team(
+ root, 1, 1,
#if OMPT_SUPPORT
- ompt_data_none, // root parallel id
-#endif
-#if OMP_40_ENABLED
- proc_bind_default,
+ ompt_data_none, // root parallel id
#endif
- &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
+ proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
}
KMP_ASSERT(root_thread->th.th_serial_team);
KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n",
@@ -3834,21 +3706,27 @@ int __kmp_register_root(int initial_thread) {
KMP_INIT_BARRIER_STATE);
#if KMP_AFFINITY_SUPPORTED
-#if OMP_40_ENABLED
root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
-#endif
if (TCR_4(__kmp_init_middle)) {
__kmp_affinity_set_init_mask(gtid, TRUE);
}
#endif /* KMP_AFFINITY_SUPPORTED */
-#if OMP_50_ENABLED
root_thread->th.th_def_allocator = __kmp_def_allocator;
root_thread->th.th_prev_level = 0;
root_thread->th.th_prev_num_threads = 1;
-#endif
+
+ kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t));
+ tmp->cg_root = root_thread;
+ tmp->cg_thread_limit = __kmp_cg_max_nth;
+ tmp->cg_nthreads = 1;
+ KA_TRACE(100, ("__kmp_register_root: Thread %p created node %p with"
+ " cg_nthreads init to 1\n",
+ root_thread, tmp));
+ tmp->up = NULL;
+ root_thread->th.th_cg_roots = tmp;
__kmp_root_counter++;
@@ -3864,11 +3742,11 @@ int __kmp_register_root(int initial_thread) {
ompt_thread_initial, __ompt_get_thread_data_internal());
}
ompt_data_t *task_data;
- __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
- if (ompt_enabled.ompt_callback_task_create) {
- ompt_callbacks.ompt_callback(ompt_callback_task_create)(
- NULL, NULL, task_data, ompt_task_initial, 0, NULL);
- // initial task has nothing to return to
+ ompt_data_t *parallel_data;
+ __ompt_get_task_info_internal(0, NULL, &task_data, NULL, &parallel_data, NULL);
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
}
ompt_set_thread_state(root_thread, ompt_state_work_serial);
@@ -3958,6 +3836,13 @@ static int __kmp_reset_root(int gtid, kmp_root_t *root) {
#endif /* KMP_OS_WINDOWS */
#if OMPT_SUPPORT
+ ompt_data_t *task_data;
+ ompt_data_t *parallel_data;
+ __ompt_get_task_info_internal(0, NULL, &task_data, NULL, &parallel_data, NULL);
+ if (ompt_enabled.ompt_callback_implicit_task) {
+ ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
+ ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
+ }
if (ompt_enabled.ompt_callback_thread_end) {
ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
&(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
@@ -3966,8 +3851,19 @@ static int __kmp_reset_root(int gtid, kmp_root_t *root) {
TCW_4(__kmp_nth,
__kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
- root->r.r_cg_nthreads--;
-
+ i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
+ KA_TRACE(100, ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
+ " to %d\n",
+ root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
+ root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
+ if (i == 1) {
+ // need to free contention group structure
+ KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
+ root->r.r_uber_thread->th.th_cg_roots->cg_root);
+ KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
+ __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
+ root->r.r_uber_thread->th.th_cg_roots = NULL;
+ }
__kmp_reap_thread(root->r.r_uber_thread, 1);
// We canot put root thread to __kmp_thread_pool, so we have to reap it istead
@@ -4001,7 +3897,6 @@ void __kmp_unregister_root_current_thread(int gtid) {
KMP_MB();
-#if OMP_45_ENABLED
kmp_info_t *thread = __kmp_threads[gtid];
kmp_team_t *team = thread->th.th_team;
kmp_task_team_t *task_team = thread->th.th_task_team;
@@ -4014,7 +3909,6 @@ void __kmp_unregister_root_current_thread(int gtid) {
#endif
__kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
}
-#endif
__kmp_reset_root(gtid, root);
@@ -4098,12 +3992,10 @@ static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
else // no tasking --> always safe to reap
this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
-#if OMP_40_ENABLED
this_thr->th.th_set_proc_bind = proc_bind_default;
#if KMP_AFFINITY_SUPPORTED
this_thr->th.th_new_place = this_thr->th.th_current_place;
#endif
-#endif
this_thr->th.th_root = master->th.th_root;
/* setup the thread's cache of the team structure */
@@ -4141,6 +4033,33 @@ static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
this_thr->th.th_pri_head = NULL;
}
+ if (this_thr != master && // Master's CG root is initialized elsewhere
+ this_thr->th.th_cg_roots != master->th.th_cg_roots) { // CG root not set
+ // Make new thread's CG root same as master's
+ KMP_DEBUG_ASSERT(master->th.th_cg_roots);
+ kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
+ if (tmp) {
+ // worker changes CG, need to check if old CG should be freed
+ int i = tmp->cg_nthreads--;
+ KA_TRACE(100, ("__kmp_initialize_info: Thread %p decrement cg_nthreads"
+ " on node %p of thread %p to %d\n",
+ this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
+ if (i == 1) {
+ __kmp_free(tmp); // last thread left CG --> free it
+ }
+ }
+ this_thr->th.th_cg_roots = master->th.th_cg_roots;
+ // Increment new thread's CG root's counter to add the new thread
+ this_thr->th.th_cg_roots->cg_nthreads++;
+ KA_TRACE(100, ("__kmp_initialize_info: Thread %p increment cg_nthreads on"
+ " node %p of thread %p to %d\n",
+ this_thr, this_thr->th.th_cg_roots,
+ this_thr->th.th_cg_roots->cg_root,
+ this_thr->th.th_cg_roots->cg_nthreads));
+ this_thr->th.th_current_task->td_icvs.thread_limit =
+ this_thr->th.th_cg_roots->cg_thread_limit;
+ }
+
/* Initialize dynamic dispatch */
{
volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
@@ -4155,9 +4074,7 @@ static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
dispatch->th_disp_index = 0;
-#if OMP_45_ENABLED
dispatch->th_doacross_buf_idx = 0;
-#endif
if (!dispatch->th_disp_buffer) {
dispatch->th_disp_buffer =
(dispatch_private_info_t *)__kmp_allocate(disp_size);
@@ -4222,22 +4139,25 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
/* first, try to get one from the thread pool */
if (__kmp_thread_pool) {
-
new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
__kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool;
if (new_thr == __kmp_thread_pool_insert_pt) {
__kmp_thread_pool_insert_pt = NULL;
}
TCW_4(new_thr->th.th_in_pool, FALSE);
- // Don't touch th_active_in_pool or th_active.
- // The worker thread adjusts those flags as it sleeps/awakens.
- __kmp_thread_pool_nth--;
+ __kmp_suspend_initialize_thread(new_thr);
+ __kmp_lock_suspend_mx(new_thr);
+ if (new_thr->th.th_active_in_pool == TRUE) {
+ KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
+ KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
+ new_thr->th.th_active_in_pool = FALSE;
+ }
+ __kmp_unlock_suspend_mx(new_thr);
KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
__kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
KMP_ASSERT(!new_thr->th.th_team);
KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
- KMP_DEBUG_ASSERT(__kmp_thread_pool_nth >= 0);
/* setup the thread structure */
__kmp_initialize_info(new_thr, team, new_tid,
@@ -4245,7 +4165,6 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
TCW_4(__kmp_nth, __kmp_nth + 1);
- root->r.r_cg_nthreads++;
new_thr->th.th_task_state = 0;
new_thr->th.th_task_state_top = 0;
@@ -4334,10 +4253,8 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
#if OMPT_SUPPORT
ompt_data_none, // root parallel id
#endif
-#if OMP_40_ENABLED
- proc_bind_default,
-#endif
- &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
+ proc_bind_default, &r_icvs,
+ 0 USE_NESTED_HOT_ARG(NULL));
}
KMP_ASSERT(serial_team);
serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for
@@ -4381,17 +4298,15 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
new_thr->th.th_blocking = false;
#endif
-#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
+#if KMP_AFFINITY_SUPPORTED
new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
#endif
-#if OMP_50_ENABLED
new_thr->th.th_def_allocator = __kmp_def_allocator;
new_thr->th.th_prev_level = 0;
new_thr->th.th_prev_num_threads = 1;
-#endif
TCW_4(new_thr->th.th_in_pool, FALSE);
new_thr->th.th_active_in_pool = FALSE;
@@ -4401,8 +4316,6 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
__kmp_all_nth++;
__kmp_nth++;
- root->r.r_cg_nthreads++;
-
// if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
// numbers of procs, and method #2 (keyed API call) for higher numbers.
if (__kmp_adjust_gtid_mode) {
@@ -4504,8 +4417,6 @@ static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
team->t.t_ordered.dt.t_value = 0;
team->t.t_master_active = FALSE;
- memset(&team->t.t_taskq, '\0', sizeof(kmp_taskq_t));
-
#ifdef KMP_DEBUG
team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */
#endif
@@ -4540,7 +4451,7 @@ __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
}
#endif
-#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
+#if KMP_AFFINITY_SUPPORTED
// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
// It calculats the worker + master thread's partition based upon the parent
@@ -4579,12 +4490,10 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
th->th.th_first_place = first_place;
th->th.th_last_place = last_place;
th->th.th_new_place = masters_place;
-#if OMP_50_ENABLED
if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
team->t.t_display_affinity != 1) {
team->t.t_display_affinity = 1;
}
-#endif
KA_TRACE(100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n",
@@ -4618,12 +4527,10 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
th->th.th_first_place = first_place;
th->th.th_last_place = last_place;
th->th.th_new_place = place;
-#if OMP_50_ENABLED
if (__kmp_display_affinity && place != th->th.th_current_place &&
team->t.t_display_affinity != 1) {
team->t.t_display_affinity = 1;
}
-#endif
KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n",
@@ -4645,12 +4552,10 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
th->th.th_first_place = first_place;
th->th.th_last_place = last_place;
th->th.th_new_place = place;
-#if OMP_50_ENABLED
if (__kmp_display_affinity && place != th->th.th_current_place &&
team->t.t_display_affinity != 1) {
team->t.t_display_affinity = 1;
}
-#endif
s_count++;
if ((s_count == S) && rem && (gap_ct == gap)) {
@@ -4719,12 +4624,10 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
th->th.th_first_place = place;
th->th.th_new_place = place;
-#if OMP_50_ENABLED
if (__kmp_display_affinity && place != th->th.th_current_place &&
team->t.t_display_affinity != 1) {
team->t.t_display_affinity = 1;
}
-#endif
s_count = 1;
while (s_count < S) {
if (place == last_place) {
@@ -4816,12 +4719,10 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
th->th.th_first_place = first;
th->th.th_new_place = place;
th->th.th_last_place = last;
-#if OMP_50_ENABLED
if (__kmp_display_affinity && place != th->th.th_current_place &&
team->t.t_display_affinity != 1) {
team->t.t_display_affinity = 1;
}
-#endif
KA_TRACE(100,
("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n",
@@ -4850,12 +4751,10 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
th->th.th_first_place = place;
th->th.th_last_place = place;
th->th.th_new_place = place;
-#if OMP_50_ENABLED
if (__kmp_display_affinity && place != th->th.th_current_place &&
team->t.t_display_affinity != 1) {
team->t.t_display_affinity = 1;
}
-#endif
s_count++;
if ((s_count == S) && rem && (gap_ct == gap)) {
@@ -4901,7 +4800,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id));
}
-#endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */
+#endif // KMP_AFFINITY_SUPPORTED
/* allocate a new team data structure to use. take one off of the free pool if
available */
@@ -4910,9 +4809,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
#if OMPT_SUPPORT
ompt_data_t ompt_parallel_data,
#endif
-#if OMP_40_ENABLED
kmp_proc_bind_t new_proc_bind,
-#endif
kmp_internal_control_t *new_icvs,
int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
@@ -4954,7 +4851,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
#endif
// Optimization to use a "hot" team
if (use_hot_team && new_nproc > 1) {
- KMP_DEBUG_ASSERT(new_nproc == max_nproc);
+ KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
#if KMP_NESTED_HOT_TEAMS
team = hot_teams[level].hot_team;
#else
@@ -4993,7 +4890,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
team->t.t_threads[0], team));
__kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
-#if OMP_40_ENABLED
#if KMP_AFFINITY_SUPPORTED
if ((team->t.t_size_changed == 0) &&
(team->t.t_proc_bind == new_proc_bind)) {
@@ -5012,7 +4908,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
#else
KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
#endif /* KMP_AFFINITY_SUPPORTED */
-#endif /* OMP_40_ENABLED */
} else if (team->t.t_nproc > new_nproc) {
KA_TRACE(20,
("__kmp_allocate_team: decreasing hot team thread count to %d\n",
@@ -5060,10 +4955,11 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
__kmp_reinitialize_team(team, new_icvs,
root->r.r_uber_thread->th.th_ident);
- /* update the remaining threads */
+ // Update remaining threads
for (f = 0; f < new_nproc; ++f) {
team->t.t_threads[f]->th.th_team_nproc = new_nproc;
}
+
// restore the current task state of the master thread: should be the
// implicit task
KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
@@ -5079,12 +4975,10 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
}
#endif
-#if OMP_40_ENABLED
KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
#if KMP_AFFINITY_SUPPORTED
__kmp_partition_places(team);
#endif
-#endif
} else { // team->t.t_nproc < new_nproc
#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
kmp_affin_mask_t *old_mask;
@@ -5191,6 +5085,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
for (f = 0; f < team->t.t_nproc; ++f)
__kmp_initialize_info(team->t.t_threads[f], team, f,
__kmp_gtid_from_tid(f, team));
+
if (level) { // set th_task_state for new threads in nested hot team
// __kmp_initialize_info() no longer zeroes th_task_state, so we should
// only need to set the th_task_state for the new threads. th_task_state
@@ -5215,15 +5110,12 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
}
#endif
-#if OMP_40_ENABLED
KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
#if KMP_AFFINITY_SUPPORTED
__kmp_partition_places(team);
#endif
-#endif
} // Check changes in number of threads
-#if OMP_40_ENABLED
kmp_info_t *master = team->t.t_threads[0];
if (master->th.th_teams_microtask) {
for (f = 1; f < new_nproc; ++f) {
@@ -5234,7 +5126,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
thr->th.th_teams_size = master->th.th_teams_size;
}
}
-#endif /* OMP_40_ENABLED */
#if KMP_NESTED_HOT_TEAMS
if (level) {
// Sync barrier state for nested hot teams, not needed for outermost hot
@@ -5315,9 +5206,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
}
}
-#if OMP_40_ENABLED
team->t.t_proc_bind = new_proc_bind;
-#endif
KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n",
team->t.t_id));
@@ -5382,9 +5271,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
}
}
-#if OMP_40_ENABLED
team->t.t_proc_bind = new_proc_bind;
-#endif
#if OMPT_SUPPORT
__ompt_team_assign_id(team, ompt_parallel_data);
@@ -5479,8 +5366,8 @@ void __kmp_free_team(kmp_root_t *root,
for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
if (task_team != NULL) {
- for (f = 0; f < team->t.t_nproc;
- ++f) { // Have all threads unref task teams
+ for (f = 0; f < team->t.t_nproc; ++f) { // threads unref task teams
+ KMP_DEBUG_ASSERT(team->t.t_threads[f]);
team->t.t_threads[f]->th.th_task_team = NULL;
}
KA_TRACE(
@@ -5511,6 +5398,32 @@ void __kmp_free_team(kmp_root_t *root,
/* TODO limit size of team pool, call reap_team if pool too large */
team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
__kmp_team_pool = (volatile kmp_team_t *)team;
+ } else { // Check if team was created for the masters in a teams construct
+ // See if first worker is a CG root
+ KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
+ team->t.t_threads[1]->th.th_cg_roots);
+ if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
+ // Clean up the CG root nodes on workers so that this team can be re-used
+ for (f = 1; f < team->t.t_nproc; ++f) {
+ kmp_info_t *thr = team->t.t_threads[f];
+ KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
+ thr->th.th_cg_roots->cg_root == thr);
+ // Pop current CG root off list
+ kmp_cg_root_t *tmp = thr->th.th_cg_roots;
+ thr->th.th_cg_roots = tmp->up;
+ KA_TRACE(100, ("__kmp_free_team: Thread %p popping node %p and moving"
+ " up to node %p. cg_nthreads was %d\n",
+ thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
+ int i = tmp->cg_nthreads--;
+ if (i == 1) {
+ __kmp_free(tmp); // free CG if we are the last thread in it
+ }
+ // Restore current task's thread_limit from CG root
+ if (thr->th.th_cg_roots)
+ thr->th.th_current_task->td_icvs.thread_limit =
+ thr->th.th_cg_roots->cg_thread_limit;
+ }
+ }
}
KMP_MB();
@@ -5566,7 +5479,6 @@ kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
void __kmp_free_thread(kmp_info_t *this_th) {
int gtid;
kmp_info_t **scan;
- kmp_root_t *root = this_th->th.th_root;
KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
__kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
@@ -5591,6 +5503,29 @@ void __kmp_free_thread(kmp_info_t *this_th) {
TCW_PTR(this_th->th.th_root, NULL);
TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */
+ while (this_th->th.th_cg_roots) {
+ this_th->th.th_cg_roots->cg_nthreads--;
+ KA_TRACE(100, ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"
+ " %p of thread %p to %d\n",
+ this_th, this_th->th.th_cg_roots,
+ this_th->th.th_cg_roots->cg_root,
+ this_th->th.th_cg_roots->cg_nthreads));
+ kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
+ if (tmp->cg_root == this_th) { // Thread is a cg_root
+ KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
+ KA_TRACE(
+ 5, ("__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
+ this_th->th.th_cg_roots = tmp->up;
+ __kmp_free(tmp);
+ } else { // Worker thread
+ if (tmp->cg_nthreads == 0) { // last thread leaves contention group
+ __kmp_free(tmp);
+ }
+ this_th->th.th_cg_roots = NULL;
+ break;
+ }
+ }
+
/* If the implicit task assigned to this thread can be used by other threads
* -> multiple threads can share the data and try to free the task at
* __kmp_reap_thread at exit. This duplicate use of the task data can happen
@@ -5631,10 +5566,20 @@ void __kmp_free_thread(kmp_info_t *this_th) {
(this_th->th.th_info.ds.ds_gtid <
this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
TCW_4(this_th->th.th_in_pool, TRUE);
- __kmp_thread_pool_nth++;
+ __kmp_suspend_initialize_thread(this_th);
+ __kmp_lock_suspend_mx(this_th);
+ if (this_th->th.th_active == TRUE) {
+ KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
+ this_th->th.th_active_in_pool = TRUE;
+ }
+#if KMP_DEBUG
+ else {
+ KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
+ }
+#endif
+ __kmp_unlock_suspend_mx(this_th);
TCW_4(__kmp_nth, __kmp_nth - 1);
- root->r.r_cg_nthreads--;
#ifdef KMP_ADJUST_BLOCKTIME
/* Adjust blocktime back to user setting or default if necessary */
@@ -5851,7 +5796,6 @@ static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
gtid = thread->th.th_info.ds.ds_gtid;
if (!is_root) {
-
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
/* Assume the threads are at the fork barrier here */
KA_TRACE(
@@ -5882,10 +5826,6 @@ static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
}
-
- // Decrement # of [worker] threads in the pool.
- KMP_DEBUG_ASSERT(__kmp_thread_pool_nth > 0);
- --__kmp_thread_pool_nth;
}
__kmp_free_implicit_task(thread);
@@ -6264,16 +6204,13 @@ void __kmp_internal_end_thread(int gtid_req) {
}
}
#if KMP_DYNAMIC_LIB
- // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber
- // thread, because we will better shutdown later in the library destructor.
- // The reason of this change is performance problem when non-openmp thread in
- // a loop forks and joins many openmp threads. We can save a lot of time
- // keeping worker threads alive until the program shutdown.
- // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966)
- // and Windows(DPD200287443) that occurs when using critical sections from
- // foreign threads.
- KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
- return;
+ if (__kmp_pause_status != kmp_hard_paused)
+ // AC: lets not shutdown the dynamic library at the exit of uber thread,
+ // because we will better shutdown later in the library destructor.
+ {
+ KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
+ return;
+ }
#endif
/* synchronize the termination process */
__kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
@@ -6409,6 +6346,7 @@ void __kmp_register_library_startup(void) {
// library. Assume the other library is alive.
// WARN( ... ); // TODO: Issue a warning.
file_name = "unknown library";
+ KMP_FALLTHROUGH();
// Attention! Falling to the next case. That's intentional.
case 1: { // Neighbor is alive.
// Check it is allowed.
@@ -6747,11 +6685,9 @@ static void __kmp_do_serial_initialize(void) {
__kmp_env_print();
}
-#if OMP_40_ENABLED
if (__kmp_display_env || __kmp_display_env_verbose) {
__kmp_env_print_2();
}
-#endif // OMP_40_ENABLED
#if OMPT_SUPPORT
ompt_post_init();
@@ -6919,6 +6855,7 @@ void __kmp_parallel_initialize(void) {
if (!__kmp_init_middle) {
__kmp_do_middle_initialize();
}
+ __kmp_resume_if_hard_paused();
/* begin initialization */
KA_TRACE(10, ("__kmp_parallel_initialize: enter\n"));
@@ -6984,10 +6921,7 @@ void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr,
// this_thr->th.th_info.ds.ds_tid ] );
dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */
-#if OMP_45_ENABLED
- dispatch->th_doacross_buf_idx =
- 0; /* reset the doacross dispatch buffer counter */
-#endif
+ dispatch->th_doacross_buf_idx = 0; // reset doacross dispatch buffer counter
if (__kmp_env_consistency_check)
__kmp_push_parallel(gtid, team->t.t_ident);
@@ -7046,21 +6980,33 @@ int __kmp_invoke_task_func(int gtid) {
}
#endif
- {
- KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
- KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
- rc =
- __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
- tid, (int)team->t.t_argc, (void **)team->t.t_argv
+#if KMP_STATS_ENABLED
+ stats_state_e previous_state = KMP_GET_THREAD_STATE();
+ if (previous_state == stats_state_e::TEAMS_REGION) {
+ KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
+ } else {
+ KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
+ }
+ KMP_SET_THREAD_STATE(IMPLICIT_TASK);
+#endif
+
+ rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
+ tid, (int)team->t.t_argc, (void **)team->t.t_argv
#if OMPT_SUPPORT
- ,
- exit_runtime_p
+ ,
+ exit_runtime_p
#endif
- );
+ );
#if OMPT_SUPPORT
- *exit_runtime_p = NULL;
+ *exit_runtime_p = NULL;
#endif
+
+#if KMP_STATS_ENABLED
+ if (previous_state == stats_state_e::TEAMS_REGION) {
+ KMP_SET_THREAD_STATE(previous_state);
}
+ KMP_POP_PARTITIONED_TIMER();
+#endif
#if USE_ITT_BUILD
if (__itt_stack_caller_create_ptr) {
@@ -7074,7 +7020,6 @@ int __kmp_invoke_task_func(int gtid) {
return rc;
}
-#if OMP_40_ENABLED
void __kmp_teams_master(int gtid) {
// This routine is called by all master threads in teams construct
kmp_info_t *thr = __kmp_threads[gtid];
@@ -7085,6 +7030,19 @@ void __kmp_teams_master(int gtid) {
KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
__kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
+
+ // This thread is a new CG root. Set up the proper variables.
+ kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t));
+ tmp->cg_root = thr; // Make thr the CG root
+ // Init to thread limit that was stored when league masters were forked
+ tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
+ tmp->cg_nthreads = 1; // Init counter to one active thread, this one
+ KA_TRACE(100, ("__kmp_teams_master: Thread %p created node %p and init"
+ " cg_nthreads to 1\n",
+ thr, tmp));
+ tmp->up = thr->th.th_cg_roots;
+ thr->th.th_cg_roots = tmp;
+
// Launch league of teams now, but not let workers execute
// (they hang on fork barrier until next parallel)
#if INCLUDE_SSC_MARKS
@@ -7096,7 +7054,9 @@ void __kmp_teams_master(int gtid) {
#if INCLUDE_SSC_MARKS
SSC_MARK_JOINING();
#endif
-
+ // If the team size was reduced from the limit, set it to the new size
+ if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
+ thr->th.th_teams_size.nth = thr->th.th_team_nproc;
// AC: last parameter "1" eliminates join barrier which won't work because
// worker threads are in a fork barrier waiting for more parallel regions
__kmp_join_call(loc, gtid
@@ -7121,7 +7081,6 @@ int __kmp_invoke_teams_master(int gtid) {
__kmp_run_after_invoked_task(gtid, 0, this_thr, team);
return 1;
}
-#endif /* OMP_40_ENABLED */
/* this sets the requested number of threads for the next parallel region
encountered by this team. since this should be enclosed in the forkjoin
@@ -7135,8 +7094,6 @@ void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
thr->th.th_set_nproc = num_threads;
}
-#if OMP_40_ENABLED
-
/* this sets the requested number of teams for the teams region and/or
the number of threads for the next parallel region encountered */
void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
@@ -7170,10 +7127,14 @@ void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
num_threads = __kmp_teams_max_nth / num_teams;
}
} else {
+ // This thread will be the master of the league masters
+ // Store new thread limit; old limit is saved in th_cg_roots list
+ thr->th.th_current_task->td_icvs.thread_limit = num_threads;
+
if (num_teams * num_threads > __kmp_teams_max_nth) {
int new_threads = __kmp_teams_max_nth / num_teams;
if (!__kmp_reserve_warn) { // user asked for too many threads
- __kmp_reserve_warn = 1; // that conflicts with KMP_TEAMS_THREAD_LIMIT
+ __kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT
__kmp_msg(kmp_ms_warning,
KMP_MSG(CantFormThrTeam, num_threads, new_threads),
KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
@@ -7190,8 +7151,6 @@ void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) {
thr->th.th_set_proc_bind = proc_bind;
}
-#endif /* OMP_40_ENABLED */
-
/* Launch the worker threads into the microtask. */
void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) {
@@ -7216,15 +7175,11 @@ void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) {
int i;
for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
team->t.t_disp_buffer[i].buffer_index = i;
-#if OMP_45_ENABLED
team->t.t_disp_buffer[i].doacross_buf_idx = i;
-#endif
}
} else {
team->t.t_disp_buffer[0].buffer_index = 0;
-#if OMP_45_ENABLED
team->t.t_disp_buffer[0].doacross_buf_idx = 0;
-#endif
}
KMP_MB(); /* Flush all pending memory write invalidates. */
@@ -7282,11 +7237,13 @@ void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
if (ompt_enabled.ompt_callback_sync_region_wait) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
- ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
+ ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
+ codeptr);
}
if (ompt_enabled.ompt_callback_sync_region) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
- ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
+ ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
+ codeptr);
}
#endif
if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
@@ -7476,12 +7433,10 @@ void __kmp_cleanup(void) {
__kmp_nested_proc_bind.bind_types = NULL;
__kmp_nested_proc_bind.size = 0;
__kmp_nested_proc_bind.used = 0;
-#if OMP_50_ENABLED
if (__kmp_affinity_format) {
KMP_INTERNAL_FREE(__kmp_affinity_format);
__kmp_affinity_format = NULL;
}
-#endif
__kmp_i18n_catclose();
@@ -7625,13 +7580,14 @@ void __kmp_aux_set_library(enum library_type arg) {
switch (__kmp_library) {
case library_serial: {
KMP_INFORM(LibraryIsSerial);
- (void)__kmp_change_library(TRUE);
} break;
case library_turnaround:
- (void)__kmp_change_library(TRUE);
+ if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
+ __kmp_use_yield = 2; // only yield when oversubscribed
break;
case library_throughput:
- (void)__kmp_change_library(FALSE);
+ if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
+ __kmp_dflt_blocktime = 200;
break;
default:
KMP_FATAL(UnknownLibraryType, arg);
@@ -7696,7 +7652,6 @@ int __kmp_aux_get_num_teams() {
/* ------------------------------------------------------------------------ */
-#if OMP_50_ENABLED
/*
* Affinity Format Parser
*
@@ -7969,7 +7924,6 @@ void __kmp_aux_display_affinity(int gtid, const char *format) {
__kmp_fprintf(kmp_out, "%s" KMP_END_OF_LINE, buf.str);
__kmp_str_buf_free(&buf);
}
-#endif // OMP_50_ENABLED
/* ------------------------------------------------------------------------ */
@@ -8023,11 +7977,7 @@ void __kmp_aux_set_defaults(char const *str, int len) {
}
__kmp_env_initialize(str);
- if (__kmp_settings
-#if OMP_40_ENABLED
- || __kmp_display_env || __kmp_display_env_verbose
-#endif // OMP_40_ENABLED
- ) {
+ if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
__kmp_env_print();
}
} // __kmp_aux_set_defaults
@@ -8104,7 +8054,7 @@ __kmp_determine_reduction_method(
#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
-#if KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_HURD
+#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD
// basic tuning
@@ -8190,3 +8140,78 @@ __kmp_determine_reduction_method(
kmp_int32 __kmp_get_reduce_method(void) {
return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
}
+
+// Soft pause sets up threads to ignore blocktime and just go to sleep.
+// Spin-wait code checks __kmp_pause_status and reacts accordingly.
+void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
+
+// Hard pause shuts down the runtime completely. Resume happens naturally when
+// OpenMP is used subsequently.
+void __kmp_hard_pause() {
+ __kmp_pause_status = kmp_hard_paused;
+ __kmp_internal_end_thread(-1);
+}
+
+// Soft resume sets __kmp_pause_status, and wakes up all threads.
+void __kmp_resume_if_soft_paused() {
+ if (__kmp_pause_status == kmp_soft_paused) {
+ __kmp_pause_status = kmp_not_paused;
+
+ for (int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
+ kmp_info_t *thread = __kmp_threads[gtid];
+ if (thread) { // Wake it if sleeping
+ kmp_flag_64 fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
+ if (fl.is_sleeping())
+ fl.resume(gtid);
+ else if (__kmp_try_suspend_mx(thread)) { // got suspend lock
+ __kmp_unlock_suspend_mx(thread); // unlock it; it won't sleep
+ } else { // thread holds the lock and may sleep soon
+ do { // until either the thread sleeps, or we can get the lock
+ if (fl.is_sleeping()) {
+ fl.resume(gtid);
+ break;
+ } else if (__kmp_try_suspend_mx(thread)) {
+ __kmp_unlock_suspend_mx(thread);
+ break;
+ }
+ } while (1);
+ }
+ }
+ }
+ }
+}
+
+// This function is called via __kmpc_pause_resource. Returns 0 if successful.
+// TODO: add warning messages
+int __kmp_pause_resource(kmp_pause_status_t level) {
+ if (level == kmp_not_paused) { // requesting resume
+ if (__kmp_pause_status == kmp_not_paused) {
+ // error message about runtime not being paused, so can't resume
+ return 1;
+ } else {
+ KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
+ __kmp_pause_status == kmp_hard_paused);
+ __kmp_pause_status = kmp_not_paused;
+ return 0;
+ }
+ } else if (level == kmp_soft_paused) { // requesting soft pause
+ if (__kmp_pause_status != kmp_not_paused) {
+ // error message about already being paused
+ return 1;
+ } else {
+ __kmp_soft_pause();
+ return 0;
+ }
+ } else if (level == kmp_hard_paused) { // requesting hard pause
+ if (__kmp_pause_status != kmp_not_paused) {
+ // error message about already being paused
+ return 1;
+ } else {
+ __kmp_hard_pause();
+ return 0;
+ }
+ } else {
+ // error message about invalid level
+ return 1;
+ }
+}