summaryrefslogtreecommitdiff
path: root/openmp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
commitcfca06d7963fa0909f90483b42a6d7d194d01e08 (patch)
tree209fb2a2d68f8f277793fc8df46c753d31bc853b /openmp
parent706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff)
Notes
Diffstat (limited to 'openmp')
-rw-r--r--openmp/runtime/src/dllexports1
-rw-r--r--openmp/runtime/src/exports_so.txt2
-rw-r--r--openmp/runtime/src/i18n/en_US.txt2
-rw-r--r--openmp/runtime/src/include/omp.h.var57
-rw-r--r--openmp/runtime/src/include/omp_lib.f.var5
-rw-r--r--openmp/runtime/src/include/omp_lib.f90.var6
-rw-r--r--openmp/runtime/src/include/omp_lib.h.var5
-rw-r--r--openmp/runtime/src/kmp.h74
-rw-r--r--openmp/runtime/src/kmp_affinity.cpp24
-rw-r--r--openmp/runtime/src/kmp_affinity.h6
-rw-r--r--openmp/runtime/src/kmp_alloc.cpp66
-rw-r--r--openmp/runtime/src/kmp_barrier.cpp2
-rw-r--r--openmp/runtime/src/kmp_csupport.cpp36
-rw-r--r--openmp/runtime/src/kmp_dispatch.cpp90
-rw-r--r--openmp/runtime/src/kmp_dispatch.h2
-rw-r--r--openmp/runtime/src/kmp_dispatch_hier.h2
-rw-r--r--openmp/runtime/src/kmp_environment.h2
-rw-r--r--openmp/runtime/src/kmp_ftn_entry.h7
-rw-r--r--openmp/runtime/src/kmp_ftn_os.h26
-rw-r--r--openmp/runtime/src/kmp_gsupport.cpp53
-rw-r--r--openmp/runtime/src/kmp_i18n.cpp2
-rw-r--r--openmp/runtime/src/kmp_i18n.h2
-rw-r--r--openmp/runtime/src/kmp_lock.cpp5
-rw-r--r--openmp/runtime/src/kmp_lock.h10
-rw-r--r--openmp/runtime/src/kmp_omp.h2
-rw-r--r--openmp/runtime/src/kmp_runtime.cpp22
-rw-r--r--openmp/runtime/src/kmp_sched.cpp2
-rw-r--r--openmp/runtime/src/kmp_settings.cpp19
-rw-r--r--openmp/runtime/src/kmp_settings.h1
-rw-r--r--openmp/runtime/src/kmp_stats.cpp4
-rw-r--r--openmp/runtime/src/kmp_stats.h6
-rw-r--r--openmp/runtime/src/kmp_str.h4
-rw-r--r--openmp/runtime/src/kmp_stub.cpp2
-rw-r--r--openmp/runtime/src/kmp_taskdeps.cpp164
-rw-r--r--openmp/runtime/src/kmp_tasking.cpp207
-rw-r--r--openmp/runtime/src/kmp_utility.cpp2
-rw-r--r--openmp/runtime/src/kmp_version.h2
-rw-r--r--openmp/runtime/src/kmp_wrapper_malloc.h8
-rw-r--r--openmp/runtime/src/ompt-internal.h2
-rw-r--r--openmp/runtime/src/ompt-specific.cpp2
-rw-r--r--openmp/runtime/src/ompt-specific.h2
-rw-r--r--openmp/runtime/src/thirdparty/ittnotify/ittnotify.h4
-rw-r--r--openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp4
-rw-r--r--openmp/runtime/src/z_Linux_util.cpp6
44 files changed, 620 insertions, 332 deletions
diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports
index f76619ec0e3c..45a294b666fa 100644
--- a/openmp/runtime/src/dllexports
+++ b/openmp/runtime/src/dllexports
@@ -533,6 +533,7 @@ kmp_set_disp_num_buffers 890
omp_pause_resource_all 757
omp_get_supported_active_levels 758
omp_fulfill_event 759
+ omp_display_env 733
omp_null_allocator DATA
omp_default_mem_alloc DATA
diff --git a/openmp/runtime/src/exports_so.txt b/openmp/runtime/src/exports_so.txt
index f7de5fd6474f..30222418163d 100644
--- a/openmp/runtime/src/exports_so.txt
+++ b/openmp/runtime/src/exports_so.txt
@@ -119,5 +119,7 @@ GOMP_4.0 {
} GOMP_3.0;
GOMP_4.5 {
} GOMP_4.0;
+GOMP_5.0 {
+} GOMP_4.5;
# end of file #
diff --git a/openmp/runtime/src/i18n/en_US.txt b/openmp/runtime/src/i18n/en_US.txt
index 3a3035b26673..b2ba63c02870 100644
--- a/openmp/runtime/src/i18n/en_US.txt
+++ b/openmp/runtime/src/i18n/en_US.txt
@@ -324,7 +324,7 @@ WrongMessageCatalog "Incompatible message catalog \"%1$s\": Version \"%
StgIgnored "%1$s: ignored because %2$s has been defined"
# %1, -- name of ignored variable, %2 -- name of variable with higher priority.
OBSOLETE "%1$s: overrides %3$s specified before"
- # %1, %2 -- name and value of the overriding variable, %3 -- name of overriden variable.
+ # %1, %2 -- name and value of the overriding variable, %3 -- name of overridden variable.
AffTilesNoHWLOC "%1$s: Tiles are only supported if KMP_TOPOLOGY_METHOD=hwloc, using granularity=package instead"
AffTilesNoTiles "%1$s: Tiles requested but were not detected on this HW, using granularity=package instead"
TopologyExtraTile "%1$s: %2$d packages x %3$d tiles/pkg x %4$d cores/tile x %5$d threads/core (%6$d total cores)"
diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var
index 2246e7012bee..f62afc2b693d 100644
--- a/openmp/runtime/src/include/omp.h.var
+++ b/openmp/runtime/src/include/omp.h.var
@@ -228,36 +228,36 @@
typedef uintptr_t omp_uintptr_t;
typedef enum {
- OMP_ATK_THREADMODEL = 1,
- OMP_ATK_ALIGNMENT = 2,
- OMP_ATK_ACCESS = 3,
- OMP_ATK_POOL_SIZE = 4,
- OMP_ATK_FALLBACK = 5,
- OMP_ATK_FB_DATA = 6,
- OMP_ATK_PINNED = 7,
- OMP_ATK_PARTITION = 8
+ omp_atk_threadmodel = 1,
+ omp_atk_alignment = 2,
+ omp_atk_access = 3,
+ omp_atk_pool_size = 4,
+ omp_atk_fallback = 5,
+ omp_atk_fb_data = 6,
+ omp_atk_pinned = 7,
+ omp_atk_partition = 8
} omp_alloctrait_key_t;
typedef enum {
- OMP_ATV_FALSE = 0,
- OMP_ATV_TRUE = 1,
- OMP_ATV_DEFAULT = 2,
- OMP_ATV_CONTENDED = 3,
- OMP_ATV_UNCONTENDED = 4,
- OMP_ATV_SEQUENTIAL = 5,
- OMP_ATV_PRIVATE = 6,
- OMP_ATV_ALL = 7,
- OMP_ATV_THREAD = 8,
- OMP_ATV_PTEAM = 9,
- OMP_ATV_CGROUP = 10,
- OMP_ATV_DEFAULT_MEM_FB = 11,
- OMP_ATV_NULL_FB = 12,
- OMP_ATV_ABORT_FB = 13,
- OMP_ATV_ALLOCATOR_FB = 14,
- OMP_ATV_ENVIRONMENT = 15,
- OMP_ATV_NEAREST = 16,
- OMP_ATV_BLOCKED = 17,
- OMP_ATV_INTERLEAVED = 18
+ omp_atv_false = 0,
+ omp_atv_true = 1,
+ omp_atv_default = 2,
+ omp_atv_contended = 3,
+ omp_atv_uncontended = 4,
+ omp_atv_sequential = 5,
+ omp_atv_private = 6,
+ omp_atv_all = 7,
+ omp_atv_thread = 8,
+ omp_atv_pteam = 9,
+ omp_atv_cgroup = 10,
+ omp_atv_default_mem_fb = 11,
+ omp_atv_null_fb = 12,
+ omp_atv_abort_fb = 13,
+ omp_atv_allocator_fb = 14,
+ omp_atv_environment = 15,
+ omp_atv_nearest = 16,
+ omp_atv_blocked = 17,
+ omp_atv_interleaved = 18
} omp_alloctrait_value_t;
typedef struct {
@@ -355,6 +355,9 @@
extern int __KAI_KMPC_CONVENTION omp_get_supported_active_levels(void);
+ /* OpenMP 5.1 Display Environment */
+ extern void omp_display_env(int verbose);
+
# undef __KAI_KMPC_CONVENTION
# undef __KMP_IMP
diff --git a/openmp/runtime/src/include/omp_lib.f.var b/openmp/runtime/src/include/omp_lib.f.var
index d631438f55ad..bf40c78707a8 100644
--- a/openmp/runtime/src/include/omp_lib.f.var
+++ b/openmp/runtime/src/include/omp_lib.f.var
@@ -488,6 +488,11 @@
integer (kind=kmp_size_t_kind) omp_capture_affinity
end function omp_capture_affinity
+ subroutine omp_display_env(verbose) bind(c)
+ use omp_lib_kinds
+ logical (kind=omp_logical_kind), value :: verbose
+ end subroutine omp_display_env
+
! ***
! *** kmp_* entry points
! ***
diff --git a/openmp/runtime/src/include/omp_lib.f90.var b/openmp/runtime/src/include/omp_lib.f90.var
index ac568486d204..fbbb7b9df94d 100644
--- a/openmp/runtime/src/include/omp_lib.f90.var
+++ b/openmp/runtime/src/include/omp_lib.f90.var
@@ -503,6 +503,12 @@
integer (kind=kmp_size_t_kind) :: omp_capture_affinity
end function omp_capture_affinity
+ subroutine omp_display_env(verbose) bind(c)
+ use omp_lib_kinds
+ logical (kind=omp_logical_kind), value :: verbose
+ end subroutine omp_display_env
+
+
! ***
! *** kmp_* entry points
! ***
diff --git a/openmp/runtime/src/include/omp_lib.h.var b/openmp/runtime/src/include/omp_lib.h.var
index 8775128157bd..f1b6b03f7725 100644
--- a/openmp/runtime/src/include/omp_lib.h.var
+++ b/openmp/runtime/src/include/omp_lib.h.var
@@ -580,6 +580,11 @@
integer (kind=kmp_size_t_kind) :: omp_capture_affinity
end function omp_capture_affinity
+ subroutine omp_display_env(verbose) bind(c)
+ import
+ logical (kind=omp_logical_kind), value :: verbose
+ end subroutine omp_display_env
+
! ***
! *** kmp_* entry points
! ***
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 086ab3bb011e..5f9b7c895619 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -872,36 +872,36 @@ extern int __kmp_hws_abs_flag; // absolute or per-item number requested
typedef uintptr_t omp_uintptr_t;
typedef enum {
- OMP_ATK_THREADMODEL = 1,
- OMP_ATK_ALIGNMENT = 2,
- OMP_ATK_ACCESS = 3,
- OMP_ATK_POOL_SIZE = 4,
- OMP_ATK_FALLBACK = 5,
- OMP_ATK_FB_DATA = 6,
- OMP_ATK_PINNED = 7,
- OMP_ATK_PARTITION = 8
+ omp_atk_threadmodel = 1,
+ omp_atk_alignment = 2,
+ omp_atk_access = 3,
+ omp_atk_pool_size = 4,
+ omp_atk_fallback = 5,
+ omp_atk_fb_data = 6,
+ omp_atk_pinned = 7,
+ omp_atk_partition = 8
} omp_alloctrait_key_t;
typedef enum {
- OMP_ATV_FALSE = 0,
- OMP_ATV_TRUE = 1,
- OMP_ATV_DEFAULT = 2,
- OMP_ATV_CONTENDED = 3,
- OMP_ATV_UNCONTENDED = 4,
- OMP_ATV_SEQUENTIAL = 5,
- OMP_ATV_PRIVATE = 6,
- OMP_ATV_ALL = 7,
- OMP_ATV_THREAD = 8,
- OMP_ATV_PTEAM = 9,
- OMP_ATV_CGROUP = 10,
- OMP_ATV_DEFAULT_MEM_FB = 11,
- OMP_ATV_NULL_FB = 12,
- OMP_ATV_ABORT_FB = 13,
- OMP_ATV_ALLOCATOR_FB = 14,
- OMP_ATV_ENVIRONMENT = 15,
- OMP_ATV_NEAREST = 16,
- OMP_ATV_BLOCKED = 17,
- OMP_ATV_INTERLEAVED = 18
+ omp_atv_false = 0,
+ omp_atv_true = 1,
+ omp_atv_default = 2,
+ omp_atv_contended = 3,
+ omp_atv_uncontended = 4,
+ omp_atv_sequential = 5,
+ omp_atv_private = 6,
+ omp_atv_all = 7,
+ omp_atv_thread = 8,
+ omp_atv_pteam = 9,
+ omp_atv_cgroup = 10,
+ omp_atv_default_mem_fb = 11,
+ omp_atv_null_fb = 12,
+ omp_atv_abort_fb = 13,
+ omp_atv_allocator_fb = 14,
+ omp_atv_environment = 15,
+ omp_atv_nearest = 16,
+ omp_atv_blocked = 17,
+ omp_atv_interleaved = 18
} omp_alloctrait_value_t;
typedef void *omp_memspace_handle_t;
@@ -1548,7 +1548,7 @@ typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
kmp_int32 tc;
kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put
after ub */
-
+ kmp_lock_t *th_steal_lock; // lock used for chunk stealing
// KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on )
// a) parm3 is properly aligned and
// b) all parm1-4 are in the same cache line.
@@ -1581,7 +1581,7 @@ typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
kmp_int64 tc; /* trip count (number of iterations) */
kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put
after ub */
-
+ kmp_lock_t *th_steal_lock; // lock used for chunk stealing
/* parm[1-4] are used in different ways by different scheduling algorithms */
// KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on )
@@ -1722,11 +1722,7 @@ typedef struct kmp_disp {
kmp_int32 th_disp_index;
kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index
volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags
- union { // we can use union here because doacross cannot be used in
- // nonmonotonic loops
- kmp_int64 *th_doacross_info; // info on loop bounds
- kmp_lock_t *th_steal_lock; // lock used for chunk stealing (8-byte variable)
- };
+ kmp_int64 *th_doacross_info; // info on loop bounds
#if KMP_USE_INTERNODE_ALIGNMENT
char more_padding[INTERNODE_CACHE_LINE];
#endif
@@ -2435,10 +2431,10 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
int th_teams_level; /* save initial level of teams construct */
/* it is 0 on device but may be any on host */
-/* The blocktime info is copied from the team struct to the thread sruct */
-/* at the start of a barrier, and the values stored in the team are used */
-/* at points in the code where the team struct is no longer guaranteed */
-/* to exist (from the POV of worker threads). */
+/* The blocktime info is copied from the team struct to the thread struct */
+/* at the start of a barrier, and the values stored in the team are used */
+/* at points in the code where the team struct is no longer guaranteed */
+/* to exist (from the POV of worker threads). */
#if KMP_USE_MONITOR
int th_team_bt_intervals;
int th_team_bt_set;
@@ -3908,6 +3904,8 @@ static inline void __kmp_resume_if_hard_paused() {
}
}
+extern void __kmp_omp_display_env(int verbose);
+
#ifdef __cplusplus
}
#endif
diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
index 650e9ff35e1b..47e70477ced6 100644
--- a/openmp/runtime/src/kmp_affinity.cpp
+++ b/openmp/runtime/src/kmp_affinity.cpp
@@ -601,7 +601,7 @@ static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
int depth = 3;
int levels[5] = {0, 1, 2, 3, 4}; // package, [node,] [tile,] core, thread
- int labels[3] = {0}; // package [,node] [,tile] - head of lables array
+ int labels[3] = {0}; // package [,node] [,tile] - head of labels array
if (__kmp_numa_detected)
++depth;
if (__kmp_tile_depth)
@@ -828,7 +828,7 @@ static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
}
int depth_full = depth; // number of levels before compressing
- // Find any levels with radiix 1, and remove them from the map
+ // Find any levels with radix 1, and remove them from the map
// (except for the package level).
depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth,
levels);
@@ -918,7 +918,7 @@ static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
return 0;
}
- // Contruct the data structure to be returned.
+ // Construct the data structure to be returned.
*address2os =
(AddrUnsPair *)__kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
int avail_ct = 0;
@@ -967,7 +967,7 @@ static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
return -1;
}
- // Contruct the data structure to be returned.
+ // Construct the data structure to be returned.
*address2os =
(AddrUnsPair *)__kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
@@ -1849,7 +1849,7 @@ static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
return 0;
}
- // Find any levels with radiix 1, and remove them from the map
+ // Find any levels with radix 1, and remove them from the map
// (except for the package level).
int new_depth = 0;
for (level = 0; level < depth; level++) {
@@ -1968,7 +1968,8 @@ static void __kmp_dispatch_set_hierarchy_values() {
__kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] =
nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
__kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores;
-#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS)
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS) && \
+ KMP_MIC_SUPPORTED
if (__kmp_mic_type >= mic3)
__kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2;
else
@@ -1982,7 +1983,8 @@ static void __kmp_dispatch_set_hierarchy_values() {
__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1;
__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] =
__kmp_nThreadsPerCore;
-#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS)
+#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS) && \
+ KMP_MIC_SUPPORTED
if (__kmp_mic_type >= mic3)
__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
2 * __kmp_nThreadsPerCore;
@@ -4328,7 +4330,7 @@ static void __kmp_aux_affinity_initialize(void) {
}
#endif // KMP_USE_HWLOC
-// If the user has specified that a paricular topology discovery method is to be
+// If the user has specified that a particular topology discovery method is to be
// used, then we abort if that method fails. The exception is group affinity,
// which might have been implicitly set.
@@ -4647,7 +4649,7 @@ static void __kmp_aux_affinity_initialize(void) {
#undef KMP_EXIT_AFF_NONE
void __kmp_affinity_initialize(void) {
- // Much of the code above was written assumming that if a machine was not
+ // Much of the code above was written assuming that if a machine was not
// affinity capable, then __kmp_affinity_type == affinity_none. We now
// explicitly represent this as __kmp_affinity_type == affinity_disabled.
// There are too many checks for __kmp_affinity_type == affinity_none
@@ -4713,7 +4715,7 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
KMP_CPU_ZERO(th->th.th_affin_mask);
}
- // Copy the thread mask to the kmp_info_t strucuture. If
+ // Copy the thread mask to the kmp_info_t structure. If
// __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one that
// has all of the OS proc ids set, or if __kmp_affinity_respect_mask is set,
// then the full mask is the same as the mask of the initialization thread.
@@ -4823,7 +4825,7 @@ void __kmp_affinity_set_place(int gtid) {
(th->th.th_new_place >= th->th.th_last_place));
}
- // Copy the thread mask to the kmp_info_t strucuture,
+ // Copy the thread mask to the kmp_info_t structure,
// and set this thread's affinity.
kmp_affin_mask_t *mask =
KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place);
diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h
index f270bb6dbb8d..664a42393191 100644
--- a/openmp/runtime/src/kmp_affinity.h
+++ b/openmp/runtime/src/kmp_affinity.h
@@ -303,8 +303,9 @@ class KMPNativeAffinity : public KMPAffinity {
int retval =
syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
#elif KMP_OS_FREEBSD
- int retval =
+ int r =
pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
+ int retval = (r == 0 ? 0 : -1);
#endif
if (retval >= 0) {
return 0;
@@ -322,8 +323,9 @@ class KMPNativeAffinity : public KMPAffinity {
int retval =
syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
#elif KMP_OS_FREEBSD
- int retval =
+ int r =
pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
+ int retval = (r == 0 ? 0 : -1);
#endif
if (retval >= 0) {
return 0;
diff --git a/openmp/runtime/src/kmp_alloc.cpp b/openmp/runtime/src/kmp_alloc.cpp
index 16893d0ffca5..314f56d9b5c6 100644
--- a/openmp/runtime/src/kmp_alloc.cpp
+++ b/openmp/runtime/src/kmp_alloc.cpp
@@ -186,7 +186,7 @@ typedef struct thr_data {
-1: not all pool blocks are the same size
>0: (common) block size for all bpool calls made so far
*/
- bfhead_t *last_pool; /* Last pool owned by this thread (delay dealocation) */
+ bfhead_t *last_pool; /* Last pool owned by this thread (delay deallocation) */
} thr_data_t;
/* Minimum allocation quantum: */
@@ -195,7 +195,7 @@ typedef struct thr_data {
#define MaxSize \
(bufsize)( \
~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
-// Maximun for the requested size.
+// Maximum for the requested size.
/* End sentinel: value placed in bsize field of dummy block delimiting
end of pool block. The most negative number which will fit in a
@@ -577,7 +577,7 @@ static void *bget(kmp_info_t *th, bufsize requested_size) {
if (thr->acqfcn != 0) {
if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) {
/* Request is too large to fit in a single expansion block.
- Try to satisy it by a direct buffer acquisition. */
+ Try to satisfy it by a direct buffer acquisition. */
bdhead_t *bdh;
size += sizeof(bdhead_t) - sizeof(bhead_t);
@@ -1348,27 +1348,27 @@ omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
al->memspace = ms; // not used currently
for (i = 0; i < ntraits; ++i) {
switch (traits[i].key) {
- case OMP_ATK_THREADMODEL:
- case OMP_ATK_ACCESS:
- case OMP_ATK_PINNED:
+ case omp_atk_threadmodel:
+ case omp_atk_access:
+ case omp_atk_pinned:
break;
- case OMP_ATK_ALIGNMENT:
+ case omp_atk_alignment:
al->alignment = traits[i].value;
KMP_ASSERT(IS_POWER_OF_TWO(al->alignment));
break;
- case OMP_ATK_POOL_SIZE:
+ case omp_atk_pool_size:
al->pool_size = traits[i].value;
break;
- case OMP_ATK_FALLBACK:
+ case omp_atk_fallback:
al->fb = (omp_alloctrait_value_t)traits[i].value;
KMP_DEBUG_ASSERT(
- al->fb == OMP_ATV_DEFAULT_MEM_FB || al->fb == OMP_ATV_NULL_FB ||
- al->fb == OMP_ATV_ABORT_FB || al->fb == OMP_ATV_ALLOCATOR_FB);
+ al->fb == omp_atv_default_mem_fb || al->fb == omp_atv_null_fb ||
+ al->fb == omp_atv_abort_fb || al->fb == omp_atv_allocator_fb);
break;
- case OMP_ATK_FB_DATA:
+ case omp_atk_fb_data:
al->fb_data = RCAST(kmp_allocator_t *, traits[i].value);
break;
- case OMP_ATK_PARTITION:
+ case omp_atk_partition:
al->memkind = RCAST(void **, traits[i].value);
break;
default:
@@ -1377,17 +1377,17 @@ omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
}
if (al->fb == 0) {
// set default allocator
- al->fb = OMP_ATV_DEFAULT_MEM_FB;
+ al->fb = omp_atv_default_mem_fb;
al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
- } else if (al->fb == OMP_ATV_ALLOCATOR_FB) {
+ } else if (al->fb == omp_atv_allocator_fb) {
KMP_ASSERT(al->fb_data != NULL);
- } else if (al->fb == OMP_ATV_DEFAULT_MEM_FB) {
+ } else if (al->fb == omp_atv_default_mem_fb) {
al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
}
if (__kmp_memkind_available) {
// Let's use memkind library if available
if (ms == omp_high_bw_mem_space) {
- if (al->memkind == (void *)OMP_ATV_INTERLEAVED && mk_hbw_interleave) {
+ if (al->memkind == (void *)omp_atv_interleaved && mk_hbw_interleave) {
al->memkind = mk_hbw_interleave;
} else if (mk_hbw_preferred) {
// AC: do not try to use MEMKIND_HBW for now, because memkind library
@@ -1402,7 +1402,7 @@ omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
return omp_null_allocator;
}
} else {
- if (al->memkind == (void *)OMP_ATV_INTERLEAVED && mk_interleave) {
+ if (al->memkind == (void *)omp_atv_interleaved && mk_interleave) {
al->memkind = mk_interleave;
} else {
al->memkind = mk_default;
@@ -1477,12 +1477,12 @@ void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
if (used + desc.size_a > al->pool_size) {
// not enough space, need to go fallback path
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
- if (al->fb == OMP_ATV_DEFAULT_MEM_FB) {
+ if (al->fb == omp_atv_default_mem_fb) {
al = (kmp_allocator_t *)omp_default_mem_alloc;
ptr = kmp_mk_alloc(*mk_default, desc.size_a);
- } else if (al->fb == OMP_ATV_ABORT_FB) {
+ } else if (al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0); // abort fallback requested
- } else if (al->fb == OMP_ATV_ALLOCATOR_FB) {
+ } else if (al->fb == omp_atv_allocator_fb) {
KMP_ASSERT(al != al->fb_data);
al = al->fb_data;
return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
@@ -1491,12 +1491,12 @@ void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
// pool has enough space
ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
if (ptr == NULL) {
- if (al->fb == OMP_ATV_DEFAULT_MEM_FB) {
+ if (al->fb == omp_atv_default_mem_fb) {
al = (kmp_allocator_t *)omp_default_mem_alloc;
ptr = kmp_mk_alloc(*mk_default, desc.size_a);
- } else if (al->fb == OMP_ATV_ABORT_FB) {
+ } else if (al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0); // abort fallback requested
- } else if (al->fb == OMP_ATV_ALLOCATOR_FB) {
+ } else if (al->fb == omp_atv_allocator_fb) {
KMP_ASSERT(al != al->fb_data);
al = al->fb_data;
return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
@@ -1507,12 +1507,12 @@ void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
// custom allocator, pool size not requested
ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
if (ptr == NULL) {
- if (al->fb == OMP_ATV_DEFAULT_MEM_FB) {
+ if (al->fb == omp_atv_default_mem_fb) {
al = (kmp_allocator_t *)omp_default_mem_alloc;
ptr = kmp_mk_alloc(*mk_default, desc.size_a);
- } else if (al->fb == OMP_ATV_ABORT_FB) {
+ } else if (al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0); // abort fallback requested
- } else if (al->fb == OMP_ATV_ALLOCATOR_FB) {
+ } else if (al->fb == omp_atv_allocator_fb) {
KMP_ASSERT(al != al->fb_data);
al = al->fb_data;
return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
@@ -1533,12 +1533,12 @@ void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
if (used + desc.size_a > al->pool_size) {
// not enough space, need to go fallback path
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
- if (al->fb == OMP_ATV_DEFAULT_MEM_FB) {
+ if (al->fb == omp_atv_default_mem_fb) {
al = (kmp_allocator_t *)omp_default_mem_alloc;
ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
- } else if (al->fb == OMP_ATV_ABORT_FB) {
+ } else if (al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0); // abort fallback requested
- } else if (al->fb == OMP_ATV_ALLOCATOR_FB) {
+ } else if (al->fb == omp_atv_allocator_fb) {
KMP_ASSERT(al != al->fb_data);
al = al->fb_data;
return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
@@ -1546,14 +1546,14 @@ void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
} else {
// pool has enough space
ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
- if (ptr == NULL && al->fb == OMP_ATV_ABORT_FB) {
+ if (ptr == NULL && al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0); // abort fallback requested
} // no sense to look for another fallback because of same internal alloc
}
} else {
// custom allocator, pool size not requested
ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
- if (ptr == NULL && al->fb == OMP_ATV_ABORT_FB) {
+ if (ptr == NULL && al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0); // abort fallback requested
} // no sense to look for another fallback because of same internal alloc
}
@@ -1961,7 +1961,7 @@ void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) {
this_thr->th.th_free_lists[index].th_free_list_other = ptr;
} else {
// either queue blocks owner is changing or size limit exceeded
- // return old queue to allocating thread (q_th) synchroneously,
+ // return old queue to allocating thread (q_th) synchronously,
// and start new list for alloc_thr's tasks
void *old_ptr;
void *tail = head;
diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp
index a6d87b5d7a2e..4aa7a084f53a 100644
--- a/openmp/runtime/src/kmp_barrier.cpp
+++ b/openmp/runtime/src/kmp_barrier.cpp
@@ -549,6 +549,7 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
if (((tid >> level) & (branch_factor - 1)) != 0) {
kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) - 1);
+ KMP_MB(); // Synchronize parent and child threads.
KA_TRACE(20,
("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n",
@@ -590,6 +591,7 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
kmp_flag_64 c_flag(&child_bar->b_arrived, new_state);
c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
ANNOTATE_BARRIER_END(child_thr);
+ KMP_MB(); // Synchronize parent and child threads.
#if USE_ITT_BUILD && USE_ITT_NOTIFY
// Barrier imbalance - write min of the thread time and a child time to
// the thread.
diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index ac9a93590ad0..9cfa64d6ff9e 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -92,7 +92,7 @@ construct, since the master thread is necessarily thread zero).
If multiple non-OpenMP threads all enter an OpenMP construct then this
will be a unique thread identifier among all the threads created by
-the OpenMP runtime (but the value cannote be defined in terms of
+the OpenMP runtime (but the value cannot be defined in terms of
OpenMP thread ids returned by omp_get_thread_num()).
*/
kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
@@ -4023,6 +4023,9 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
lo = pr_buf->th_doacross_info[2];
up = pr_buf->th_doacross_info[3];
st = pr_buf->th_doacross_info[4];
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ ompt_dependence_t deps[num_dims];
+#endif
if (st == 1) { // most common case
if (vec[0] < lo || vec[0] > up) {
KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
@@ -4048,6 +4051,10 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
}
iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
}
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ deps[0].variable.value = iter_number;
+ deps[0].dependence_type = ompt_dependence_type_sink;
+#endif
for (i = 1; i < num_dims; ++i) {
kmp_int64 iter, ln;
kmp_int32 j = i * 4;
@@ -4081,6 +4088,10 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
iter = (kmp_uint64)(lo - vec[i]) / (-st);
}
iter_number = iter + ln * iter_number;
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ deps[i].variable.value = iter;
+ deps[i].dependence_type = ompt_dependence_type_sink;
+#endif
}
shft = iter_number % 32; // use 32-bit granularity
iter_number >>= 5; // divided by 32
@@ -4089,6 +4100,12 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
KMP_YIELD(TRUE);
}
KMP_MB();
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ if (ompt_enabled.ompt_callback_dependences) {
+ ompt_callbacks.ompt_callback(ompt_callback_dependences)(
+ &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims);
+ }
+#endif
KA_TRACE(20,
("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
gtid, (iter_number << 5) + shft));
@@ -4116,6 +4133,9 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
num_dims = pr_buf->th_doacross_info[0];
lo = pr_buf->th_doacross_info[2];
st = pr_buf->th_doacross_info[4];
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ ompt_dependence_t deps[num_dims];
+#endif
if (st == 1) { // most common case
iter_number = vec[0] - lo;
} else if (st > 0) {
@@ -4123,6 +4143,10 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
} else { // negative increment
iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
}
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ deps[0].variable.value = iter_number;
+ deps[0].dependence_type = ompt_dependence_type_source;
+#endif
for (i = 1; i < num_dims; ++i) {
kmp_int64 iter, ln;
kmp_int32 j = i * 4;
@@ -4137,7 +4161,17 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
iter = (kmp_uint64)(lo - vec[i]) / (-st);
}
iter_number = iter + ln * iter_number;
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ deps[i].variable.value = iter;
+ deps[i].dependence_type = ompt_dependence_type_source;
+#endif
+ }
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ if (ompt_enabled.ompt_callback_dependences) {
+ ompt_callbacks.ompt_callback(ompt_callback_dependences)(
+ &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims);
}
+#endif
shft = iter_number % 32; // use 32-bit granularity
iter_number >>= 5; // divided by 32
flag = 1 << shft;
diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp
index a91ffa2ba299..9d7b81733eba 100644
--- a/openmp/runtime/src/kmp_dispatch.cpp
+++ b/openmp/runtime/src/kmp_dispatch.cpp
@@ -372,10 +372,10 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
// before spending time on this).
// For now use dynamically allocated per-thread lock,
// free memory in __kmp_dispatch_next when status==0.
- KMP_DEBUG_ASSERT(th->th.th_dispatch->th_steal_lock == NULL);
- th->th.th_dispatch->th_steal_lock =
+ KMP_DEBUG_ASSERT(pr->u.p.th_steal_lock == NULL);
+ pr->u.p.th_steal_lock =
(kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t));
- __kmp_init_lock(th->th.th_dispatch->th_steal_lock);
+ __kmp_init_lock(pr->u.p.th_steal_lock);
}
break;
} else {
@@ -968,7 +968,7 @@ __kmp_dispatch_init(ident_t *loc, int gtid, enum sched_type schedule, T lb,
// all parm3 will be the same, it still exists a bad case like using 0 and 1
// rather than program life-time increment. So the dedicated variable is
// required. The 'static_steal_counter' is used.
- if (schedule == kmp_sch_static_steal) {
+ if (pr->schedule == kmp_sch_static_steal) {
// Other threads will inspect this variable when searching for a victim.
// This is a flag showing that other threads may steal from this thread
// since then.
@@ -1195,7 +1195,7 @@ int __kmp_dispatch_next_algorithm(int gtid,
if (traits_t<T>::type_size > 4) {
// use lock for 8-byte and CAS for 4-byte induction
// variable. TODO (optional): check and use 16-byte CAS
- kmp_lock_t *lck = th->th.th_dispatch->th_steal_lock;
+ kmp_lock_t *lck = pr->u.p.th_steal_lock;
KMP_DEBUG_ASSERT(lck != NULL);
if (pr->u.p.count < (UT)pr->u.p.ub) {
__kmp_acquire_lock(lck, gtid);
@@ -1210,37 +1210,38 @@ int __kmp_dispatch_next_algorithm(int gtid,
kmp_info_t **other_threads = team->t.t_threads;
int while_limit = pr->u.p.parm3;
int while_index = 0;
+ T id = pr->u.p.static_steal_counter; // loop id
+ int idx = (th->th.th_dispatch->th_disp_index - 1) %
+ __kmp_dispatch_num_buffers; // current loop index
+ // note: victim thread can potentially execute another loop
// TODO: algorithm of searching for a victim
// should be cleaned up and measured
while ((!status) && (while_limit != ++while_index)) {
+ dispatch_private_info_template<T> *victim;
T remaining;
T victimIdx = pr->u.p.parm4;
T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1;
- dispatch_private_info_template<T> *victim =
- reinterpret_cast<dispatch_private_info_template<T> *>(
- other_threads[victimIdx]
- ->th.th_dispatch->th_dispatch_pr_current);
- while ((victim == NULL || victim == pr ||
- (*(volatile T *)&victim->u.p.static_steal_counter !=
- *(volatile T *)&pr->u.p.static_steal_counter)) &&
+ victim = reinterpret_cast<dispatch_private_info_template<T> *>(
+ &other_threads[victimIdx]->th.th_dispatch->th_disp_buffer[idx]);
+ KMP_DEBUG_ASSERT(victim);
+ while ((victim == pr || id != victim->u.p.static_steal_counter) &&
oldVictimIdx != victimIdx) {
victimIdx = (victimIdx + 1) % nproc;
victim = reinterpret_cast<dispatch_private_info_template<T> *>(
- other_threads[victimIdx]
- ->th.th_dispatch->th_dispatch_pr_current);
+ &other_threads[victimIdx]->th.th_dispatch->th_disp_buffer[idx]);
+ KMP_DEBUG_ASSERT(victim);
}
- if (!victim || (*(volatile T *)&victim->u.p.static_steal_counter !=
- *(volatile T *)&pr->u.p.static_steal_counter)) {
+ if (victim == pr || id != victim->u.p.static_steal_counter) {
continue; // try once more (nproc attempts in total)
// no victim is ready yet to participate in stealing
- // because all victims are still in kmp_init_dispatch
+ // because no victim passed kmp_init_dispatch yet
}
if (victim->u.p.count + 2 > (UT)victim->u.p.ub) {
pr->u.p.parm4 = (victimIdx + 1) % nproc; // shift start tid
continue; // not enough chunks to steal, goto next victim
}
- lck = other_threads[victimIdx]->th.th_dispatch->th_steal_lock;
+ lck = victim->u.p.th_steal_lock;
KMP_ASSERT(lck != NULL);
__kmp_acquire_lock(lck, gtid);
limit = victim->u.p.ub; // keep initial ub
@@ -1250,7 +1251,7 @@ int __kmp_dispatch_next_algorithm(int gtid,
pr->u.p.parm4 = (victimIdx + 1) % nproc; // next victim
continue; // not enough chunks to steal
}
- // stealing succeded, reduce victim's ub by 1/4 of undone chunks or
+ // stealing succeeded, reduce victim's ub by 1/4 of undone chunks or
// by 1
if (remaining > 3) {
// steal 1/4 of remaining
@@ -1268,10 +1269,10 @@ int __kmp_dispatch_next_algorithm(int gtid,
status = 1;
while_index = 0;
// now update own count and ub with stolen range but init chunk
- __kmp_acquire_lock(th->th.th_dispatch->th_steal_lock, gtid);
+ __kmp_acquire_lock(pr->u.p.th_steal_lock, gtid);
pr->u.p.count = init + 1;
pr->u.p.ub = limit;
- __kmp_release_lock(th->th.th_dispatch->th_steal_lock, gtid);
+ __kmp_release_lock(pr->u.p.th_steal_lock, gtid);
} // while (search for victim)
} // if (try to find victim and steal)
} else {
@@ -1308,32 +1309,32 @@ int __kmp_dispatch_next_algorithm(int gtid,
kmp_info_t **other_threads = team->t.t_threads;
int while_limit = pr->u.p.parm3;
int while_index = 0;
-
+ T id = pr->u.p.static_steal_counter; // loop id
+ int idx = (th->th.th_dispatch->th_disp_index - 1) %
+ __kmp_dispatch_num_buffers; // current loop index
+ // note: victim thread can potentially execute another loop
// TODO: algorithm of searching for a victim
// should be cleaned up and measured
while ((!status) && (while_limit != ++while_index)) {
+ dispatch_private_info_template<T> *victim;
union_i4 vold, vnew;
kmp_int32 remaining;
T victimIdx = pr->u.p.parm4;
T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1;
- dispatch_private_info_template<T> *victim =
- reinterpret_cast<dispatch_private_info_template<T> *>(
- other_threads[victimIdx]
- ->th.th_dispatch->th_dispatch_pr_current);
- while ((victim == NULL || victim == pr ||
- (*(volatile T *)&victim->u.p.static_steal_counter !=
- *(volatile T *)&pr->u.p.static_steal_counter)) &&
+ victim = reinterpret_cast<dispatch_private_info_template<T> *>(
+ &other_threads[victimIdx]->th.th_dispatch->th_disp_buffer[idx]);
+ KMP_DEBUG_ASSERT(victim);
+ while ((victim == pr || id != victim->u.p.static_steal_counter) &&
oldVictimIdx != victimIdx) {
victimIdx = (victimIdx + 1) % nproc;
victim = reinterpret_cast<dispatch_private_info_template<T> *>(
- other_threads[victimIdx]
- ->th.th_dispatch->th_dispatch_pr_current);
+ &other_threads[victimIdx]->th.th_dispatch->th_disp_buffer[idx]);
+ KMP_DEBUG_ASSERT(victim);
}
- if (!victim || (*(volatile T *)&victim->u.p.static_steal_counter !=
- *(volatile T *)&pr->u.p.static_steal_counter)) {
+ if (victim == pr || id != victim->u.p.static_steal_counter) {
continue; // try once more (nproc attempts in total)
// no victim is ready yet to participate in stealing
- // because all victims are still in kmp_init_dispatch
+ // because no victim passed kmp_init_dispatch yet
}
pr->u.p.parm4 = victimIdx; // new victim found
while (1) { // CAS loop if victim has enough chunks to steal
@@ -1357,7 +1358,7 @@ int __kmp_dispatch_next_algorithm(int gtid,
(volatile kmp_int64 *)&victim->u.p.count,
*VOLATILE_CAST(kmp_int64 *) & vold.b,
*VOLATILE_CAST(kmp_int64 *) & vnew.b)) {
- // stealing succedded
+ // stealing succeeded
KMP_COUNT_DEVELOPER_VALUE(FOR_static_steal_stolen,
vold.p.ub - vnew.p.ub);
status = 1;
@@ -1372,7 +1373,7 @@ int __kmp_dispatch_next_algorithm(int gtid,
#endif
break;
} // if (check CAS result)
- KMP_CPU_PAUSE(); // CAS failed, repeate attempt
+ KMP_CPU_PAUSE(); // CAS failed, repeatedly attempt
} // while (try to steal from particular victim)
} // while (search for victim)
} // if (try to find victim and steal)
@@ -1532,7 +1533,7 @@ int __kmp_dispatch_next_algorithm(int gtid,
}
if ((T)remaining <
pr->u.p.parm2) { // compare with K*nproc*(chunk+1), K=2 by default
- // use dynamic-style shcedule
+ // use dynamic-style schedule
// atomically increment iterations, get old value
init = test_then_add<ST>(RCAST(volatile ST *, &sh->u.s.iteration),
(ST)chunkspec);
@@ -1601,7 +1602,7 @@ int __kmp_dispatch_next_algorithm(int gtid,
KMP_DEBUG_ASSERT(init % chunk == 0);
// compare with K*nproc*(chunk+1), K=2 by default
if ((T)remaining < pr->u.p.parm2) {
- // use dynamic-style shcedule
+ // use dynamic-style schedule
// atomically increment iterations, get old value
init = test_then_add<ST>(RCAST(volatile ST *, &sh->u.s.iteration),
(ST)chunk);
@@ -1892,7 +1893,7 @@ static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last,
typedef typename traits_t<T>::unsigned_t UT;
typedef typename traits_t<T>::signed_t ST;
// This is potentially slightly misleading, schedule(runtime) will appear here
- // even if the actual runtme schedule is static. (Which points out a
+ // even if the actual runtime schedule is static. (Which points out a
// disadvantage of schedule(runtime): even when static scheduling is used it
// costs more than a compile time choice to use static scheduling would.)
KMP_TIME_PARTITIONED_BLOCK(OMP_loop_dynamic_scheduling);
@@ -1909,7 +1910,7 @@ static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last,
gtid, p_lb, p_ub, p_st, p_last));
if (team->t.t_serialized) {
- /* NOTE: serialize this dispatch becase we are not at the active level */
+ /* NOTE: serialize this dispatch because we are not at the active level */
pr = reinterpret_cast<dispatch_private_info_template<T> *>(
th->th.th_dispatch->th_disp_buffer); /* top of the stack */
KMP_DEBUG_ASSERT(pr);
@@ -2068,14 +2069,19 @@ static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last,
if (pr->schedule == kmp_sch_static_steal &&
traits_t<T>::type_size > 4) {
int i;
+ int idx = (th->th.th_dispatch->th_disp_index - 1) %
+ __kmp_dispatch_num_buffers; // current loop index
kmp_info_t **other_threads = team->t.t_threads;
// loop complete, safe to destroy locks used for stealing
for (i = 0; i < th->th.th_team_nproc; ++i) {
- kmp_lock_t *lck = other_threads[i]->th.th_dispatch->th_steal_lock;
+ dispatch_private_info_template<T> *buf =
+ reinterpret_cast<dispatch_private_info_template<T> *>(
+ &other_threads[i]->th.th_dispatch->th_disp_buffer[idx]);
+ kmp_lock_t *lck = buf->u.p.th_steal_lock;
KMP_ASSERT(lck != NULL);
__kmp_destroy_lock(lck);
__kmp_free(lck);
- other_threads[i]->th.th_dispatch->th_steal_lock = NULL;
+ buf->u.p.th_steal_lock = NULL;
}
}
#endif
diff --git a/openmp/runtime/src/kmp_dispatch.h b/openmp/runtime/src/kmp_dispatch.h
index 8b3e98435a3f..1f98e4b80a79 100644
--- a/openmp/runtime/src/kmp_dispatch.h
+++ b/openmp/runtime/src/kmp_dispatch.h
@@ -75,7 +75,7 @@ template <typename T> struct dispatch_private_infoXX_template {
ST st; // signed
UT tc; // unsigned
T static_steal_counter; // for static_steal only; maybe better to put after ub
-
+ kmp_lock_t *th_steal_lock; // lock used for chunk stealing
/* parm[1-4] are used in different ways by different scheduling algorithms */
// KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on )
diff --git a/openmp/runtime/src/kmp_dispatch_hier.h b/openmp/runtime/src/kmp_dispatch_hier.h
index 3d7faea04272..c615b7b08958 100644
--- a/openmp/runtime/src/kmp_dispatch_hier.h
+++ b/openmp/runtime/src/kmp_dispatch_hier.h
@@ -993,7 +993,7 @@ void __kmp_dispatch_init_hierarchy(ident_t *loc, int n,
th->th.th_hier_bar_data = (kmp_hier_private_bdata_t *)__kmp_allocate(
sizeof(kmp_hier_private_bdata_t) * kmp_hier_layer_e::LAYER_LAST);
}
- // Have threads "register" themselves by modifiying the active count for each
+ // Have threads "register" themselves by modifying the active count for each
// level they are involved in. The active count will act as nthreads for that
// level regarding the scheduling algorithms
for (int i = 0; i < n; ++i) {
diff --git a/openmp/runtime/src/kmp_environment.h b/openmp/runtime/src/kmp_environment.h
index 76a9672f3240..a7ea9e955788 100644
--- a/openmp/runtime/src/kmp_environment.h
+++ b/openmp/runtime/src/kmp_environment.h
@@ -1,5 +1,5 @@
/*
- * kmp_environment.h -- Handle environment varoiables OS-independently.
+ * kmp_environment.h -- Handle environment variables OS-independently.
*/
//===----------------------------------------------------------------------===//
diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h
index 89172c0b704c..ab57907e088e 100644
--- a/openmp/runtime/src/kmp_ftn_entry.h
+++ b/openmp/runtime/src/kmp_ftn_entry.h
@@ -1371,6 +1371,13 @@ void FTN_STDCALL FTN_FULFILL_EVENT(kmp_event_t *event) {
#endif
}
+// display environment variables when requested
+void FTN_STDCALL FTN_DISPLAY_ENV(int verbose) {
+#ifndef KMP_STUB
+ __kmp_omp_display_env(verbose);
+#endif
+}
+
// GCC compatibility (versioned symbols)
#ifdef KMP_USE_VERSION_SYMBOLS
diff --git a/openmp/runtime/src/kmp_ftn_os.h b/openmp/runtime/src/kmp_ftn_os.h
index 41cafab12537..22fb2bb2f5ca 100644
--- a/openmp/runtime/src/kmp_ftn_os.h
+++ b/openmp/runtime/src/kmp_ftn_os.h
@@ -133,6 +133,7 @@
#define FTN_PAUSE_RESOURCE omp_pause_resource
#define FTN_PAUSE_RESOURCE_ALL omp_pause_resource_all
#define FTN_GET_SUPPORTED_ACTIVE_LEVELS omp_get_supported_active_levels
+#define FTN_DISPLAY_ENV omp_display_env
#define FTN_FULFILL_EVENT omp_fulfill_event
#endif /* KMP_FTN_PLAIN */
@@ -256,6 +257,7 @@
#define FTN_PAUSE_RESOURCE omp_pause_resource_
#define FTN_PAUSE_RESOURCE_ALL omp_pause_resource_all_
#define FTN_GET_SUPPORTED_ACTIVE_LEVELS omp_get_supported_active_levels_
+#define FTN_DISPLAY_ENV omp_display_env_
#define FTN_FULFILL_EVENT omp_fulfill_event_
#endif /* KMP_FTN_APPEND */
@@ -377,6 +379,7 @@
#define FTN_PAUSE_RESOURCE OMP_PAUSE_RESOURCE
#define FTN_PAUSE_RESOURCE_ALL OMP_PAUSE_RESOURCE_ALL
#define FTN_GET_SUPPORTED_ACTIVE_LEVELS OMP_GET_SUPPORTED_ACTIVE_LEVELS
+#define FTN_DISPLAY_ENV OMP_DISPLAY_ENV
#define FTN_FULFILL_EVENT OMP_FULFILL_EVENT
#endif /* KMP_FTN_UPPER */
@@ -500,6 +503,7 @@
#define FTN_PAUSE_RESOURCE OMP_PAUSE_RESOURCE_
#define FTN_PAUSE_RESOURCE_ALL OMP_PAUSE_RESOURCE_ALL_
#define FTN_GET_SUPPORTED_ACTIVE_LEVELS OMP_GET_SUPPORTED_ACTIVE_LEVELS_
+#define FTN_DISPLAY_ENV OMP_DISPLAY_ENV_
#define FTN_FULFILL_EVENT OMP_FULFILL_EVENT_
#endif /* KMP_FTN_UAPPEND */
@@ -654,4 +658,26 @@
#define KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED \
GOMP_parallel_loop_nonmonotonic_guided
+// All GOMP_5.0 symbols
+#define KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_NEXT \
+ GOMP_loop_maybe_nonmonotonic_runtime_next
+#define KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_START \
+ GOMP_loop_maybe_nonmonotonic_runtime_start
+#define KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_NEXT \
+ GOMP_loop_nonmonotonic_runtime_next
+#define KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_START \
+ GOMP_loop_nonmonotonic_runtime_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_NEXT \
+ GOMP_loop_ull_maybe_nonmonotonic_runtime_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_START \
+ GOMP_loop_ull_maybe_nonmonotonic_runtime_start
+#define KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_NEXT \
+ GOMP_loop_ull_nonmonotonic_runtime_next
+#define KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_START \
+ GOMP_loop_ull_nonmonotonic_runtime_start
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_RUNTIME \
+ GOMP_parallel_loop_nonmonotonic_runtime
+#define KMP_API_NAME_GOMP_PARALLEL_LOOP_MAYBE_NONMONOTONIC_RUNTIME \
+ GOMP_parallel_loop_maybe_nonmonotonic_runtime
+
#endif /* KMP_FTN_OS_H */
diff --git a/openmp/runtime/src/kmp_gsupport.cpp b/openmp/runtime/src/kmp_gsupport.cpp
index e0739a737d9c..ab4f27bfc067 100644
--- a/openmp/runtime/src/kmp_gsupport.cpp
+++ b/openmp/runtime/src/kmp_gsupport.cpp
@@ -275,7 +275,7 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ORDERED_END)(void) {
#define KMP_DISPATCH_FINI_CHUNK_ULL __kmp_aux_dispatch_fini_chunk_8u
#define KMP_DISPATCH_NEXT_ULL __kmpc_dispatch_next_8u
-// The parallel contruct
+// The parallel construct
#ifndef KMP_DEBUG
static
@@ -325,7 +325,7 @@ static
enum sched_type schedule, long start,
long end, long incr,
long chunk_size) {
- // Intialize the loop worksharing construct.
+ // Initialize the loop worksharing construct.
KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size,
schedule != kmp_sch_static);
@@ -635,6 +635,15 @@ LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT), {})
LOOP_RUNTIME_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_START),
kmp_sch_runtime)
LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT), {})
+LOOP_RUNTIME_START(
+ KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_START),
+ kmp_sch_runtime)
+LOOP_RUNTIME_START(
+ KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_START),
+ kmp_sch_runtime)
+LOOP_NEXT(
+ KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_NEXT), {})
+LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_NEXT), {})
LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START),
kmp_ord_static)
@@ -911,6 +920,18 @@ LOOP_NEXT_ULL(
LOOP_RUNTIME_START_ULL(
KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), kmp_sch_runtime)
LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT), {})
+LOOP_RUNTIME_START_ULL(
+ KMP_EXPAND_NAME(
+ KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_START),
+ kmp_sch_runtime)
+LOOP_RUNTIME_START_ULL(
+ KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_START),
+ kmp_sch_runtime)
+LOOP_NEXT_ULL(
+ KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_NEXT),
+ {})
+LOOP_NEXT_ULL(
+ KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_NEXT), {})
LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START),
kmp_ord_static)
@@ -1513,6 +1534,12 @@ PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED),
kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)
PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME),
kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)
+PARALLEL_LOOP(
+ KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_MAYBE_NONMONOTONIC_RUNTIME),
+ kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)
+PARALLEL_LOOP(
+ KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_RUNTIME),
+ kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)
void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_START)(void) {
int gtid = __kmp_entry_gtid();
@@ -1985,6 +2012,28 @@ KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC, 45,
KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED, 45,
"GOMP_4.5");
+// GOMP_5.0 versioned symbols
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_NEXT, 50,
+ "GOMP_5.0");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_START, 50,
+ "GOMP_5.0");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_NEXT, 50,
+ "GOMP_5.0");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_START, 50,
+ "GOMP_5.0");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_NEXT,
+ 50, "GOMP_5.0");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_START,
+ 50, "GOMP_5.0");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_NEXT, 50,
+ "GOMP_5.0");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_START, 50,
+ "GOMP_5.0");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_RUNTIME, 50,
+ "GOMP_5.0");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_MAYBE_NONMONOTONIC_RUNTIME,
+ 50, "GOMP_5.0");
+
#endif // KMP_USE_VERSION_SYMBOLS
#ifdef __cplusplus
diff --git a/openmp/runtime/src/kmp_i18n.cpp b/openmp/runtime/src/kmp_i18n.cpp
index 53c442715b0b..d2651cfabdf3 100644
--- a/openmp/runtime/src/kmp_i18n.cpp
+++ b/openmp/runtime/src/kmp_i18n.cpp
@@ -639,7 +639,7 @@ kmp_msg_t __kmp_msg_format(unsigned id_arg, ...) {
// numbers, for example: "%2$s %1$s".
__kmp_str_buf_vprint(&buffer, __kmp_i18n_catgets(id), args);
#elif KMP_OS_WINDOWS
- // On Winodws, printf() family functions does not recognize GNU style
+ // On Windows, printf() family functions does not recognize GNU style
// parameter numbers, so we have to use FormatMessage() instead. It recognizes
// parameter numbers, e. g.: "%2!s! "%1!s!".
{
diff --git a/openmp/runtime/src/kmp_i18n.h b/openmp/runtime/src/kmp_i18n.h
index 9d79a21bb2df..3fd6099ad149 100644
--- a/openmp/runtime/src/kmp_i18n.h
+++ b/openmp/runtime/src/kmp_i18n.h
@@ -32,7 +32,7 @@ extern "C" {
__kmp_i18n_catgets() returns read-only string. It should not be freed.
- KMP_I18N_STR macro simplifies acces to strings in message catalog a bit.
+ KMP_I18N_STR macro simplifies access to strings in message catalog a bit.
Following two lines are equivalent:
__kmp_i18n_catgets( kmp_i18n_str_Warning )
diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp
index 2cc9e08278c4..8bf7ef2deb71 100644
--- a/openmp/runtime/src/kmp_lock.cpp
+++ b/openmp/runtime/src/kmp_lock.cpp
@@ -1239,6 +1239,9 @@ __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
KMP_MB();
// ToDo: Use __kmp_wait_sleep or similar when blocktime != inf
KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck);
+ // Synchronize writes to both runtime thread structures
+ // and writes in user code.
+ KMP_MB();
#ifdef DEBUG_QUEUING_LOCKS
TRACE_LOCK(gtid + 1, "acq spin");
@@ -3018,7 +3021,7 @@ kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0};
// User lock allocator for dynamically dispatched indirect locks. Every entry of
-// the indirect lock table holds the address and type of the allocated indrect
+// the indirect lock table holds the address and type of the allocated indirect
// lock (kmp_indirect_lock_t), and the size of the table doubles when it is
// full. A destroyed indirect lock object is returned to the reusable pool of
// locks, unique to each lock type.
diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h
index 75a15f084c69..e54f6812b8b3 100644
--- a/openmp/runtime/src/kmp_lock.h
+++ b/openmp/runtime/src/kmp_lock.h
@@ -42,7 +42,7 @@ typedef struct ident ident_t;
// ----------------------------------------------------------------------------
// We need to know the size of the area we can assume that the compiler(s)
-// allocated for obects of type omp_lock_t and omp_nest_lock_t. The Intel
+// allocated for objects of type omp_lock_t and omp_nest_lock_t. The Intel
// compiler always allocates a pointer-sized area, as does visual studio.
//
// gcc however, only allocates 4 bytes for regular locks, even on 64-bit
@@ -861,11 +861,11 @@ __kmp_destroy_nested_user_lock_with_checks(kmp_user_lock_p lck) {
//
// In other cases, the calling code really should differentiate between an
// unimplemented function and one that is implemented but returning NULL /
-// invalied value. If this is the case, no get function wrapper exists.
+// invalid value. If this is the case, no get function wrapper exists.
extern int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck);
-// no set function; fields set durining local allocation
+// no set function; fields set during local allocation
extern const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck);
@@ -899,7 +899,7 @@ static inline void __kmp_set_user_lock_flags(kmp_user_lock_p lck,
}
}
-// The fuction which sets up all of the vtbl pointers for kmp_user_lock_t.
+// The function which sets up all of the vtbl pointers for kmp_user_lock_t.
extern void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind);
// Macros for binding user lock functions.
@@ -1128,7 +1128,7 @@ extern int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32);
extern int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32);
// Function tables for indirect locks. Set/unset/test differentiate functions
-// with/withuot consistency checking.
+// with/without consistency checking.
extern void (*__kmp_indirect_init[])(kmp_user_lock_p);
extern void (**__kmp_indirect_destroy)(kmp_user_lock_p);
extern int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32);
diff --git a/openmp/runtime/src/kmp_omp.h b/openmp/runtime/src/kmp_omp.h
index 27b550d1f663..c7ba32a14338 100644
--- a/openmp/runtime/src/kmp_omp.h
+++ b/openmp/runtime/src/kmp_omp.h
@@ -47,7 +47,7 @@ typedef struct {
} kmp_omp_nthr_item_t;
typedef struct {
- kmp_int32 num; // Number of items in the arrray.
+ kmp_int32 num; // Number of items in the array.
kmp_uint64 array; // Address of array of kmp_omp_num_threads_item_t.
} kmp_omp_nthr_info_t;
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index acd157db8e52..e0c8cf241044 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -3529,7 +3529,7 @@ static int __kmp_expand_threads(int nNeed) {
// > __kmp_max_nth in one of two ways:
//
// 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]
- // may not be resused by another thread, so we may need to increase
+ // may not be reused by another thread, so we may need to increase
// __kmp_threads_capacity to __kmp_max_nth + 1.
//
// 2) New foreign root(s) are encountered. We always register new foreign
@@ -4515,11 +4515,11 @@ __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
#if KMP_AFFINITY_SUPPORTED
// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
-// It calculats the worker + master thread's partition based upon the parent
+// It calculates the worker + master thread's partition based upon the parent
// thread's partition, and binds each worker to a thread in their partition.
// The master thread's partition should already include its current binding.
static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
- // Copy the master thread's place partion to the team struct
+ // Copy the master thread's place partition to the team struct
kmp_info_t *master_th = team->t.t_threads[0];
KMP_DEBUG_ASSERT(master_th != NULL);
kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
@@ -5536,7 +5536,7 @@ kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
// locality problems on programs where the size of the hot team regularly
// grew and shrunk.
//
-// Now, for single-level parallelism, the OMP tid is alway == gtid.
+// Now, for single-level parallelism, the OMP tid is always == gtid.
void __kmp_free_thread(kmp_info_t *this_th) {
int gtid;
kmp_info_t **scan;
@@ -5609,7 +5609,7 @@ void __kmp_free_thread(kmp_info_t *this_th) {
// scan is the address of a link in the list, possibly the address of
// __kmp_thread_pool itself.
//
- // In the absence of nested parallism, the for loop will have 0 iterations.
+ // In the absence of nested parallelism, the for loop will have 0 iterations.
if (__kmp_thread_pool_insert_pt != NULL) {
scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
} else {
@@ -6088,7 +6088,7 @@ void __kmp_internal_end_library(int gtid_req) {
only place to clear __kmp_serial_init */
/* we'll check this later too, after we get the lock */
// 2009-09-06: We do not set g_abort without setting g_done. This check looks
- // redundaant, because the next check will work in any case.
+ // redundant, because the next check will work in any case.
if (__kmp_global.g.g_abort) {
KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n"));
/* TODO abort? */
@@ -8217,7 +8217,6 @@ __kmp_determine_reduction_method(
return (retval);
}
-
// this function is for testing set/get/determine reduce method
kmp_int32 __kmp_get_reduce_method(void) {
return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
@@ -8297,3 +8296,12 @@ int __kmp_pause_resource(kmp_pause_status_t level) {
return 1;
}
}
+
+
+void __kmp_omp_display_env(int verbose) {
+ __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
+ if (__kmp_init_serial == 0)
+ __kmp_do_serial_initialize();
+ __kmp_display_env_impl(!verbose, verbose);
+ __kmp_release_bootstrap_lock(&__kmp_initz_lock);
+}
diff --git a/openmp/runtime/src/kmp_sched.cpp b/openmp/runtime/src/kmp_sched.cpp
index 17c149806c89..28d0ffe0fb9d 100644
--- a/openmp/runtime/src/kmp_sched.cpp
+++ b/openmp/runtime/src/kmp_sched.cpp
@@ -667,7 +667,7 @@ static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
// stride for next chunks calculation.
// Last iteration flag set for the team that will execute
// the last iteration of the loop.
- // The routine is called for dist_schedue(static,chunk) only.
+ // The routine is called for dist_schedule(static,chunk) only.
typedef typename traits_t<T>::unsigned_t UT;
typedef typename traits_t<T>::signed_t ST;
kmp_uint32 team_id;
diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index c7dec4d218c6..5745cbba585f 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -364,7 +364,7 @@ static void __kmp_stg_parse_int(
char const
*name, // I: Name of environment variable (used in warning messages).
char const *value, // I: Value of environment variable to parse.
- int min, // I: Miminal allowed value.
+ int min, // I: Minimum allowed value.
int max, // I: Maximum allowed value.
int *out // O: Output (parsed) value.
) {
@@ -1305,7 +1305,7 @@ static void __kmp_stg_print_max_task_priority(kmp_str_buf_t *buffer,
} // __kmp_stg_print_max_task_priority
// KMP_TASKLOOP_MIN_TASKS
-// taskloop threashold to switch from recursive to linear tasks creation
+// taskloop threshold to switch from recursive to linear tasks creation
static void __kmp_stg_parse_taskloop_min_tasks(char const *name,
char const *value, void *data) {
int tmp;
@@ -2041,7 +2041,7 @@ static void __kmp_parse_affinity_env(char const *name, char const *value,
// If we see a parse error, emit a warning and scan to the next ",".
//
// FIXME - there's got to be a better way to print an error
-// message, hopefully without overwritting peices of buf.
+// message, hopefully without overwriting peices of buf.
#define EMIT_WARN(skip, errlist) \
{ \
char ch; \
@@ -4395,7 +4395,7 @@ static void __kmp_stg_print_speculative_statsfile(kmp_str_buf_t *buffer,
// -----------------------------------------------------------------------------
// KMP_HW_SUBSET (was KMP_PLACE_THREADS)
-// The longest observable sequense of items is
+// The longest observable sequence of items is
// Socket-Node-Tile-Core-Thread
// So, let's limit to 5 levels for now
// The input string is usually short enough, let's use 512 limit for now
@@ -5720,7 +5720,11 @@ void __kmp_env_print() {
} // __kmp_env_print
void __kmp_env_print_2() {
+ __kmp_display_env_impl(__kmp_display_env, __kmp_display_env_verbose);
+} // __kmp_env_print_2
+
+void __kmp_display_env_impl(int display_env, int display_env_verbose) {
kmp_env_blk_t block;
kmp_str_buf_t buffer;
@@ -5737,9 +5741,9 @@ void __kmp_env_print_2() {
for (int i = 0; i < __kmp_stg_count; ++i) {
if (__kmp_stg_table[i].print != NULL &&
- ((__kmp_display_env &&
+ ((display_env &&
strncmp(__kmp_stg_table[i].name, "OMP_", 4) == 0) ||
- __kmp_display_env_verbose)) {
+ display_env_verbose)) {
__kmp_stg_table[i].print(&buffer, __kmp_stg_table[i].name,
__kmp_stg_table[i].data);
}
@@ -5754,7 +5758,6 @@ void __kmp_env_print_2() {
__kmp_str_buf_free(&buffer);
__kmp_printf("\n");
-
-} // __kmp_env_print_2
+}
// end of file
diff --git a/openmp/runtime/src/kmp_settings.h b/openmp/runtime/src/kmp_settings.h
index 3247ffc6af74..d61c40694cf6 100644
--- a/openmp/runtime/src/kmp_settings.h
+++ b/openmp/runtime/src/kmp_settings.h
@@ -17,6 +17,7 @@ void __kmp_reset_global_vars(void);
void __kmp_env_initialize(char const *);
void __kmp_env_print();
void __kmp_env_print_2();
+void __kmp_display_env_impl(int display_env, int display_env_verbose);
int __kmp_initial_threads_capacity(int req_nproc);
void __kmp_init_dflt_team_nth();
diff --git a/openmp/runtime/src/kmp_stats.cpp b/openmp/runtime/src/kmp_stats.cpp
index dabd0c35b85c..55ac18a4312c 100644
--- a/openmp/runtime/src/kmp_stats.cpp
+++ b/openmp/runtime/src/kmp_stats.cpp
@@ -270,7 +270,7 @@ void explicitTimer::stop(tsc_tick_count tick,
/* ************* partitionedTimers member functions ************* */
partitionedTimers::partitionedTimers() { timer_stack.reserve(8); }
-// initialize the paritioned timers to an initial timer
+// initialize the partitioned timers to an initial timer
void partitionedTimers::init(explicitTimer timer) {
KMP_DEBUG_ASSERT(this->timer_stack.size() == 0);
timer_stack.push_back(timer);
@@ -609,7 +609,7 @@ void kmp_stats_output_module::printTimerStats(FILE *statsOut,
totalStats[s].format(tag, true).c_str());
}
- // Print historgram of statistics
+ // Print histogram of statistics
if (theStats[0].haveHist()) {
fprintf(statsOut, "\nTimer distributions\n");
for (int s = 0; s < TIMER_LAST; s++) {
diff --git a/openmp/runtime/src/kmp_stats.h b/openmp/runtime/src/kmp_stats.h
index ee95658fd9b7..a36528f3fca3 100644
--- a/openmp/runtime/src/kmp_stats.h
+++ b/openmp/runtime/src/kmp_stats.h
@@ -195,7 +195,7 @@ enum stats_state_e {
// from a dynamically scheduled loop
// OMP_critical -- Time thread spends executing critical section
// OMP_critical_wait -- Time thread spends waiting to enter
-// a critcal seciton
+// a critical section
// OMP_single -- Time spent executing a "single" region
// OMP_master -- Time spent executing a "master" region
// OMP_task_immediate -- Time spent executing non-deferred tasks
@@ -522,7 +522,7 @@ public:
void windup();
};
-// Special wrapper around the partioned timers to aid timing code blocks
+// Special wrapper around the partitioned timers to aid timing code blocks
// It avoids the need to have an explicit end, leaving the scope suffices.
class blockPartitionedTimer {
partitionedTimers *part_timers;
@@ -920,7 +920,7 @@ extern kmp_stats_output_module __kmp_stats_output;
#define KMP_OUTPUT_STATS(heading_string) __kmp_output_stats(heading_string)
/*!
- * \brief Initializes the paritioned timers to begin with name.
+ * \brief Initializes the partitioned timers to begin with name.
*
* @param name timer which you want this thread to begin with
*
diff --git a/openmp/runtime/src/kmp_str.h b/openmp/runtime/src/kmp_str.h
index 09faadb68f1a..9e669bbe4742 100644
--- a/openmp/runtime/src/kmp_str.h
+++ b/openmp/runtime/src/kmp_str.h
@@ -72,12 +72,12 @@ struct kmp_str_fname {
typedef struct kmp_str_fname kmp_str_fname_t;
void __kmp_str_fname_init(kmp_str_fname_t *fname, char const *path);
void __kmp_str_fname_free(kmp_str_fname_t *fname);
-// Compares file name with specified patern. If pattern is NULL, any fname
+// Compares file name with specified pattern. If pattern is NULL, any fname
// matched.
int __kmp_str_fname_match(kmp_str_fname_t const *fname, char const *pattern);
/* The compiler provides source locations in string form
- ";file;func;line;col;;". It is not convenient for manupulation. This
+ ";file;func;line;col;;". It is not convenient for manipulation. This
structure keeps source location in more convenient form.
Usage:
diff --git a/openmp/runtime/src/kmp_stub.cpp b/openmp/runtime/src/kmp_stub.cpp
index 6b5041988d5c..0fc022a03a2d 100644
--- a/openmp/runtime/src/kmp_stub.cpp
+++ b/openmp/runtime/src/kmp_stub.cpp
@@ -147,7 +147,7 @@ void *kmp_malloc(size_t size) {
i;
void *res;
#if KMP_OS_WINDOWS
- // If succesfull returns a pointer to the memory block, otherwise returns
+ // If successful returns a pointer to the memory block, otherwise returns
// NULL.
// Sets errno to ENOMEM or EINVAL if memory allocation failed or parameter
// validation failed.
diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index e1618f5cd9df..a654951f5b3b 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -35,7 +35,7 @@ static std::atomic<kmp_int32> kmp_node_id_seed = ATOMIC_VAR_INIT(0);
static void __kmp_init_node(kmp_depnode_t *node) {
node->dn.successors = NULL;
- node->dn.task = NULL; // will point to the rigth task
+ node->dn.task = NULL; // will point to the right task
// once dependences have been processed
for (int i = 0; i < MAX_MTX_DEPS; ++i)
node->dn.mtx_locks[i] = NULL;
@@ -205,7 +205,7 @@ static kmp_depnode_list_t *__kmp_add_node(kmp_info_t *thread,
return new_head;
}
-static inline void __kmp_track_dependence(kmp_depnode_t *source,
+static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source,
kmp_depnode_t *sink,
kmp_task_t *sink_task) {
#ifdef KMP_SUPPORT_GRAPH_OUTPUT
@@ -224,11 +224,14 @@ static inline void __kmp_track_dependence(kmp_depnode_t *source,
*/
if (ompt_enabled.ompt_callback_task_dependence) {
kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
- kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task);
+ ompt_data_t *sink_data;
+ if (sink_task)
+ sink_data = &(KMP_TASK_TO_TASKDATA(sink_task)->ompt_task_info.task_data);
+ else
+ sink_data = &__kmp_threads[gtid]->th.ompt_thread_info.task_data;
ompt_callbacks.ompt_callback(ompt_callback_task_dependence)(
- &(task_source->ompt_task_info.task_data),
- &(task_sink->ompt_task_info.task_data));
+ &(task_source->ompt_task_info.task_data), sink_data);
}
#endif /* OMPT_SUPPORT && OMPT_OPTIONAL */
}
@@ -246,7 +249,7 @@ __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread,
if (dep->dn.task) {
KMP_ACQUIRE_DEPNODE(gtid, dep);
if (dep->dn.task) {
- __kmp_track_dependence(dep, node, task);
+ __kmp_track_dependence(gtid, dep, node, task);
dep->dn.successors = __kmp_add_node(thread, dep->dn.successors, node);
KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
"%p\n",
@@ -272,7 +275,7 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
// synchronously add source to sink' list of successors
KMP_ACQUIRE_DEPNODE(gtid, sink);
if (sink->dn.task) {
- __kmp_track_dependence(sink, source, task);
+ __kmp_track_dependence(gtid, sink, source, task);
sink->dn.successors = __kmp_add_node(thread, sink->dn.successors, source);
KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
"%p\n",
@@ -473,8 +476,8 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
npredecessors++;
// Update predecessors and obtain current value to check if there are still
- // any outstandig dependences (some tasks may have finished while we processed
- // the dependences)
+ // any outstanding dependences (some tasks may have finished while we
+ // processed the dependences)
npredecessors =
node->dn.npredecessors.fetch_add(npredecessors) + npredecessors;
@@ -498,7 +501,7 @@ task''
@param noalias_dep_list List of depend items with no aliasing
@return Returns either TASK_CURRENT_NOT_QUEUED if the current task was not
-suspendend and queued, or TASK_CURRENT_QUEUED if it was suspended and queued
+suspended and queued, or TASK_CURRENT_QUEUED if it was suspended and queued
Schedule a non-thread-switchable task with dependences for execution
*/
@@ -540,47 +543,40 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
ompt_enabled.ompt_callback_dependences) {
kmp_int32 i;
- new_taskdata->ompt_task_info.ndeps = ndeps + ndeps_noalias;
- new_taskdata->ompt_task_info.deps =
- (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC(
- thread, (ndeps + ndeps_noalias) * sizeof(ompt_dependence_t));
+ int ompt_ndeps = ndeps + ndeps_noalias;
+ ompt_dependence_t *ompt_deps = (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC(
+ thread, (ndeps + ndeps_noalias) * sizeof(ompt_dependence_t));
- KMP_ASSERT(new_taskdata->ompt_task_info.deps != NULL);
+ KMP_ASSERT(ompt_deps != NULL);
for (i = 0; i < ndeps; i++) {
- new_taskdata->ompt_task_info.deps[i].variable.ptr =
- (void *)dep_list[i].base_addr;
+ ompt_deps[i].variable.ptr = (void *)dep_list[i].base_addr;
if (dep_list[i].flags.in && dep_list[i].flags.out)
- new_taskdata->ompt_task_info.deps[i].dependence_type =
- ompt_dependence_type_inout;
+ ompt_deps[i].dependence_type = ompt_dependence_type_inout;
else if (dep_list[i].flags.out)
- new_taskdata->ompt_task_info.deps[i].dependence_type =
- ompt_dependence_type_out;
+ ompt_deps[i].dependence_type = ompt_dependence_type_out;
else if (dep_list[i].flags.in)
- new_taskdata->ompt_task_info.deps[i].dependence_type =
- ompt_dependence_type_in;
+ ompt_deps[i].dependence_type = ompt_dependence_type_in;
+ else if (dep_list[i].flags.mtx)
+ ompt_deps[i].dependence_type = ompt_dependence_type_mutexinoutset;
}
for (i = 0; i < ndeps_noalias; i++) {
- new_taskdata->ompt_task_info.deps[ndeps + i].variable.ptr =
- (void *)noalias_dep_list[i].base_addr;
+ ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr;
if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out)
- new_taskdata->ompt_task_info.deps[ndeps + i].dependence_type =
- ompt_dependence_type_inout;
+ ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inout;
else if (noalias_dep_list[i].flags.out)
- new_taskdata->ompt_task_info.deps[ndeps + i].dependence_type =
- ompt_dependence_type_out;
+ ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_out;
else if (noalias_dep_list[i].flags.in)
- new_taskdata->ompt_task_info.deps[ndeps + i].dependence_type =
- ompt_dependence_type_in;
+ ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_in;
+ else if (noalias_dep_list[i].flags.mtx)
+ ompt_deps[ndeps + i].dependence_type =
+ ompt_dependence_type_mutexinoutset;
}
ompt_callbacks.ompt_callback(ompt_callback_dependences)(
- &(new_taskdata->ompt_task_info.task_data),
- new_taskdata->ompt_task_info.deps, new_taskdata->ompt_task_info.ndeps);
+ &(new_taskdata->ompt_task_info.task_data), ompt_deps, ompt_ndeps);
/* We can now free the allocated memory for the dependencies */
- /* For OMPD we might want to delay the free until task_end */
- KMP_OMPT_DEPS_FREE(thread, new_taskdata->ompt_task_info.deps);
- new_taskdata->ompt_task_info.deps = NULL;
- new_taskdata->ompt_task_info.ndeps = 0;
+ /* For OMPD we might want to delay the free until end of this function */
+ KMP_OMPT_DEPS_FREE(thread, ompt_deps);
}
#endif /* OMPT_OPTIONAL */
#endif /* OMPT_SUPPORT */
@@ -642,6 +638,23 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
return ret;
}
+#if OMPT_SUPPORT
+void __ompt_taskwait_dep_finish(kmp_taskdata_t *current_task,
+ ompt_data_t *taskwait_task_data) {
+ if (ompt_enabled.ompt_callback_task_schedule) {
+ ompt_data_t task_data = ompt_data_none;
+ ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
+ current_task ? &(current_task->ompt_task_info.task_data) : &task_data,
+ ompt_task_switch, taskwait_task_data);
+ ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
+ taskwait_task_data, ompt_task_complete,
+ current_task ? &(current_task->ompt_task_info.task_data) : &task_data);
+ }
+ current_task->ompt_task_info.frame.enter_frame.ptr = NULL;
+ *taskwait_task_data = ompt_data_none;
+}
+#endif /* OMPT_SUPPORT */
+
/*!
@ingroup TASKING
@param loc_ref location of the original task directive
@@ -668,6 +681,74 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
kmp_info_t *thread = __kmp_threads[gtid];
kmp_taskdata_t *current_task = thread->th.th_current_task;
+#if OMPT_SUPPORT
+ // this function represents a taskwait construct with depend clause
+ // We signal 4 events:
+ // - creation of the taskwait task
+ // - dependences of the taskwait task
+ // - schedule and finish of the taskwait task
+ ompt_data_t *taskwait_task_data = &thread->th.ompt_thread_info.task_data;
+ KMP_ASSERT(taskwait_task_data->ptr == NULL);
+ if (ompt_enabled.enabled) {
+ if (!current_task->ompt_task_info.frame.enter_frame.ptr)
+ current_task->ompt_task_info.frame.enter_frame.ptr =
+ OMPT_GET_FRAME_ADDRESS(0);
+ if (ompt_enabled.ompt_callback_task_create) {
+ ompt_data_t task_data = ompt_data_none;
+ ompt_callbacks.ompt_callback(ompt_callback_task_create)(
+ current_task ? &(current_task->ompt_task_info.task_data) : &task_data,
+ current_task ? &(current_task->ompt_task_info.frame) : NULL,
+ taskwait_task_data,
+ ompt_task_explicit | ompt_task_undeferred | ompt_task_mergeable, 1,
+ OMPT_GET_RETURN_ADDRESS(0));
+ }
+ }
+
+#if OMPT_OPTIONAL
+ /* OMPT grab all dependences if requested by the tool */
+ if (ndeps + ndeps_noalias > 0 && ompt_enabled.ompt_callback_dependences) {
+ kmp_int32 i;
+
+ int ompt_ndeps = ndeps + ndeps_noalias;
+ ompt_dependence_t *ompt_deps = (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC(
+ thread, (ndeps + ndeps_noalias) * sizeof(ompt_dependence_t));
+
+ KMP_ASSERT(ompt_deps != NULL);
+
+ for (i = 0; i < ndeps; i++) {
+ ompt_deps[i].variable.ptr = (void *)dep_list[i].base_addr;
+ if (dep_list[i].flags.in && dep_list[i].flags.out)
+ ompt_deps[i].dependence_type = ompt_dependence_type_inout;
+ else if (dep_list[i].flags.out)
+ ompt_deps[i].dependence_type = ompt_dependence_type_out;
+ else if (dep_list[i].flags.in)
+ ompt_deps[i].dependence_type = ompt_dependence_type_in;
+ else if (dep_list[i].flags.mtx)
+ ompt_deps[ndeps + i].dependence_type =
+ ompt_dependence_type_mutexinoutset;
+ }
+ for (i = 0; i < ndeps_noalias; i++) {
+ ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr;
+ if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out)
+ ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inout;
+ else if (noalias_dep_list[i].flags.out)
+ ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_out;
+ else if (noalias_dep_list[i].flags.in)
+ ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_in;
+ else if (noalias_dep_list[i].flags.mtx)
+ ompt_deps[ndeps + i].dependence_type =
+ ompt_dependence_type_mutexinoutset;
+ }
+ ompt_callbacks.ompt_callback(ompt_callback_dependences)(
+ taskwait_task_data, ompt_deps, ompt_ndeps);
+ /* We can now free the allocated memory for the dependencies */
+ /* For OMPD we might want to delay the free until end of this function */
+ KMP_OMPT_DEPS_FREE(thread, ompt_deps);
+ ompt_deps = NULL;
+ }
+#endif /* OMPT_OPTIONAL */
+#endif /* OMPT_SUPPORT */
+
// We can return immediately as:
// - dependences are not computed in serial teams (except with proxy tasks)
// - if the dephash is not yet created it means we have nothing to wait for
@@ -682,6 +763,9 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking "
"dependencies : loc=%p\n",
gtid, loc_ref));
+#if OMPT_SUPPORT
+ __ompt_taskwait_dep_finish(current_task, taskwait_task_data);
+#endif /* OMPT_SUPPORT */
return;
}
@@ -694,6 +778,9 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking "
"dependencies : loc=%p\n",
gtid, loc_ref));
+#if OMPT_SUPPORT
+ __ompt_taskwait_dep_finish(current_task, taskwait_task_data);
+#endif /* OMPT_SUPPORT */
return;
}
@@ -705,6 +792,9 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
__kmp_task_stealing_constraint);
}
+#if OMPT_SUPPORT
+ __ompt_taskwait_dep_finish(current_task, taskwait_task_data);
+#endif /* OMPT_SUPPORT */
KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n",
gtid, loc_ref));
}
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 15ffc1454fe9..2ddc2e7a6fd7 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -298,6 +298,7 @@ static bool __kmp_task_is_allowed(int gtid, const kmp_int32 is_constrained,
static void __kmp_realloc_task_deque(kmp_info_t *thread,
kmp_thread_data_t *thread_data) {
kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
+ KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == size);
kmp_int32 new_size = 2 * size;
KE_TRACE(10, ("__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "
@@ -381,8 +382,11 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
} else {
__kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
locked = 1;
- // expand deque to push the task which is not allowed to execute
- __kmp_realloc_task_deque(thread, thread_data);
+ if (TCR_4(thread_data->td.td_deque_ntasks) >=
+ TASK_DEQUE_SIZE(thread_data->td)) {
+ // expand deque to push the task which is not allowed to execute
+ __kmp_realloc_task_deque(thread, thread_data);
+ }
}
}
// Lock the deque for the task push operation
@@ -547,8 +551,6 @@ static inline void __ompt_task_init(kmp_taskdata_t *task, int tid) {
task->ompt_task_info.frame.enter_frame = ompt_data_none;
task->ompt_task_info.frame.exit_frame_flags = ompt_frame_runtime | ompt_frame_framepointer;
task->ompt_task_info.frame.enter_frame_flags = ompt_frame_runtime | ompt_frame_framepointer;
- task->ompt_task_info.ndeps = 0;
- task->ompt_task_info.deps = NULL;
}
// __ompt_task_start:
@@ -573,24 +575,20 @@ static inline void __ompt_task_start(kmp_task_t *task,
// __ompt_task_finish:
// Build and trigger final task-schedule event
-static inline void
-__ompt_task_finish(kmp_task_t *task, kmp_taskdata_t *resumed_task,
- ompt_task_status_t status = ompt_task_complete) {
- kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
- if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
- taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
- status = ompt_task_cancel;
- }
-
- /* let OMPT know that we're returning to the callee task */
+static inline void __ompt_task_finish(kmp_task_t *task,
+ kmp_taskdata_t *resumed_task,
+ ompt_task_status_t status) {
if (ompt_enabled.ompt_callback_task_schedule) {
+ kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+ if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
+ taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
+ status = ompt_task_cancel;
+ }
+
+ /* let OMPT know that we're returning to the callee task */
ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
&(taskdata->ompt_task_info.task_data), status,
- &((resumed_task ? resumed_task
- : (taskdata->ompt_task_info.scheduling_parent
- ? taskdata->ompt_task_info.scheduling_parent
- : taskdata->td_parent))
- ->ompt_task_info.task_data));
+ (resumed_task ? &(resumed_task->ompt_task_info.task_data) : NULL));
}
}
#endif
@@ -799,6 +797,10 @@ static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
// gtid: global thread ID for calling thread
// task: task to be finished
// resumed_task: task to be resumed. (may be NULL if task is serialized)
+//
+// template<ompt>: effectively ompt_enabled.enabled!=0
+// the version with ompt=false is inlined, allowing to optimize away all ompt
+// code in this case
template <bool ompt>
static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
kmp_taskdata_t *resumed_task) {
@@ -845,10 +847,6 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
return;
}
}
-#if OMPT_SUPPORT
- if (ompt)
- __ompt_task_finish(task, resumed_task);
-#endif
// Check mutexinoutset dependencies, release locks
kmp_depnode_t *node = taskdata->td_depnode;
@@ -861,7 +859,37 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
}
}
+ // bookkeeping for resuming task:
+ // GEH - note tasking_ser => task_serial
+ KMP_DEBUG_ASSERT(
+ (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
+ taskdata->td_flags.task_serial);
+ if (taskdata->td_flags.task_serial) {
+ if (resumed_task == NULL) {
+ resumed_task = taskdata->td_parent; // In a serialized task, the resumed
+ // task is the parent
+ }
+ } else {
+ KMP_DEBUG_ASSERT(resumed_task !=
+ NULL); // verify that resumed task is passed as argument
+ }
+
+ /* If the tasks' destructor thunk flag has been set, we need to invoke the
+ destructor thunk that has been generated by the compiler. The code is
+ placed here, since at this point other tasks might have been released
+ hence overlapping the destructor invocations with some other work in the
+ released tasks. The OpenMP spec is not specific on when the destructors
+ are invoked, so we should be free to choose. */
+ if (taskdata->td_flags.destructors_thunk) {
+ kmp_routine_entry_t destr_thunk = task->data1.destructors;
+ KMP_ASSERT(destr_thunk);
+ destr_thunk(gtid, task);
+ }
+
KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
+ KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
+ KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
+
bool detach = false;
if (taskdata->td_flags.detachable == TASK_DETACHABLE) {
if (taskdata->td_allow_completion_event.type ==
@@ -870,21 +898,41 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
__kmp_acquire_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
if (taskdata->td_allow_completion_event.type ==
KMP_EVENT_ALLOW_COMPLETION) {
+ // task finished execution
+ KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
+ taskdata->td_flags.executing = 0; // suspend the finishing task
+
+#if OMPT_SUPPORT
+ // For a detached task, which is not completed, we switch back
+ // the omp_fulfill_event signals completion
+ // locking is necessary to avoid a race with ompt_task_late_fulfill
+ if (ompt)
+ __ompt_task_finish(task, resumed_task, ompt_task_detach);
+#endif
+
+ // no access to taskdata after this point!
+ // __kmp_fulfill_event might free taskdata at any time from now
+
taskdata->td_flags.proxy = TASK_PROXY; // proxify!
detach = true;
}
__kmp_release_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
}
}
- KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
- KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
if (!detach) {
taskdata->td_flags.complete = 1; // mark the task as completed
+#if OMPT_SUPPORT
+ // This is not a detached task, we are done here
+ if (ompt)
+ __ompt_task_finish(task, resumed_task, ompt_task_complete);
+#endif
+
// Only need to keep track of count if team parallel and tasking not
- // serialized
- if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
+ // serialized, or task is detachable and event has already been fulfilled
+ if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) ||
+ taskdata->td_flags.detachable == TASK_DETACHABLE) {
// Predecrement simulated by "- 1" calculation
children =
KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks) - 1;
@@ -897,45 +945,19 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
// with the proxy task as origin
__kmp_release_deps(gtid, taskdata);
}
+ // td_flags.executing must be marked as 0 after __kmp_release_deps has been
+ // called. Othertwise, if a task is executed immediately from the
+ // release_deps code, the flag will be reset to 1 again by this same
+ // function
+ KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
+ taskdata->td_flags.executing = 0; // suspend the finishing task
}
- // td_flags.executing must be marked as 0 after __kmp_release_deps has been
- // called. Othertwise, if a task is executed immediately from the release_deps
- // code, the flag will be reset to 1 again by this same function
- KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
- taskdata->td_flags.executing = 0; // suspend the finishing task
KA_TRACE(
20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
gtid, taskdata, children));
- /* If the tasks' destructor thunk flag has been set, we need to invoke the
- destructor thunk that has been generated by the compiler. The code is
- placed here, since at this point other tasks might have been released
- hence overlapping the destructor invokations with some other work in the
- released tasks. The OpenMP spec is not specific on when the destructors
- are invoked, so we should be free to choose. */
- if (taskdata->td_flags.destructors_thunk) {
- kmp_routine_entry_t destr_thunk = task->data1.destructors;
- KMP_ASSERT(destr_thunk);
- destr_thunk(gtid, task);
- }
-
- // bookkeeping for resuming task:
- // GEH - note tasking_ser => task_serial
- KMP_DEBUG_ASSERT(
- (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
- taskdata->td_flags.task_serial);
- if (taskdata->td_flags.task_serial) {
- if (resumed_task == NULL) {
- resumed_task = taskdata->td_parent; // In a serialized task, the resumed
- // task is the parent
- }
- } else {
- KMP_DEBUG_ASSERT(resumed_task !=
- NULL); // verify that resumed task is passed as argument
- }
-
// Free this task and then ancestor tasks if they have no children.
// Restore th_current_task first as suggested by John:
// johnmc: if an asynchronous inquiry peers into the runtime system
@@ -1304,7 +1326,7 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
taskdata->td_flags.task_serial =
(parent_task->td_flags.final || taskdata->td_flags.team_serial ||
- taskdata->td_flags.tasking_ser);
+ taskdata->td_flags.tasking_ser || flags->merged_if0);
taskdata->td_flags.started = 0;
taskdata->td_flags.executing = 0;
@@ -1411,7 +1433,7 @@ __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid,
//
// gtid: global thread ID of caller
// task: the task to invoke
-// current_task: the task to resume after task invokation
+// current_task: the task to resume after task invocation
static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
kmp_taskdata_t *current_task) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
@@ -2911,7 +2933,7 @@ static inline int __kmp_execute_tasks_template(
// met, then return now, so that the barrier gather/release pattern can
// proceed. If this thread is in the last spin loop in the barrier,
// waiting to be released, we know that the termination condition will not
- // be satisified, so don't waste any cycles checking it.
+ // be satisfied, so don't waste any cycles checking it.
if (flag == NULL || (!final_spin && flag->done_check())) {
KA_TRACE(
15,
@@ -3096,7 +3118,7 @@ static void __kmp_enable_tasking(kmp_task_team_t *task_team,
* to each thread in the team, so that it can steal work from it.
*
* Enter the existence of the kmp_task_team_t struct. It employs a reference
- * counting mechanims, and is allocated by the master thread before calling
+ * counting mechanism, and is allocated by the master thread before calling
* __kmp_<barrier_kind>_release, and then is release by the last thread to
* exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
* of the kmp_task_team_t structs for consecutive barriers can overlap
@@ -3107,7 +3129,7 @@ static void __kmp_enable_tasking(kmp_task_team_t *task_team,
* We currently use the existence of the threads array as an indicator that
* tasks were spawned since the last barrier. If the structure is to be
* useful outside the context of tasking, then this will have to change, but
- * not settting the field minimizes the performance impact of tasking on
+ * not setting the field minimizes the performance impact of tasking on
* barriers, when no explicit tasks were spawned (pushed, actually).
*/
@@ -3651,7 +3673,11 @@ static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
return result;
__kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
- __kmp_realloc_task_deque(thread, thread_data);
+ if (TCR_4(thread_data->td.td_deque_ntasks) >=
+ TASK_DEQUE_SIZE(thread_data->td)) {
+ // expand deque to push the task which is not allowed to execute
+ __kmp_realloc_task_deque(thread, thread_data);
+ }
} else {
@@ -3847,22 +3873,30 @@ void __kmp_fulfill_event(kmp_event_t *event) {
bool detached = false;
int gtid = __kmp_get_gtid();
+ // The associated task might have completed or could be completing at this
+ // point.
+ // We need to take the lock to avoid races
+ __kmp_acquire_tas_lock(&event->lock, gtid);
if (taskdata->td_flags.proxy == TASK_PROXY) {
- // The associated task code completed before this call and detached.
detached = true;
- event->type = KMP_EVENT_UNINITIALIZED;
} else {
- // The associated task has not completed but could be completing at this
- // point.
- // We need to take the lock to avoid races
- __kmp_acquire_tas_lock(&event->lock, gtid);
- if (taskdata->td_flags.proxy == TASK_PROXY)
- detached = true;
- event->type = KMP_EVENT_UNINITIALIZED;
- __kmp_release_tas_lock(&event->lock, gtid);
+#if OMPT_SUPPORT
+ // The OMPT event must occur under mutual exclusion,
+ // otherwise the tool might access ptask after free
+ if (UNLIKELY(ompt_enabled.enabled))
+ __ompt_task_finish(ptask, NULL, ompt_task_early_fulfill);
+#endif
}
+ event->type = KMP_EVENT_UNINITIALIZED;
+ __kmp_release_tas_lock(&event->lock, gtid);
if (detached) {
+#if OMPT_SUPPORT
+ // We free ptask afterwards and know the task is finished,
+ // so locking is not necessary
+ if (UNLIKELY(ompt_enabled.enabled))
+ __ompt_task_finish(ptask, NULL, ompt_task_late_fulfill);
+#endif
// If the task detached complete the proxy task
if (gtid >= 0) {
kmp_team_t *team = taskdata->td_team;
@@ -3888,14 +3922,13 @@ void __kmp_fulfill_event(kmp_event_t *event) {
kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
kmp_task_t *task;
kmp_taskdata_t *taskdata;
- kmp_taskdata_t *taskdata_src;
- kmp_taskdata_t *parent_task = thread->th.th_current_task;
+ kmp_taskdata_t *taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
+ kmp_taskdata_t *parent_task = taskdata_src->td_parent; // same parent task
size_t shareds_offset;
size_t task_size;
KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,
task_src));
- taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==
TASK_FULL); // it should not be proxy task
KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT);
@@ -3923,9 +3956,12 @@ kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
}
taskdata->td_alloc_thread = thread;
taskdata->td_parent = parent_task;
- taskdata->td_taskgroup =
- parent_task
- ->td_taskgroup; // task inherits the taskgroup from the parent task
+ // task inherits the taskgroup from the parent task
+ taskdata->td_taskgroup = parent_task->td_taskgroup;
+ // tied task needs to initialize the td_last_tied at creation,
+ // untied one does this when it is scheduled for execution
+ if (taskdata->td_flags.tiedness == TASK_TIED)
+ taskdata->td_last_tied = taskdata;
// Only need to keep track of child task counts if team parallel and tasking
// not serialized
@@ -4255,7 +4291,7 @@ int __kmp_taskloop_task(int gtid, void *ptask) {
// grainsize Number of loop iterations per task
// extras Number of chunks with grainsize+1 iterations
// tc Iterations count
-// num_t_min Threashold to launch tasks recursively
+// num_t_min Threshold to launch tasks recursively
// task_dup Tasks duplication routine
// codeptr_ra Return address for OMPT events
void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
@@ -4267,7 +4303,6 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
void *codeptr_ra,
#endif
void *task_dup) {
-#if KMP_DEBUG
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
KMP_DEBUG_ASSERT(task != NULL);
KMP_DEBUG_ASSERT(num_tasks > num_t_min);
@@ -4275,7 +4310,6 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
" %lld, extras %lld, i=%lld,%lld(%d), dup %p\n",
gtid, taskdata, num_tasks, grainsize, extras, *lb, *ub, st,
task_dup));
-#endif
p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
kmp_uint64 lower = *lb;
kmp_info_t *thread = __kmp_threads[gtid];
@@ -4319,9 +4353,14 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
*ub = ub0; // adjust upper bound for the 1st half
// create auxiliary task for 2nd half of the loop
+ // make sure new task has same parent task as the pattern task
+ kmp_taskdata_t *current_task = thread->th.th_current_task;
+ thread->th.th_current_task = taskdata->td_parent;
kmp_task_t *new_task =
__kmpc_omp_task_alloc(loc, gtid, 1, 3 * sizeof(void *),
sizeof(__taskloop_params_t), &__kmp_taskloop_task);
+ // restore current task
+ thread->th.th_current_task = current_task;
__taskloop_params_t *p = (__taskloop_params_t *)new_task->shareds;
p->task = next_task;
p->lb = (kmp_uint64 *)((char *)next_task + lower_offset);
diff --git a/openmp/runtime/src/kmp_utility.cpp b/openmp/runtime/src/kmp_utility.cpp
index 44a99d0455b3..6e6785deb445 100644
--- a/openmp/runtime/src/kmp_utility.cpp
+++ b/openmp/runtime/src/kmp_utility.cpp
@@ -194,7 +194,7 @@ void __kmp_query_cpuid(kmp_cpuinfo_t *p) {
KA_TRACE(trace_level, (" PSN"));
}
if ((buf.edx >> 19) & 1) {
- /* CLFULSH - Cache Flush Instruction Available */
+ /* CLFLUSH - Cache Flush Instruction Available */
cflush_size =
data[1] * 8; /* Bits 15-08: CLFLUSH line size = 8 (64 bytes) */
KA_TRACE(trace_level, (" CLFLUSH(%db)", cflush_size));
diff --git a/openmp/runtime/src/kmp_version.h b/openmp/runtime/src/kmp_version.h
index 9e726b3805b2..6ce40eecb5de 100644
--- a/openmp/runtime/src/kmp_version.h
+++ b/openmp/runtime/src/kmp_version.h
@@ -30,7 +30,7 @@ extern "C" {
just before version string. */
#define KMP_VERSION_MAGIC_STR "\x00@(#) "
#define KMP_VERSION_MAGIC_LEN 6 // Length of KMP_VERSION_MAGIC_STR.
-#define KMP_VERSION_PREF_STR "Intel(R) OMP "
+#define KMP_VERSION_PREF_STR "LLVM OMP "
#define KMP_VERSION_PREFIX KMP_VERSION_MAGIC_STR KMP_VERSION_PREF_STR
/* declare all the version string constants for KMP_VERSION env. variable */
diff --git a/openmp/runtime/src/kmp_wrapper_malloc.h b/openmp/runtime/src/kmp_wrapper_malloc.h
index 1544c5df3d64..c027e0b297d0 100644
--- a/openmp/runtime/src/kmp_wrapper_malloc.h
+++ b/openmp/runtime/src/kmp_wrapper_malloc.h
@@ -15,11 +15,11 @@
#define KMP_WRAPPER_MALLOC_H
/* This header serves for 3 purposes:
- 1. Declaring standard memory allocation rourines in OS-independent way.
+ 1. Declaring standard memory allocation routines in OS-independent way.
2. Passing source location info through memory allocation wrappers.
3. Enabling native memory debugging capabilities.
- 1. Declaring standard memory allocation rourines in OS-independent way.
+ 1. Declaring standard memory allocation routines in OS-independent way.
-----------------------------------------------------------------------
On Linux* OS, alloca() function is declared in <alloca.h> header, while on
Windows* OS there is no <alloca.h> header, function _alloca() (note
@@ -103,9 +103,9 @@
#error Unknown or unsupported OS.
#endif
-/* KMP_SRC_LOC_DECL -- Declaring source location paramemters, to be used in
+/* KMP_SRC_LOC_DECL -- Declaring source location parameters, to be used in
function declaration.
- KMP_SRC_LOC_PARM -- Source location paramemters, to be used to pass
+ KMP_SRC_LOC_PARM -- Source location parameters, to be used to pass
parameters to underlying levels.
KMP_SRC_LOC_CURR -- Source location arguments describing current location,
to be used at top-level.
diff --git a/openmp/runtime/src/ompt-internal.h b/openmp/runtime/src/ompt-internal.h
index 958b5943af38..f753ab4ebc6d 100644
--- a/openmp/runtime/src/ompt-internal.h
+++ b/openmp/runtime/src/ompt-internal.h
@@ -57,8 +57,6 @@ typedef struct {
ompt_data_t task_data;
struct kmp_taskdata *scheduling_parent;
int thread_num;
- int ndeps;
- ompt_dependence_t *deps;
} ompt_task_info_t;
typedef struct {
diff --git a/openmp/runtime/src/ompt-specific.cpp b/openmp/runtime/src/ompt-specific.cpp
index 7fb81bb7d1a0..a7288f08a661 100644
--- a/openmp/runtime/src/ompt-specific.cpp
+++ b/openmp/runtime/src/ompt-specific.cpp
@@ -262,8 +262,6 @@ void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid,
lwt->ompt_task_info.frame.enter_frame = ompt_data_none;
lwt->ompt_task_info.frame.exit_frame = ompt_data_none;
lwt->ompt_task_info.scheduling_parent = NULL;
- lwt->ompt_task_info.deps = NULL;
- lwt->ompt_task_info.ndeps = 0;
lwt->heap = 0;
lwt->parent = 0;
}
diff --git a/openmp/runtime/src/ompt-specific.h b/openmp/runtime/src/ompt-specific.h
index 5ba240c1a950..fa5c5662c649 100644
--- a/openmp/runtime/src/ompt-specific.h
+++ b/openmp/runtime/src/ompt-specific.h
@@ -102,7 +102,7 @@ inline void ompt_set_thread_state(kmp_info_t *thread, ompt_state_t state) {
inline const char *ompt_get_runtime_version() {
return &__kmp_version_lib_ver[KMP_VERSION_MAGIC_LEN];
}
-#endif // OMPT_SUPPRORT
+#endif // OMPT_SUPPORT
// macros providing the OMPT callbacks for reduction clause
#if OMPT_SUPPORT && OMPT_OPTIONAL
diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h
index d730c48ec705..db1c0d0d9d21 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h
+++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h
@@ -2303,7 +2303,7 @@ ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_
* name of the metadata), and a value (the actual data). The encoding of
* the value depends on the type of the metadata.
*
- * The type of metadata is specified by an enumerated type __itt_metdata_type.
+ * The type of metadata is specified by an enumerated type __itt_metadata_type.
* @{
*/
@@ -3196,7 +3196,7 @@ ITT_STUBV(ITTAPI, void, relation_add_ex, (const __itt_domain *domain,
#define __itt_relation_add_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(relation_add_ex,d,x,y,z,a,b)
#define __itt_relation_add_ex_ptr ITTNOTIFY_NAME(relation_add_ex)
#else /* INTEL_NO_ITTNOTIFY_API */
-#define __itt_relation_add_to_current_ex(domain,clock_domain,timestame,relation,tail)
+#define __itt_relation_add_to_current_ex(domain,clock_domain,timestamp,relation,tail)
#define __itt_relation_add_to_current_ex_ptr 0
#define __itt_relation_add_ex(domain,clock_domain,timestamp,head,relation,tail)
#define __itt_relation_add_ex_ptr 0
diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp
index 8f9e2a655ae4..4936b9baaf80 100644
--- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp
+++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp
@@ -762,7 +762,7 @@ static const char* __itt_fsplit(const char* s, const char* sep, const char** out
/* This function return value of env variable that placed into static buffer.
* !!! The same static buffer is used for subsequent calls. !!!
- * This was done to aviod dynamic allocation for few calls.
+ * This was done to avoid dynamic allocation for few calls.
* Actually we need this function only four times.
*/
static const char* __itt_get_env_var(const char* name)
@@ -1012,7 +1012,7 @@ static void __itt_reinit_all_pointers(void)
static void __itt_nullify_all_pointers(void)
{
int i;
- /* Nulify all pointers except domain_create, string_handle_create and counter_create */
+ /* Nullify all pointers except domain_create, string_handle_create and counter_create */
for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
*_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func;
}
diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp
index 1daa3d31047e..3b5910fc95e8 100644
--- a/openmp/runtime/src/z_Linux_util.cpp
+++ b/openmp/runtime/src/z_Linux_util.cpp
@@ -164,7 +164,7 @@ void __kmp_affinity_determine_capable(const char *env_var) {
if (gCode > 0) { // Linux* OS only
// The optimal situation: the OS returns the size of the buffer it expects.
//
- // A verification of correct behavior is that Isetaffinity on a NULL
+ // A verification of correct behavior is that setaffinity on a NULL
// buffer with the same size fails with errno set to EFAULT.
sCode = syscall(__NR_sched_setaffinity, 0, gCode, NULL);
KA_TRACE(30, ("__kmp_affinity_determine_capable: "
@@ -286,7 +286,7 @@ void __kmp_affinity_determine_capable(const char *env_var) {
if (gCode == 0) {
KMP_AFFINITY_ENABLE(KMP_CPU_SET_SIZE_LIMIT);
KA_TRACE(10, ("__kmp_affinity_determine_capable: "
- "affinity supported (mask size %d)\n"<
+ "affinity supported (mask size %d)\n",
(int)__kmp_affin_mask_size));
KMP_INTERNAL_FREE(buf);
return;
@@ -2207,7 +2207,7 @@ int __kmp_get_load_balance(int max) {
#else // Linux* OS
-// The fuction returns number of running (not sleeping) threads, or -1 in case
+// The function returns number of running (not sleeping) threads, or -1 in case
// of error. Error could be reported if Linux* OS kernel too old (without
// "/proc" support). Counting running threads stops if max running threads
// encountered.