summaryrefslogtreecommitdiff
path: root/openmp/runtime/src/kmp_csupport.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
commitcfca06d7963fa0909f90483b42a6d7d194d01e08 (patch)
tree209fb2a2d68f8f277793fc8df46c753d31bc853b /openmp/runtime/src/kmp_csupport.cpp
parent706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff)
Notes
Diffstat (limited to 'openmp/runtime/src/kmp_csupport.cpp')
-rw-r--r--openmp/runtime/src/kmp_csupport.cpp36
1 files changed, 35 insertions, 1 deletions
diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index ac9a93590ad0..9cfa64d6ff9e 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -92,7 +92,7 @@ construct, since the master thread is necessarily thread zero).
If multiple non-OpenMP threads all enter an OpenMP construct then this
will be a unique thread identifier among all the threads created by
-the OpenMP runtime (but the value cannote be defined in terms of
+the OpenMP runtime (but the value cannot be defined in terms of
OpenMP thread ids returned by omp_get_thread_num()).
*/
kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
@@ -4023,6 +4023,9 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
lo = pr_buf->th_doacross_info[2];
up = pr_buf->th_doacross_info[3];
st = pr_buf->th_doacross_info[4];
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ ompt_dependence_t deps[num_dims];
+#endif
if (st == 1) { // most common case
if (vec[0] < lo || vec[0] > up) {
KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
@@ -4048,6 +4051,10 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
}
iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
}
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ deps[0].variable.value = iter_number;
+ deps[0].dependence_type = ompt_dependence_type_sink;
+#endif
for (i = 1; i < num_dims; ++i) {
kmp_int64 iter, ln;
kmp_int32 j = i * 4;
@@ -4081,6 +4088,10 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
iter = (kmp_uint64)(lo - vec[i]) / (-st);
}
iter_number = iter + ln * iter_number;
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ deps[i].variable.value = iter;
+ deps[i].dependence_type = ompt_dependence_type_sink;
+#endif
}
shft = iter_number % 32; // use 32-bit granularity
iter_number >>= 5; // divided by 32
@@ -4089,6 +4100,12 @@ void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
KMP_YIELD(TRUE);
}
KMP_MB();
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ if (ompt_enabled.ompt_callback_dependences) {
+ ompt_callbacks.ompt_callback(ompt_callback_dependences)(
+ &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims);
+ }
+#endif
KA_TRACE(20,
("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
gtid, (iter_number << 5) + shft));
@@ -4116,6 +4133,9 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
num_dims = pr_buf->th_doacross_info[0];
lo = pr_buf->th_doacross_info[2];
st = pr_buf->th_doacross_info[4];
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ ompt_dependence_t deps[num_dims];
+#endif
if (st == 1) { // most common case
iter_number = vec[0] - lo;
} else if (st > 0) {
@@ -4123,6 +4143,10 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
} else { // negative increment
iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
}
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ deps[0].variable.value = iter_number;
+ deps[0].dependence_type = ompt_dependence_type_source;
+#endif
for (i = 1; i < num_dims; ++i) {
kmp_int64 iter, ln;
kmp_int32 j = i * 4;
@@ -4137,7 +4161,17 @@ void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
iter = (kmp_uint64)(lo - vec[i]) / (-st);
}
iter_number = iter + ln * iter_number;
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ deps[i].variable.value = iter;
+ deps[i].dependence_type = ompt_dependence_type_source;
+#endif
+ }
+#if OMPT_SUPPORT && OMPT_OPTIONAL
+ if (ompt_enabled.ompt_callback_dependences) {
+ ompt_callbacks.ompt_callback(ompt_callback_dependences)(
+ &(OMPT_CUR_TASK_INFO(th)->task_data), deps, num_dims);
}
+#endif
shft = iter_number % 32; // use 32-bit granularity
iter_number >>= 5; // divided by 32
flag = 1 << shft;