From 53265afff0dd5ba32642c444583e7d2bb25ca2e9 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 23 Oct 2019 17:53:14 +0000 Subject: Vendor import of stripped LLVM openmp trunk r375505, the last commit before the upstream Subversion repository was made read-only, and the LLVM project migrated to GitHub: https://llvm.org/svn/llvm-project/openmp/trunk@375505 --- runtime/src/kmp.h | 5 +- runtime/src/kmp_affinity.cpp | 4 +- runtime/src/kmp_affinity.h | 17 +- runtime/src/kmp_csupport.cpp | 6 +- runtime/src/kmp_ftn_os.h | 20 + runtime/src/kmp_global.cpp | 2 +- runtime/src/kmp_gsupport.cpp | 44 +- runtime/src/kmp_lock.cpp | 16 +- runtime/src/kmp_lock.h | 16 +- runtime/src/kmp_os.h | 7 +- runtime/src/kmp_platform.h | 6 +- runtime/src/kmp_runtime.cpp | 236 ++-- runtime/src/kmp_stub.cpp | 4 +- runtime/src/kmp_taskdeps.cpp | 73 +- runtime/src/kmp_taskq.cpp | 0 runtime/src/kmp_wait_release.h | 5 +- runtime/src/ompt-general.cpp | 23 +- runtime/src/ompt-internal.h | 1 + runtime/src/ompt-specific.cpp | 7 +- runtime/src/ompt-specific.h | 2 +- .../src/thirdparty/ittnotify/ittnotify_config.h | 10 +- .../src/thirdparty/ittnotify/ittnotify_static.c | 1201 -------------------- .../src/thirdparty/ittnotify/ittnotify_static.cpp | 1201 ++++++++++++++++++++ runtime/src/z_Linux_asm.S | 263 ++++- runtime/src/z_Linux_util.cpp | 89 +- 25 files changed, 1884 insertions(+), 1374 deletions(-) delete mode 100644 runtime/src/kmp_taskq.cpp delete mode 100644 runtime/src/thirdparty/ittnotify/ittnotify_static.c create mode 100644 runtime/src/thirdparty/ittnotify/ittnotify_static.cpp (limited to 'runtime/src') diff --git a/runtime/src/kmp.h b/runtime/src/kmp.h index fdb9dbb519810..23eebe673126b 100644 --- a/runtime/src/kmp.h +++ b/runtime/src/kmp.h @@ -2181,10 +2181,9 @@ struct kmp_dephash_entry { typedef struct kmp_dephash { kmp_dephash_entry_t **buckets; size_t size; -#ifdef KMP_DEBUG + size_t generation; kmp_uint32 nelements; kmp_uint32 nconflicts; -#endif } kmp_dephash_t; typedef struct kmp_task_affinity_info { @@ -3342,7 +3341,7 @@ extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask); extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask); extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask); extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size); -#if KMP_OS_LINUX +#if KMP_OS_LINUX || KMP_OS_FREEBSD extern int kmp_set_thread_affinity_mask_initial(void); #endif #endif /* KMP_AFFINITY_SUPPORTED */ diff --git a/runtime/src/kmp_affinity.cpp b/runtime/src/kmp_affinity.cpp index 372c300d44ebb..4c7ed3181197e 100644 --- a/runtime/src/kmp_affinity.cpp +++ b/runtime/src/kmp_affinity.cpp @@ -1968,7 +1968,7 @@ static void __kmp_dispatch_set_hierarchy_values() { __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] = nPackages * nCoresPerPkg * __kmp_nThreadsPerCore; __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores; -#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS) if (__kmp_mic_type >= mic3) __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2; else @@ -1982,7 +1982,7 @@ static void __kmp_dispatch_set_hierarchy_values() { __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1; __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_nThreadsPerCore; -#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS) if (__kmp_mic_type >= mic3) __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] = 2 * __kmp_nThreadsPerCore; diff --git a/runtime/src/kmp_affinity.h b/runtime/src/kmp_affinity.h index c00ad36456612..f270bb6dbb8d3 100644 --- a/runtime/src/kmp_affinity.h +++ b/runtime/src/kmp_affinity.h @@ -160,6 +160,7 @@ public: }; #endif /* KMP_USE_HWLOC */ +#if KMP_OS_LINUX || KMP_OS_FREEBSD #if KMP_OS_LINUX /* On some of the older OS's that we build on, these constants aren't present in #included from . They must be the same on @@ -234,6 +235,10 @@ public: #endif /* __NR_sched_getaffinity */ #error Unknown or unsupported architecture #endif /* KMP_ARCH_* */ +#elif KMP_OS_FREEBSD +#include +#include +#endif class KMPNativeAffinity : public KMPAffinity { class Mask : public KMPAffinity::Mask { typedef unsigned char mask_t; @@ -294,8 +299,13 @@ class KMPNativeAffinity : public KMPAffinity { int get_system_affinity(bool abort_on_error) override { KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), "Illegal get affinity operation when not capable"); +#if KMP_OS_LINUX int retval = syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask); +#elif KMP_OS_FREEBSD + int retval = + pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast(mask)); +#endif if (retval >= 0) { return 0; } @@ -308,8 +318,13 @@ class KMPNativeAffinity : public KMPAffinity { int set_system_affinity(bool abort_on_error) const override { KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), "Illegal get affinity operation when not capable"); +#if KMP_OS_LINUX int retval = syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask); +#elif KMP_OS_FREEBSD + int retval = + pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast(mask)); +#endif if (retval >= 0) { return 0; } @@ -347,7 +362,7 @@ class KMPNativeAffinity : public KMPAffinity { } api_type get_api_type() const override { return NATIVE_OS; } }; -#endif /* KMP_OS_LINUX */ +#endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */ #if KMP_OS_WINDOWS class KMPNativeAffinity : public KMPAffinity { diff --git a/runtime/src/kmp_csupport.cpp b/runtime/src/kmp_csupport.cpp index c778c97022f5b..d39bf9af43341 100644 --- a/runtime/src/kmp_csupport.cpp +++ b/runtime/src/kmp_csupport.cpp @@ -545,7 +545,8 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { if (ompt_enabled.ompt_callback_parallel_end) { ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, - ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid)); + ompt_parallel_invoker_program | ompt_parallel_team, + OMPT_LOAD_RETURN_ADDRESS(global_tid)); } __ompt_lw_taskteam_unlink(this_thr); this_thr->th.ompt_thread_info.state = ompt_state_overhead; @@ -676,7 +677,8 @@ void __kmpc_flush(ident_t *loc) { #endif // KMP_COMPILER_ICC } #endif // KMP_MIC -#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64) +#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ + KMP_ARCH_RISCV64) // Nothing to see here move along #elif KMP_ARCH_PPC64 // Nothing needed here (we have a real MB above). diff --git a/runtime/src/kmp_ftn_os.h b/runtime/src/kmp_ftn_os.h index 856479cbe2123..41cafab125375 100644 --- a/runtime/src/kmp_ftn_os.h +++ b/runtime/src/kmp_ftn_os.h @@ -633,5 +633,25 @@ GOMP_loop_ull_doacross_guided_start #define KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START \ GOMP_loop_ull_doacross_runtime_start +#define KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT \ + GOMP_loop_nonmonotonic_dynamic_next +#define KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START \ + GOMP_loop_nonmonotonic_dynamic_start +#define KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT \ + GOMP_loop_nonmonotonic_guided_next +#define KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START \ + GOMP_loop_nonmonotonic_guided_start +#define KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT \ + GOMP_loop_ull_nonmonotonic_dynamic_next +#define KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START \ + GOMP_loop_ull_nonmonotonic_dynamic_start +#define KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT \ + GOMP_loop_ull_nonmonotonic_guided_next +#define KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START \ + GOMP_loop_ull_nonmonotonic_guided_start +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC \ + GOMP_parallel_loop_nonmonotonic_dynamic +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED \ + GOMP_parallel_loop_nonmonotonic_guided #endif /* KMP_FTN_OS_H */ diff --git a/runtime/src/kmp_global.cpp b/runtime/src/kmp_global.cpp index 1ec73b82e99c2..6e636dc394eae 100644 --- a/runtime/src/kmp_global.cpp +++ b/runtime/src/kmp_global.cpp @@ -431,7 +431,7 @@ std::atomic __kmp_thread_pool_active_nth = ATOMIC_VAR_INIT(0); /* ------------------------------------------------- * GLOBAL/ROOT STATE */ KMP_ALIGN_CACHE -kmp_global_t __kmp_global = {{0}}; +kmp_global_t __kmp_global; /* ----------------------------------------------- */ /* GLOBAL SYNCHRONIZATION LOCKS */ diff --git a/runtime/src/kmp_gsupport.cpp b/runtime/src/kmp_gsupport.cpp index d41e027eb2b00..10841d265958a 100644 --- a/runtime/src/kmp_gsupport.cpp +++ b/runtime/src/kmp_gsupport.cpp @@ -22,7 +22,7 @@ extern "C" { #endif // __cplusplus #define MKLOC(loc, routine) \ - static ident_t(loc) = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"}; + static ident_t loc = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"}; #include "kmp_ftn_os.h" @@ -622,10 +622,16 @@ LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_START), kmp_sch_static) LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT), {}) LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START), kmp_sch_dynamic_chunked) +LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START), + kmp_sch_dynamic_chunked) LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT), {}) +LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT), {}) LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_START), kmp_sch_guided_chunked) +LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START), + kmp_sch_guided_chunked) LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT), {}) +LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT), {}) LOOP_RUNTIME_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_START), kmp_sch_runtime) LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT), {}) @@ -892,6 +898,16 @@ LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT), {}) LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START), kmp_sch_guided_chunked) LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT), {}) +LOOP_START_ULL( + KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START), + kmp_sch_dynamic_chunked) +LOOP_NEXT_ULL( + KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT), {}) +LOOP_START_ULL( + KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START), + kmp_sch_guided_chunked) +LOOP_NEXT_ULL( + KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT), {}) LOOP_RUNTIME_START_ULL( KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), kmp_sch_runtime) LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT), {}) @@ -1487,6 +1503,12 @@ PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC), kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST) PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC), kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) +PARALLEL_LOOP( + KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED), + kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) +PARALLEL_LOOP( + KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC), + kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED), kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME), @@ -1942,6 +1964,26 @@ KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START, 45, "GOMP_4.5"); KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START, 45, "GOMP_4.5"); +KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START, 45, + "GOMP_4.5"); +KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT, 45, + "GOMP_4.5"); +KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START, 45, + "GOMP_4.5"); +KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT, 45, + "GOMP_4.5"); +KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START, 45, + "GOMP_4.5"); +KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT, 45, + "GOMP_4.5"); +KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START, 45, + "GOMP_4.5"); +KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT, 45, + "GOMP_4.5"); +KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC, 45, + "GOMP_4.5"); +KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED, 45, + "GOMP_4.5"); #endif // KMP_USE_VERSION_SYMBOLS diff --git a/runtime/src/kmp_lock.cpp b/runtime/src/kmp_lock.cpp index 78d63c67852d6..2cc9e08278c4e 100644 --- a/runtime/src/kmp_lock.cpp +++ b/runtime/src/kmp_lock.cpp @@ -2943,10 +2943,10 @@ static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = { #undef expand // Exposes only one set of jump tables (*lock or *lock_with_checks). -void (*(*__kmp_direct_destroy))(kmp_dyna_lock_t *) = 0; -int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0; -int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0; -int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0; +void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0; +int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0; +int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0; +int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0; // Jump tables for the indirect lock functions #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock, @@ -2993,10 +2993,10 @@ static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { #undef expand // Exposes only one jump tables (*lock or *lock_with_checks). -void (*(*__kmp_indirect_destroy))(kmp_user_lock_p) = 0; -int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0; -int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0; -int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0; +void (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0; +int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0; +int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0; +int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0; // Lock index table. kmp_indirect_lock_table_t __kmp_i_lock_table; diff --git a/runtime/src/kmp_lock.h b/runtime/src/kmp_lock.h index ccd84eb821d97..9ad86a51657de 100644 --- a/runtime/src/kmp_lock.h +++ b/runtime/src/kmp_lock.h @@ -1122,18 +1122,18 @@ typedef struct { // Function tables for direct locks. Set/unset/test differentiate functions // with/without consistency checking. extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t); -extern void (*(*__kmp_direct_destroy))(kmp_dyna_lock_t *); -extern int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32); -extern int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32); -extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32); +extern void (**__kmp_direct_destroy)(kmp_dyna_lock_t *); +extern int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32); +extern int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32); +extern int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32); // Function tables for indirect locks. Set/unset/test differentiate functions // with/withuot consistency checking. extern void (*__kmp_indirect_init[])(kmp_user_lock_p); -extern void (*(*__kmp_indirect_destroy))(kmp_user_lock_p); -extern int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32); -extern int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32); -extern int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32); +extern void (**__kmp_indirect_destroy)(kmp_user_lock_p); +extern int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32); +extern int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32); +extern int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32); // Extracts direct lock tag from a user lock pointer #define KMP_EXTRACT_D_TAG(l) \ diff --git a/runtime/src/kmp_os.h b/runtime/src/kmp_os.h index c4c7bcf6cc578..cd942a9c4430f 100644 --- a/runtime/src/kmp_os.h +++ b/runtime/src/kmp_os.h @@ -69,7 +69,7 @@ #error Unknown compiler #endif -#if (KMP_OS_LINUX || KMP_OS_WINDOWS) && !KMP_OS_CNK +#if (KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_FREEBSD) && !KMP_OS_CNK #define KMP_AFFINITY_SUPPORTED 1 #if KMP_OS_WINDOWS && KMP_ARCH_X86_64 #define KMP_GROUP_AFFINITY 1 @@ -165,7 +165,8 @@ typedef unsigned long long kmp_uint64; #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS #define KMP_SIZE_T_SPEC KMP_UINT32_SPEC -#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 +#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ + KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 #define KMP_SIZE_T_SPEC KMP_UINT64_SPEC #else #error "Can't determine size_t printf format specifier." @@ -840,7 +841,7 @@ extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v); #endif /* KMP_OS_WINDOWS */ #if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \ - KMP_ARCH_MIPS64 + KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 #define KMP_MB() __sync_synchronize() #endif diff --git a/runtime/src/kmp_platform.h b/runtime/src/kmp_platform.h index e4f2e06b962ce..3238deafc01be 100644 --- a/runtime/src/kmp_platform.h +++ b/runtime/src/kmp_platform.h @@ -98,6 +98,7 @@ #define KMP_ARCH_PPC64 (KMP_ARCH_PPC64_LE || KMP_ARCH_PPC64_BE) #define KMP_ARCH_MIPS 0 #define KMP_ARCH_MIPS64 0 +#define KMP_ARCH_RISCV64 0 #if KMP_OS_WINDOWS #if defined(_M_AMD64) || defined(__x86_64) @@ -135,6 +136,9 @@ #undef KMP_ARCH_MIPS #define KMP_ARCH_MIPS 1 #endif +#elif defined __riscv && __riscv_xlen == 64 +#undef KMP_ARCH_RISCV64 +#define KMP_ARCH_RISCV64 1 #endif #endif @@ -199,7 +203,7 @@ // TODO: Fixme - This is clever, but really fugly #if (1 != \ KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + \ - KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64) + KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64 + KMP_ARCH_RISCV64) #error Unknown or unsupported architecture #endif diff --git a/runtime/src/kmp_runtime.cpp b/runtime/src/kmp_runtime.cpp index 7f6c149c792e7..dd6e0ff70193a 100644 --- a/runtime/src/kmp_runtime.cpp +++ b/runtime/src/kmp_runtime.cpp @@ -1190,8 +1190,8 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( &(parent_task_info->task_data), &(parent_task_info->frame), - &ompt_parallel_data, team_size, ompt_parallel_invoker_program, - codeptr); + &ompt_parallel_data, team_size, + ompt_parallel_invoker_program | ompt_parallel_team, codeptr); } } #endif // OMPT_SUPPORT @@ -1481,9 +1481,13 @@ int __kmp_fork_call(ident_t *loc, int gtid, int team_size = master_set_numthreads ? master_set_numthreads : get__nproc_2(parent_team, master_tid); + int flags = OMPT_INVOKER(call_context) | + ((microtask == (microtask_t)__kmp_teams_master) + ? ompt_parallel_league + : ompt_parallel_team); ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)( - parent_task_data, ompt_frame, &ompt_parallel_data, team_size, - OMPT_INVOKER(call_context), return_address); + parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags, + return_address); } master_th->th.ompt_thread_info.state = ompt_state_overhead; } @@ -1512,19 +1516,17 @@ int __kmp_fork_call(ident_t *loc, int gtid, // AC: we are in serialized parallel __kmpc_serialized_parallel(loc, gtid); KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1); - // AC: need this in order enquiry functions work - // correctly, will restore at join time - parent_team->t.t_serialized--; + #if OMPT_SUPPORT void *dummy; - void **exit_runtime_p; + void **exit_frame_p; ompt_lw_taskteam_t lw_taskteam; if (ompt_enabled.enabled) { __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data, return_address); - exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr); + exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr); __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); // don't use lw_taskteam after linking. content was swaped @@ -1532,19 +1534,23 @@ int __kmp_fork_call(ident_t *loc, int gtid, /* OMPT implicit task begin */ implicit_task_data = OMPT_CUR_TASK_DATA(master_th); if (ompt_enabled.ompt_callback_implicit_task) { - ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), - implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? OMPT_CUR_TASK_INFO(master_th) ->thread_num = __kmp_tid_from_gtid(gtid); + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), + implicit_task_data, 1, + OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); } /* OMPT state */ master_th->th.ompt_thread_info.state = ompt_state_work_parallel; } else { - exit_runtime_p = &dummy; + exit_frame_p = &dummy; } #endif + // AC: need to decrement t_serialized for enquiry functions to work + // correctly, will restore at join time + parent_team->t.t_serialized--; { KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); @@ -1552,26 +1558,27 @@ int __kmp_fork_call(ident_t *loc, int gtid, __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv #if OMPT_SUPPORT , - exit_runtime_p + exit_frame_p #endif ); } #if OMPT_SUPPORT - *exit_runtime_p = NULL; if (ompt_enabled.enabled) { + *exit_frame_p = NULL; OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none; if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, implicit_task_data, 1, - OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? + OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); } + ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); __ompt_lw_taskteam_unlink(master_th); - if (ompt_enabled.ompt_callback_parallel_end) { ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( - OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th), - OMPT_INVOKER(call_context), return_address); + &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th), + OMPT_INVOKER(call_context) | ompt_parallel_team, + return_address); } master_th->th.ompt_thread_info.state = ompt_state_overhead; } @@ -1586,6 +1593,15 @@ int __kmp_fork_call(ident_t *loc, int gtid, parent_team->t.t_level++; parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + ompt_lw_taskteam_t lw_taskteam; + __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, + &ompt_parallel_data, return_address); + __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true); + } +#endif + /* Change number of threads in the team if requested */ if (master_set_numthreads) { // The parallel has num_threads clause if (master_set_numthreads < master_th->th.th_teams_size.nth) { @@ -1714,7 +1730,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, #if OMPT_SUPPORT void *dummy; - void **exit_runtime_p; + void **exit_frame_p; ompt_task_info_t *task_info; ompt_lw_taskteam_t lw_taskteam; @@ -1727,19 +1743,21 @@ int __kmp_fork_call(ident_t *loc, int gtid, // don't use lw_taskteam after linking. content was swaped task_info = OMPT_CUR_TASK_INFO(master_th); - exit_runtime_p = &(task_info->frame.exit_frame.ptr); + exit_frame_p = &(task_info->frame.exit_frame.ptr); if (ompt_enabled.ompt_callback_implicit_task) { - ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), - &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? OMPT_CUR_TASK_INFO(master_th) ->thread_num = __kmp_tid_from_gtid(gtid); + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), + &(task_info->task_data), 1, + OMPT_CUR_TASK_INFO(master_th)->thread_num, + ompt_task_implicit); } /* OMPT state */ master_th->th.ompt_thread_info.state = ompt_state_work_parallel; } else { - exit_runtime_p = &dummy; + exit_frame_p = &dummy; } #endif @@ -1750,25 +1768,27 @@ int __kmp_fork_call(ident_t *loc, int gtid, parent_team->t.t_argv #if OMPT_SUPPORT , - exit_runtime_p + exit_frame_p #endif ); } #if OMPT_SUPPORT if (ompt_enabled.enabled) { - exit_runtime_p = NULL; + *exit_frame_p = NULL; if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, &(task_info->task_data), 1, - OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? + OMPT_CUR_TASK_INFO(master_th)->thread_num, + ompt_task_implicit); } - + ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); __ompt_lw_taskteam_unlink(master_th); if (ompt_enabled.ompt_callback_parallel_end) { ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( - OMPT_CUR_TEAM_DATA(master_th), parent_task_data, - OMPT_INVOKER(call_context), return_address); + &ompt_parallel_data, parent_task_data, + OMPT_INVOKER(call_context) | ompt_parallel_team, + return_address); } master_th->th.ompt_thread_info.state = ompt_state_overhead; } @@ -1800,6 +1820,23 @@ int __kmp_fork_call(ident_t *loc, int gtid, team->t.t_level--; // AC: call special invoker for outer "parallel" of teams construct invoker(gtid); +#if OMPT_SUPPORT + if (ompt_enabled.enabled) { + ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th); + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, &(task_info->task_data), 0, + OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial); + } + if (ompt_enabled.ompt_callback_parallel_end) { + ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( + &ompt_parallel_data, parent_task_data, + OMPT_INVOKER(call_context) | ompt_parallel_league, + return_address); + } + master_th->th.ompt_thread_info.state = ompt_state_overhead; + } +#endif } else { argv = args; for (i = argc - 1; i >= 0; --i) @@ -1813,7 +1850,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, #if OMPT_SUPPORT void *dummy; - void **exit_runtime_p; + void **exit_frame_p; ompt_task_info_t *task_info; ompt_lw_taskteam_t lw_taskteam; @@ -1824,14 +1861,15 @@ int __kmp_fork_call(ident_t *loc, int gtid, __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); // don't use lw_taskteam after linking. content was swaped task_info = OMPT_CUR_TASK_INFO(master_th); - exit_runtime_p = &(task_info->frame.exit_frame.ptr); + exit_frame_p = &(task_info->frame.exit_frame.ptr); /* OMPT implicit task begin */ implicit_task_data = OMPT_CUR_TASK_DATA(master_th); if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), - implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? + implicit_task_data, 1, __kmp_tid_from_gtid(gtid), + ompt_task_implicit); OMPT_CUR_TASK_INFO(master_th) ->thread_num = __kmp_tid_from_gtid(gtid); } @@ -1839,7 +1877,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, /* OMPT state */ master_th->th.ompt_thread_info.state = ompt_state_work_parallel; } else { - exit_runtime_p = &dummy; + exit_frame_p = &dummy; } #endif @@ -1849,18 +1887,19 @@ int __kmp_fork_call(ident_t *loc, int gtid, __kmp_invoke_microtask(microtask, gtid, 0, argc, args #if OMPT_SUPPORT , - exit_runtime_p + exit_frame_p #endif ); } #if OMPT_SUPPORT if (ompt_enabled.enabled) { - *exit_runtime_p = NULL; + *exit_frame_p = NULL; if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, &(task_info->task_data), 1, - OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? + OMPT_CUR_TASK_INFO(master_th)->thread_num, + ompt_task_implicit); } ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); @@ -1868,7 +1907,8 @@ int __kmp_fork_call(ident_t *loc, int gtid, if (ompt_enabled.ompt_callback_parallel_end) { ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( &ompt_parallel_data, parent_task_data, - OMPT_INVOKER(call_context), return_address); + OMPT_INVOKER(call_context) | ompt_parallel_team, + return_address); } master_th->th.ompt_thread_info.state = ompt_state_overhead; } @@ -2225,12 +2265,11 @@ static inline void __kmp_join_restore_state(kmp_info_t *thread, static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread, kmp_team_t *team, ompt_data_t *parallel_data, - fork_context_e fork_context, void *codeptr) { + int flags, void *codeptr) { ompt_task_info_t *task_info = __ompt_get_task_info_object(0); if (ompt_enabled.ompt_callback_parallel_end) { ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( - parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context), - codeptr); + parallel_data, &(task_info->task_data), flags, codeptr); } task_info->frame.enter_frame = ompt_data_none; @@ -2263,6 +2302,7 @@ void __kmp_join_call(ident_t *loc, int gtid master_th->th.th_ident = loc; #if OMPT_SUPPORT + void *team_microtask = (void *)team->t.t_pkfn; if (ompt_enabled.enabled) { master_th->th.ompt_thread_info.state = ompt_state_overhead; } @@ -2352,10 +2392,25 @@ void __kmp_join_call(ident_t *loc, int gtid if (master_th->th.th_teams_microtask && !exit_teams && team->t.t_pkfn != (microtask_t)__kmp_teams_master && team->t.t_level == master_th->th.th_teams_level + 1) { - // AC: We need to leave the team structure intact at the end of parallel - // inside the teams construct, so that at the next parallel same (hot) team - // works, only adjust nesting levels - +// AC: We need to leave the team structure intact at the end of parallel +// inside the teams construct, so that at the next parallel same (hot) team +// works, only adjust nesting levels +#if OMPT_SUPPORT + ompt_data_t ompt_parallel_data = ompt_data_none; + if (ompt_enabled.enabled) { + ompt_task_info_t *task_info = __ompt_get_task_info_object(0); + if (ompt_enabled.ompt_callback_implicit_task) { + int ompt_team_size = team->t.t_nproc; + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, + OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); + } + task_info->frame.exit_frame = ompt_data_none; + task_info->task_data = ompt_data_none; + ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); + __ompt_lw_taskteam_unlink(master_th); + } +#endif /* Decrement our nested depth level */ team->t.t_level--; team->t.t_active_level--; @@ -2394,8 +2449,8 @@ void __kmp_join_call(ident_t *loc, int gtid #if OMPT_SUPPORT if (ompt_enabled.enabled) { - __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context, - codeptr); + __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data, + OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr); } #endif @@ -2424,12 +2479,14 @@ void __kmp_join_call(ident_t *loc, int gtid if (ompt_enabled.enabled) { ompt_task_info_t *task_info = __ompt_get_task_info_object(0); if (ompt_enabled.ompt_callback_implicit_task) { - int ompt_team_size = team->t.t_nproc; + int flags = (team_microtask == (void *)__kmp_teams_master) + ? ompt_task_initial + : ompt_task_implicit; + int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc; ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, - OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? + OMPT_CUR_TASK_INFO(master_th)->thread_num, flags); } - task_info->frame.exit_frame = ompt_data_none; task_info->task_data = ompt_data_none; } @@ -2503,8 +2560,12 @@ void __kmp_join_call(ident_t *loc, int gtid __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); #if OMPT_SUPPORT + int flags = + OMPT_INVOKER(fork_context) | + ((team_microtask == (void *)__kmp_teams_master) ? ompt_parallel_league + : ompt_parallel_team); if (ompt_enabled.enabled) { - __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context, + __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags, codeptr); } #endif @@ -4432,7 +4493,7 @@ static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team)); } -#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED +#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED /* Sets full mask for thread and returns old mask, no changes to structures. */ static void __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) { @@ -4980,7 +5041,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, __kmp_partition_places(team); #endif } else { // team->t.t_nproc < new_nproc -#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED +#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED kmp_affin_mask_t *old_mask; if (KMP_AFFINITY_CAPABLE()) { KMP_CPU_ALLOC(old_mask); @@ -5029,7 +5090,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, __kmp_reinitialize_team(team, new_icvs, NULL); } -#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED +#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED /* Temporarily set full mask for master thread before creation of workers. The reason is that workers inherit the affinity from master, so if a lot of workers are created on the single core quickly, they @@ -5064,7 +5125,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, } } -#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED +#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED if (KMP_AFFINITY_CAPABLE()) { /* Restore initial master thread's affinity mask */ __kmp_set_system_affinity(old_mask, TRUE); @@ -5600,7 +5661,7 @@ void __kmp_free_thread(kmp_info_t *this_th) { void *__kmp_launch_thread(kmp_info_t *this_thr) { int gtid = this_thr->th.th_info.ds.ds_gtid; /* void *stack_data;*/ - kmp_team_t *(*volatile pteam); + kmp_team_t **volatile pteam; KMP_MB(); KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid)); @@ -5618,18 +5679,15 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { this_thr->th.ompt_thread_info.state = ompt_state_overhead; this_thr->th.ompt_thread_info.wait_id = 0; this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0); + this_thr->th.ompt_thread_info.parallel_flags = 0; if (ompt_enabled.ompt_callback_thread_begin) { ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( ompt_thread_worker, thread_data); } - } -#endif - -#if OMPT_SUPPORT - if (ompt_enabled.enabled) { this_thr->th.ompt_thread_info.state = ompt_state_idle; } #endif + /* This is the place where threads wait for work */ while (!TCR_4(__kmp_global.g.g_done)) { KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]); @@ -5647,7 +5705,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { } #endif - pteam = (kmp_team_t * (*))(&this_thr->th.th_team); + pteam = &this_thr->th.th_team; /* have we been allocated? */ if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) { @@ -6956,16 +7014,16 @@ int __kmp_invoke_task_func(int gtid) { #if OMPT_SUPPORT void *dummy; - void **exit_runtime_p; + void **exit_frame_p; ompt_data_t *my_task_data; ompt_data_t *my_parallel_data; int ompt_team_size; if (ompt_enabled.enabled) { - exit_runtime_p = &( + exit_frame_p = &( team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame.ptr); } else { - exit_runtime_p = &dummy; + exit_frame_p = &dummy; } my_task_data = @@ -6975,7 +7033,7 @@ int __kmp_invoke_task_func(int gtid) { ompt_team_size = team->t.t_nproc; ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size, - __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? + __kmp_tid_from_gtid(gtid), ompt_task_implicit); OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid); } #endif @@ -6994,11 +7052,12 @@ int __kmp_invoke_task_func(int gtid) { tid, (int)team->t.t_argc, (void **)team->t.t_argv #if OMPT_SUPPORT , - exit_runtime_p + exit_frame_p #endif ); #if OMPT_SUPPORT - *exit_runtime_p = NULL; + *exit_frame_p = NULL; + this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team; #endif #if KMP_STATS_ENABLED @@ -7077,7 +7136,22 @@ int __kmp_invoke_teams_master(int gtid) { (void *)__kmp_teams_master); #endif __kmp_run_before_invoked_task(gtid, 0, this_thr, team); +#if OMPT_SUPPORT + int tid = __kmp_tid_from_gtid(gtid); + ompt_data_t *task_data = + &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data; + ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data; + if (ompt_enabled.ompt_callback_implicit_task) { + ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( + ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid, + ompt_task_initial); + OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid; + } +#endif __kmp_teams_master(gtid); +#if OMPT_SUPPORT + this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league; +#endif __kmp_run_after_invoked_task(gtid, 0, this_thr, team); return 1; } @@ -7118,19 +7192,32 @@ void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams, thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; // Remember the number of threads for inner parallel regions + if (!TCR_4(__kmp_init_middle)) + __kmp_middle_initialize(); // get internal globals calculated + KMP_DEBUG_ASSERT(__kmp_avail_proc); + KMP_DEBUG_ASSERT(__kmp_dflt_team_nth); if (num_threads == 0) { - if (!TCR_4(__kmp_init_middle)) - __kmp_middle_initialize(); // get __kmp_avail_proc calculated num_threads = __kmp_avail_proc / num_teams; + // adjust num_threads w/o warning as it is not user setting + // num_threads = min(num_threads, nthreads-var, thread-limit-var) + // no thread_limit clause specified - do not change thread-limit-var ICV + if (num_threads > __kmp_dflt_team_nth) { + num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV + } + if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) { + num_threads = thr->th.th_current_task->td_icvs.thread_limit; + } // prevent team size to exceed thread-limit-var if (num_teams * num_threads > __kmp_teams_max_nth) { - // adjust num_threads w/o warning as it is not user setting num_threads = __kmp_teams_max_nth / num_teams; } } else { // This thread will be the master of the league masters // Store new thread limit; old limit is saved in th_cg_roots list thr->th.th_current_task->td_icvs.thread_limit = num_threads; - + // num_threads = min(num_threads, nthreads-var) + if (num_threads > __kmp_dflt_team_nth) { + num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV + } if (num_teams * num_threads > __kmp_teams_max_nth) { int new_threads = __kmp_teams_max_nth / num_teams; if (!__kmp_reserve_warn) { // user asked for too many threads @@ -8023,7 +8110,8 @@ __kmp_determine_reduction_method( int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; -#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 +#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ + KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD diff --git a/runtime/src/kmp_stub.cpp b/runtime/src/kmp_stub.cpp index badbbde7c967b..6b5041988d5c3 100644 --- a/runtime/src/kmp_stub.cpp +++ b/runtime/src/kmp_stub.cpp @@ -164,7 +164,7 @@ void *kmp_aligned_malloc(size_t sz, size_t a) { #if KMP_OS_WINDOWS res = _aligned_malloc(sz, a); #else - if (err = posix_memalign(&res, a, sz)) { + if ((err = posix_memalign(&res, a, sz))) { errno = err; // can be EINVAL or ENOMEM res = NULL; } @@ -277,7 +277,7 @@ void __kmps_get_schedule(kmp_sched_t *kind, int *modifier) { kmp_proc_bind_t __kmps_get_proc_bind(void) { i; - return 0; + return proc_bind_false; } // __kmps_get_proc_bind double __kmps_get_wtime(void) { diff --git a/runtime/src/kmp_taskdeps.cpp b/runtime/src/kmp_taskdeps.cpp index db79deac3127f..f8aa51dd904a2 100644 --- a/runtime/src/kmp_taskdeps.cpp +++ b/runtime/src/kmp_taskdeps.cpp @@ -54,12 +54,64 @@ static inline kmp_depnode_t *__kmp_node_ref(kmp_depnode_t *node) { enum { KMP_DEPHASH_OTHER_SIZE = 97, KMP_DEPHASH_MASTER_SIZE = 997 }; +size_t sizes[] = { 997, 2003, 4001, 8191, 16001, 32003, 64007, 131071, 270029 }; +const size_t MAX_GEN = 8; + static inline kmp_int32 __kmp_dephash_hash(kmp_intptr_t addr, size_t hsize) { // TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) % // m_num_sets ); return ((addr >> 6) ^ (addr >> 2)) % hsize; } +static kmp_dephash_t *__kmp_dephash_extend(kmp_info_t *thread, + kmp_dephash_t *current_dephash) { + kmp_dephash_t *h; + + size_t gen = current_dephash->generation + 1; + if (gen >= MAX_GEN) + return current_dephash; + size_t new_size = sizes[gen]; + + kmp_int32 size_to_allocate = + new_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t); + +#if USE_FAST_MEMORY + h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size_to_allocate); +#else + h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size_to_allocate); +#endif + + h->size = new_size; + h->nelements = current_dephash->nelements; + h->buckets = (kmp_dephash_entry **)(h + 1); + h->generation = gen; + + // insert existing elements in the new table + for (size_t i = 0; i < current_dephash->size; i++) { + kmp_dephash_entry_t *next; + for (kmp_dephash_entry_t *entry = current_dephash->buckets[i]; entry; entry = next) { + next = entry->next_in_bucket; + // Compute the new hash using the new size, and insert the entry in + // the new bucket. + kmp_int32 new_bucket = __kmp_dephash_hash(entry->addr, h->size); + if (entry->next_in_bucket) { + h->nconflicts++; + } + entry->next_in_bucket = h->buckets[new_bucket]; + h->buckets[new_bucket] = entry; + } + } + + // Free old hash table +#if USE_FAST_MEMORY + __kmp_fast_free(thread, current_dephash); +#else + __kmp_thread_free(thread, current_dephash); +#endif + + return h; +} + static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread, kmp_taskdata_t *current_task) { kmp_dephash_t *h; @@ -81,10 +133,9 @@ static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread, #endif h->size = h_size; -#ifdef KMP_DEBUG + h->generation = 0; h->nelements = 0; h->nconflicts = 0; -#endif h->buckets = (kmp_dephash_entry **)(h + 1); for (size_t i = 0; i < h_size; i++) @@ -97,7 +148,13 @@ static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread, #define ENTRY_LAST_MTXS 1 static kmp_dephash_entry * -__kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr) { +__kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t **hash, kmp_intptr_t addr) { + kmp_dephash_t *h = *hash; + if (h->nelements != 0 + && h->nconflicts/h->size >= 1) { + *hash = __kmp_dephash_extend(thread, h); + h = *hash; + } kmp_int32 bucket = __kmp_dephash_hash(addr, h->size); kmp_dephash_entry_t *entry; @@ -122,11 +179,9 @@ __kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr) { entry->mtx_lock = NULL; entry->next_in_bucket = h->buckets[bucket]; h->buckets[bucket] = entry; -#ifdef KMP_DEBUG h->nelements++; if (entry->next_in_bucket) h->nconflicts++; -#endif } return entry; } @@ -232,7 +287,7 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid, template static inline kmp_int32 -__kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash, +__kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash, bool dep_barrier, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_task_t *task) { KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependencies : " @@ -352,7 +407,7 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash, // returns true if the task has any outstanding dependence static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node, - kmp_task_t *task, kmp_dephash_t *hash, + kmp_task_t *task, kmp_dephash_t **hash, bool dep_barrier, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, @@ -552,7 +607,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, __kmp_init_node(node); new_taskdata->td_depnode = node; - if (__kmp_check_deps(gtid, node, new_task, current_task->td_dephash, + if (__kmp_check_deps(gtid, node, new_task, ¤t_task->td_dephash, NO_DEP_BARRIER, ndeps, dep_list, ndeps_noalias, noalias_dep_list)) { KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking " @@ -633,7 +688,7 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depnode_t node = {0}; __kmp_init_node(&node); - if (!__kmp_check_deps(gtid, &node, NULL, current_task->td_dephash, + if (!__kmp_check_deps(gtid, &node, NULL, ¤t_task->td_dephash, DEP_BARRIER, ndeps, dep_list, ndeps_noalias, noalias_dep_list)) { KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking " diff --git a/runtime/src/kmp_taskq.cpp b/runtime/src/kmp_taskq.cpp deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/runtime/src/kmp_wait_release.h b/runtime/src/kmp_wait_release.h index bb6bdf5d8fa58..b235be3cf64ad 100644 --- a/runtime/src/kmp_wait_release.h +++ b/runtime/src/kmp_wait_release.h @@ -140,8 +140,11 @@ static void __ompt_implicit_task_end(kmp_info_t *this_thr, #endif if (!KMP_MASTER_TID(ds_tid)) { if (ompt_enabled.ompt_callback_implicit_task) { + int flags = this_thr->th.ompt_thread_info.parallel_flags; + flags = (flags & ompt_parallel_league) ? ompt_task_initial + : ompt_task_implicit; ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_end, NULL, tId, 0, ds_tid, ompt_task_implicit); + ompt_scope_end, NULL, tId, 0, ds_tid, flags); } // return to idle state this_thr->th.ompt_thread_info.state = ompt_state_idle; diff --git a/runtime/src/ompt-general.cpp b/runtime/src/ompt-general.cpp index 00bf606bb1d5c..41b2827007b6d 100644 --- a/runtime/src/ompt-general.cpp +++ b/runtime/src/ompt-general.cpp @@ -430,10 +430,8 @@ OMPT_API_ROUTINE ompt_set_result_t ompt_set_callback(ompt_callbacks_t which, #define ompt_event_macro(event_name, callback_type, event_id) \ case event_name: \ - if (ompt_event_implementation_status(event_name)) { \ - ompt_callbacks.ompt_callback(event_name) = (callback_type)callback; \ - ompt_enabled.event_name = (callback != 0); \ - } \ + ompt_callbacks.ompt_callback(event_name) = (callback_type)callback; \ + ompt_enabled.event_name = (callback != 0); \ if (callback) \ return ompt_event_implementation_status(event_name); \ else \ @@ -456,16 +454,15 @@ OMPT_API_ROUTINE int ompt_get_callback(ompt_callbacks_t which, switch (which) { #define ompt_event_macro(event_name, callback_type, event_id) \ - case event_name: \ - if (ompt_event_implementation_status(event_name)) { \ - ompt_callback_t mycb = \ - (ompt_callback_t)ompt_callbacks.ompt_callback(event_name); \ - if (ompt_enabled.event_name && mycb) { \ - *callback = mycb; \ - return ompt_get_callback_success; \ - } \ + case event_name: { \ + ompt_callback_t mycb = \ + (ompt_callback_t)ompt_callbacks.ompt_callback(event_name); \ + if (ompt_enabled.event_name && mycb) { \ + *callback = mycb; \ + return ompt_get_callback_success; \ } \ - return ompt_get_callback_failure; + return ompt_get_callback_failure; \ + } FOREACH_OMPT_EVENT(ompt_event_macro) diff --git a/runtime/src/ompt-internal.h b/runtime/src/ompt-internal.h index 5a6beaf88caec..958b5943af389 100644 --- a/runtime/src/ompt-internal.h +++ b/runtime/src/ompt-internal.h @@ -81,6 +81,7 @@ typedef struct { ompt_state_t state; ompt_wait_id_t wait_id; int ompt_task_yielded; + int parallel_flags; // information for the last parallel region invoked void *idle_frame; } ompt_thread_info_t; diff --git a/runtime/src/ompt-specific.cpp b/runtime/src/ompt-specific.cpp index 63153d274efb3..7fb81bb7d1a01 100644 --- a/runtime/src/ompt-specific.cpp +++ b/runtime/src/ompt-specific.cpp @@ -269,10 +269,11 @@ void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid, } void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, - int on_heap) { + int on_heap, bool always) { ompt_lw_taskteam_t *link_lwt = lwt; - if (thr->th.th_team->t.t_serialized > - 1) { // we already have a team, so link the new team and swap values + if (always || + thr->th.th_team->t.t_serialized > + 1) { // we already have a team, so link the new team and swap values if (on_heap) { // the lw_taskteam cannot stay on stack, allocate it on heap link_lwt = (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t)); diff --git a/runtime/src/ompt-specific.h b/runtime/src/ompt-specific.h index 86fd928d03789..47d8a1669846f 100644 --- a/runtime/src/ompt-specific.h +++ b/runtime/src/ompt-specific.h @@ -26,7 +26,7 @@ void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid, ompt_data_t *ompt_pid, void *codeptr); void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, - int on_heap); + int on_heap, bool always = false); void __ompt_lw_taskteam_unlink(kmp_info_t *thr); diff --git a/runtime/src/thirdparty/ittnotify/ittnotify_config.h b/runtime/src/thirdparty/ittnotify/ittnotify_config.h index cc494cb4db432..f231e70d181f3 100644 --- a/runtime/src/thirdparty/ittnotify/ittnotify_config.h +++ b/runtime/src/thirdparty/ittnotify/ittnotify_config.h @@ -161,6 +161,10 @@ # define ITT_ARCH_MIPS64 6 #endif /* ITT_ARCH_MIPS64 */ +#ifndef ITT_ARCH_RISCV64 +# define ITT_ARCH_RISCV64 7 +#endif /* ITT_ARCH_RISCV64 */ + #ifndef ITT_ARCH # if defined _M_IX86 || defined __i386__ # define ITT_ARCH ITT_ARCH_IA32 @@ -178,6 +182,8 @@ # define ITT_ARCH ITT_ARCH_MIPS # elif defined __mips__ && defined __mips64 # define ITT_ARCH ITT_ARCH_MIPS64 +# elif defined __riscv && __riscv_xlen == 64 +# define ITT_ARCH ITT_ARCH_RISCV64 # endif #endif @@ -330,7 +336,9 @@ ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend) : "memory"); return result; } -#elif ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_PPC64 || ITT_ARCH==ITT_ARCH_AARCH64 || ITT_ARCH==ITT_ARCH_MIPS || ITT_ARCH==ITT_ARCH_MIPS64 +#elif ITT_ARCH == ITT_ARCH_ARM || ITT_ARCH == ITT_ARCH_PPC64 || \ + ITT_ARCH == ITT_ARCH_AARCH64 || ITT_ARCH == ITT_ARCH_MIPS || \ + ITT_ARCH == ITT_ARCH_MIPS64 || ITT_ARCH == ITT_ARCH_RISCV64 #define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val) #endif /* ITT_ARCH==ITT_ARCH_IA64 */ #ifndef ITT_SIMPLE_INIT diff --git a/runtime/src/thirdparty/ittnotify/ittnotify_static.c b/runtime/src/thirdparty/ittnotify/ittnotify_static.c deleted file mode 100644 index a2a73ada2e0c1..0000000000000 --- a/runtime/src/thirdparty/ittnotify/ittnotify_static.c +++ /dev/null @@ -1,1201 +0,0 @@ - -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "kmp_config.h" -#include "ittnotify_config.h" - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#if defined(__MINGW32__) -#include -#else -#define PATH_MAX 512 -#endif -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -#include -#include -#include -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#include -#include -#include -#include - -#define INTEL_NO_MACRO_BODY -#define INTEL_ITTNOTIFY_API_PRIVATE -#include "ittnotify.h" -#include "legacy/ittnotify.h" - -#if KMP_MSVC_COMPAT -#include "disable_warnings.h" -#endif - -static const char api_version[] = API_VERSION "\0\n@(#) $Revision: 481659 $\n"; - -#define _N_(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) - -#if ITT_OS==ITT_OS_WIN -static const char* ittnotify_lib_name = "libittnotify.dll"; -#elif ITT_OS==ITT_OS_LINUX || ITT_OS==ITT_OS_FREEBSD -static const char* ittnotify_lib_name = "libittnotify.so"; -#elif ITT_OS==ITT_OS_MAC -static const char* ittnotify_lib_name = "libittnotify.dylib"; -#else -#error Unsupported or unknown OS. -#endif - -#ifdef __ANDROID__ -#include -#include -#include -#include -#include -#include -#include - -#ifdef ITT_ANDROID_LOG - #define ITT_ANDROID_LOG_TAG "INTEL_VTUNE_USERAPI" - #define ITT_ANDROID_LOGI(...) ((void)__android_log_print(ANDROID_LOG_INFO, ITT_ANDROID_LOG_TAG, __VA_ARGS__)) - #define ITT_ANDROID_LOGW(...) ((void)__android_log_print(ANDROID_LOG_WARN, ITT_ANDROID_LOG_TAG, __VA_ARGS__)) - #define ITT_ANDROID_LOGE(...) ((void)__android_log_print(ANDROID_LOG_ERROR,ITT_ANDROID_LOG_TAG, __VA_ARGS__)) - #define ITT_ANDROID_LOGD(...) ((void)__android_log_print(ANDROID_LOG_DEBUG,ITT_ANDROID_LOG_TAG, __VA_ARGS__)) -#else - #define ITT_ANDROID_LOGI(...) - #define ITT_ANDROID_LOGW(...) - #define ITT_ANDROID_LOGE(...) - #define ITT_ANDROID_LOGD(...) -#endif - -/* default location of userapi collector on Android */ -#define ANDROID_ITTNOTIFY_DEFAULT_PATH_MASK(x) "/data/data/com.intel.vtune/perfrun/lib" \ - #x "/runtime/libittnotify.so" - -#if ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_ARM -#define ANDROID_ITTNOTIFY_DEFAULT_PATH ANDROID_ITTNOTIFY_DEFAULT_PATH_MASK(32) -#else -#define ANDROID_ITTNOTIFY_DEFAULT_PATH ANDROID_ITTNOTIFY_DEFAULT_PATH_MASK(64) -#endif - -#endif - -#ifndef PATH_MAX -#define PATH_MAX 4096 -#endif - - -#ifndef LIB_VAR_NAME -#if ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_MIPS -#define LIB_VAR_NAME INTEL_LIBITTNOTIFY32 -#else -#define LIB_VAR_NAME INTEL_LIBITTNOTIFY64 -#endif -#endif /* LIB_VAR_NAME */ - -#define ITT_MUTEX_INIT_AND_LOCK(p) { \ - if (PTHREAD_SYMBOLS) \ - { \ - if (!p.mutex_initialized) \ - { \ - if (__itt_interlocked_increment(&p.atomic_counter) == 1) \ - { \ - __itt_mutex_init(&p.mutex); \ - p.mutex_initialized = 1; \ - } \ - else \ - while (!p.mutex_initialized) \ - __itt_thread_yield(); \ - } \ - __itt_mutex_lock(&p.mutex); \ - } \ -} - -typedef int (__itt_init_ittlib_t)(const char*, __itt_group_id); - -/* this define used to control initialization function name. */ -#ifndef __itt_init_ittlib_name -ITT_EXTERN_C int _N_(init_ittlib)(const char*, __itt_group_id); -static __itt_init_ittlib_t* __itt_init_ittlib_ptr = _N_(init_ittlib); -#define __itt_init_ittlib_name __itt_init_ittlib_ptr -#endif /* __itt_init_ittlib_name */ - -typedef void (__itt_fini_ittlib_t)(void); - -/* this define used to control finalization function name. */ -#ifndef __itt_fini_ittlib_name -ITT_EXTERN_C void _N_(fini_ittlib)(void); -static __itt_fini_ittlib_t* __itt_fini_ittlib_ptr = _N_(fini_ittlib); -#define __itt_fini_ittlib_name __itt_fini_ittlib_ptr -#endif /* __itt_fini_ittlib_name */ - -/* building pointers to imported funcs */ -#undef ITT_STUBV -#undef ITT_STUB -#define ITT_STUB(api,type,name,args,params,ptr,group,format) \ -static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ -typedef type api ITT_JOIN(_N_(name),_t) args; \ -ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END \ -static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args \ -{ \ - __itt_init_ittlib_name(NULL, __itt_group_all); \ - if (ITTNOTIFY_NAME(name) && ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init))) \ - return ITTNOTIFY_NAME(name) params; \ - else \ - return (type)0; \ -} - -#define ITT_STUBV(api,type,name,args,params,ptr,group,format) \ -static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ -typedef type api ITT_JOIN(_N_(name),_t) args; \ -ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END \ -static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args \ -{ \ - __itt_init_ittlib_name(NULL, __itt_group_all); \ - if (ITTNOTIFY_NAME(name) && ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init))) \ - ITTNOTIFY_NAME(name) params; \ - else \ - return; \ -} - -#undef __ITT_INTERNAL_INIT -#include "ittnotify_static.h" - -#undef ITT_STUB -#undef ITT_STUBV -#define ITT_STUB(api,type,name,args,params,ptr,group,format) \ -static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ -typedef type api ITT_JOIN(_N_(name),_t) args; \ -ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END - -#define ITT_STUBV(api,type,name,args,params,ptr,group,format) \ -static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ -typedef type api ITT_JOIN(_N_(name),_t) args; \ -ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END - -#define __ITT_INTERNAL_INIT -#include "ittnotify_static.h" -#undef __ITT_INTERNAL_INIT - -ITT_GROUP_LIST(group_list); - -#pragma pack(push, 8) - -typedef struct ___itt_group_alias -{ - const char* env_var; - __itt_group_id groups; -} __itt_group_alias; - -static __itt_group_alias group_alias[] = { - { "KMP_FOR_TPROFILE", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync | __itt_group_mark) }, - { "KMP_FOR_TCHECK", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync | __itt_group_fsync | __itt_group_mark | __itt_group_suppress) }, - { NULL, (__itt_group_none) }, - { api_version, (__itt_group_none) } /* !!! Just to avoid unused code elimination !!! */ -}; - -#pragma pack(pop) - -#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT -#pragma warning(push) -#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -static __itt_api_info api_list[] = { -/* Define functions with static implementation */ -#undef ITT_STUB -#undef ITT_STUBV -#define ITT_STUB(api,type,name,args,params,nameindll,group,format) { ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (__itt_group_id)(group)}, -#define ITT_STUBV ITT_STUB -#define __ITT_INTERNAL_INIT -#include "ittnotify_static.h" -#undef __ITT_INTERNAL_INIT -/* Define functions without static implementation */ -#undef ITT_STUB -#undef ITT_STUBV -#define ITT_STUB(api,type,name,args,params,nameindll,group,format) {ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), NULL, (__itt_group_id)(group)}, -#define ITT_STUBV ITT_STUB -#include "ittnotify_static.h" - {NULL, NULL, NULL, NULL, __itt_group_none} -}; - -#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT -#pragma warning(pop) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -static const char dll_path[PATH_MAX] = { 0 }; - -/* static part descriptor which handles. all notification api attributes. */ -__itt_global _N_(_ittapi_global) = { - ITT_MAGIC, /* identification info */ - ITT_MAJOR, ITT_MINOR, API_VERSION_BUILD, /* version info */ - 0, /* api_initialized */ - 0, /* mutex_initialized */ - 0, /* atomic_counter */ - MUTEX_INITIALIZER, /* mutex */ - NULL, /* dynamic library handle */ - NULL, /* error_handler */ - (const char**)&dll_path, /* dll_path_ptr */ - (__itt_api_info*)&api_list, /* api_list_ptr */ - NULL, /* next __itt_global */ - NULL, /* thread_list */ - NULL, /* domain_list */ - NULL, /* string_list */ - __itt_collection_normal, /* collection state */ - NULL /* counter_list */ -}; - -typedef void (__itt_api_init_t)(__itt_global*, __itt_group_id); -typedef void (__itt_api_fini_t)(__itt_global*); - -/* ========================================================================= */ - -#ifdef ITT_NOTIFY_EXT_REPORT -ITT_EXTERN_C void _N_(error_handler)(__itt_error_code, va_list args); -#endif /* ITT_NOTIFY_EXT_REPORT */ - -#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT -#pragma warning(push) -#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -static void __itt_report_error(unsigned code_arg, ...) -{ - va_list args; - va_start(args, code_arg); - - // We use unsigned for the code argument and explicitly cast it here to the - // right enumerator because variadic functions are not compatible with - // default promotions. - __itt_error_code code = (__itt_error_code)code_arg; - - if (_N_(_ittapi_global).error_handler != NULL) - { - __itt_error_handler_t* handler = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler; - handler(code, args); - } -#ifdef ITT_NOTIFY_EXT_REPORT - _N_(error_handler)(code, args); -#endif /* ITT_NOTIFY_EXT_REPORT */ - va_end(args); -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT -#pragma warning(pop) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))(const wchar_t* name) -{ - __itt_domain *h_tail = NULL, *h = NULL; - - if (name == NULL) - { - return NULL; - } - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { - if (ITTNOTIFY_NAME(domain_createW) && ITTNOTIFY_NAME(domain_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))) - { - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(domain_createW)(name); - } - } - for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) - { - if (h->nameW != NULL && !wcscmp(h->nameW, name)) break; - } - if (h == NULL) - { - NEW_DOMAIN_W(&_N_(_ittapi_global),h,h_tail,name); - } - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return h; -} - -static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createA),_init))(const char* name) -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))(const char* name) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -{ - __itt_domain *h_tail = NULL, *h = NULL; - - if (name == NULL) - { - return NULL; - } - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { -#if ITT_PLATFORM==ITT_PLATFORM_WIN - if (ITTNOTIFY_NAME(domain_createA) && ITTNOTIFY_NAME(domain_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createA),_init))) - { - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(domain_createA)(name); - } -#else - if (ITTNOTIFY_NAME(domain_create) && ITTNOTIFY_NAME(domain_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))) - { - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(domain_create)(name); - } -#endif - } - for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) - { - if (h->nameA != NULL && !__itt_fstrcmp(h->nameA, name)) break; - } - if (h == NULL) - { - NEW_DOMAIN_A(&_N_(_ittapi_global),h,h_tail,name); - } - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return h; -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init))(const wchar_t* name) -{ - __itt_string_handle *h_tail = NULL, *h = NULL; - - if (name == NULL) - { - return NULL; - } - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { - if (ITTNOTIFY_NAME(string_handle_createW) && ITTNOTIFY_NAME(string_handle_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init))) - { - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(string_handle_createW)(name); - } - } - for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) - { - if (h->strW != NULL && !wcscmp(h->strW, name)) break; - } - if (h == NULL) - { - NEW_STRING_HANDLE_W(&_N_(_ittapi_global),h,h_tail,name); - } - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return h; -} - -static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createA),_init))(const char* name) -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_create),_init))(const char* name) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -{ - __itt_string_handle *h_tail = NULL, *h = NULL; - - if (name == NULL) - { - return NULL; - } - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { -#if ITT_PLATFORM==ITT_PLATFORM_WIN - if (ITTNOTIFY_NAME(string_handle_createA) && ITTNOTIFY_NAME(string_handle_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createA),_init))) - { - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(string_handle_createA)(name); - } -#else - if (ITTNOTIFY_NAME(string_handle_create) && ITTNOTIFY_NAME(string_handle_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_create),_init))) - { - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(string_handle_create)(name); - } -#endif - } - for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) - { - if (h->strA != NULL && !__itt_fstrcmp(h->strA, name)) break; - } - if (h == NULL) - { - NEW_STRING_HANDLE_A(&_N_(_ittapi_global),h,h_tail,name); - } - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return h; -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createW),_init))(const wchar_t *name, const wchar_t *domain) -{ - __itt_counter_info_t *h_tail = NULL, *h = NULL; - __itt_metadata_type type = __itt_metadata_u64; - - if (name == NULL) - { - return NULL; - } - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { - if (ITTNOTIFY_NAME(counter_createW) && ITTNOTIFY_NAME(counter_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createW),_init))) - { - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(counter_createW)(name, domain); - } - } - for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) - { - if (h->nameW != NULL && h->type == type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) || - (h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain)))) break; - - } - if (h == NULL) - { - NEW_COUNTER_W(&_N_(_ittapi_global),h,h_tail,name,domain,type); - } - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return (__itt_counter)h; -} - -static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createA),_init))(const char *name, const char *domain) -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create),_init))(const char *name, const char *domain) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -{ - __itt_counter_info_t *h_tail = NULL, *h = NULL; - __itt_metadata_type type = __itt_metadata_u64; - - if (name == NULL) - { - return NULL; - } - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { -#if ITT_PLATFORM==ITT_PLATFORM_WIN - if (ITTNOTIFY_NAME(counter_createA) && ITTNOTIFY_NAME(counter_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createA),_init))) - { - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(counter_createA)(name, domain); - } -#else - if (ITTNOTIFY_NAME(counter_create) && ITTNOTIFY_NAME(counter_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create),_init))) - { - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(counter_create)(name, domain); - } -#endif - } - for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) - { - if (h->nameA != NULL && h->type == type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) || - (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) break; - } - if (h == NULL) - { - NEW_COUNTER_A(&_N_(_ittapi_global),h,h_tail,name,domain,type); - } - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return (__itt_counter)h; -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedW),_init))(const wchar_t *name, const wchar_t *domain, __itt_metadata_type type) -{ - __itt_counter_info_t *h_tail = NULL, *h = NULL; - - if (name == NULL) - { - return NULL; - } - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { - if (ITTNOTIFY_NAME(counter_create_typedW) && ITTNOTIFY_NAME(counter_create_typedW) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedW),_init))) - { - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(counter_create_typedW)(name, domain, type); - } - } - for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) - { - if (h->nameW != NULL && h->type == type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) || - (h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain)))) break; - - } - if (h == NULL) - { - NEW_COUNTER_W(&_N_(_ittapi_global),h,h_tail,name,domain,type); - } - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return (__itt_counter)h; -} - -static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedA),_init))(const char *name, const char *domain, __itt_metadata_type type) -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typed),_init))(const char *name, const char *domain, __itt_metadata_type type) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -{ - __itt_counter_info_t *h_tail = NULL, *h = NULL; - - if (name == NULL) - { - return NULL; - } - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { -#if ITT_PLATFORM==ITT_PLATFORM_WIN - if (ITTNOTIFY_NAME(counter_create_typedA) && ITTNOTIFY_NAME(counter_create_typedA) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedA),_init))) - { - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(counter_create_typedA)(name, domain, type); - } -#else - if (ITTNOTIFY_NAME(counter_create_typed) && ITTNOTIFY_NAME(counter_create_typed) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typed),_init))) - { - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(counter_create_typed)(name, domain, type); - } -#endif - } - for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) - { - if (h->nameA != NULL && h->type == type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) || - (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) break; - } - if (h == NULL) - { - NEW_COUNTER_A(&_N_(_ittapi_global),h,h_tail,name,domain,type); - } - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return (__itt_counter)h; -} - -/* -------------------------------------------------------------------------- */ - -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))(void) -{ - if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) - { - __itt_init_ittlib_name(NULL, __itt_group_all); - } - if (ITTNOTIFY_NAME(pause) && ITTNOTIFY_NAME(pause) != ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))) - { - ITTNOTIFY_NAME(pause)(); - } - else - { - _N_(_ittapi_global).state = __itt_collection_paused; - } -} - -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))(void) -{ - if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) - { - __itt_init_ittlib_name(NULL, __itt_group_all); - } - if (ITTNOTIFY_NAME(resume) && ITTNOTIFY_NAME(resume) != ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))) - { - ITTNOTIFY_NAME(resume)(); - } - else - { - _N_(_ittapi_global).state = __itt_collection_normal; - } -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))(const wchar_t* name) -{ - if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) - { - __itt_init_ittlib_name(NULL, __itt_group_all); - } - if (ITTNOTIFY_NAME(thread_set_nameW) && ITTNOTIFY_NAME(thread_set_nameW) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))) - { - ITTNOTIFY_NAME(thread_set_nameW)(name); - } -} - -static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_setW),_init))(const wchar_t* name, int namelen) -{ - (void)namelen; - ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))(name); - return 0; -} - -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))(const char* name) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))(const char* name) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -{ - if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) - { - __itt_init_ittlib_name(NULL, __itt_group_all); - } -#if ITT_PLATFORM==ITT_PLATFORM_WIN - if (ITTNOTIFY_NAME(thread_set_nameA) && ITTNOTIFY_NAME(thread_set_nameA) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))) - { - ITTNOTIFY_NAME(thread_set_nameA)(name); - } -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - if (ITTNOTIFY_NAME(thread_set_name) && ITTNOTIFY_NAME(thread_set_name) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))) - { - ITTNOTIFY_NAME(thread_set_name)(name); - } -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_setA),_init))(const char* name, int namelen) -{ - (void)namelen; - ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))(name); - return 0; -} -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_set),_init))(const char* name, int namelen) -{ - (void)namelen; - ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))(name); - return 0; -} -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))(void) -{ - if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) - { - __itt_init_ittlib_name(NULL, __itt_group_all); - } - if (ITTNOTIFY_NAME(thread_ignore) && ITTNOTIFY_NAME(thread_ignore) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))) - { - ITTNOTIFY_NAME(thread_ignore)(); - } -} - -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_ignore),_init))(void) -{ - ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))(); -} - -static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(enable_attach),_init))(void) -{ -#ifdef __ANDROID__ - /* - * if LIB_VAR_NAME env variable were set before then stay previous value - * else set default path - */ - setenv(ITT_TO_STR(LIB_VAR_NAME), ANDROID_ITTNOTIFY_DEFAULT_PATH, 0); -#endif -} - -/* -------------------------------------------------------------------------- */ - -static const char* __itt_fsplit(const char* s, const char* sep, const char** out, int* len) -{ - int i; - int j; - - if (!s || !sep || !out || !len) - return NULL; - - for (i = 0; s[i]; i++) - { - int b = 0; - for (j = 0; sep[j]; j++) - if (s[i] == sep[j]) - { - b = 1; - break; - } - if (!b) - break; - } - - if (!s[i]) - return NULL; - - *len = 0; - *out = &s[i]; - - for (; s[i]; i++, (*len)++) - { - int b = 0; - for (j = 0; sep[j]; j++) - if (s[i] == sep[j]) - { - b = 1; - break; - } - if (b) - break; - } - - for (; s[i]; i++) - { - int b = 0; - for (j = 0; sep[j]; j++) - if (s[i] == sep[j]) - { - b = 1; - break; - } - if (!b) - break; - } - - return &s[i]; -} - -/* This function return value of env variable that placed into static buffer. - * !!! The same static buffer is used for subsequent calls. !!! - * This was done to aviod dynamic allocation for few calls. - * Actually we need this function only four times. - */ -static const char* __itt_get_env_var(const char* name) -{ -#define MAX_ENV_VALUE_SIZE 4086 - static char env_buff[MAX_ENV_VALUE_SIZE]; - static char* env_value = (char*)env_buff; - - if (name != NULL) - { -#if ITT_PLATFORM==ITT_PLATFORM_WIN - size_t max_len = MAX_ENV_VALUE_SIZE - (size_t)(env_value - env_buff); - DWORD rc = GetEnvironmentVariableA(name, env_value, (DWORD)max_len); - if (rc >= max_len) - __itt_report_error(__itt_error_env_too_long, name, (size_t)rc - 1, (size_t)(max_len - 1)); - else if (rc > 0) - { - const char* ret = (const char*)env_value; - env_value += rc + 1; - return ret; - } - else - { - /* If environment variable is empty, GetEnvirornmentVariables() - * returns zero (number of characters (not including terminating null), - * and GetLastError() returns ERROR_SUCCESS. */ - DWORD err = GetLastError(); - if (err == ERROR_SUCCESS) - return env_value; - - if (err != ERROR_ENVVAR_NOT_FOUND) - __itt_report_error(__itt_error_cant_read_env, name, (int)err); - } -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ - char* env = getenv(name); - if (env != NULL) - { - size_t len = __itt_fstrnlen(env, MAX_ENV_VALUE_SIZE); - size_t max_len = MAX_ENV_VALUE_SIZE - (size_t)(env_value - env_buff); - if (len < max_len) - { - const char* ret = (const char*)env_value; - __itt_fstrcpyn(env_value, max_len, env, len + 1); - env_value += len + 1; - return ret; - } else - __itt_report_error(__itt_error_env_too_long, name, (size_t)len, (size_t)(max_len - 1)); - } -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - } - return NULL; -} - -static const char* __itt_get_lib_name(void) -{ - const char* lib_name = __itt_get_env_var(ITT_TO_STR(LIB_VAR_NAME)); - -#ifdef __ANDROID__ - if (lib_name == NULL) - { - -#if ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_ARM - const char* const marker_filename = "com.intel.itt.collector_lib_32"; -#else - const char* const marker_filename = "com.intel.itt.collector_lib_64"; -#endif - - char system_wide_marker_filename[PATH_MAX] = {0}; - int itt_marker_file_fd = -1; - ssize_t res = 0; - - res = snprintf(system_wide_marker_filename, PATH_MAX - 1, "%s%s", "/data/local/tmp/", marker_filename); - if (res < 0) - { - ITT_ANDROID_LOGE("Unable to concatenate marker file string."); - return lib_name; - } - itt_marker_file_fd = open(system_wide_marker_filename, O_RDONLY); - - if (itt_marker_file_fd == -1) - { - const pid_t my_pid = getpid(); - char cmdline_path[PATH_MAX] = {0}; - char package_name[PATH_MAX] = {0}; - char app_sandbox_file[PATH_MAX] = {0}; - int cmdline_fd = 0; - - ITT_ANDROID_LOGI("Unable to open system-wide marker file."); - res = snprintf(cmdline_path, PATH_MAX - 1, "/proc/%d/cmdline", my_pid); - if (res < 0) - { - ITT_ANDROID_LOGE("Unable to get cmdline path string."); - return lib_name; - } - - ITT_ANDROID_LOGI("CMD file: %s\n", cmdline_path); - cmdline_fd = open(cmdline_path, O_RDONLY); - if (cmdline_fd == -1) - { - ITT_ANDROID_LOGE("Unable to open %s file!", cmdline_path); - return lib_name; - } - res = read(cmdline_fd, package_name, PATH_MAX - 1); - if (res == -1) - { - ITT_ANDROID_LOGE("Unable to read %s file!", cmdline_path); - res = close(cmdline_fd); - if (res == -1) - { - ITT_ANDROID_LOGE("Unable to close %s file!", cmdline_path); - } - return lib_name; - } - res = close(cmdline_fd); - if (res == -1) - { - ITT_ANDROID_LOGE("Unable to close %s file!", cmdline_path); - return lib_name; - } - ITT_ANDROID_LOGI("Package name: %s\n", package_name); - res = snprintf(app_sandbox_file, PATH_MAX - 1, "/data/data/%s/%s", package_name, marker_filename); - if (res < 0) - { - ITT_ANDROID_LOGE("Unable to concatenate marker file string."); - return lib_name; - } - - ITT_ANDROID_LOGI("Lib marker file name: %s\n", app_sandbox_file); - itt_marker_file_fd = open(app_sandbox_file, O_RDONLY); - if (itt_marker_file_fd == -1) - { - ITT_ANDROID_LOGE("Unable to open app marker file!"); - return lib_name; - } - } - - { - char itt_lib_name[PATH_MAX] = {0}; - - res = read(itt_marker_file_fd, itt_lib_name, PATH_MAX - 1); - if (res == -1) - { - ITT_ANDROID_LOGE("Unable to read %s file!", itt_marker_file_fd); - res = close(itt_marker_file_fd); - if (res == -1) - { - ITT_ANDROID_LOGE("Unable to close %s file!", itt_marker_file_fd); - } - return lib_name; - } - ITT_ANDROID_LOGI("ITT Lib path: %s", itt_lib_name); - res = close(itt_marker_file_fd); - if (res == -1) - { - ITT_ANDROID_LOGE("Unable to close %s file!", itt_marker_file_fd); - return lib_name; - } - ITT_ANDROID_LOGI("Set env %s to %s", ITT_TO_STR(LIB_VAR_NAME), itt_lib_name); - res = setenv(ITT_TO_STR(LIB_VAR_NAME), itt_lib_name, 0); - if (res == -1) - { - ITT_ANDROID_LOGE("Unable to set env var!"); - return lib_name; - } - lib_name = __itt_get_env_var(ITT_TO_STR(LIB_VAR_NAME)); - ITT_ANDROID_LOGI("ITT Lib path from env: %s", lib_name); - } - } -#endif - - return lib_name; -} - -/* Avoid clashes with std::min, reported by tbb team */ -#define __itt_min(a,b) (a) < (b) ? (a) : (b) - -static __itt_group_id __itt_get_groups(void) -{ - int i; - __itt_group_id res = __itt_group_none; - const char* var_name = "INTEL_ITTNOTIFY_GROUPS"; - const char* group_str = __itt_get_env_var(var_name); - - if (group_str != NULL) - { - int len; - char gr[255]; - const char* chunk; - while ((group_str = __itt_fsplit(group_str, ",; ", &chunk, &len)) != NULL) - { - int min_len = __itt_min(len, (int)(sizeof(gr) - 1)); - __itt_fstrcpyn(gr, sizeof(gr) - 1, chunk, min_len); - gr[min_len] = 0; - - for (i = 0; group_list[i].name != NULL; i++) - { - if (!__itt_fstrcmp(gr, group_list[i].name)) - { - res = (__itt_group_id)(res | group_list[i].id); - break; - } - } - } - /* TODO: !!! Workaround for bug with warning for unknown group !!! - * Should be fixed in new initialization scheme. - * Now the following groups should be set always. */ - for (i = 0; group_list[i].id != __itt_group_none; i++) - if (group_list[i].id != __itt_group_all && - group_list[i].id > __itt_group_splitter_min && - group_list[i].id < __itt_group_splitter_max) - res = (__itt_group_id)(res | group_list[i].id); - return res; - } - else - { - for (i = 0; group_alias[i].env_var != NULL; i++) - if (__itt_get_env_var(group_alias[i].env_var) != NULL) - return group_alias[i].groups; - } - - return res; -} - -#undef __itt_min - -static int __itt_lib_version(lib_t lib) -{ - if (lib == NULL) - return 0; - if (__itt_get_proc(lib, "__itt_api_init")) - return 2; - if (__itt_get_proc(lib, "__itt_api_version")) - return 1; - return 0; -} - -/* It's not used right now! Comment it out to avoid warnings. -static void __itt_reinit_all_pointers(void) -{ - int i; - // Fill all pointers with initial stubs - for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) - *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].init_func; -} -*/ - -static void __itt_nullify_all_pointers(void) -{ - int i; - /* Nulify all pointers except domain_create, string_handle_create and counter_create */ - for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) - *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT -#pragma warning(push) -#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */ -#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */ -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -ITT_EXTERN_C void _N_(fini_ittlib)(void) -{ - __itt_api_fini_t* __itt_api_fini_ptr = NULL; - static volatile TIDT current_thread = 0; - - if (_N_(_ittapi_global).api_initialized) - { - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { - if (current_thread == 0) - { - if (PTHREAD_SYMBOLS) current_thread = __itt_thread_id(); - if (_N_(_ittapi_global).lib != NULL) - { - __itt_api_fini_ptr = (__itt_api_fini_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_fini"); - } - if (__itt_api_fini_ptr) - { - __itt_api_fini_ptr(&_N_(_ittapi_global)); - } - - __itt_nullify_all_pointers(); - - /* TODO: !!! not safe !!! don't support unload so far. - * if (_N_(_ittapi_global).lib != NULL) - * __itt_unload_lib(_N_(_ittapi_global).lib); - * _N_(_ittapi_global).lib = NULL; - */ - _N_(_ittapi_global).api_initialized = 0; - current_thread = 0; - } - } - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - } -} - -ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_groups) -{ - int i; - __itt_group_id groups; -#ifdef ITT_COMPLETE_GROUP - __itt_group_id zero_group = __itt_group_none; -#endif /* ITT_COMPLETE_GROUP */ - static volatile TIDT current_thread = 0; - - if (!_N_(_ittapi_global).api_initialized) - { -#ifndef ITT_SIMPLE_INIT - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); -#endif /* ITT_SIMPLE_INIT */ - - if (!_N_(_ittapi_global).api_initialized) - { - if (current_thread == 0) - { - if (PTHREAD_SYMBOLS) current_thread = __itt_thread_id(); - if (lib_name == NULL) - { - lib_name = __itt_get_lib_name(); - } - groups = __itt_get_groups(); - if (DL_SYMBOLS && (groups != __itt_group_none || lib_name != NULL)) - { - _N_(_ittapi_global).lib = __itt_load_lib((lib_name == NULL) ? ittnotify_lib_name : lib_name); - - if (_N_(_ittapi_global).lib != NULL) - { - __itt_api_init_t* __itt_api_init_ptr; - int lib_version = __itt_lib_version(_N_(_ittapi_global).lib); - - switch (lib_version) { - case 0: - groups = __itt_group_legacy; - case 1: - /* Fill all pointers from dynamic library */ - for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) - { - if (_N_(_ittapi_global).api_list_ptr[i].group & groups & init_groups) - { - *_N_(_ittapi_global).api_list_ptr[i].func_ptr = (void*)__itt_get_proc(_N_(_ittapi_global).lib, _N_(_ittapi_global).api_list_ptr[i].name); - if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr == NULL) - { - /* Restore pointers for function with static implementation */ - *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; - __itt_report_error(__itt_error_no_symbol, lib_name, _N_(_ittapi_global).api_list_ptr[i].name); -#ifdef ITT_COMPLETE_GROUP - zero_group = (__itt_group_id)(zero_group | _N_(_ittapi_global).api_list_ptr[i].group); -#endif /* ITT_COMPLETE_GROUP */ - } - } - else - *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; - } - - if (groups == __itt_group_legacy) - { - /* Compatibility with legacy tools */ - ITTNOTIFY_NAME(thread_ignore) = ITTNOTIFY_NAME(thr_ignore); -#if ITT_PLATFORM==ITT_PLATFORM_WIN - ITTNOTIFY_NAME(sync_createA) = ITTNOTIFY_NAME(sync_set_nameA); - ITTNOTIFY_NAME(sync_createW) = ITTNOTIFY_NAME(sync_set_nameW); -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ - ITTNOTIFY_NAME(sync_create) = ITTNOTIFY_NAME(sync_set_name); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - ITTNOTIFY_NAME(sync_prepare) = ITTNOTIFY_NAME(notify_sync_prepare); - ITTNOTIFY_NAME(sync_cancel) = ITTNOTIFY_NAME(notify_sync_cancel); - ITTNOTIFY_NAME(sync_acquired) = ITTNOTIFY_NAME(notify_sync_acquired); - ITTNOTIFY_NAME(sync_releasing) = ITTNOTIFY_NAME(notify_sync_releasing); - } - -#ifdef ITT_COMPLETE_GROUP - for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) - if (_N_(_ittapi_global).api_list_ptr[i].group & zero_group) - *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; -#endif /* ITT_COMPLETE_GROUP */ - break; - case 2: - __itt_api_init_ptr = (__itt_api_init_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_init"); - if (__itt_api_init_ptr) - __itt_api_init_ptr(&_N_(_ittapi_global), init_groups); - break; - } - } - else - { - __itt_nullify_all_pointers(); - - __itt_report_error(__itt_error_no_module, lib_name, -#if ITT_PLATFORM==ITT_PLATFORM_WIN - __itt_system_error() -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - dlerror() -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - ); - } - } - else - { - __itt_nullify_all_pointers(); - } - _N_(_ittapi_global).api_initialized = 1; - current_thread = 0; - /* !!! Just to avoid unused code elimination !!! */ - if (__itt_fini_ittlib_ptr == _N_(fini_ittlib)) current_thread = 0; - } - } - -#ifndef ITT_SIMPLE_INIT - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); -#endif /* ITT_SIMPLE_INIT */ - } - - /* Evaluating if any function ptr is non empty and it's in init_groups */ - for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) - { - if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr != _N_(_ittapi_global).api_list_ptr[i].null_func && - _N_(_ittapi_global).api_list_ptr[i].group & init_groups) - { - return 1; - } - } - return 0; -} - -ITT_EXTERN_C __itt_error_handler_t* _N_(set_error_handler)(__itt_error_handler_t* handler) -{ - __itt_error_handler_t* prev = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler; - _N_(_ittapi_global).error_handler = (void*)(size_t)handler; - return prev; -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT -#pragma warning(pop) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ diff --git a/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp b/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp new file mode 100644 index 0000000000000..c48b3f420bb78 --- /dev/null +++ b/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp @@ -0,0 +1,1201 @@ + +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "kmp_config.h" +#include "kmp_os.h" +#include "ittnotify_config.h" + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#if defined(__MINGW32__) +#include +#else +#define PATH_MAX 512 +#endif +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +#include +#include +#include +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include +#include +#include +#include + +#define INTEL_NO_MACRO_BODY +#define INTEL_ITTNOTIFY_API_PRIVATE +#include "ittnotify.h" +#include "legacy/ittnotify.h" + +#if KMP_MSVC_COMPAT +#include "disable_warnings.h" +#endif + +static const char api_version[] = API_VERSION "\0\n@(#) $Revision: 481659 $\n"; + +#define _N_(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) + +#if ITT_OS==ITT_OS_WIN +static const char* ittnotify_lib_name = "libittnotify.dll"; +#elif ITT_OS==ITT_OS_LINUX || ITT_OS==ITT_OS_FREEBSD +static const char* ittnotify_lib_name = "libittnotify.so"; +#elif ITT_OS==ITT_OS_MAC +static const char* ittnotify_lib_name = "libittnotify.dylib"; +#else +#error Unsupported or unknown OS. +#endif + +#ifdef __ANDROID__ +#include +#include +#include +#include +#include +#include +#include + +#ifdef ITT_ANDROID_LOG + #define ITT_ANDROID_LOG_TAG "INTEL_VTUNE_USERAPI" + #define ITT_ANDROID_LOGI(...) ((void)__android_log_print(ANDROID_LOG_INFO, ITT_ANDROID_LOG_TAG, __VA_ARGS__)) + #define ITT_ANDROID_LOGW(...) ((void)__android_log_print(ANDROID_LOG_WARN, ITT_ANDROID_LOG_TAG, __VA_ARGS__)) + #define ITT_ANDROID_LOGE(...) ((void)__android_log_print(ANDROID_LOG_ERROR,ITT_ANDROID_LOG_TAG, __VA_ARGS__)) + #define ITT_ANDROID_LOGD(...) ((void)__android_log_print(ANDROID_LOG_DEBUG,ITT_ANDROID_LOG_TAG, __VA_ARGS__)) +#else + #define ITT_ANDROID_LOGI(...) + #define ITT_ANDROID_LOGW(...) + #define ITT_ANDROID_LOGE(...) + #define ITT_ANDROID_LOGD(...) +#endif + +/* default location of userapi collector on Android */ +#define ANDROID_ITTNOTIFY_DEFAULT_PATH_MASK(x) "/data/data/com.intel.vtune/perfrun/lib" \ + #x "/runtime/libittnotify.so" + +#if ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_ARM +#define ANDROID_ITTNOTIFY_DEFAULT_PATH ANDROID_ITTNOTIFY_DEFAULT_PATH_MASK(32) +#else +#define ANDROID_ITTNOTIFY_DEFAULT_PATH ANDROID_ITTNOTIFY_DEFAULT_PATH_MASK(64) +#endif + +#endif + +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + + +#ifndef LIB_VAR_NAME +#if ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_MIPS +#define LIB_VAR_NAME INTEL_LIBITTNOTIFY32 +#else +#define LIB_VAR_NAME INTEL_LIBITTNOTIFY64 +#endif +#endif /* LIB_VAR_NAME */ + +#define ITT_MUTEX_INIT_AND_LOCK(p) { \ + if (PTHREAD_SYMBOLS) \ + { \ + if (!p.mutex_initialized) \ + { \ + if (__itt_interlocked_increment(&p.atomic_counter) == 1) \ + { \ + __itt_mutex_init(&p.mutex); \ + p.mutex_initialized = 1; \ + } \ + else \ + while (!p.mutex_initialized) \ + __itt_thread_yield(); \ + } \ + __itt_mutex_lock(&p.mutex); \ + } \ +} + +typedef int (__itt_init_ittlib_t)(const char*, __itt_group_id); + +/* this define used to control initialization function name. */ +#ifndef __itt_init_ittlib_name +ITT_EXTERN_C int _N_(init_ittlib)(const char*, __itt_group_id); +static __itt_init_ittlib_t* __itt_init_ittlib_ptr = _N_(init_ittlib); +#define __itt_init_ittlib_name __itt_init_ittlib_ptr +#endif /* __itt_init_ittlib_name */ + +typedef void (__itt_fini_ittlib_t)(void); + +/* this define used to control finalization function name. */ +#ifndef __itt_fini_ittlib_name +ITT_EXTERN_C void _N_(fini_ittlib)(void); +static __itt_fini_ittlib_t* __itt_fini_ittlib_ptr = _N_(fini_ittlib); +#define __itt_fini_ittlib_name __itt_fini_ittlib_ptr +#endif /* __itt_fini_ittlib_name */ + +/* building pointers to imported funcs */ +#undef ITT_STUBV +#undef ITT_STUB +#define ITT_STUB(api,type,name,args,params,ptr,group,format) \ +static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ +typedef type api ITT_JOIN(_N_(name),_t) args; \ +ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END \ +static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args \ +{ \ + __itt_init_ittlib_name(NULL, __itt_group_all); \ + if (ITTNOTIFY_NAME(name) && ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init))) \ + return ITTNOTIFY_NAME(name) params; \ + else \ + return (type)0; \ +} + +#define ITT_STUBV(api,type,name,args,params,ptr,group,format) \ +static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ +typedef type api ITT_JOIN(_N_(name),_t) args; \ +ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END \ +static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args \ +{ \ + __itt_init_ittlib_name(NULL, __itt_group_all); \ + if (ITTNOTIFY_NAME(name) && ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init))) \ + ITTNOTIFY_NAME(name) params; \ + else \ + return; \ +} + +#undef __ITT_INTERNAL_INIT +#include "ittnotify_static.h" + +#undef ITT_STUB +#undef ITT_STUBV +#define ITT_STUB(api,type,name,args,params,ptr,group,format) \ +static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ +typedef type api ITT_JOIN(_N_(name),_t) args; \ +ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END + +#define ITT_STUBV(api,type,name,args,params,ptr,group,format) \ +static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ +typedef type api ITT_JOIN(_N_(name),_t) args; \ +ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END + +#define __ITT_INTERNAL_INIT +#include "ittnotify_static.h" +#undef __ITT_INTERNAL_INIT + +ITT_GROUP_LIST(group_list); + +#pragma pack(push, 8) + +typedef struct ___itt_group_alias +{ + const char* env_var; + __itt_group_id groups; +} __itt_group_alias; + +static __itt_group_alias group_alias[] = { + { "KMP_FOR_TPROFILE", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync | __itt_group_mark) }, + { "KMP_FOR_TCHECK", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync | __itt_group_fsync | __itt_group_mark | __itt_group_suppress) }, + { NULL, (__itt_group_none) }, + { api_version, (__itt_group_none) } /* !!! Just to avoid unused code elimination !!! */ +}; + +#pragma pack(pop) + +#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT +#pragma warning(push) +#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +static __itt_api_info api_list[] = { +/* Define functions with static implementation */ +#undef ITT_STUB +#undef ITT_STUBV +#define ITT_STUB(api,type,name,args,params,nameindll,group,format) { ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (__itt_group_id)(group)}, +#define ITT_STUBV ITT_STUB +#define __ITT_INTERNAL_INIT +#include "ittnotify_static.h" +#undef __ITT_INTERNAL_INIT +/* Define functions without static implementation */ +#undef ITT_STUB +#undef ITT_STUBV +#define ITT_STUB(api,type,name,args,params,nameindll,group,format) {ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), NULL, (__itt_group_id)(group)}, +#define ITT_STUBV ITT_STUB +#include "ittnotify_static.h" + {NULL, NULL, NULL, NULL, __itt_group_none} +}; + +#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT +#pragma warning(pop) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/* static part descriptor which handles. all notification api attributes. */ +__itt_global _N_(_ittapi_global) = { + ITT_MAGIC, /* identification info */ + ITT_MAJOR, ITT_MINOR, API_VERSION_BUILD, /* version info */ + 0, /* api_initialized */ + 0, /* mutex_initialized */ + 0, /* atomic_counter */ + MUTEX_INITIALIZER, /* mutex */ + NULL, /* dynamic library handle */ + NULL, /* error_handler */ + NULL, /* dll_path_ptr */ + (__itt_api_info*)&api_list, /* api_list_ptr */ + NULL, /* next __itt_global */ + NULL, /* thread_list */ + NULL, /* domain_list */ + NULL, /* string_list */ + __itt_collection_normal, /* collection state */ + NULL /* counter_list */ +}; + +typedef void (__itt_api_init_t)(__itt_global*, __itt_group_id); +typedef void (__itt_api_fini_t)(__itt_global*); + +/* ========================================================================= */ + +#ifdef ITT_NOTIFY_EXT_REPORT +ITT_EXTERN_C void _N_(error_handler)(__itt_error_code, va_list args); +#endif /* ITT_NOTIFY_EXT_REPORT */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT +#pragma warning(push) +#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +static void __itt_report_error(unsigned code_arg, ...) +{ + va_list args; + va_start(args, code_arg); + + // We use unsigned for the code argument and explicitly cast it here to the + // right enumerator because variadic functions are not compatible with + // default promotions. + __itt_error_code code = (__itt_error_code)code_arg; + + if (_N_(_ittapi_global).error_handler != NULL) + { + __itt_error_handler_t* handler = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler; + handler(code, args); + } +#ifdef ITT_NOTIFY_EXT_REPORT + _N_(error_handler)(code, args); +#endif /* ITT_NOTIFY_EXT_REPORT */ + va_end(args); +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT +#pragma warning(pop) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))(const wchar_t* name) +{ + __itt_domain *h_tail = NULL, *h = NULL; + + if (name == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { + if (ITTNOTIFY_NAME(domain_createW) && ITTNOTIFY_NAME(domain_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(domain_createW)(name); + } + } + for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameW != NULL && !wcscmp(h->nameW, name)) break; + } + if (h == NULL) + { + NEW_DOMAIN_W(&_N_(_ittapi_global),h,h_tail,name); + } + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return h; +} + +static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createA),_init))(const char* name) +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))(const char* name) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +{ + __itt_domain *h_tail = NULL, *h = NULL; + + if (name == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + if (ITTNOTIFY_NAME(domain_createA) && ITTNOTIFY_NAME(domain_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createA),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(domain_createA)(name); + } +#else + if (ITTNOTIFY_NAME(domain_create) && ITTNOTIFY_NAME(domain_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))) + { + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(domain_create)(name); + } +#endif + } + for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameA != NULL && !__itt_fstrcmp(h->nameA, name)) break; + } + if (h == NULL) + { + NEW_DOMAIN_A(&_N_(_ittapi_global),h,h_tail,name); + } + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return h; +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init))(const wchar_t* name) +{ + __itt_string_handle *h_tail = NULL, *h = NULL; + + if (name == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { + if (ITTNOTIFY_NAME(string_handle_createW) && ITTNOTIFY_NAME(string_handle_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(string_handle_createW)(name); + } + } + for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) + { + if (h->strW != NULL && !wcscmp(h->strW, name)) break; + } + if (h == NULL) + { + NEW_STRING_HANDLE_W(&_N_(_ittapi_global),h,h_tail,name); + } + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return h; +} + +static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createA),_init))(const char* name) +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_create),_init))(const char* name) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +{ + __itt_string_handle *h_tail = NULL, *h = NULL; + + if (name == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + if (ITTNOTIFY_NAME(string_handle_createA) && ITTNOTIFY_NAME(string_handle_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createA),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(string_handle_createA)(name); + } +#else + if (ITTNOTIFY_NAME(string_handle_create) && ITTNOTIFY_NAME(string_handle_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_create),_init))) + { + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(string_handle_create)(name); + } +#endif + } + for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) + { + if (h->strA != NULL && !__itt_fstrcmp(h->strA, name)) break; + } + if (h == NULL) + { + NEW_STRING_HANDLE_A(&_N_(_ittapi_global),h,h_tail,name); + } + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return h; +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createW),_init))(const wchar_t *name, const wchar_t *domain) +{ + __itt_counter_info_t *h_tail = NULL, *h = NULL; + __itt_metadata_type type = __itt_metadata_u64; + + if (name == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { + if (ITTNOTIFY_NAME(counter_createW) && ITTNOTIFY_NAME(counter_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createW),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(counter_createW)(name, domain); + } + } + for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameW != NULL && h->type == type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) || + (h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain)))) break; + + } + if (h == NULL) + { + NEW_COUNTER_W(&_N_(_ittapi_global),h,h_tail,name,domain,type); + } + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return (__itt_counter)h; +} + +static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createA),_init))(const char *name, const char *domain) +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create),_init))(const char *name, const char *domain) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +{ + __itt_counter_info_t *h_tail = NULL, *h = NULL; + __itt_metadata_type type = __itt_metadata_u64; + + if (name == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + if (ITTNOTIFY_NAME(counter_createA) && ITTNOTIFY_NAME(counter_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createA),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(counter_createA)(name, domain); + } +#else + if (ITTNOTIFY_NAME(counter_create) && ITTNOTIFY_NAME(counter_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create),_init))) + { + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(counter_create)(name, domain); + } +#endif + } + for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameA != NULL && h->type == type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) || + (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) break; + } + if (h == NULL) + { + NEW_COUNTER_A(&_N_(_ittapi_global),h,h_tail,name,domain,type); + } + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return (__itt_counter)h; +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedW),_init))(const wchar_t *name, const wchar_t *domain, __itt_metadata_type type) +{ + __itt_counter_info_t *h_tail = NULL, *h = NULL; + + if (name == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { + if (ITTNOTIFY_NAME(counter_create_typedW) && ITTNOTIFY_NAME(counter_create_typedW) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedW),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(counter_create_typedW)(name, domain, type); + } + } + for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameW != NULL && h->type == type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) || + (h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain)))) break; + + } + if (h == NULL) + { + NEW_COUNTER_W(&_N_(_ittapi_global),h,h_tail,name,domain,type); + } + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return (__itt_counter)h; +} + +static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedA),_init))(const char *name, const char *domain, __itt_metadata_type type) +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typed),_init))(const char *name, const char *domain, __itt_metadata_type type) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +{ + __itt_counter_info_t *h_tail = NULL, *h = NULL; + + if (name == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + if (ITTNOTIFY_NAME(counter_create_typedA) && ITTNOTIFY_NAME(counter_create_typedA) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedA),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(counter_create_typedA)(name, domain, type); + } +#else + if (ITTNOTIFY_NAME(counter_create_typed) && ITTNOTIFY_NAME(counter_create_typed) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typed),_init))) + { + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(counter_create_typed)(name, domain, type); + } +#endif + } + for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameA != NULL && h->type == type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) || + (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) break; + } + if (h == NULL) + { + NEW_COUNTER_A(&_N_(_ittapi_global),h,h_tail,name,domain,type); + } + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return (__itt_counter)h; +} + +/* -------------------------------------------------------------------------- */ + +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))(void) +{ + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) + { + __itt_init_ittlib_name(NULL, __itt_group_all); + } + if (ITTNOTIFY_NAME(pause) && ITTNOTIFY_NAME(pause) != ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))) + { + ITTNOTIFY_NAME(pause)(); + } + else + { + _N_(_ittapi_global).state = __itt_collection_paused; + } +} + +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))(void) +{ + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) + { + __itt_init_ittlib_name(NULL, __itt_group_all); + } + if (ITTNOTIFY_NAME(resume) && ITTNOTIFY_NAME(resume) != ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))) + { + ITTNOTIFY_NAME(resume)(); + } + else + { + _N_(_ittapi_global).state = __itt_collection_normal; + } +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))(const wchar_t* name) +{ + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) + { + __itt_init_ittlib_name(NULL, __itt_group_all); + } + if (ITTNOTIFY_NAME(thread_set_nameW) && ITTNOTIFY_NAME(thread_set_nameW) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))) + { + ITTNOTIFY_NAME(thread_set_nameW)(name); + } +} + +static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_setW),_init))(const wchar_t* name, int namelen) +{ + (void)namelen; + ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))(name); + return 0; +} + +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))(const char* name) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))(const char* name) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +{ + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) + { + __itt_init_ittlib_name(NULL, __itt_group_all); + } +#if ITT_PLATFORM==ITT_PLATFORM_WIN + if (ITTNOTIFY_NAME(thread_set_nameA) && ITTNOTIFY_NAME(thread_set_nameA) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))) + { + ITTNOTIFY_NAME(thread_set_nameA)(name); + } +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + if (ITTNOTIFY_NAME(thread_set_name) && ITTNOTIFY_NAME(thread_set_name) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))) + { + ITTNOTIFY_NAME(thread_set_name)(name); + } +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_setA),_init))(const char* name, int namelen) +{ + (void)namelen; + ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))(name); + return 0; +} +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_set),_init))(const char* name, int namelen) +{ + (void)namelen; + ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))(name); + return 0; +} +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))(void) +{ + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) + { + __itt_init_ittlib_name(NULL, __itt_group_all); + } + if (ITTNOTIFY_NAME(thread_ignore) && ITTNOTIFY_NAME(thread_ignore) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))) + { + ITTNOTIFY_NAME(thread_ignore)(); + } +} + +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_ignore),_init))(void) +{ + ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))(); +} + +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(enable_attach),_init))(void) +{ +#ifdef __ANDROID__ + /* + * if LIB_VAR_NAME env variable were set before then stay previous value + * else set default path + */ + setenv(ITT_TO_STR(LIB_VAR_NAME), ANDROID_ITTNOTIFY_DEFAULT_PATH, 0); +#endif +} + +/* -------------------------------------------------------------------------- */ + +static const char* __itt_fsplit(const char* s, const char* sep, const char** out, int* len) +{ + int i; + int j; + + if (!s || !sep || !out || !len) + return NULL; + + for (i = 0; s[i]; i++) + { + int b = 0; + for (j = 0; sep[j]; j++) + if (s[i] == sep[j]) + { + b = 1; + break; + } + if (!b) + break; + } + + if (!s[i]) + return NULL; + + *len = 0; + *out = &s[i]; + + for (; s[i]; i++, (*len)++) + { + int b = 0; + for (j = 0; sep[j]; j++) + if (s[i] == sep[j]) + { + b = 1; + break; + } + if (b) + break; + } + + for (; s[i]; i++) + { + int b = 0; + for (j = 0; sep[j]; j++) + if (s[i] == sep[j]) + { + b = 1; + break; + } + if (!b) + break; + } + + return &s[i]; +} + +/* This function return value of env variable that placed into static buffer. + * !!! The same static buffer is used for subsequent calls. !!! + * This was done to aviod dynamic allocation for few calls. + * Actually we need this function only four times. + */ +static const char* __itt_get_env_var(const char* name) +{ +#define MAX_ENV_VALUE_SIZE 4086 + static char env_buff[MAX_ENV_VALUE_SIZE]; + static char* env_value = (char*)env_buff; + + if (name != NULL) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + size_t max_len = MAX_ENV_VALUE_SIZE - (size_t)(env_value - env_buff); + DWORD rc = GetEnvironmentVariableA(name, env_value, (DWORD)max_len); + if (rc >= max_len) + __itt_report_error(__itt_error_env_too_long, name, (size_t)rc - 1, (size_t)(max_len - 1)); + else if (rc > 0) + { + const char* ret = (const char*)env_value; + env_value += rc + 1; + return ret; + } + else + { + /* If environment variable is empty, GetEnvirornmentVariables() + * returns zero (number of characters (not including terminating null), + * and GetLastError() returns ERROR_SUCCESS. */ + DWORD err = GetLastError(); + if (err == ERROR_SUCCESS) + return env_value; + + if (err != ERROR_ENVVAR_NOT_FOUND) + __itt_report_error(__itt_error_cant_read_env, name, (int)err); + } +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ + char* env = getenv(name); + if (env != NULL) + { + size_t len = __itt_fstrnlen(env, MAX_ENV_VALUE_SIZE); + size_t max_len = MAX_ENV_VALUE_SIZE - (size_t)(env_value - env_buff); + if (len < max_len) + { + const char* ret = (const char*)env_value; + __itt_fstrcpyn(env_value, max_len, env, len + 1); + env_value += len + 1; + return ret; + } else + __itt_report_error(__itt_error_env_too_long, name, (size_t)len, (size_t)(max_len - 1)); + } +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + } + return NULL; +} + +static const char* __itt_get_lib_name(void) +{ + const char* lib_name = __itt_get_env_var(ITT_TO_STR(LIB_VAR_NAME)); + +#ifdef __ANDROID__ + if (lib_name == NULL) + { + +#if ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_ARM + const char* const marker_filename = "com.intel.itt.collector_lib_32"; +#else + const char* const marker_filename = "com.intel.itt.collector_lib_64"; +#endif + + char system_wide_marker_filename[PATH_MAX] = {0}; + int itt_marker_file_fd = -1; + ssize_t res = 0; + + res = snprintf(system_wide_marker_filename, PATH_MAX - 1, "%s%s", "/data/local/tmp/", marker_filename); + if (res < 0) + { + ITT_ANDROID_LOGE("Unable to concatenate marker file string."); + return lib_name; + } + itt_marker_file_fd = open(system_wide_marker_filename, O_RDONLY); + + if (itt_marker_file_fd == -1) + { + const pid_t my_pid = getpid(); + char cmdline_path[PATH_MAX] = {0}; + char package_name[PATH_MAX] = {0}; + char app_sandbox_file[PATH_MAX] = {0}; + int cmdline_fd = 0; + + ITT_ANDROID_LOGI("Unable to open system-wide marker file."); + res = snprintf(cmdline_path, PATH_MAX - 1, "/proc/%d/cmdline", my_pid); + if (res < 0) + { + ITT_ANDROID_LOGE("Unable to get cmdline path string."); + return lib_name; + } + + ITT_ANDROID_LOGI("CMD file: %s\n", cmdline_path); + cmdline_fd = open(cmdline_path, O_RDONLY); + if (cmdline_fd == -1) + { + ITT_ANDROID_LOGE("Unable to open %s file!", cmdline_path); + return lib_name; + } + res = read(cmdline_fd, package_name, PATH_MAX - 1); + if (res == -1) + { + ITT_ANDROID_LOGE("Unable to read %s file!", cmdline_path); + res = close(cmdline_fd); + if (res == -1) + { + ITT_ANDROID_LOGE("Unable to close %s file!", cmdline_path); + } + return lib_name; + } + res = close(cmdline_fd); + if (res == -1) + { + ITT_ANDROID_LOGE("Unable to close %s file!", cmdline_path); + return lib_name; + } + ITT_ANDROID_LOGI("Package name: %s\n", package_name); + res = snprintf(app_sandbox_file, PATH_MAX - 1, "/data/data/%s/%s", package_name, marker_filename); + if (res < 0) + { + ITT_ANDROID_LOGE("Unable to concatenate marker file string."); + return lib_name; + } + + ITT_ANDROID_LOGI("Lib marker file name: %s\n", app_sandbox_file); + itt_marker_file_fd = open(app_sandbox_file, O_RDONLY); + if (itt_marker_file_fd == -1) + { + ITT_ANDROID_LOGE("Unable to open app marker file!"); + return lib_name; + } + } + + { + char itt_lib_name[PATH_MAX] = {0}; + + res = read(itt_marker_file_fd, itt_lib_name, PATH_MAX - 1); + if (res == -1) + { + ITT_ANDROID_LOGE("Unable to read %s file!", itt_marker_file_fd); + res = close(itt_marker_file_fd); + if (res == -1) + { + ITT_ANDROID_LOGE("Unable to close %s file!", itt_marker_file_fd); + } + return lib_name; + } + ITT_ANDROID_LOGI("ITT Lib path: %s", itt_lib_name); + res = close(itt_marker_file_fd); + if (res == -1) + { + ITT_ANDROID_LOGE("Unable to close %s file!", itt_marker_file_fd); + return lib_name; + } + ITT_ANDROID_LOGI("Set env %s to %s", ITT_TO_STR(LIB_VAR_NAME), itt_lib_name); + res = setenv(ITT_TO_STR(LIB_VAR_NAME), itt_lib_name, 0); + if (res == -1) + { + ITT_ANDROID_LOGE("Unable to set env var!"); + return lib_name; + } + lib_name = __itt_get_env_var(ITT_TO_STR(LIB_VAR_NAME)); + ITT_ANDROID_LOGI("ITT Lib path from env: %s", lib_name); + } + } +#endif + + return lib_name; +} + +/* Avoid clashes with std::min, reported by tbb team */ +#define __itt_min(a,b) (a) < (b) ? (a) : (b) + +static __itt_group_id __itt_get_groups(void) +{ + int i; + __itt_group_id res = __itt_group_none; + const char* var_name = "INTEL_ITTNOTIFY_GROUPS"; + const char* group_str = __itt_get_env_var(var_name); + + if (group_str != NULL) + { + int len; + char gr[255]; + const char* chunk; + while ((group_str = __itt_fsplit(group_str, ",; ", &chunk, &len)) != NULL) + { + int min_len = __itt_min(len, (int)(sizeof(gr) - 1)); + __itt_fstrcpyn(gr, sizeof(gr) - 1, chunk, min_len); + gr[min_len] = 0; + + for (i = 0; group_list[i].name != NULL; i++) + { + if (!__itt_fstrcmp(gr, group_list[i].name)) + { + res = (__itt_group_id)(res | group_list[i].id); + break; + } + } + } + /* TODO: !!! Workaround for bug with warning for unknown group !!! + * Should be fixed in new initialization scheme. + * Now the following groups should be set always. */ + for (i = 0; group_list[i].id != __itt_group_none; i++) + if (group_list[i].id != __itt_group_all && + group_list[i].id > __itt_group_splitter_min && + group_list[i].id < __itt_group_splitter_max) + res = (__itt_group_id)(res | group_list[i].id); + return res; + } + else + { + for (i = 0; group_alias[i].env_var != NULL; i++) + if (__itt_get_env_var(group_alias[i].env_var) != NULL) + return group_alias[i].groups; + } + + return res; +} + +#undef __itt_min + +static int __itt_lib_version(lib_t lib) +{ + if (lib == NULL) + return 0; + if (__itt_get_proc(lib, "__itt_api_init")) + return 2; + if (__itt_get_proc(lib, "__itt_api_version")) + return 1; + return 0; +} + +/* It's not used right now! Comment it out to avoid warnings. +static void __itt_reinit_all_pointers(void) +{ + int i; + // Fill all pointers with initial stubs + for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].init_func; +} +*/ + +static void __itt_nullify_all_pointers(void) +{ + int i; + /* Nulify all pointers except domain_create, string_handle_create and counter_create */ + for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT +#pragma warning(push) +#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */ +#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +ITT_EXTERN_C void _N_(fini_ittlib)(void) +{ + __itt_api_fini_t* __itt_api_fini_ptr = NULL; + static volatile TIDT current_thread = 0; + + if (_N_(_ittapi_global).api_initialized) + { + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { + if (current_thread == 0) + { + if (PTHREAD_SYMBOLS) current_thread = __itt_thread_id(); + if (_N_(_ittapi_global).lib != NULL) + { + __itt_api_fini_ptr = (__itt_api_fini_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_fini"); + } + if (__itt_api_fini_ptr) + { + __itt_api_fini_ptr(&_N_(_ittapi_global)); + } + + __itt_nullify_all_pointers(); + + /* TODO: !!! not safe !!! don't support unload so far. + * if (_N_(_ittapi_global).lib != NULL) + * __itt_unload_lib(_N_(_ittapi_global).lib); + * _N_(_ittapi_global).lib = NULL; + */ + _N_(_ittapi_global).api_initialized = 0; + current_thread = 0; + } + } + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + } +} + +ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_groups) +{ + int i; + __itt_group_id groups; +#ifdef ITT_COMPLETE_GROUP + __itt_group_id zero_group = __itt_group_none; +#endif /* ITT_COMPLETE_GROUP */ + static volatile TIDT current_thread = 0; + + if (!_N_(_ittapi_global).api_initialized) + { +#ifndef ITT_SIMPLE_INIT + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); +#endif /* ITT_SIMPLE_INIT */ + + if (!_N_(_ittapi_global).api_initialized) + { + if (current_thread == 0) + { + if (PTHREAD_SYMBOLS) current_thread = __itt_thread_id(); + if (lib_name == NULL) + { + lib_name = __itt_get_lib_name(); + } + groups = __itt_get_groups(); + if (DL_SYMBOLS && (groups != __itt_group_none || lib_name != NULL)) + { + _N_(_ittapi_global).lib = __itt_load_lib((lib_name == NULL) ? ittnotify_lib_name : lib_name); + + if (_N_(_ittapi_global).lib != NULL) + { + __itt_api_init_t* __itt_api_init_ptr; + int lib_version = __itt_lib_version(_N_(_ittapi_global).lib); + + switch (lib_version) { + case 0: + groups = __itt_group_legacy; + KMP_FALLTHROUGH(); + case 1: + /* Fill all pointers from dynamic library */ + for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) + { + if (_N_(_ittapi_global).api_list_ptr[i].group & groups & init_groups) + { + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = (void*)__itt_get_proc(_N_(_ittapi_global).lib, _N_(_ittapi_global).api_list_ptr[i].name); + if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr == NULL) + { + /* Restore pointers for function with static implementation */ + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; + __itt_report_error(__itt_error_no_symbol, lib_name, _N_(_ittapi_global).api_list_ptr[i].name); +#ifdef ITT_COMPLETE_GROUP + zero_group = (__itt_group_id)(zero_group | _N_(_ittapi_global).api_list_ptr[i].group); +#endif /* ITT_COMPLETE_GROUP */ + } + } + else + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; + } + + if (groups == __itt_group_legacy) + { + /* Compatibility with legacy tools */ + ITTNOTIFY_NAME(thread_ignore) = ITTNOTIFY_NAME(thr_ignore); +#if ITT_PLATFORM==ITT_PLATFORM_WIN + ITTNOTIFY_NAME(sync_createA) = ITTNOTIFY_NAME(sync_set_nameA); + ITTNOTIFY_NAME(sync_createW) = ITTNOTIFY_NAME(sync_set_nameW); +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ + ITTNOTIFY_NAME(sync_create) = ITTNOTIFY_NAME(sync_set_name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + ITTNOTIFY_NAME(sync_prepare) = ITTNOTIFY_NAME(notify_sync_prepare); + ITTNOTIFY_NAME(sync_cancel) = ITTNOTIFY_NAME(notify_sync_cancel); + ITTNOTIFY_NAME(sync_acquired) = ITTNOTIFY_NAME(notify_sync_acquired); + ITTNOTIFY_NAME(sync_releasing) = ITTNOTIFY_NAME(notify_sync_releasing); + } + +#ifdef ITT_COMPLETE_GROUP + for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) + if (_N_(_ittapi_global).api_list_ptr[i].group & zero_group) + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; +#endif /* ITT_COMPLETE_GROUP */ + break; + case 2: + __itt_api_init_ptr = (__itt_api_init_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_init"); + if (__itt_api_init_ptr) + __itt_api_init_ptr(&_N_(_ittapi_global), init_groups); + break; + } + } + else + { + __itt_nullify_all_pointers(); + + __itt_report_error(__itt_error_no_module, lib_name, +#if ITT_PLATFORM==ITT_PLATFORM_WIN + __itt_system_error() +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + dlerror() +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + ); + } + } + else + { + __itt_nullify_all_pointers(); + } + _N_(_ittapi_global).api_initialized = 1; + current_thread = 0; + /* !!! Just to avoid unused code elimination !!! */ + if (__itt_fini_ittlib_ptr == _N_(fini_ittlib)) current_thread = 0; + } + } + +#ifndef ITT_SIMPLE_INIT + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); +#endif /* ITT_SIMPLE_INIT */ + } + + /* Evaluating if any function ptr is non empty and it's in init_groups */ + for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) + { + if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr != _N_(_ittapi_global).api_list_ptr[i].null_func && + _N_(_ittapi_global).api_list_ptr[i].group & init_groups) + { + return 1; + } + } + return 0; +} + +ITT_EXTERN_C __itt_error_handler_t* _N_(set_error_handler)(__itt_error_handler_t* handler) +{ + __itt_error_handler_t* prev = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler; + _N_(_ittapi_global).error_handler = (void*)(size_t)handler; + return prev; +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT +#pragma warning(pop) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ diff --git a/runtime/src/z_Linux_asm.S b/runtime/src/z_Linux_asm.S index 0d8885eca1c74..b491fcf186aaa 100644 --- a/runtime/src/z_Linux_asm.S +++ b/runtime/src/z_Linux_asm.S @@ -495,13 +495,21 @@ __kmp_unnamed_critical_addr: # endif /* !KMP_ASM_INTRINS */ //------------------------------------------------------------------------ -// typedef void (*microtask_t)( int *gtid, int *tid, ... ); -// // int -// __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, -// int argc, void *p_argv[] ) { -// (*pkfn)( & gtid, & gtid, argv[0], ... ); -// return 1; +// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), +// int gtid, int tid, +// int argc, void *p_argv[] +// #if OMPT_SUPPORT +// , +// void **exit_frame_ptr +// #endif +// ) { +// #if OMPT_SUPPORT +// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); +// #endif +// +// (*pkfn)( & gtid, & tid, argv[0], ... ); +// return 1; // } // -- Begin __kmp_invoke_microtask @@ -991,14 +999,21 @@ KMP_LABEL(invoke_3): # endif /* !KMP_ASM_INTRINS */ //------------------------------------------------------------------------ -// typedef void (*microtask_t)( int *gtid, int *tid, ... ); -// // int // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), -// int gtid, int tid, -// int argc, void *p_argv[] ) { -// (*pkfn)( & gtid, & tid, argv[0], ... ); -// return 1; +// int gtid, int tid, +// int argc, void *p_argv[] +// #if OMPT_SUPPORT +// , +// void **exit_frame_ptr +// #endif +// ) { +// #if OMPT_SUPPORT +// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); +// #endif +// +// (*pkfn)( & gtid, & tid, argv[0], ... ); +// return 1; // } // // note: at call to pkfn must have %rsp 128-byte aligned for compiler @@ -1192,15 +1207,27 @@ KMP_LABEL(kmp_1_exit): #if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 //------------------------------------------------------------------------ -// -// typedef void (*microtask_t)( int *gtid, int *tid, ... ); -// // int // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), -// int gtid, int tid, -// int argc, void *p_argv[] ) { -// (*pkfn)( & gtid, & tid, argv[0], ... ); -// return 1; +// int gtid, int tid, +// int argc, void *p_argv[] +// #if OMPT_SUPPORT +// , +// void **exit_frame_ptr +// #endif +// ) { +// #if OMPT_SUPPORT +// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); +// #endif +// +// (*pkfn)( & gtid, & tid, argv[0], ... ); +// +// // FIXME: This is done at call-site and can be removed here. +// #if OMPT_SUPPORT +// *exit_frame_ptr = 0; +// #endif +// +// return 1; // } // // parameters: @@ -1306,15 +1333,27 @@ KMP_LABEL(kmp_1): #if KMP_ARCH_PPC64 //------------------------------------------------------------------------ -// -// typedef void (*microtask_t)( int *gtid, int *tid, ... ); -// // int // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), -// int gtid, int tid, -// int argc, void *p_argv[] ) { -// (*pkfn)( & gtid, & tid, argv[0], ... ); -// return 1; +// int gtid, int tid, +// int argc, void *p_argv[] +// #if OMPT_SUPPORT +// , +// void **exit_frame_ptr +// #endif +// ) { +// #if OMPT_SUPPORT +// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); +// #endif +// +// (*pkfn)( & gtid, & tid, argv[0], ... ); +// +// // FIXME: This is done at call-site and can be removed here. +// #if OMPT_SUPPORT +// *exit_frame_ptr = 0; +// #endif +// +// return 1; // } // // parameters: @@ -1524,6 +1563,173 @@ __kmp_invoke_microtask: #endif /* KMP_ARCH_PPC64 */ +#if KMP_ARCH_RISCV64 + +//------------------------------------------------------------------------ +// +// typedef void (*microtask_t)(int *gtid, int *tid, ...); +// +// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, +// void *p_argv[] +// #if OMPT_SUPPORT +// , +// void **exit_frame_ptr +// #endif +// ) { +// #if OMPT_SUPPORT +// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); +// #endif +// +// (*pkfn)(>id, &tid, argv[0], ...); +// +// return 1; +// } +// +// Parameters: +// a0: pkfn +// a1: gtid +// a2: tid +// a3: argc +// a4: p_argv +// a5: exit_frame_ptr +// +// Locals: +// __gtid: gtid param pushed on stack so can pass >id to pkfn +// __tid: tid param pushed on stack so can pass &tid to pkfn +// +// Temp. registers: +// +// t0: used to calculate the dynamic stack size / used to hold pkfn address +// t1: used as temporary for stack placement calculation +// t2: used as temporary for stack arguments +// t3: used as temporary for number of remaining pkfn parms +// t4: used to traverse p_argv array +// +// return: a0 (always 1/TRUE) +// + +__gtid = -20 +__tid = -24 + +// -- Begin __kmp_invoke_microtask +// mark_begin; + .text + .globl __kmp_invoke_microtask + .p2align 1 + .type __kmp_invoke_microtask,@function +__kmp_invoke_microtask: + .cfi_startproc + + // First, save ra and fp + addi sp, sp, -16 + sd ra, 8(sp) + sd fp, 0(sp) + addi fp, sp, 16 + .cfi_def_cfa fp, 0 + .cfi_offset ra, -8 + .cfi_offset fp, -16 + + // Compute the dynamic stack size: + // + // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by + // reference + // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' + // function by register. Given that we have 8 of such registers (a[0-7]) + // and two + 'argc' arguments (consider >id and &tid), we need to + // reserve max(0, argc - 6)*8 extra bytes + // + // The total number of bytes is then max(0, argc - 6)*8 + 8 + + // Compute max(0, argc - 6) using the following bithack: + // max(0, x) = x - (x & (x >> 31)), where x := argc - 6 + // Source: http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax + addi t0, a3, -6 + srai t1, t0, 31 + and t1, t0, t1 + sub t0, t0, t1 + + addi t0, t0, 1 + + slli t0, t0, 3 + sub sp, sp, t0 + + // Align the stack to 16 bytes + andi sp, sp, -16 + + mv t0, a0 + mv t3, a3 + mv t4, a4 + +#if OMPT_SUPPORT + // Save frame pointer into exit_frame + sd fp, 0(a5) +#endif + + // Prepare arguments for the pkfn function (first 8 using a0-a7 registers) + + sw a1, __gtid(fp) + sw a2, __tid(fp) + + addi a0, fp, __gtid + addi a1, fp, __tid + + beqz t3, .L_kmp_3 + ld a2, 0(t4) + + addi t3, t3, -1 + beqz t3, .L_kmp_3 + ld a3, 8(t4) + + addi t3, t3, -1 + beqz t3, .L_kmp_3 + ld a4, 16(t4) + + addi t3, t3, -1 + beqz t3, .L_kmp_3 + ld a5, 24(t4) + + addi t3, t3, -1 + beqz t3, .L_kmp_3 + ld a6, 32(t4) + + addi t3, t3, -1 + beqz t3, .L_kmp_3 + ld a7, 40(t4) + + // Prepare any additional argument passed through the stack + addi t4, t4, 48 + mv t1, sp + j .L_kmp_2 +.L_kmp_1: + ld t2, 0(t4) + sd t2, 0(t1) + addi t4, t4, 8 + addi t1, t1, 8 +.L_kmp_2: + addi t3, t3, -1 + bnez t3, .L_kmp_1 + +.L_kmp_3: + // Call pkfn function + jalr t0 + + // Restore stack and return + + addi a0, zero, 1 + + addi sp, fp, -16 + ld fp, 0(sp) + ld ra, 8(sp) + addi sp, sp, 16 + ret +.Lfunc_end0: + .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask + .cfi_endproc + +// -- End __kmp_invoke_microtask + +#endif /* KMP_ARCH_RISCV64 */ + #if KMP_ARCH_ARM || KMP_ARCH_MIPS .data .comm .gomp_critical_user_,32,8 @@ -1535,7 +1741,7 @@ __kmp_unnamed_critical_addr: .size __kmp_unnamed_critical_addr,4 #endif /* KMP_ARCH_ARM */ -#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 +#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 .data .comm .gomp_critical_user_,32,8 .data @@ -1544,7 +1750,8 @@ __kmp_unnamed_critical_addr: __kmp_unnamed_critical_addr: .8byte .gomp_critical_user_ .size __kmp_unnamed_critical_addr,8 -#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 */ +#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || + KMP_ARCH_RISCV64 */ #if KMP_OS_LINUX # if KMP_ARCH_ARM diff --git a/runtime/src/z_Linux_util.cpp b/runtime/src/z_Linux_util.cpp index 1983fc2b98341..0ee12927e4bf8 100644 --- a/runtime/src/z_Linux_util.cpp +++ b/runtime/src/z_Linux_util.cpp @@ -50,6 +50,9 @@ #include #include #elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD +#include +#include +#include #include #elif KMP_OS_NETBSD #include @@ -97,7 +100,7 @@ static void __kmp_print_cond(char *buffer, kmp_cond_align_t *cond) { } #endif -#if (KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED) +#if ((KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED) /* Affinity support */ @@ -119,16 +122,21 @@ void __kmp_affinity_bind_thread(int which) { void __kmp_affinity_determine_capable(const char *env_var) { // Check and see if the OS supports thread affinity. +#if KMP_OS_LINUX #define KMP_CPU_SET_SIZE_LIMIT (1024 * 1024) +#elif KMP_OS_FREEBSD +#define KMP_CPU_SET_SIZE_LIMIT (sizeof(cpuset_t)) +#endif - int gCode; - int sCode; - unsigned char *buf; - buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT); +#if KMP_OS_LINUX // If Linux* OS: // If the syscall fails or returns a suggestion for the size, // then we don't have to search for an appropriate size. + int gCode; + int sCode; + unsigned char *buf; + buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT); gCode = syscall(__NR_sched_getaffinity, 0, KMP_CPU_SET_SIZE_LIMIT, buf); KA_TRACE(30, ("__kmp_affinity_determine_capable: " "initial getaffinity call returned %d errno = %d\n", @@ -267,6 +275,23 @@ void __kmp_affinity_determine_capable(const char *env_var) { } } } +#elif KMP_OS_FREEBSD + int gCode; + unsigned char *buf; + buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT); + gCode = pthread_getaffinity_np(pthread_self(), KMP_CPU_SET_SIZE_LIMIT, reinterpret_cast(buf)); + KA_TRACE(30, ("__kmp_affinity_determine_capable: " + "initial getaffinity call returned %d errno = %d\n", + gCode, errno)); + if (gCode == 0) { + KMP_AFFINITY_ENABLE(KMP_CPU_SET_SIZE_LIMIT); + KA_TRACE(10, ("__kmp_affinity_determine_capable: " + "affinity supported (mask size %d)\n"< + (int)__kmp_affin_mask_size)); + KMP_INTERNAL_FREE(buf); + return; + } +#endif // save uncaught error code // int error = errno; KMP_INTERNAL_FREE(buf); @@ -802,6 +827,13 @@ void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size) { and also gives the user the stack space they requested for all threads */ stack_size += gtid * __kmp_stkoffset * 2; +#if defined(__ANDROID__) && __ANDROID_API__ < 19 + // Round the stack size to a multiple of the page size. Older versions of + // Android (until KitKat) would fail pthread_attr_setstacksize with EINVAL + // if the stack size was not a multiple of the page size. + stack_size = (stack_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); +#endif + KA_TRACE(10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, " "__kmp_stksize = %lu bytes, final stacksize = %lu bytes\n", gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size)); @@ -1972,7 +2004,7 @@ int __kmp_is_address_mapped(void *addr) { int found = 0; int rc; -#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_HURD +#if KMP_OS_LINUX || KMP_OS_HURD /* On GNUish OSes, read the /proc//maps pseudo-file to get all the address ranges mapped into the address space. */ @@ -2010,6 +2042,44 @@ int __kmp_is_address_mapped(void *addr) { // Free resources. fclose(file); KMP_INTERNAL_FREE(name); +#elif KMP_OS_FREEBSD + char *buf; + size_t lstsz; + int mib[] = {CTL_KERN, KERN_PROC, KERN_PROC_VMMAP, getpid()}; + rc = sysctl(mib, 4, NULL, &lstsz, NULL, 0); + if (rc < 0) + return 0; + // We pass from number of vm entry's semantic + // to size of whole entry map list. + lstsz = lstsz * 4 / 3; + buf = reinterpret_cast(kmpc_malloc(lstsz)); + rc = sysctl(mib, 4, buf, &lstsz, NULL, 0); + if (rc < 0) { + kmpc_free(buf); + return 0; + } + + char *lw = buf; + char *up = buf + lstsz; + + while (lw < up) { + struct kinfo_vmentry *cur = reinterpret_cast(lw); + size_t cursz = cur->kve_structsize; + if (cursz == 0) + break; + void *start = reinterpret_cast(cur->kve_start); + void *end = reinterpret_cast(cur->kve_end); + // Readable/Writable addresses within current map entry + if ((addr >= start) && (addr < end)) { + if ((cur->kve_protection & KVME_PROT_READ) != 0 && + (cur->kve_protection & KVME_PROT_WRITE) != 0) { + found = 1; + break; + } + } + lw += cursz; + } + kmpc_free(buf); #elif KMP_OS_DARWIN @@ -2331,7 +2401,8 @@ finish: // Clean up and exit. #endif // USE_LOAD_BALANCE #if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || \ - ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || KMP_ARCH_PPC64) + ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || \ + KMP_ARCH_PPC64 || KMP_ARCH_RISCV64) // we really only need the case with 1 argument, because CLANG always build // a struct of pointers to shared variables referenced in the outlined function @@ -2415,10 +2486,6 @@ int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, break; } -#if OMPT_SUPPORT - *exit_frame_ptr = 0; -#endif - return 1; } -- cgit v1.2.3