summaryrefslogtreecommitdiff
path: root/runtime/src/kmp_alloc.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/src/kmp_alloc.cpp')
-rw-r--r--runtime/src/kmp_alloc.cpp397
1 files changed, 326 insertions, 71 deletions
diff --git a/runtime/src/kmp_alloc.cpp b/runtime/src/kmp_alloc.cpp
index ae60fef02d128..861940120e908 100644
--- a/runtime/src/kmp_alloc.cpp
+++ b/runtime/src/kmp_alloc.cpp
@@ -4,10 +4,9 @@
//===----------------------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source Licenses. See LICENSE.txt for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -34,7 +33,7 @@ typedef kmp_int64 bufsize;
#endif
#else
typedef ssize_t bufsize;
-#endif
+#endif // KMP_OS_WINDOWS
/* The three modes of operation are, fifo search, lifo search, and best-fit */
@@ -1221,74 +1220,216 @@ void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) {
KE_TRACE(30, ("<- __kmp_thread_free()\n"));
}
-#if OMP_50_ENABLED
/* OMP 5.0 Memory Management support */
-static int (*p_hbw_check)(void);
-static void *(*p_hbw_malloc)(size_t);
-static void (*p_hbw_free)(void *);
-static int (*p_hbw_set_policy)(int);
static const char *kmp_mk_lib_name;
static void *h_memkind;
+/* memkind experimental API: */
+// memkind_alloc
+static void *(*kmp_mk_alloc)(void *k, size_t sz);
+// memkind_free
+static void (*kmp_mk_free)(void *kind, void *ptr);
+// memkind_check_available
+static int (*kmp_mk_check)(void *kind);
+// kinds we are going to use
+static void **mk_default;
+static void **mk_interleave;
+static void **mk_hbw;
+static void **mk_hbw_interleave;
+static void **mk_hbw_preferred;
+static void **mk_hugetlb;
+static void **mk_hbw_hugetlb;
+static void **mk_hbw_preferred_hugetlb;
+
+#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
+static inline void chk_kind(void ***pkind) {
+ KMP_DEBUG_ASSERT(pkind);
+ if (*pkind) // symbol found
+ if (kmp_mk_check(**pkind)) // kind not available or error
+ *pkind = NULL;
+}
+#endif
void __kmp_init_memkind() {
+// as of 2018-07-31 memkind does not support Windows*, exclude it for now
#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
+ // use of statically linked memkind is problematic, as it depends on libnuma
kmp_mk_lib_name = "libmemkind.so";
h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
if (h_memkind) {
- p_hbw_check = (int (*)())dlsym(h_memkind, "hbw_check_available");
- p_hbw_malloc = (void *(*)(size_t))dlsym(h_memkind, "hbw_malloc");
- p_hbw_free = (void (*)(void *))dlsym(h_memkind, "hbw_free");
- p_hbw_set_policy = (int (*)(int))dlsym(h_memkind, "hbw_set_policy");
- if (p_hbw_check && p_hbw_malloc && p_hbw_free && p_hbw_set_policy) {
+ kmp_mk_check = (int (*)(void *))dlsym(h_memkind, "memkind_check_available");
+ kmp_mk_alloc =
+ (void *(*)(void *, size_t))dlsym(h_memkind, "memkind_malloc");
+ kmp_mk_free = (void (*)(void *, void *))dlsym(h_memkind, "memkind_free");
+ mk_default = (void **)dlsym(h_memkind, "MEMKIND_DEFAULT");
+ if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default &&
+ !kmp_mk_check(*mk_default)) {
__kmp_memkind_available = 1;
- if (p_hbw_check() == 0) {
- p_hbw_set_policy(1); // return NULL is not enough memory
- __kmp_hbw_mem_available = 1; // found HBW memory available
- }
- return; // success - all symbols resolved
+ mk_interleave = (void **)dlsym(h_memkind, "MEMKIND_INTERLEAVE");
+ chk_kind(&mk_interleave);
+ mk_hbw = (void **)dlsym(h_memkind, "MEMKIND_HBW");
+ chk_kind(&mk_hbw);
+ mk_hbw_interleave = (void **)dlsym(h_memkind, "MEMKIND_HBW_INTERLEAVE");
+ chk_kind(&mk_hbw_interleave);
+ mk_hbw_preferred = (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED");
+ chk_kind(&mk_hbw_preferred);
+ mk_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HUGETLB");
+ chk_kind(&mk_hugetlb);
+ mk_hbw_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HBW_HUGETLB");
+ chk_kind(&mk_hbw_hugetlb);
+ mk_hbw_preferred_hugetlb =
+ (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED_HUGETLB");
+ chk_kind(&mk_hbw_preferred_hugetlb);
+ KE_TRACE(25, ("__kmp_init_memkind: memkind library initialized\n"));
+ return; // success
}
dlclose(h_memkind); // failure
h_memkind = NULL;
}
- p_hbw_check = NULL;
- p_hbw_malloc = NULL;
- p_hbw_free = NULL;
- p_hbw_set_policy = NULL;
+ kmp_mk_check = NULL;
+ kmp_mk_alloc = NULL;
+ kmp_mk_free = NULL;
+ mk_default = NULL;
+ mk_interleave = NULL;
+ mk_hbw = NULL;
+ mk_hbw_interleave = NULL;
+ mk_hbw_preferred = NULL;
+ mk_hugetlb = NULL;
+ mk_hbw_hugetlb = NULL;
+ mk_hbw_preferred_hugetlb = NULL;
#else
kmp_mk_lib_name = "";
h_memkind = NULL;
- p_hbw_check = NULL;
- p_hbw_malloc = NULL;
- p_hbw_free = NULL;
- p_hbw_set_policy = NULL;
+ kmp_mk_check = NULL;
+ kmp_mk_alloc = NULL;
+ kmp_mk_free = NULL;
+ mk_default = NULL;
+ mk_interleave = NULL;
+ mk_hbw = NULL;
+ mk_hbw_interleave = NULL;
+ mk_hbw_preferred = NULL;
+ mk_hugetlb = NULL;
+ mk_hbw_hugetlb = NULL;
+ mk_hbw_preferred_hugetlb = NULL;
#endif
}
void __kmp_fini_memkind() {
#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
+ if (__kmp_memkind_available)
+ KE_TRACE(25, ("__kmp_fini_memkind: finalize memkind library\n"));
if (h_memkind) {
dlclose(h_memkind);
h_memkind = NULL;
}
- p_hbw_check = NULL;
- p_hbw_malloc = NULL;
- p_hbw_free = NULL;
- p_hbw_set_policy = NULL;
+ kmp_mk_check = NULL;
+ kmp_mk_alloc = NULL;
+ kmp_mk_free = NULL;
+ mk_default = NULL;
+ mk_interleave = NULL;
+ mk_hbw = NULL;
+ mk_hbw_interleave = NULL;
+ mk_hbw_preferred = NULL;
+ mk_hugetlb = NULL;
+ mk_hbw_hugetlb = NULL;
+ mk_hbw_preferred_hugetlb = NULL;
#endif
}
-void __kmpc_set_default_allocator(int gtid, const omp_allocator_t *allocator) {
- if (allocator == OMP_NULL_ALLOCATOR)
+omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
+ int ntraits,
+ omp_alloctrait_t traits[]) {
+ // OpenMP 5.0 only allows predefined memspaces
+ KMP_DEBUG_ASSERT(ms == omp_default_mem_space || ms == omp_low_lat_mem_space ||
+ ms == omp_large_cap_mem_space || ms == omp_const_mem_space ||
+ ms == omp_high_bw_mem_space);
+ kmp_allocator_t *al;
+ int i;
+ al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t)); // zeroed
+ al->memspace = ms; // not used currently
+ for (i = 0; i < ntraits; ++i) {
+ switch (traits[i].key) {
+ case OMP_ATK_THREADMODEL:
+ case OMP_ATK_ACCESS:
+ case OMP_ATK_PINNED:
+ break;
+ case OMP_ATK_ALIGNMENT:
+ al->alignment = traits[i].value;
+ KMP_ASSERT(IS_POWER_OF_TWO(al->alignment));
+ break;
+ case OMP_ATK_POOL_SIZE:
+ al->pool_size = traits[i].value;
+ break;
+ case OMP_ATK_FALLBACK:
+ al->fb = (omp_alloctrait_value_t)traits[i].value;
+ KMP_DEBUG_ASSERT(
+ al->fb == OMP_ATV_DEFAULT_MEM_FB || al->fb == OMP_ATV_NULL_FB ||
+ al->fb == OMP_ATV_ABORT_FB || al->fb == OMP_ATV_ALLOCATOR_FB);
+ break;
+ case OMP_ATK_FB_DATA:
+ al->fb_data = RCAST(kmp_allocator_t *, traits[i].value);
+ break;
+ case OMP_ATK_PARTITION:
+ al->memkind = RCAST(void **, traits[i].value);
+ break;
+ default:
+ KMP_ASSERT2(0, "Unexpected allocator trait");
+ }
+ }
+ if (al->fb == 0) {
+ // set default allocator
+ al->fb = OMP_ATV_DEFAULT_MEM_FB;
+ al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
+ } else if (al->fb == OMP_ATV_ALLOCATOR_FB) {
+ KMP_ASSERT(al->fb_data != NULL);
+ } else if (al->fb == OMP_ATV_DEFAULT_MEM_FB) {
+ al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
+ }
+ if (__kmp_memkind_available) {
+ // Let's use memkind library if available
+ if (ms == omp_high_bw_mem_space) {
+ if (al->memkind == (void *)OMP_ATV_INTERLEAVED && mk_hbw_interleave) {
+ al->memkind = mk_hbw_interleave;
+ } else if (mk_hbw_preferred) {
+ // AC: do not try to use MEMKIND_HBW for now, because memkind library
+ // cannot reliably detect exhaustion of HBW memory.
+ // It could be possible using hbw_verify_memory_region() but memkind
+ // manual says: "Using this function in production code may result in
+ // serious performance penalty".
+ al->memkind = mk_hbw_preferred;
+ } else {
+ // HBW is requested but not available --> return NULL allocator
+ __kmp_free(al);
+ return omp_null_allocator;
+ }
+ } else {
+ if (al->memkind == (void *)OMP_ATV_INTERLEAVED && mk_interleave) {
+ al->memkind = mk_interleave;
+ } else {
+ al->memkind = mk_default;
+ }
+ }
+ } else {
+ if (ms == omp_high_bw_mem_space) {
+ // cannot detect HBW memory presence without memkind library
+ __kmp_free(al);
+ return omp_null_allocator;
+ }
+ }
+ return (omp_allocator_handle_t)al;
+}
+
+void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t allocator) {
+ if (allocator > kmp_max_mem_alloc)
+ __kmp_free(allocator);
+}
+
+void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t allocator) {
+ if (allocator == omp_null_allocator)
allocator = omp_default_mem_alloc;
- KMP_DEBUG_ASSERT(
- allocator == omp_default_mem_alloc ||
- allocator == omp_large_cap_mem_alloc ||
- allocator == omp_const_mem_alloc || allocator == omp_high_bw_mem_alloc ||
- allocator == omp_low_lat_mem_alloc || allocator == omp_cgroup_mem_alloc ||
- allocator == omp_pteam_mem_alloc || allocator == omp_thread_mem_alloc);
__kmp_threads[gtid]->th.th_def_allocator = allocator;
}
-const omp_allocator_t *__kmpc_get_default_allocator(int gtid) {
+
+omp_allocator_handle_t __kmpc_get_default_allocator(int gtid) {
return __kmp_threads[gtid]->th.th_def_allocator;
}
@@ -1296,44 +1437,137 @@ typedef struct kmp_mem_desc { // Memory block descriptor
void *ptr_alloc; // Pointer returned by allocator
size_t size_a; // Size of allocated memory block (initial+descriptor+align)
void *ptr_align; // Pointer to aligned memory, returned
- const omp_allocator_t *allocator; // allocator
+ kmp_allocator_t *allocator; // allocator
} kmp_mem_desc_t;
static int alignment = sizeof(void *); // let's align to pointer size
-void *__kmpc_alloc(int gtid, size_t size, const omp_allocator_t *allocator) {
+void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
+ void *ptr = NULL;
+ kmp_allocator_t *al;
KMP_DEBUG_ASSERT(__kmp_init_serial);
- if (allocator == OMP_NULL_ALLOCATOR)
+ if (allocator == omp_null_allocator)
allocator = __kmp_threads[gtid]->th.th_def_allocator;
+ KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator));
+ al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
+
int sz_desc = sizeof(kmp_mem_desc_t);
- void *ptr = NULL;
kmp_mem_desc_t desc;
kmp_uintptr_t addr; // address returned by allocator
kmp_uintptr_t addr_align; // address to return to caller
kmp_uintptr_t addr_descr; // address of memory block descriptor
-
- KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator));
-
- desc.size_a = size + sz_desc + alignment;
- if (allocator == omp_default_mem_alloc)
- ptr = __kmp_allocate(desc.size_a);
- if (allocator == omp_high_bw_mem_alloc && __kmp_hbw_mem_available) {
- KMP_DEBUG_ASSERT(p_hbw_malloc != NULL);
- ptr = p_hbw_malloc(desc.size_a);
+ int align = alignment; // default alignment
+ if (allocator > kmp_max_mem_alloc && al->alignment > 0) {
+ align = al->alignment; // alignment requested by user
}
+ desc.size_a = size + sz_desc + align;
- KE_TRACE(10, ("__kmpc_alloc: T#%d %p=alloc(%d) hbw %d\n", gtid, ptr,
- desc.size_a, __kmp_hbw_mem_available));
+ if (__kmp_memkind_available) {
+ if (allocator < kmp_max_mem_alloc) {
+ // pre-defined allocator
+ if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
+ ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
+ } else {
+ ptr = kmp_mk_alloc(*mk_default, desc.size_a);
+ }
+ } else if (al->pool_size > 0) {
+ // custom allocator with pool size requested
+ kmp_uint64 used =
+ KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
+ if (used + desc.size_a > al->pool_size) {
+ // not enough space, need to go fallback path
+ KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
+ if (al->fb == OMP_ATV_DEFAULT_MEM_FB) {
+ al = (kmp_allocator_t *)omp_default_mem_alloc;
+ ptr = kmp_mk_alloc(*mk_default, desc.size_a);
+ } else if (al->fb == OMP_ATV_ABORT_FB) {
+ KMP_ASSERT(0); // abort fallback requested
+ } else if (al->fb == OMP_ATV_ALLOCATOR_FB) {
+ KMP_ASSERT(al != al->fb_data);
+ al = al->fb_data;
+ return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
+ } // else ptr == NULL;
+ } else {
+ // pool has enough space
+ ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
+ if (ptr == NULL) {
+ if (al->fb == OMP_ATV_DEFAULT_MEM_FB) {
+ al = (kmp_allocator_t *)omp_default_mem_alloc;
+ ptr = kmp_mk_alloc(*mk_default, desc.size_a);
+ } else if (al->fb == OMP_ATV_ABORT_FB) {
+ KMP_ASSERT(0); // abort fallback requested
+ } else if (al->fb == OMP_ATV_ALLOCATOR_FB) {
+ KMP_ASSERT(al != al->fb_data);
+ al = al->fb_data;
+ return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
+ }
+ }
+ }
+ } else {
+ // custom allocator, pool size not requested
+ ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
+ if (ptr == NULL) {
+ if (al->fb == OMP_ATV_DEFAULT_MEM_FB) {
+ al = (kmp_allocator_t *)omp_default_mem_alloc;
+ ptr = kmp_mk_alloc(*mk_default, desc.size_a);
+ } else if (al->fb == OMP_ATV_ABORT_FB) {
+ KMP_ASSERT(0); // abort fallback requested
+ } else if (al->fb == OMP_ATV_ALLOCATOR_FB) {
+ KMP_ASSERT(al != al->fb_data);
+ al = al->fb_data;
+ return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
+ }
+ }
+ }
+ } else if (allocator < kmp_max_mem_alloc) {
+ // pre-defined allocator
+ if (allocator == omp_high_bw_mem_alloc) {
+ // ptr = NULL;
+ } else {
+ ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
+ }
+ } else if (al->pool_size > 0) {
+ // custom allocator with pool size requested
+ kmp_uint64 used =
+ KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
+ if (used + desc.size_a > al->pool_size) {
+ // not enough space, need to go fallback path
+ KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
+ if (al->fb == OMP_ATV_DEFAULT_MEM_FB) {
+ al = (kmp_allocator_t *)omp_default_mem_alloc;
+ ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
+ } else if (al->fb == OMP_ATV_ABORT_FB) {
+ KMP_ASSERT(0); // abort fallback requested
+ } else if (al->fb == OMP_ATV_ALLOCATOR_FB) {
+ KMP_ASSERT(al != al->fb_data);
+ al = al->fb_data;
+ return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
+ } // else ptr == NULL;
+ } else {
+ // pool has enough space
+ ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
+ if (ptr == NULL && al->fb == OMP_ATV_ABORT_FB) {
+ KMP_ASSERT(0); // abort fallback requested
+ } // no sense to look for another fallback because of same internal alloc
+ }
+ } else {
+ // custom allocator, pool size not requested
+ ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
+ if (ptr == NULL && al->fb == OMP_ATV_ABORT_FB) {
+ KMP_ASSERT(0); // abort fallback requested
+ } // no sense to look for another fallback because of same internal alloc
+ }
+ KE_TRACE(10, ("__kmpc_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
if (ptr == NULL)
return NULL;
addr = (kmp_uintptr_t)ptr;
- addr_align = (addr + sz_desc + alignment - 1) & ~(alignment - 1);
+ addr_align = (addr + sz_desc + align - 1) & ~(align - 1);
addr_descr = addr_align - sz_desc;
desc.ptr_alloc = ptr;
desc.ptr_align = (void *)addr_align;
- desc.allocator = allocator;
+ desc.allocator = al;
*((kmp_mem_desc_t *)addr_descr) = desc; // save descriptor contents
KMP_MB();
@@ -1341,11 +1575,14 @@ void *__kmpc_alloc(int gtid, size_t size, const omp_allocator_t *allocator) {
return desc.ptr_align;
}
-void __kmpc_free(int gtid, void *ptr, const omp_allocator_t *allocator) {
+void __kmpc_free(int gtid, void *ptr, const omp_allocator_handle_t allocator) {
KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
if (ptr == NULL)
return;
+ kmp_allocator_t *al;
+ omp_allocator_handle_t oal;
+ al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
kmp_mem_desc_t desc;
kmp_uintptr_t addr_align; // address to return to caller
kmp_uintptr_t addr_descr; // address of memory block descriptor
@@ -1356,24 +1593,42 @@ void __kmpc_free(int gtid, void *ptr, const omp_allocator_t *allocator) {
KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
if (allocator) {
- KMP_DEBUG_ASSERT(desc.allocator == allocator);
- } else {
- allocator = desc.allocator;
+ KMP_DEBUG_ASSERT(desc.allocator == al || desc.allocator == al->fb_data);
}
- KMP_DEBUG_ASSERT(allocator);
-
- if (allocator == omp_default_mem_alloc)
- __kmp_free(desc.ptr_alloc);
- if (allocator == omp_high_bw_mem_alloc && __kmp_hbw_mem_available) {
- KMP_DEBUG_ASSERT(p_hbw_free != NULL);
- p_hbw_free(desc.ptr_alloc);
+ al = desc.allocator;
+ oal = (omp_allocator_handle_t)al; // cast to void* for comparisons
+ KMP_DEBUG_ASSERT(al);
+
+ if (__kmp_memkind_available) {
+ if (oal < kmp_max_mem_alloc) {
+ // pre-defined allocator
+ if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) {
+ kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc);
+ } else {
+ kmp_mk_free(*mk_default, desc.ptr_alloc);
+ }
+ } else {
+ if (al->pool_size > 0) { // custom allocator with pool size requested
+ kmp_uint64 used =
+ KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
+ (void)used; // to suppress compiler warning
+ KMP_DEBUG_ASSERT(used >= desc.size_a);
+ }
+ kmp_mk_free(*al->memkind, desc.ptr_alloc);
+ }
+ } else {
+ if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
+ kmp_uint64 used =
+ KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
+ (void)used; // to suppress compiler warning
+ KMP_DEBUG_ASSERT(used >= desc.size_a);
+ }
+ __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
}
KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, desc.ptr_alloc,
allocator));
}
-#endif
-
/* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes
memory leaks, but it may be useful for debugging memory corruptions, used
freed pointers, etc. */