diff options
author | Jason Evans <jasone@FreeBSD.org> | 2008-08-27 02:00:53 +0000 |
---|---|---|
committer | Jason Evans <jasone@FreeBSD.org> | 2008-08-27 02:00:53 +0000 |
commit | d6742bfbd3ecadd2bde38ccbb2351bd5c19ede1d (patch) | |
tree | 0e5399c773ca24c377f8a89207f8b7bf90716b12 /lib/libc/stdlib/malloc.c | |
parent | 55b418d32b6e12d722ca398b41e69320f809ed76 (diff) |
Notes
Diffstat (limited to 'lib/libc/stdlib/malloc.c')
-rw-r--r-- | lib/libc/stdlib/malloc.c | 1311 |
1 files changed, 1080 insertions, 231 deletions
diff --git a/lib/libc/stdlib/malloc.c b/lib/libc/stdlib/malloc.c index b36dbc757d313..a32ffa78ca090 100644 --- a/lib/libc/stdlib/malloc.c +++ b/lib/libc/stdlib/malloc.c @@ -35,6 +35,9 @@ * + Multiple arenas are used if there are multiple CPUs, which reduces lock * contention and cache sloshing. * + * + Thread-specific caching is used if there are multiple threads, which + * reduces the amount of locking. + * * + Cache line sharing between arenas is avoided for internal data * structures. * @@ -48,37 +51,49 @@ * and a 16 byte quantum on a 32-bit system, the size classes in each category * are as follows: * - * |=====================================| - * | Category | Subcategory | Size | - * |=====================================| - * | Small | Tiny | 2 | - * | | | 4 | - * | | | 8 | - * | |----------------+---------| - * | | Quantum-spaced | 16 | - * | | | 32 | - * | | | 48 | - * | | | ... | - * | | | 480 | - * | | | 496 | - * | | | 512 | - * | |----------------+---------| - * | | Sub-page | 1 kB | - * | | | 2 kB | - * |=====================================| - * | Large | 4 kB | - * | | 8 kB | - * | | 12 kB | - * | | ... | - * | | 1012 kB | - * | | 1016 kB | - * | | 1020 kB | - * |=====================================| - * | Huge | 1 MB | - * | | 2 MB | - * | | 3 MB | - * | | ... | - * |=====================================| + * |=======================================| + * | Category | Subcategory | Size | + * |=======================================| + * | Small | Tiny | 2 | + * | | | 4 | + * | | | 8 | + * | |------------------+---------| + * | | Quantum-spaced | 16 | + * | | | 32 | + * | | | 48 | + * | | | ... | + * | | | 96 | + * | | | 112 | + * | | | 128 | + * | |------------------+---------| + * | | Cacheline-spaced | 192 | + * | | | 256 | + * | | | 320 | + * | | | 384 | + * | | | 448 | + * | | | 512 | + * | |------------------+---------| + * | | Sub-page | 760 | + * | | | 1024 | + * | | | 1280 | + * | | | ... | + * | | | 3328 | + * | | | 3584 | + * | | | 3840 | + * |=======================================| + * | Large | 4 kB | + * | | 8 kB | + * | | 12 kB | + * | | ... | + * | | 1012 kB | + * | | 1016 kB | + * | | 1020 kB | + * |=======================================| + * | Huge | 1 MB | + * | | 2 MB | + * | | 3 MB | + * | | ... | + * |=======================================| * * A different mechanism is used for each category: * @@ -113,6 +128,19 @@ #endif /* + * MALLOC_TINY enables support for tiny objects, which are smaller than one + * quantum. + */ +#define MALLOC_TINY + +/* + * MALLOC_MAG enables a magazine-based thread-specific caching layer for small + * objects. This makes it possible to allocate/deallocate objects without any + * locking when the cache is in the steady state. + */ +#define MALLOC_MAG + +/* * MALLOC_BALANCE enables monitoring of arena lock contention and dynamically * re-balances arena load if exponentially averaged contention exceeds a * certain threshold. @@ -184,46 +212,63 @@ __FBSDID("$FreeBSD$"); /* Size of stack-allocated buffer passed to strerror_r(). */ #define STRERROR_BUF 64 -/* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */ +/* + * The const_size2bin table is sized according to PAGESIZE_2POW, but for + * correctness reasons, we never assume that + * (pagesize == (1U << * PAGESIZE_2POW)). + * + * Minimum alignment of allocations is 2^QUANTUM_2POW bytes. + */ #ifdef __i386__ -# define QUANTUM_2POW_MIN 4 +# define PAGESIZE_2POW 12 +# define QUANTUM_2POW 4 # define SIZEOF_PTR_2POW 2 # define CPU_SPINWAIT __asm__ volatile("pause") #endif #ifdef __ia64__ -# define QUANTUM_2POW_MIN 4 +# define PAGESIZE_2POW 12 +# define QUANTUM_2POW 4 # define SIZEOF_PTR_2POW 3 #endif #ifdef __alpha__ -# define QUANTUM_2POW_MIN 4 +# define PAGESIZE_2POW 13 +# define QUANTUM_2POW 4 # define SIZEOF_PTR_2POW 3 # define NO_TLS #endif #ifdef __sparc64__ -# define QUANTUM_2POW_MIN 4 +# define PAGESIZE_2POW 13 +# define QUANTUM_2POW 4 # define SIZEOF_PTR_2POW 3 # define NO_TLS #endif #ifdef __amd64__ -# define QUANTUM_2POW_MIN 4 +# define PAGESIZE_2POW 12 +# define QUANTUM_2POW 4 # define SIZEOF_PTR_2POW 3 # define CPU_SPINWAIT __asm__ volatile("pause") #endif #ifdef __arm__ -# define QUANTUM_2POW_MIN 3 +# define PAGESIZE_2POW 12 +# define QUANTUM_2POW 3 # define SIZEOF_PTR_2POW 2 # define NO_TLS #endif #ifdef __mips__ -# define QUANTUM_2POW_MIN 3 +# define PAGESIZE_2POW 12 +# define QUANTUM_2POW 3 # define SIZEOF_PTR_2POW 2 # define NO_TLS #endif #ifdef __powerpc__ -# define QUANTUM_2POW_MIN 4 +# define PAGESIZE_2POW 12 +# define QUANTUM_2POW 4 # define SIZEOF_PTR_2POW 2 #endif +#define QUANTUM ((size_t)(1U << QUANTUM_2POW)) +#define QUANTUM_MASK (QUANTUM - 1) + #define SIZEOF_PTR (1U << SIZEOF_PTR_2POW) /* sizeof(int) == (1U << SIZEOF_INT_2POW). */ @@ -237,6 +282,10 @@ __FBSDID("$FreeBSD$"); #endif #ifdef NO_TLS + /* MALLOC_MAG requires TLS. */ +# ifdef MALLOC_MAG +# undef MALLOC_MAG +# endif /* MALLOC_BALANCE requires TLS. */ # ifdef MALLOC_BALANCE # undef MALLOC_BALANCE @@ -253,23 +302,42 @@ __FBSDID("$FreeBSD$"); #define DIRTY_MAX_DEFAULT (1U << 9) /* - * Maximum size of L1 cache line. This is used to avoid cache line aliasing, - * so over-estimates are okay (up to a point), but under-estimates will - * negatively affect performance. + * Maximum size of L1 cache line. This is used to avoid cache line aliasing. + * In addition, this controls the spacing of cacheline-spaced size classes. */ #define CACHELINE_2POW 6 #define CACHELINE ((size_t)(1U << CACHELINE_2POW)) +#define CACHELINE_MASK (CACHELINE - 1) -/* Smallest size class to support. */ -#define TINY_MIN_2POW 1 +/* + * Subpages are an artificially designated partitioning of pages. Their only + * purpose is to support subpage-spaced size classes. + * + * There must be at least 4 subpages per page, due to the way size classes are + * handled. + */ +#define SUBPAGE_2POW 8 +#define SUBPAGE ((size_t)(1U << SUBPAGE_2POW)) +#define SUBPAGE_MASK (SUBPAGE - 1) + +#ifdef MALLOC_TINY + /* Smallest size class to support. */ +# define TINY_MIN_2POW 1 +#endif /* * Maximum size class that is a multiple of the quantum, but not (necessarily) * a power of 2. Above this size, allocations are rounded up to the nearest * power of 2. */ -#define SMALL_MAX_2POW_DEFAULT 9 -#define SMALL_MAX_DEFAULT (1U << SMALL_MAX_2POW_DEFAULT) +#define QSPACE_MAX_2POW_DEFAULT 7 + +/* + * Maximum size class that is a multiple of the cacheline, but not (necessarily) + * a power of 2. Above this size, allocations are rounded up to the nearest + * power of 2. + */ +#define CSPACE_MAX_2POW_DEFAULT 9 /* * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized @@ -293,8 +361,7 @@ __FBSDID("$FreeBSD$"); #define RUN_MAX_OVRHD_RELAX 0x00001800U /* Put a cap on small object run size. This overrides RUN_MAX_OVRHD. */ -#define RUN_MAX_SMALL_2POW 15 -#define RUN_MAX_SMALL (1U << RUN_MAX_SMALL_2POW) +#define RUN_MAX_SMALL (12 * pagesize) /* * Hyper-threaded CPUs may need a special instruction inside spin loops in @@ -319,6 +386,15 @@ __FBSDID("$FreeBSD$"); */ #define BLOCK_COST_2POW 4 +#ifdef MALLOC_MAG + /* + * Default magazine size, in bytes. max_rounds is calculated to make + * optimal use of the space, leaving just enough room for the magazine + * header. + */ +# define MAG_SIZE_2POW_DEFAULT 9 +#endif + #ifdef MALLOC_BALANCE /* * We use an exponential moving average to track recent lock contention, @@ -369,6 +445,11 @@ struct malloc_bin_stats_s { */ uint64_t nrequests; +#ifdef MALLOC_MAG + /* Number of magazine reloads from this bin. */ + uint64_t nmags; +#endif + /* Total number of runs created for this bin's size class. */ uint64_t nruns; @@ -678,6 +759,35 @@ struct arena_s { /******************************************************************************/ /* + * Magazine data structures. + */ + +#ifdef MALLOC_MAG +typedef struct mag_s mag_t; +struct mag_s { + size_t binind; /* Index of associated bin. */ + size_t nrounds; + void *rounds[1]; /* Dynamically sized. */ +}; + +/* + * Magazines are lazily allocated, but once created, they remain until the + * associated mag_rack is destroyed. + */ +typedef struct bin_mags_s bin_mags_t; +struct bin_mags_s { + mag_t *curmag; + mag_t *sparemag; +}; + +typedef struct mag_rack_s mag_rack_t; +struct mag_rack_s { + bin_mags_t bin_mags[1]; /* Dynamically sized. */ +}; +#endif + +/******************************************************************************/ +/* * Data. */ @@ -690,16 +800,147 @@ static size_t pagesize_mask; static size_t pagesize_2pow; /* Various bin-related settings. */ -static size_t bin_maxclass; /* Max size class for bins. */ -static unsigned ntbins; /* Number of (2^n)-spaced tiny bins. */ +#ifdef MALLOC_TINY /* Number of (2^n)-spaced tiny bins. */ +# define ntbins ((unsigned)(QUANTUM_2POW - TINY_MIN_2POW)) +#else +# define ntbins 0 +#endif static unsigned nqbins; /* Number of quantum-spaced bins. */ -static unsigned nsbins; /* Number of (2^n)-spaced sub-page bins. */ -static size_t small_min; -static size_t small_max; - -/* Various quantum-related settings. */ -static size_t quantum; -static size_t quantum_mask; /* (quantum - 1). */ +static unsigned ncbins; /* Number of cacheline-spaced bins. */ +static unsigned nsbins; /* Number of subpage-spaced bins. */ +static unsigned nbins; +#ifdef MALLOC_TINY +# define tspace_max ((size_t)(QUANTUM >> 1)) +#endif +#define qspace_min QUANTUM +static size_t qspace_max; +static size_t cspace_min; +static size_t cspace_max; +static size_t sspace_min; +static size_t sspace_max; +#define bin_maxclass sspace_max + +static uint8_t const *size2bin; +/* + * const_size2bin is a static constant lookup table that in the common case can + * be used as-is for size2bin. For dynamically linked programs, this avoids + * a page of memory overhead per process. + */ +#define S2B_1(i) i, +#define S2B_2(i) S2B_1(i) S2B_1(i) +#define S2B_4(i) S2B_2(i) S2B_2(i) +#define S2B_8(i) S2B_4(i) S2B_4(i) +#define S2B_16(i) S2B_8(i) S2B_8(i) +#define S2B_32(i) S2B_16(i) S2B_16(i) +#define S2B_64(i) S2B_32(i) S2B_32(i) +#define S2B_128(i) S2B_64(i) S2B_64(i) +#define S2B_256(i) S2B_128(i) S2B_128(i) +static const uint8_t const_size2bin[(1U << PAGESIZE_2POW) - 255] = { + S2B_1(0xffU) /* 0 */ +#if (QUANTUM_2POW == 4) +/* 64-bit system ************************/ +# ifdef MALLOC_TINY + S2B_2(0) /* 2 */ + S2B_2(1) /* 4 */ + S2B_4(2) /* 8 */ + S2B_8(3) /* 16 */ +# define S2B_QMIN 3 +# else + S2B_16(0) /* 16 */ +# define S2B_QMIN 0 +# endif + S2B_16(S2B_QMIN + 1) /* 32 */ + S2B_16(S2B_QMIN + 2) /* 48 */ + S2B_16(S2B_QMIN + 3) /* 64 */ + S2B_16(S2B_QMIN + 4) /* 80 */ + S2B_16(S2B_QMIN + 5) /* 96 */ + S2B_16(S2B_QMIN + 6) /* 112 */ + S2B_16(S2B_QMIN + 7) /* 128 */ +# define S2B_CMIN (S2B_QMIN + 8) +#else +/* 32-bit system ************************/ +# ifdef MALLOC_TINY + S2B_2(0) /* 2 */ + S2B_2(1) /* 4 */ + S2B_4(2) /* 8 */ +# define S2B_QMIN 2 +# else + S2B_8(0) /* 8 */ +# define S2B_QMIN 0 +# endif + S2B_8(S2B_QMIN + 1) /* 16 */ + S2B_8(S2B_QMIN + 2) /* 24 */ + S2B_8(S2B_QMIN + 3) /* 32 */ + S2B_8(S2B_QMIN + 4) /* 40 */ + S2B_8(S2B_QMIN + 5) /* 48 */ + S2B_8(S2B_QMIN + 6) /* 56 */ + S2B_8(S2B_QMIN + 7) /* 64 */ + S2B_8(S2B_QMIN + 8) /* 72 */ + S2B_8(S2B_QMIN + 9) /* 80 */ + S2B_8(S2B_QMIN + 10) /* 88 */ + S2B_8(S2B_QMIN + 11) /* 96 */ + S2B_8(S2B_QMIN + 12) /* 104 */ + S2B_8(S2B_QMIN + 13) /* 112 */ + S2B_8(S2B_QMIN + 14) /* 120 */ + S2B_8(S2B_QMIN + 15) /* 128 */ +# define S2B_CMIN (S2B_QMIN + 16) +#endif +/****************************************/ + S2B_64(S2B_CMIN + 0) /* 192 */ + S2B_64(S2B_CMIN + 1) /* 256 */ + S2B_64(S2B_CMIN + 2) /* 320 */ + S2B_64(S2B_CMIN + 3) /* 384 */ + S2B_64(S2B_CMIN + 4) /* 448 */ + S2B_64(S2B_CMIN + 5) /* 512 */ +# define S2B_SMIN (S2B_CMIN + 6) + S2B_256(S2B_SMIN + 0) /* 768 */ + S2B_256(S2B_SMIN + 1) /* 1024 */ + S2B_256(S2B_SMIN + 2) /* 1280 */ + S2B_256(S2B_SMIN + 3) /* 1536 */ + S2B_256(S2B_SMIN + 4) /* 1792 */ + S2B_256(S2B_SMIN + 5) /* 2048 */ + S2B_256(S2B_SMIN + 6) /* 2304 */ + S2B_256(S2B_SMIN + 7) /* 2560 */ + S2B_256(S2B_SMIN + 8) /* 2816 */ + S2B_256(S2B_SMIN + 9) /* 3072 */ + S2B_256(S2B_SMIN + 10) /* 3328 */ + S2B_256(S2B_SMIN + 11) /* 3584 */ + S2B_256(S2B_SMIN + 12) /* 3840 */ +#if (PAGESIZE_2POW == 13) + S2B_256(S2B_SMIN + 13) /* 4096 */ + S2B_256(S2B_SMIN + 14) /* 4352 */ + S2B_256(S2B_SMIN + 15) /* 4608 */ + S2B_256(S2B_SMIN + 16) /* 4864 */ + S2B_256(S2B_SMIN + 17) /* 5120 */ + S2B_256(S2B_SMIN + 18) /* 5376 */ + S2B_256(S2B_SMIN + 19) /* 5632 */ + S2B_256(S2B_SMIN + 20) /* 5888 */ + S2B_256(S2B_SMIN + 21) /* 6144 */ + S2B_256(S2B_SMIN + 22) /* 6400 */ + S2B_256(S2B_SMIN + 23) /* 6656 */ + S2B_256(S2B_SMIN + 24) /* 6912 */ + S2B_256(S2B_SMIN + 25) /* 7168 */ + S2B_256(S2B_SMIN + 26) /* 7424 */ + S2B_256(S2B_SMIN + 27) /* 7680 */ + S2B_256(S2B_SMIN + 28) /* 7936 */ +#endif +}; +#undef S2B_1 +#undef S2B_2 +#undef S2B_4 +#undef S2B_8 +#undef S2B_16 +#undef S2B_32 +#undef S2B_64 +#undef S2B_128 +#undef S2B_256 +#undef S2B_QMIN +#undef S2B_CMIN +#undef S2B_SMIN + +#ifdef MALLOC_MAG +static size_t max_rounds; +#endif /* Various chunk-related settings. */ static size_t chunksize; @@ -796,6 +1037,14 @@ static pthread_mutex_t arenas_lock; /* Protects arenas initialization. */ static __thread arena_t *arenas_map; #endif +#ifdef MALLOC_MAG +/* + * Map of thread-specific magazine racks, used for thread-specific object + * caching. + */ +static __thread mag_rack_t *mag_rack; +#endif + #ifdef MALLOC_STATS /* Chunk statistics. */ static chunk_stats_t stats_chunks; @@ -818,13 +1067,17 @@ static bool opt_junk = false; static bool opt_dss = true; static bool opt_mmap = true; #endif +#ifdef MALLOC_MAG +static bool opt_mag = true; +static size_t opt_mag_size_2pow = MAG_SIZE_2POW_DEFAULT; +#endif static size_t opt_dirty_max = DIRTY_MAX_DEFAULT; #ifdef MALLOC_BALANCE static uint64_t opt_balance_threshold = BALANCE_THRESHOLD_DEFAULT; #endif static bool opt_print_stats = false; -static size_t opt_quantum_2pow = QUANTUM_2POW_MIN; -static size_t opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT; +static size_t opt_qspace_max_2pow = QSPACE_MAX_2POW_DEFAULT; +static size_t opt_cspace_max_2pow = CSPACE_MAX_2POW_DEFAULT; static size_t opt_chunk_2pow = CHUNK_2POW_DEFAULT; static bool opt_utrace = false; static bool opt_sysv = false; @@ -902,15 +1155,21 @@ static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, size_t oldsize, size_t newsize, bool dirty); static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin); -static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin); -static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size); +static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin); +static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size); #ifdef MALLOC_BALANCE static void arena_lock_balance_hard(arena_t *arena); #endif +#ifdef MALLOC_MAG +static void mag_load(mag_t *mag); +#endif static void *arena_malloc_large(arena_t *arena, size_t size, bool zero); static void *arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size); static size_t arena_salloc(const void *ptr); +#ifdef MALLOC_MAG +static void mag_unload(mag_t *mag); +#endif static void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, @@ -921,11 +1180,22 @@ static bool arena_ralloc_large(void *ptr, size_t size, size_t oldsize); static void *arena_ralloc(void *ptr, size_t size, size_t oldsize); static bool arena_new(arena_t *arena); static arena_t *arenas_extend(unsigned ind); +#ifdef MALLOC_MAG +static mag_t *mag_create(arena_t *arena, size_t binind); +static void mag_destroy(mag_t *mag); +static mag_rack_t *mag_rack_create(arena_t *arena); +static void mag_rack_destroy(mag_rack_t *rack); +#endif static void *huge_malloc(size_t size, bool zero); static void *huge_palloc(size_t alignment, size_t size); static void *huge_ralloc(void *ptr, size_t size, size_t oldsize); static void huge_dalloc(void *ptr); static void malloc_print_stats(void); +#ifdef MALLOC_DEBUG +static void size2bin_validate(void); +#endif +static bool size2bin_init(void); +static bool size2bin_init_hard(void); static bool malloc_init_hard(void); /* @@ -1063,18 +1333,23 @@ malloc_spin_unlock(pthread_mutex_t *lock) #define CHUNK_CEILING(s) \ (((s) + chunksize_mask) & ~chunksize_mask) +/* Return the smallest quantum multiple that is >= a. */ +#define QUANTUM_CEILING(a) \ + (((a) + QUANTUM_MASK) & ~QUANTUM_MASK) + /* Return the smallest cacheline multiple that is >= s. */ #define CACHELINE_CEILING(s) \ - (((s) + (CACHELINE - 1)) & ~(CACHELINE - 1)) + (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) -/* Return the smallest quantum multiple that is >= a. */ -#define QUANTUM_CEILING(a) \ - (((a) + quantum_mask) & ~quantum_mask) +/* Return the smallest subpage multiple that is >= s. */ +#define SUBPAGE_CEILING(s) \ + (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK) /* Return the smallest pagesize multiple that is >= s. */ #define PAGE_CEILING(s) \ (((s) + pagesize_mask) & ~pagesize_mask) +#ifdef MALLOC_TINY /* Compute the smallest power of 2 that is >= x. */ static inline size_t pow2_ceil(size_t x) @@ -1092,6 +1367,7 @@ pow2_ceil(size_t x) x++; return (x); } +#endif #ifdef MALLOC_BALANCE /* @@ -1382,10 +1658,19 @@ stats_print(arena_t *arena) arena->stats.ndalloc_small + arena->stats.ndalloc_large); malloc_printf("mapped: %12zu\n", arena->stats.mapped); - malloc_printf("bins: bin size regs pgs requests newruns" - " reruns maxruns curruns\n"); - for (i = 0, gap_start = UINT_MAX; i < ntbins + nqbins + nsbins; i++) { - if (arena->bins[i].stats.nrequests == 0) { +#ifdef MALLOC_MAG + if (__isthreaded && opt_mag) { + malloc_printf("bins: bin size regs pgs mags " + "newruns reruns maxruns curruns\n"); + } else { +#endif + malloc_printf("bins: bin size regs pgs requests " + "newruns reruns maxruns curruns\n"); +#ifdef MALLOC_MAG + } +#endif + for (i = 0, gap_start = UINT_MAX; i < nbins; i++) { + if (arena->bins[i].stats.nruns == 0) { if (gap_start == UINT_MAX) gap_start = i; } else { @@ -1404,10 +1689,15 @@ stats_print(arena_t *arena) "%13u %1s %4u %4u %3u %9llu %9llu" " %9llu %7lu %7lu\n", i, - i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S", + i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : + i < ntbins + nqbins + ncbins ? "C" : "S", arena->bins[i].reg_size, arena->bins[i].nregs, arena->bins[i].run_size >> pagesize_2pow, +#ifdef MALLOC_MAG + (__isthreaded && opt_mag) ? + arena->bins[i].stats.nmags : +#endif arena->bins[i].stats.nrequests, arena->bins[i].stats.nruns, arena->bins[i].stats.reruns, @@ -2137,44 +2427,9 @@ arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin) static inline void arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size) { - /* - * To divide by a number D that is not a power of two we multiply - * by (2^21 / D) and then right shift by 21 positions. - * - * X / D - * - * becomes - * - * (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT - */ -#define SIZE_INV_SHIFT 21 -#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1) - static const unsigned size_invs[] = { - SIZE_INV(3), - SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), - SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), - SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15), - SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19), - SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23), - SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27), - SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) -#if (QUANTUM_2POW_MIN < 4) - , - SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35), - SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39), - SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43), - SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47), - SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51), - SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55), - SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59), - SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63) -#endif - }; unsigned diff, regind, elm, bit; assert(run->magic == ARENA_RUN_MAGIC); - assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3 - >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN)); /* * Avoid doing division with a variable divisor if possible. Using @@ -2203,26 +2458,89 @@ arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size) regind = (diff >> log2_table[size - 1]); else if (size <= 32768) regind = diff >> (8 + log2_table[(size >> 8) - 1]); - else { - /* - * The run size is too large for us to use the lookup - * table. Use real division. - */ + else regind = diff / size; - } - } else if (size <= (((sizeof(size_invs) / sizeof(unsigned)) + 2) - << QUANTUM_2POW_MIN)) { - regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff; - regind >>= SIZE_INV_SHIFT; - } else { + } else if (size < qspace_max) { /* - * size_invs isn't large enough to handle this size class, so - * calculate regind using actual division. This only happens - * if the user increases small_max via the 'S' runtime - * configuration option. + * To divide by a number D that is not a power of two we + * multiply by (2^21 / D) and then right shift by 21 positions. + * + * X / D + * + * becomes + * + * (X * qsize_invs[(D >> QUANTUM_2POW) - 3]) + * >> SIZE_INV_SHIFT + * + * We can omit the first three elements, because we never + * divide by 0, and QUANTUM and 2*QUANTUM are both powers of + * two, which are handled above. */ - regind = diff / size; - }; +#define SIZE_INV_SHIFT 21 +#define QSIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << QUANTUM_2POW)) + 1) + static const unsigned qsize_invs[] = { + QSIZE_INV(3), + QSIZE_INV(4), QSIZE_INV(5), QSIZE_INV(6), QSIZE_INV(7) +#if (QUANTUM_2POW < 4) + , + QSIZE_INV(8), QSIZE_INV(9), QSIZE_INV(10), QSIZE_INV(11), + QSIZE_INV(12),QSIZE_INV(13), QSIZE_INV(14), QSIZE_INV(15) +#endif + }; + assert(QUANTUM * (((sizeof(qsize_invs)) / sizeof(unsigned)) + 3) + >= (1U << QSPACE_MAX_2POW_DEFAULT)); + + if (size <= (((sizeof(qsize_invs) / sizeof(unsigned)) + 2) << + QUANTUM_2POW)) { + regind = qsize_invs[(size >> QUANTUM_2POW) - 3] * diff; + regind >>= SIZE_INV_SHIFT; + } else + regind = diff / size; +#undef QSIZE_INV + } else if (size < cspace_max) { +#define CSIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << CACHELINE_2POW)) + 1) + static const unsigned csize_invs[] = { + CSIZE_INV(3), + CSIZE_INV(4), CSIZE_INV(5), CSIZE_INV(6), CSIZE_INV(7) + }; + assert(CACHELINE * (((sizeof(csize_invs)) / sizeof(unsigned)) + + 3) >= (1U << CSPACE_MAX_2POW_DEFAULT)); + + if (size <= (((sizeof(csize_invs) / sizeof(unsigned)) + 2) << + CACHELINE_2POW)) { + regind = csize_invs[(size >> CACHELINE_2POW) - 3] * + diff; + regind >>= SIZE_INV_SHIFT; + } else + regind = diff / size; +#undef CSIZE_INV + } else { +#define SSIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << SUBPAGE_2POW)) + 1) + static const unsigned ssize_invs[] = { + SSIZE_INV(3), + SSIZE_INV(4), SSIZE_INV(5), SSIZE_INV(6), SSIZE_INV(7), + SSIZE_INV(8), SSIZE_INV(9), SSIZE_INV(10), SSIZE_INV(11), + SSIZE_INV(12), SSIZE_INV(13), SSIZE_INV(14), SSIZE_INV(15) +#if (PAGESIZE_2POW == 13) + , + SSIZE_INV(16), SSIZE_INV(17), SSIZE_INV(18), SSIZE_INV(19), + SSIZE_INV(20), SSIZE_INV(21), SSIZE_INV(22), SSIZE_INV(23), + SSIZE_INV(24), SSIZE_INV(25), SSIZE_INV(26), SSIZE_INV(27), + SSIZE_INV(28), SSIZE_INV(29), SSIZE_INV(29), SSIZE_INV(30) +#endif + }; + assert(SUBPAGE * (((sizeof(ssize_invs)) / sizeof(unsigned)) + 3) + >= (1U << PAGESIZE_2POW)); + + if (size < (((sizeof(ssize_invs) / sizeof(unsigned)) + 2) << + SUBPAGE_2POW)) { + regind = ssize_invs[(size >> SUBPAGE_2POW) - 3] * diff; + regind >>= SIZE_INV_SHIFT; + } else + regind = diff / size; +#undef SSIZE_INV + } +#undef SIZE_INV_SHIFT assert(diff == regind * size); assert(regind < bin->nregs); @@ -2232,8 +2550,6 @@ arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size) bit = regind - (elm << (SIZEOF_INT_2POW + 3)); assert((run->regs_mask[elm] & (1U << bit)) == 0); run->regs_mask[elm] |= (1U << bit); -#undef SIZE_INV -#undef SIZE_INV_SHIFT } static void @@ -2813,7 +3129,7 @@ arena_lock_balance(arena_t *arena) /* * Calculate the exponentially averaged contention for this * arena. Due to integer math always rounding down, this value - * decays somewhat faster then normal. + * decays somewhat faster than normal. */ arena->contention = (((uint64_t)arena->contention * (uint64_t)((1U << BALANCE_ALPHA_INV_2POW)-1)) @@ -2846,39 +3162,125 @@ arena_lock_balance_hard(arena_t *arena) } #endif +#ifdef MALLOC_MAG +static inline void * +mag_alloc(mag_t *mag) +{ + + if (mag->nrounds == 0) + return (NULL); + mag->nrounds--; + + return (mag->rounds[mag->nrounds]); +} + +static void +mag_load(mag_t *mag) +{ + arena_t *arena; + arena_bin_t *bin; + arena_run_t *run; + void *round; + size_t i; + + arena = choose_arena(); + bin = &arena->bins[mag->binind]; +#ifdef MALLOC_BALANCE + arena_lock_balance(arena); +#else + malloc_spin_lock(&arena->lock); +#endif + for (i = mag->nrounds; i < max_rounds; i++) { + if ((run = bin->runcur) != NULL && run->nfree > 0) + round = arena_bin_malloc_easy(arena, bin, run); + else + round = arena_bin_malloc_hard(arena, bin); + if (round == NULL) + break; + mag->rounds[i] = round; + } +#ifdef MALLOC_STATS + bin->stats.nmags++; + arena->stats.nmalloc_small += (i - mag->nrounds); + arena->stats.allocated_small += (i - mag->nrounds) * bin->reg_size; +#endif + malloc_spin_unlock(&arena->lock); + mag->nrounds = i; +} + +static inline void * +mag_rack_alloc(mag_rack_t *rack, size_t size, bool zero) +{ + void *ret; + bin_mags_t *bin_mags; + mag_t *mag; + size_t binind; + + binind = size2bin[size]; + assert(binind < nbins); + bin_mags = &rack->bin_mags[binind]; + + mag = bin_mags->curmag; + if (mag == NULL) { + /* Create an initial magazine for this size class. */ + assert(bin_mags->sparemag == NULL); + mag = mag_create(choose_arena(), binind); + if (mag == NULL) + return (NULL); + bin_mags->curmag = mag; + mag_load(mag); + } + + ret = mag_alloc(mag); + if (ret == NULL) { + if (bin_mags->sparemag != NULL) { + if (bin_mags->sparemag->nrounds > 0) { + /* Swap magazines. */ + bin_mags->curmag = bin_mags->sparemag; + bin_mags->sparemag = mag; + mag = bin_mags->curmag; + } else { + /* Reload the current magazine. */ + mag_load(mag); + } + } else { + /* Create a second magazine. */ + mag = mag_create(choose_arena(), binind); + if (mag == NULL) + return (NULL); + mag_load(mag); + bin_mags->sparemag = bin_mags->curmag; + bin_mags->curmag = mag; + } + ret = mag_alloc(mag); + if (ret == NULL) + return (NULL); + } + + if (zero == false) { + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); + } else + memset(ret, 0, size); + + return (ret); +} +#endif + static inline void * arena_malloc_small(arena_t *arena, size_t size, bool zero) { void *ret; arena_bin_t *bin; arena_run_t *run; + size_t binind; - if (size < small_min) { - /* Tiny. */ - size = pow2_ceil(size); - bin = &arena->bins[ffs((int)(size >> (TINY_MIN_2POW + - 1)))]; -#if (!defined(NDEBUG) || defined(MALLOC_STATS)) - /* - * Bin calculation is always correct, but we may need - * to fix size for the purposes of assertions and/or - * stats accuracy. - */ - if (size < (1U << TINY_MIN_2POW)) - size = (1U << TINY_MIN_2POW); -#endif - } else if (size <= small_max) { - /* Quantum-spaced. */ - size = QUANTUM_CEILING(size); - bin = &arena->bins[ntbins + (size >> opt_quantum_2pow) - - 1]; - } else { - /* Sub-page. */ - size = pow2_ceil(size); - bin = &arena->bins[ntbins + nqbins - + (ffs((int)(size >> opt_small_max_2pow)) - 2)]; - } - assert(size == bin->reg_size); + binind = size2bin[size]; + assert(binind < nbins); + bin = &arena->bins[binind]; + size = bin->reg_size; #ifdef MALLOC_BALANCE arena_lock_balance(arena); @@ -2956,7 +3358,19 @@ arena_malloc(arena_t *arena, size_t size, bool zero) assert(QUANTUM_CEILING(size) <= arena_maxclass); if (size <= bin_maxclass) { - return (arena_malloc_small(arena, size, zero)); +#ifdef MALLOC_MAG + if (__isthreaded && opt_mag) { + mag_rack_t *rack = mag_rack; + if (rack == NULL) { + rack = mag_rack_create(arena); + if (rack == NULL) + return (NULL); + mag_rack = rack; + } + return (mag_rack_alloc(rack, size, zero)); + } else +#endif + return (arena_malloc_small(arena, size, zero)); } else return (arena_malloc_large(arena, size, zero)); } @@ -3085,7 +3499,7 @@ ipalloc(size_t alignment, size_t size) size_t run_size; /* - * We can't achieve sub-page alignment, so round up alignment + * We can't achieve subpage alignment, so round up alignment * permanently; it makes later calculations simpler. */ alignment = PAGE_CEILING(alignment); @@ -3282,6 +3696,122 @@ arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr, #endif } +#ifdef MALLOC_MAG +static void +mag_unload(mag_t *mag) +{ + arena_chunk_t *chunk; + arena_t *arena; + void *round; + size_t i, ndeferred, nrounds; + + for (ndeferred = mag->nrounds; ndeferred > 0;) { + nrounds = ndeferred; + /* Lock the arena associated with the first round. */ + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mag->rounds[0]); + arena = chunk->arena; +#ifdef MALLOC_BALANCE + arena_lock_balance(arena); +#else + malloc_spin_lock(&arena->lock); +#endif + /* Deallocate every round that belongs to the locked arena. */ + for (i = ndeferred = 0; i < nrounds; i++) { + round = mag->rounds[i]; + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(round); + if (chunk->arena == arena) { + size_t pageind = (((uintptr_t)round - + (uintptr_t)chunk) >> pagesize_2pow); + arena_chunk_map_t *mapelm = + &chunk->map[pageind]; + arena_dalloc_small(arena, chunk, round, mapelm); + } else { + /* + * This round was allocated via a different + * arena than the one that is currently locked. + * Stash the round, so that it can be handled + * in a future pass. + */ + mag->rounds[ndeferred] = round; + ndeferred++; + } + } + malloc_spin_unlock(&arena->lock); + } + + mag->nrounds = 0; +} + +static inline void +mag_rack_dalloc(mag_rack_t *rack, void *ptr) +{ + arena_t *arena; + arena_chunk_t *chunk; + arena_run_t *run; + arena_bin_t *bin; + bin_mags_t *bin_mags; + mag_t *mag; + size_t pageind, binind; + arena_chunk_map_t *mapelm; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena = chunk->arena; + pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow); + mapelm = &chunk->map[pageind]; + run = (arena_run_t *)(mapelm->bits & ~pagesize_mask); + assert(run->magic == ARENA_RUN_MAGIC); + bin = run->bin; + binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / + sizeof(arena_bin_t); + assert(binind < nbins); + + if (opt_junk) + memset(ptr, 0x5a, arena->bins[binind].reg_size); + + bin_mags = &rack->bin_mags[binind]; + mag = bin_mags->curmag; + if (mag == NULL) { + /* Create an initial magazine for this size class. */ + assert(bin_mags->sparemag == NULL); + mag = mag_create(choose_arena(), binind); + if (mag == NULL) { + malloc_spin_lock(&arena->lock); + arena_dalloc_small(arena, chunk, ptr, mapelm); + malloc_spin_unlock(&arena->lock); + return; + } + bin_mags->curmag = mag; + } + + if (mag->nrounds == max_rounds) { + if (bin_mags->sparemag != NULL) { + if (bin_mags->sparemag->nrounds < max_rounds) { + /* Swap magazines. */ + bin_mags->curmag = bin_mags->sparemag; + bin_mags->sparemag = mag; + mag = bin_mags->curmag; + } else { + /* Unload the current magazine. */ + mag_unload(mag); + } + } else { + /* Create a second magazine. */ + mag = mag_create(choose_arena(), binind); + if (mag == NULL) { + mag = rack->bin_mags[binind].curmag; + mag_unload(mag); + } else { + bin_mags->sparemag = bin_mags->curmag; + bin_mags->curmag = mag; + } + } + assert(mag->nrounds < max_rounds); + } + mag->rounds[mag->nrounds] = ptr; + mag->nrounds++; +} +#endif + static void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) { @@ -3329,9 +3859,28 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0); if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { /* Small allocation. */ - malloc_spin_lock(&arena->lock); - arena_dalloc_small(arena, chunk, ptr, mapelm); - malloc_spin_unlock(&arena->lock); +#ifdef MALLOC_MAG + if (__isthreaded && opt_mag) { + mag_rack_t *rack = mag_rack; + if (rack == NULL) { + rack = mag_rack_create(arena); + if (rack == NULL) { + malloc_spin_lock(&arena->lock); + arena_dalloc_small(arena, chunk, ptr, + mapelm); + malloc_spin_unlock(&arena->lock); + } + mag_rack = rack; + } + mag_rack_dalloc(rack, ptr); + } else { +#endif + malloc_spin_lock(&arena->lock); + arena_dalloc_small(arena, chunk, ptr, mapelm); + malloc_spin_unlock(&arena->lock); +#ifdef MALLOC_MAG + } +#endif } else arena_dalloc_large(arena, chunk, ptr); } @@ -3471,24 +4020,16 @@ arena_ralloc(void *ptr, size_t size, size_t oldsize) size_t copysize; /* Try to avoid moving the allocation. */ - if (size < small_min) { - if (oldsize < small_min && - ffs((int)(pow2_ceil(size) >> (TINY_MIN_2POW + 1))) - == ffs((int)(pow2_ceil(oldsize) >> (TINY_MIN_2POW + 1)))) - goto IN_PLACE; /* Same size class. */ - } else if (size <= small_max) { - if (oldsize >= small_min && oldsize <= small_max && - (QUANTUM_CEILING(size) >> opt_quantum_2pow) - == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow)) - goto IN_PLACE; /* Same size class. */ - } else if (size <= bin_maxclass) { - if (oldsize > small_max && oldsize <= bin_maxclass && - pow2_ceil(size) == pow2_ceil(oldsize)) - goto IN_PLACE; /* Same size class. */ - } else if (oldsize > bin_maxclass && oldsize <= arena_maxclass) { - assert(size > bin_maxclass); - if (arena_ralloc_large(ptr, size, oldsize) == false) - return (ptr); + if (size <= bin_maxclass) { + if (oldsize <= bin_maxclass && size2bin[size] == + size2bin[oldsize]) + goto IN_PLACE; + } else { + if (oldsize > bin_maxclass && oldsize <= arena_maxclass) { + assert(size > bin_maxclass); + if (arena_ralloc_large(ptr, size, oldsize) == false) + return (ptr); + } } /* @@ -3558,8 +4099,10 @@ arena_new(arena_t *arena) /* Initialize bins. */ prev_run_size = pagesize; + i = 0; +#ifdef MALLOC_TINY /* (2^n)-spaced tiny bins. */ - for (i = 0; i < ntbins; i++) { + for (; i < ntbins; i++) { bin = &arena->bins[i]; bin->runcur = NULL; arena_run_tree_new(&bin->runs); @@ -3572,6 +4115,7 @@ arena_new(arena_t *arena) memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); #endif } +#endif /* Quantum-spaced bins. */ for (; i < ntbins + nqbins; i++) { @@ -3579,7 +4123,7 @@ arena_new(arena_t *arena) bin->runcur = NULL; arena_run_tree_new(&bin->runs); - bin->reg_size = quantum * (i - ntbins + 1); + bin->reg_size = (i - ntbins + 1) << QUANTUM_2POW; prev_run_size = arena_bin_run_size_calc(bin, prev_run_size); @@ -3588,13 +4132,30 @@ arena_new(arena_t *arena) #endif } - /* (2^n)-spaced sub-page bins. */ - for (; i < ntbins + nqbins + nsbins; i++) { + /* Cacheline-spaced bins. */ + for (; i < ntbins + nqbins + ncbins; i++) { bin = &arena->bins[i]; bin->runcur = NULL; arena_run_tree_new(&bin->runs); - bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1)); + bin->reg_size = cspace_min + ((i - (ntbins + nqbins)) << + CACHELINE_2POW); + + prev_run_size = arena_bin_run_size_calc(bin, prev_run_size); + +#ifdef MALLOC_STATS + memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); +#endif + } + + /* Subpage-spaced bins. */ + for (; i < nbins; i++) { + bin = &arena->bins[i]; + bin->runcur = NULL; + arena_run_tree_new(&bin->runs); + + bin->reg_size = sspace_min + ((i - (ntbins + nqbins + ncbins)) + << SUBPAGE_2POW); prev_run_size = arena_bin_run_size_calc(bin, prev_run_size); @@ -3618,7 +4179,7 @@ arenas_extend(unsigned ind) /* Allocate enough space for trailing bins. */ ret = (arena_t *)base_alloc(sizeof(arena_t) - + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1))); + + (sizeof(arena_bin_t) * (nbins - 1))); if (ret != NULL && arena_new(ret) == false) { arenas[ind] = ret; return (ret); @@ -3639,6 +4200,95 @@ arenas_extend(unsigned ind) return (arenas[0]); } +#ifdef MALLOC_MAG +static mag_t * +mag_create(arena_t *arena, size_t binind) +{ + mag_t *ret; + + if (sizeof(mag_t) + (sizeof(void *) * (max_rounds - 1)) <= + bin_maxclass) { + ret = arena_malloc_small(arena, sizeof(mag_t) + (sizeof(void *) + * (max_rounds - 1)), false); + } else { + ret = imalloc(sizeof(mag_t) + (sizeof(void *) * (max_rounds - + 1))); + } + if (ret == NULL) + return (NULL); + ret->binind = binind; + ret->nrounds = 0; + + return (ret); +} + +static void +mag_destroy(mag_t *mag) +{ + arena_t *arena; + arena_chunk_t *chunk; + size_t pageind; + arena_chunk_map_t *mapelm; + + chunk = CHUNK_ADDR2BASE(mag); + arena = chunk->arena; + pageind = (((uintptr_t)mag - (uintptr_t)chunk) >> pagesize_2pow); + mapelm = &chunk->map[pageind]; + + assert(mag->nrounds == 0); + if (sizeof(mag_t) + (sizeof(void *) * (max_rounds - 1)) <= + bin_maxclass) { + malloc_spin_lock(&arena->lock); + arena_dalloc_small(arena, chunk, mag, mapelm); + malloc_spin_unlock(&arena->lock); + } else + idalloc(mag); +} + +static mag_rack_t * +mag_rack_create(arena_t *arena) +{ + + assert(sizeof(mag_rack_t) + (sizeof(bin_mags_t *) * (nbins - 1)) <= + bin_maxclass); + return (arena_malloc_small(arena, sizeof(mag_rack_t) + + (sizeof(bin_mags_t) * (nbins - 1)), true)); +} + +static void +mag_rack_destroy(mag_rack_t *rack) +{ + arena_t *arena; + arena_chunk_t *chunk; + bin_mags_t *bin_mags; + size_t i, pageind; + arena_chunk_map_t *mapelm; + + for (i = 0; i < nbins; i++) { + bin_mags = &rack->bin_mags[i]; + if (bin_mags->curmag != NULL) { + assert(bin_mags->curmag->binind == i); + mag_unload(bin_mags->curmag); + mag_destroy(bin_mags->curmag); + } + if (bin_mags->sparemag != NULL) { + assert(bin_mags->sparemag->binind == i); + mag_unload(bin_mags->sparemag); + mag_destroy(bin_mags->sparemag); + } + } + + chunk = CHUNK_ADDR2BASE(rack); + arena = chunk->arena; + pageind = (((uintptr_t)rack - (uintptr_t)chunk) >> pagesize_2pow); + mapelm = &chunk->map[pageind]; + + malloc_spin_lock(&arena->lock); + arena_dalloc_small(arena, chunk, rack, mapelm); + malloc_spin_unlock(&arena->lock); +} +#endif + /* * End arena. */ @@ -3860,6 +4510,9 @@ malloc_print_stats(void) #ifdef MALLOC_DSS _malloc_message(opt_dss ? "D" : "d", "", "", ""); #endif +#ifdef MALLOC_MAG + _malloc_message(opt_mag ? "G" : "g", "", "", ""); +#endif _malloc_message(opt_junk ? "J" : "j", "", "", ""); #ifdef MALLOC_DSS _malloc_message(opt_mmap ? "M" : "m", "", "", ""); @@ -3877,9 +4530,27 @@ malloc_print_stats(void) #endif _malloc_message("Pointer size: ", umax2s(sizeof(void *), s), "\n", ""); - _malloc_message("Quantum size: ", umax2s(quantum, s), "\n", ""); - _malloc_message("Max small size: ", umax2s(small_max, s), "\n", - ""); + _malloc_message("Quantum size: ", umax2s(QUANTUM, s), "\n", ""); + _malloc_message("Cacheline size (assumed): ", umax2s(CACHELINE, + s), "\n", ""); +#ifdef MALLOC_TINY + _malloc_message("Tiny 2^n-spaced sizes: [", umax2s((1U << + TINY_MIN_2POW), s), "..", ""); + _malloc_message(umax2s((qspace_min >> 1), s), "]\n", "", ""); +#endif + _malloc_message("Quantum-spaced sizes: [", umax2s(qspace_min, + s), "..", ""); + _malloc_message(umax2s(qspace_max, s), "]\n", "", ""); + _malloc_message("Cacheline-spaced sizes: [", umax2s(cspace_min, + s), "..", ""); + _malloc_message(umax2s(cspace_max, s), "]\n", "", ""); + _malloc_message("Subpage-spaced sizes: [", umax2s(sspace_min, + s), "..", ""); + _malloc_message(umax2s(sspace_max, s), "]\n", "", ""); +#ifdef MALLOC_MAG + _malloc_message("Rounds per magazine: ", umax2s(max_rounds, s), + "\n", ""); +#endif _malloc_message("Max dirty pages per arena: ", umax2s(opt_dirty_max, s), "\n", ""); @@ -3969,6 +4640,122 @@ malloc_print_stats(void) } } +#ifdef MALLOC_DEBUG +static void +size2bin_validate(void) +{ + size_t i, size, binind; + + assert(size2bin[0] == 0xffU); + i = 1; +# ifdef MALLOC_TINY + /* Tiny. */ + for (; i < (1U << TINY_MIN_2POW); i++) { + size = pow2_ceil(1U << TINY_MIN_2POW); + binind = ffs((int)(size >> (TINY_MIN_2POW + 1))); + assert(size2bin[i] == binind); + } + for (; i < qspace_min; i++) { + size = pow2_ceil(i); + binind = ffs((int)(size >> (TINY_MIN_2POW + 1))); + assert(size2bin[i] == binind); + } +# endif + /* Quantum-spaced. */ + for (; i <= qspace_max; i++) { + size = QUANTUM_CEILING(i); + binind = ntbins + (size >> QUANTUM_2POW) - 1; + assert(size2bin[i] == binind); + } + /* Cacheline-spaced. */ + for (; i <= cspace_max; i++) { + size = CACHELINE_CEILING(i); + binind = ntbins + nqbins + ((size - cspace_min) >> + CACHELINE_2POW); + assert(size2bin[i] == binind); + } + /* Sub-page. */ + for (; i <= sspace_max; i++) { + size = SUBPAGE_CEILING(i); + binind = ntbins + nqbins + ncbins + ((size - sspace_min) + >> SUBPAGE_2POW); + assert(size2bin[i] == binind); + } +} +#endif + +static bool +size2bin_init(void) +{ + + if (opt_qspace_max_2pow != QSPACE_MAX_2POW_DEFAULT + || opt_cspace_max_2pow != CSPACE_MAX_2POW_DEFAULT) + return (size2bin_init_hard()); + + size2bin = const_size2bin; +#ifdef MALLOC_DEBUG + assert(sizeof(const_size2bin) == bin_maxclass + 1); + size2bin_validate(); +#endif + return (false); +} + +static bool +size2bin_init_hard(void) +{ + size_t i, size, binind; + uint8_t *custom_size2bin; + + assert(opt_qspace_max_2pow != QSPACE_MAX_2POW_DEFAULT + || opt_cspace_max_2pow != CSPACE_MAX_2POW_DEFAULT); + + custom_size2bin = (uint8_t *)base_alloc(bin_maxclass + 1); + if (custom_size2bin == NULL) + return (true); + + custom_size2bin[0] = 0xffU; + i = 1; +#ifdef MALLOC_TINY + /* Tiny. */ + for (; i < (1U << TINY_MIN_2POW); i++) { + size = pow2_ceil(1U << TINY_MIN_2POW); + binind = ffs((int)(size >> (TINY_MIN_2POW + 1))); + custom_size2bin[i] = binind; + } + for (; i < qspace_min; i++) { + size = pow2_ceil(i); + binind = ffs((int)(size >> (TINY_MIN_2POW + 1))); + custom_size2bin[i] = binind; + } +#endif + /* Quantum-spaced. */ + for (; i <= qspace_max; i++) { + size = QUANTUM_CEILING(i); + binind = ntbins + (size >> QUANTUM_2POW) - 1; + custom_size2bin[i] = binind; + } + /* Cacheline-spaced. */ + for (; i <= cspace_max; i++) { + size = CACHELINE_CEILING(i); + binind = ntbins + nqbins + ((size - cspace_min) >> + CACHELINE_2POW); + custom_size2bin[i] = binind; + } + /* Sub-page. */ + for (; i <= sspace_max; i++) { + size = SUBPAGE_CEILING(i); + binind = ntbins + nqbins + ncbins + ((size - sspace_min) >> + SUBPAGE_2POW); + custom_size2bin[i] = binind; + } + + size2bin = custom_size2bin; +#ifdef MALLOC_DEBUG + size2bin_validate(); +#endif + return (false); +} + /* * FreeBSD's pthreads implementation calls malloc(3), so the malloc * implementation has to take pains to avoid infinite recursion during @@ -4128,6 +4915,18 @@ MALLOC_OUT: opt_balance_threshold <<= 1; #endif break; + case 'c': + if (opt_cspace_max_2pow - 1 > + opt_qspace_max_2pow && + opt_cspace_max_2pow > + CACHELINE_2POW) + opt_cspace_max_2pow--; + break; + case 'C': + if (opt_cspace_max_2pow < pagesize_2pow + - 1) + opt_cspace_max_2pow++; + break; case 'd': #ifdef MALLOC_DSS opt_dss = false; @@ -4147,6 +4946,14 @@ MALLOC_OUT: else if ((opt_dirty_max << 1) != 0) opt_dirty_max <<= 1; break; +#ifdef MALLOC_MAG + case 'g': + opt_mag = false; + break; + case 'G': + opt_mag = true; + break; +#endif case 'j': opt_junk = false; break; @@ -4190,24 +4997,30 @@ MALLOC_OUT: opt_print_stats = true; break; case 'q': - if (opt_quantum_2pow > QUANTUM_2POW_MIN) - opt_quantum_2pow--; + if (opt_qspace_max_2pow > QUANTUM_2POW) + opt_qspace_max_2pow--; break; case 'Q': - if (opt_quantum_2pow < pagesize_2pow - - 1) - opt_quantum_2pow++; + if (opt_qspace_max_2pow + 1 < + opt_cspace_max_2pow) + opt_qspace_max_2pow++; break; - case 's': - if (opt_small_max_2pow > - QUANTUM_2POW_MIN) - opt_small_max_2pow--; +#ifdef MALLOC_MAG + case 'R': + if (opt_mag_size_2pow + 1 < (8U << + SIZEOF_PTR_2POW)) + opt_mag_size_2pow++; break; - case 'S': - if (opt_small_max_2pow < pagesize_2pow - - 1) - opt_small_max_2pow++; + case 'r': + /* + * Make sure there's always at least + * one round per magazine. + */ + if ((1U << (opt_mag_size_2pow-1)) >= + sizeof(mag_t)) + opt_mag_size_2pow--; break; +#endif case 'u': opt_utrace = false; break; @@ -4259,27 +5072,40 @@ MALLOC_OUT: atexit(malloc_print_stats); } - /* Set variables according to the value of opt_small_max_2pow. */ - if (opt_small_max_2pow < opt_quantum_2pow) - opt_small_max_2pow = opt_quantum_2pow; - small_max = (1U << opt_small_max_2pow); - - /* Set bin-related variables. */ - bin_maxclass = (pagesize >> 1); - assert(opt_quantum_2pow >= TINY_MIN_2POW); - ntbins = opt_quantum_2pow - TINY_MIN_2POW; - assert(ntbins <= opt_quantum_2pow); - nqbins = (small_max >> opt_quantum_2pow); - nsbins = pagesize_2pow - opt_small_max_2pow - 1; - - /* Set variables according to the value of opt_quantum_2pow. */ - quantum = (1U << opt_quantum_2pow); - quantum_mask = quantum - 1; - if (ntbins > 0) - small_min = (quantum >> 1) + 1; - else - small_min = 1; - assert(small_min <= quantum); +#ifdef MALLOC_MAG + /* + * Calculate the actual number of rounds per magazine, taking into + * account header overhead. + */ + max_rounds = (1LLU << (opt_mag_size_2pow - SIZEOF_PTR_2POW)) - + (sizeof(mag_t) >> SIZEOF_PTR_2POW) + 1; +#endif + + /* Set variables according to the value of opt_[qc]space_max_2pow. */ + qspace_max = (1U << opt_qspace_max_2pow); + cspace_min = CACHELINE_CEILING(qspace_max); + if (cspace_min == qspace_max) + cspace_min += CACHELINE; + cspace_max = (1U << opt_cspace_max_2pow); + sspace_min = SUBPAGE_CEILING(cspace_max); + if (sspace_min == cspace_max) + sspace_min += SUBPAGE; + assert(sspace_min < pagesize); + sspace_max = pagesize - SUBPAGE; + +#ifdef MALLOC_TINY + assert(QUANTUM_2POW >= TINY_MIN_2POW); +#endif + assert(ntbins <= QUANTUM_2POW); + nqbins = qspace_max >> QUANTUM_2POW; + ncbins = ((cspace_max - cspace_min) >> CACHELINE_2POW) + 1; + nsbins = ((sspace_max - sspace_min) >> SUBPAGE_2POW) + 1; + nbins = ntbins + nqbins + ncbins + nsbins; + + if (size2bin_init()) { + malloc_mutex_unlock(&init_lock); + return (true); + } /* Set variables according to the value of opt_chunk_2pow. */ chunksize = (1LU << opt_chunk_2pow); @@ -4289,11 +5115,8 @@ MALLOC_OUT: size_t header_size; /* - * Compute the header size such that it is large - * enough to contain the page map and enough nodes for the - * worst case: one node per non-header page plus one extra for - * situations where we briefly have one more node allocated - * than we will need. + * Compute the header size such that it is large enough to + * contain the page map. */ header_size = sizeof(arena_chunk_t) + (sizeof(arena_chunk_map_t) * (chunk_npages - 1)); @@ -4310,10 +5133,7 @@ MALLOC_OUT: #endif /* Various sanity checks that regard configuration. */ - assert(quantum >= sizeof(void *)); - assert(quantum <= pagesize); assert(chunksize >= pagesize); - assert(quantum * 4 <= chunksize); /* Initialize chunks data. */ malloc_mutex_init(&huge_mtx); @@ -4350,10 +5170,10 @@ MALLOC_OUT: if (ncpus > 1) { /* - * For SMP systems, create four times as many arenas as there - * are CPUs by default. + * For SMP systems, create twice as many arenas as there are + * CPUs by default. */ - opt_narenas_lshift += 2; + opt_narenas_lshift++; } /* Determine how many arenas to use. */ @@ -4682,8 +5502,37 @@ malloc_usable_size(const void *ptr) */ /******************************************************************************/ /* - * Begin library-private functions, used by threading libraries for protection - * of malloc during fork(). These functions are only called if the program is + * Begin library-private functions. + */ + +/******************************************************************************/ +/* + * Begin thread cache. + */ + +/* + * We provide an unpublished interface in order to receive notifications from + * the pthreads library whenever a thread exits. This allows us to clean up + * thread caches. + */ +void +_malloc_thread_cleanup(void) +{ + +#ifdef MALLOC_MAG + if (mag_rack != NULL) { + assert(mag_rack != (void *)-1); + mag_rack_destroy(mag_rack); +#ifdef MALLOC_DEBUG + mag_rack = (void *)-1; +#endif + } +#endif +} + +/* + * The following functions are used by threading libraries for protection of + * malloc during fork(). These functions are only called if the program is * running in threaded mode, so there is no need to check whether the program * is threaded here. */ |