diff options
Diffstat (limited to 'lib/libc/stdlib/malloc.c')
-rw-r--r-- | lib/libc/stdlib/malloc.c | 3471 |
1 files changed, 1018 insertions, 2453 deletions
diff --git a/lib/libc/stdlib/malloc.c b/lib/libc/stdlib/malloc.c index 0c7c9cc2f1c6..5ebb51df00dd 100644 --- a/lib/libc/stdlib/malloc.c +++ b/lib/libc/stdlib/malloc.c @@ -37,18 +37,7 @@ * + Cache line sharing between arenas is avoided for internal data * structures. * - * + Memory is managed in chunks, rather than as individual pages. - * - * + Allocations are region-based; internal region size is a discrete - * multiple of a quantum that is appropriate for alignment constraints. - * This applies to allocations that are up to half the chunk size. - * - * + Coalescence of regions is delayed in order to reduce overhead and - * fragmentation. - * - * + realloc() always moves data, in order to reduce fragmentation. - * - * + Red-black trees are used to sort large regions. + * + Memory is managed in chunks and runs, rather than as individual pages. * * + Data structures for huge allocations are stored separately from * allocations, which reduces thrashing during low memory conditions. @@ -189,17 +178,6 @@ __FBSDID("$FreeBSD$"); # define MALLOC_STATS #endif -/* - * Include redzones before/after every region, and check for buffer overflows. - */ -#ifndef NO_MALLOC_EXTRAS -# define MALLOC_REDZONES -#endif -#ifdef MALLOC_REDZONES -# define MALLOC_RED_2POW 4 -# define MALLOC_RED ((size_t)(1 << MALLOC_RED_2POW)) -#endif - #ifndef MALLOC_DEBUG # ifndef NDEBUG # define NDEBUG @@ -209,15 +187,12 @@ __FBSDID("$FreeBSD$"); #ifdef MALLOC_DEBUG /* Disable inlining to make debugging easier. */ -# define __inline +# define inline #endif /* Size of stack-allocated buffer passed to strerror_r(). */ #define STRERROR_BUF 64 -/* Number of quantum-spaced bins to store free regions in. */ -#define NBINS 128 - /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */ #ifdef __i386__ # define QUANTUM_2POW_MIN 4 @@ -266,10 +241,10 @@ __FBSDID("$FreeBSD$"); * * chunksize limits: * - * pagesize <= chunk_size <= 2^29 + * 2^(pagesize_2pow - 1 + RUN_MIN_REGS_2POW) <= chunk_size <= 2^28 */ -#define CHUNK_2POW_DEFAULT 24 -#define CHUNK_2POW_MAX 29 +#define CHUNK_2POW_DEFAULT 21 +#define CHUNK_2POW_MAX 28 /* * Maximum size of L1 cache line. This is used to avoid cache line aliasing, @@ -279,8 +254,39 @@ __FBSDID("$FreeBSD$"); #define CACHELINE_2POW 6 #define CACHELINE ((size_t)(1 << CACHELINE_2POW)) -/* Default number of regions to delay coalescence for. */ -#define NDELAY 256 +/* Minimum size class that is a power of 2, and smaller than the quantum. */ +#define TINY_MIN_2POW 1 +#define TINY_MIN (1 << TINY_MIN_2POW) + +/* + * Maximum size class that is a multiple of the quantum, but not (necessarily) + * a power of 2. Above this size, allocations are rounded up to the nearest + * power of 2. + */ +#define SMALL_MAX_2POW_DEFAULT 9 +#define SMALL_MAX_DEFAULT (1 << SMALL_MAX_2POW_DEFAULT) + +/* + * Minimum number of regions that must fit into a run that serves quantum-size + * bin allocations. + * + * Note that if this is set too low, space will be wasted if there are size + * classes that are small enough that RUN_MIN_REGS regions don't fill a page. + * If this is set too high, then the overhead of searching through the bitmap + * that tracks region usage will become excessive. + */ +#define RUN_MIN_REGS_2POW 10 +#define RUN_MIN_REGS (1 << RUN_MIN_REGS_2POW) + +/* + * Maximum number of pages for a run that is used for bin allocations. + * + * Note that if this is set too low, then fragmentation for the largest bin + * size classes will be high. If this is set too high, then even small + * programs will often have to allocate more than two chunks early on. + */ +#define RUN_MAX_PAGES_2POW 4 +#define RUN_MAX_PAGES (1 << RUN_MAX_PAGES_2POW) /******************************************************************************/ @@ -309,21 +315,20 @@ struct malloc_bin_stats_s { */ uint64_t nrequests; + /* Total number of runs created for this bin's size class. */ + uint64_t nruns; + /* - * Number of exact-fit allocations that were successfully serviced by - * this bin. + * Total number of run promotions/demotions for this bin's size class. */ - uint64_t nserviced; + uint64_t npromote; + uint64_t ndemote; - /* High-water marks for this bin. */ - unsigned long highcached; + /* High-water mark for this bin. */ + unsigned long highruns; - /* - * Current number of regions in this bin. This number isn't needed - * during normal operation, so is maintained here in order to allow - * calculating the high water mark. - */ - unsigned long curcached; + /* Current number of runs in this bin. */ + unsigned long curruns; }; typedef struct arena_stats_s arena_stats_t; @@ -334,80 +339,7 @@ struct arena_stats_s { uint64_t ncalloc; uint64_t ndalloc; uint64_t nralloc; - - /* Number of region splits. */ - uint64_t nsplit; - - /* Number of region coalescences. */ - uint64_t ncoalesce; - - /* Bin statistics. */ - malloc_bin_stats_t bins[NBINS]; - - /* Split statistics. */ - struct { - /* - * Number of times a region is requested from the "split" field - * of the arena. - */ - uint64_t nrequests; - - /* - * Number of times the "split" field of the arena successfully - * services requests. - */ - uint64_t nserviced; - } split; - - /* Frag statistics. */ - struct { - /* Number of times the "frag" field of the arena is refilled. */ - uint64_t nrefills; - - /* - * Number of times a region is requested from the "frag" field - * of the arena. - */ - uint64_t nrequests; - - /* - * Number of times the "frag" field of the arena successfully - * services requests. - */ - uint64_t nserviced; - } frag; - - /* large and large_regions statistics. */ - struct { - /* - * Number of allocation requests that were too large for a bin, - * but not large enough for a hugh allocation. - */ - uint64_t nrequests; - - /* - * Number of allocation requests that were successfully - * serviced by large_regions. - */ - uint64_t nserviced; - - /* - * High-water mark for large_regions (number of nodes in tree). - */ - unsigned long highcached; - - /* - * Used only to store the current number of nodes, since this - * number isn't maintained anywhere else. - */ - unsigned long curcached; - } large; - - /* Huge allocation statistics. */ - struct { - /* Number of huge allocation requests. */ - uint64_t nrequests; - } huge; + uint64_t nmadvise; }; typedef struct chunk_stats_s chunk_stats_t; @@ -433,21 +365,9 @@ struct chunk_stats_s { * Chunk data structures. */ -/* Needed by chunk data structures. */ -typedef struct arena_s arena_t; - /* Tree of chunks. */ typedef struct chunk_node_s chunk_node_t; struct chunk_node_s { - /* - * For an active chunk that is currently carved into regions by an - * arena allocator, this points to the arena that owns the chunk. We - * set this pointer even for huge allocations, so that it is possible - * to tell whether a huge allocation was done on behalf of a user - * allocation request, or on behalf of an internal allocation request. - */ - arena_t *arena; - /* Linkage for the chunk tree. */ RB_ENTRY(chunk_node_s) link; @@ -461,209 +381,210 @@ struct chunk_node_s { /* Total chunk size. */ size_t size; - - /* Number of trailing bytes that are not used. */ - size_t extra; }; typedef struct chunk_tree_s chunk_tree_t; RB_HEAD(chunk_tree_s, chunk_node_s); /******************************************************************************/ /* - * Region data structures. + * Arena data structures. */ -typedef struct region_s region_t; +typedef struct arena_s arena_t; +typedef struct arena_bin_s arena_bin_t; -/* - * Tree of region headers, used for free regions that don't fit in the arena - * bins. - */ -typedef struct region_node_s region_node_t; -struct region_node_s { - RB_ENTRY(region_node_s) link; - region_t *reg; +typedef struct arena_chunk_map_s arena_chunk_map_t; +struct arena_chunk_map_s { + bool free:1; + bool large:1; + unsigned npages:15; /* Limiting factor for CHUNK_2POW_MAX. */ + unsigned pos:15; }; -typedef struct region_tree_s region_tree_t; -RB_HEAD(region_tree_s, region_node_s); -typedef struct region_prev_s region_prev_t; -struct region_prev_s { - uint32_t size; -}; +/* Arena chunk header. */ +typedef struct arena_chunk_s arena_chunk_t; +struct arena_chunk_s { + /* Arena that owns the chunk. */ + arena_t *arena; + + /* Linkage for the arena's chunk tree. */ + RB_ENTRY(arena_chunk_s) link; -#define NEXT_SIZE_MASK 0x1fffffffU -typedef struct { -#ifdef MALLOC_REDZONES - char prev_red[MALLOC_RED]; -#endif /* - * Typical bit pattern for bits: - * - * pncsssss ssssssss ssssssss ssssssss + * Number of pages in use. This is maintained in order to make + * detection of empty chunks fast. + */ + uint32_t pages_used; + + /* + * Array of counters that keeps track of how many free runs of each + * size are available in this chunk. This table is sized at compile + * time, which is wasteful. However, due to unrelated rounding, this + * doesn't actually waste any otherwise useful space. * - * p : Previous free? - * n : Next free? - * c : Part of a range of contiguous allocations? - * s : Next size (number of quanta). + * index == 2^n pages * - * It's tempting to use structure bitfields here, but the compiler has - * to make assumptions that make the code slower than direct bit - * manipulations, and these fields are manipulated a lot. + * index | npages + * ------+------- + * 0 | 1 + * 1 | 2 + * 2 | 4 + * 3 | 8 + * : */ - uint32_t bits; + uint32_t nfree_runs[CHUNK_2POW_MAX/* - PAGE_SHIFT */]; -#ifdef MALLOC_REDZONES - size_t next_exact_size; - char next_red[MALLOC_RED]; -#endif -} region_sep_t; - -typedef struct region_next_small_sizer_s region_next_small_sizer_t; -struct region_next_small_sizer_s -{ - qr(region_t) link; + /* Map of pages within chunk that keeps track of free/large/small. */ + arena_chunk_map_t map[1]; /* Dynamically sized. */ }; +typedef struct arena_chunk_tree_s arena_chunk_tree_t; +RB_HEAD(arena_chunk_tree_s, arena_chunk_s); -typedef struct region_next_small_s region_next_small_t; -struct region_next_small_s -{ - qr(region_t) link; +typedef struct arena_run_s arena_run_t; +struct arena_run_s { +#ifdef MALLOC_DEBUG + uint32_t magic; +# define ARENA_RUN_MAGIC 0x384adf93 +#endif - /* Only accessed for delayed regions & footer invalid. */ - uint32_t slot; -}; + /* Linkage for run rings. */ + qr(arena_run_t) link; -typedef struct region_next_large_s region_next_large_t; -struct region_next_large_s -{ - region_node_t node; + /* Bin this run is associated with. */ + arena_bin_t *bin; - /* Use for LRU vs MRU tree ordering. */ - bool lru; -}; + /* Bitmask of in-use regions (0: in use, 1: free). */ +#define REGS_MASK_NELMS \ + ((1 << (RUN_MIN_REGS_2POW + 1)) / (sizeof(unsigned) << 3)) + unsigned regs_mask[REGS_MASK_NELMS]; -typedef struct region_next_s region_next_t; -struct region_next_s { - union { - region_next_small_t s; - region_next_large_t l; - } u; -}; + /* Index of first element that might have a free region. */ + unsigned regs_minelm; -/* - * Region separator, including prev/next fields that are only accessible when - * the neighboring regions are free. - */ -struct region_s { - /* This field must only be accessed if sep.prev_free is true. */ - region_prev_t prev; - - /* Actual region separator that is always present between regions. */ - region_sep_t sep; + /* Number of free regions in run. */ + unsigned nfree:(RUN_MIN_REGS_2POW + 1); /* - * These fields must only be accessed if sep.next_free or - * sep.next_contig is true. + * Current quartile for this run, one of: {RUN_Q0, RUN_25, RUN_Q50, + * RUN_Q75, RUN_Q100}. */ - region_next_t next; -}; +#define RUN_Q0 0 +#define RUN_Q25 1 +#define RUN_Q50 2 +#define RUN_Q75 3 +#define RUN_Q100 4 + unsigned quartile:3; -/* Small variant of region separator, only used for size calculations. */ -typedef struct region_small_sizer_s region_small_sizer_t; -struct region_small_sizer_s { - region_prev_t prev; - region_sep_t sep; - region_next_small_sizer_t next; + /* + * Limits on the number of free regions for the fullness quartile this + * run is currently in. If nfree goes outside these limits, the run + * is moved to a different fullness quartile. + */ + unsigned free_max:(RUN_MIN_REGS_2POW + 1); + unsigned free_min:(RUN_MIN_REGS_2POW + 1); }; -/******************************************************************************/ -/* - * Arena data structures. - */ - -typedef struct arena_bin_s arena_bin_t; struct arena_bin_s { /* - * Link into ring before the oldest free region and just after the - * newest free region. + * Current run being used to service allocations of this bin's size + * class. */ - region_t regions; + arena_run_t *runcur; + + /* + * Links into rings of runs, of various fullnesses. A new run + * conceptually starts off in runs0, and when it reaches 25% full, it + * is moved to the runs25 ring. For the run to be moved again, it must + * become either empty or 50% full. Thus, each ring contains runs that + * are within 25% of the advertised fullness for the ring. This + * provides a low-overhead mechanism for segregating runs into + * approximate fullness classes. + * + * These rings are useful when looking for an existing run + * to use when runcur is no longer usable. We look for usable runs in + * the following order: + * + * 1) runs75 + * 2) runs50 + * 3) runs25 + * 4) runs100 + * + * We never look in runs0 because it never has more than one run in it, + * and in such cases runcur already points to that run. + * + * runs100 isn't a good place to look, because it contains runs that + * may be completely full. Still, we look there as a last resort in + * order to avoid allocating a new run if at all possible. + */ + /* arena_run_t runs0; 0% <= fullness < 25% */ + arena_run_t runs25; /* 0% < fullness < 50% */ + arena_run_t runs50; /* 25% < fullness < 75% */ + arena_run_t runs75; /* 50% < fullness < 100% */ + arena_run_t runs100; /* 75% < fullness <= 100% */ + + /* Size of regions in a run for this bin's size class. */ + size_t reg_size; + + /* Total size of a run for this bin's size class. */ + size_t run_size; + + /* Total number of regions in a run for this bin's size class. */ + uint32_t nregs; + + /* Offset of first region in a run for this bin's size class. */ + uint32_t reg0_offset; + +#ifdef MALLOC_STATS + /* Bin statistics. */ + malloc_bin_stats_t stats; +#endif }; struct arena_s { #ifdef MALLOC_DEBUG - uint32_t magic; + uint32_t magic; # define ARENA_MAGIC 0x947d3d24 #endif /* All operations on this arena require that mtx be locked. */ - malloc_mutex_t mtx; + malloc_mutex_t mtx; + +#ifdef MALLOC_STATS + /* Total byte count of allocated memory, not including overhead. */ + size_t allocated; + + arena_stats_t stats; +#endif + + /* + * Tree of chunks this arena manages. + */ + arena_chunk_tree_t chunks; /* * bins is used to store rings of free regions of the following sizes, - * assuming a 16-byte quantum (sizes include region separators): + * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS. * * bins[i] | size | * --------+------+ - * 0 | 32 | - * 1 | 48 | - * 2 | 64 | + * 0 | 2 | + * 1 | 4 | + * 2 | 8 | + * --------+------+ + * 3 | 16 | + * 4 | 32 | + * 5 | 48 | + * 6 | 64 | * : : * : : + * 33 | 496 | + * 34 | 512 | + * --------+------+ + * 35 | 1024 | + * 36 | 2048 | * --------+------+ */ - arena_bin_t bins[NBINS]; - - /* - * A bitmask that corresponds to which bins have elements in them. - * This is used when searching for the first bin that contains a free - * region that is large enough to service an allocation request. - */ -#define BINMASK_NELMS (NBINS / (sizeof(int) << 3)) - int bins_mask[BINMASK_NELMS]; - - /* - * Tree of free regions of the size range [bin_maxsize..~chunk). These - * are sorted primarily by size, and secondarily by LRU. - */ - region_tree_t large_regions; - - /* - * If not NULL, a region that is not stored in bins or large_regions. - * If large enough, this region is used instead of any region stored in - * bins or large_regions, in order to reduce the number of insert/remove - * operations, and in order to increase locality of allocation in - * common cases. - */ - region_t *split; - - /* - * If not NULL, a region that is not stored in bins or large_regions. - * If large enough, this region is preferentially used for small - * allocations over any region in large_regions, split, or over-fit - * small bins. - */ - region_t *frag; - - /* Tree of chunks that this arena currenly owns. */ - chunk_tree_t chunks; - unsigned nchunks; - - /* - * FIFO ring of free regions for which coalescence is delayed. A slot - * that contains NULL is considered empty. opt_ndelay stores how many - * elements there are in the FIFO. - */ - region_t **delayed; - uint32_t next_delayed; /* Next slot in delayed to use. */ - -#ifdef MALLOC_STATS - /* Total byte count of allocated memory, not including overhead. */ - size_t allocated; - - arena_stats_t stats; -#endif + arena_bin_t bins[1]; /* Dynamically sized. */ }; /******************************************************************************/ @@ -679,16 +600,25 @@ static unsigned ncpus; /* VM page size. */ static unsigned pagesize; +static unsigned pagesize_2pow; + +/* Various bin-related settings. */ +static size_t bin_maxclass; /* Max size class for bins. */ +static unsigned ntbins; /* Number of (2^n)-spaced tiny bins. */ +static unsigned nqbins; /* Number of quantum-spaced bins. */ +static unsigned npbins; /* Number of (2^n)-spaced bins. */ +static size_t small_min; +static size_t small_max; /* Various quantum-related settings. */ static size_t quantum; static size_t quantum_mask; /* (quantum - 1). */ -static size_t bin_shift; -static size_t bin_maxsize; /* Various chunk-related settings. */ static size_t chunk_size; static size_t chunk_size_mask; /* (chunk_size - 1). */ +static size_t arena_maxclass; /* Max size class for arenas. */ +static unsigned arena_chunk_maplen; /********/ /* @@ -718,8 +648,9 @@ static void *brk_max; * Byte counters for allocated/total space used by the chunks in the huge * allocations tree. */ +static uint64_t huge_nmalloc; +static uint64_t huge_ndalloc; static size_t huge_allocated; -static size_t huge_total; #endif /* @@ -736,7 +667,7 @@ static chunk_tree_t old_chunks; /* * Current chunk that is being used for internal memory allocations. This * chunk is carved up in cacheline-size quanta, so that there is no chance of - * false cach sharing. + * false cache line sharing. * */ static void *base_chunk; static void *base_next_addr; @@ -782,16 +713,22 @@ static chunk_stats_t stats_chunks; */ const char *_malloc_options; +#ifndef NO_MALLOC_EXTRAS static bool opt_abort = true; static bool opt_junk = true; +#else +static bool opt_abort = false; +static bool opt_junk = false; +#endif +static bool opt_hint = false; static bool opt_print_stats = false; static size_t opt_quantum_2pow = QUANTUM_2POW_MIN; +static size_t opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT; static size_t opt_chunk_2pow = CHUNK_2POW_DEFAULT; static bool opt_utrace = false; static bool opt_sysv = false; static bool opt_xmalloc = false; static bool opt_zero = false; -static uint32_t opt_ndelay = NDELAY; static int32_t opt_narenas_lshift = 0; typedef struct { @@ -819,40 +756,29 @@ static void *base_alloc(size_t size); static chunk_node_t *base_chunk_node_alloc(void); static void base_chunk_node_dealloc(chunk_node_t *node); #ifdef MALLOC_STATS -static void stats_merge(arena_t *arena, arena_stats_t *stats_arenas); -static void stats_print(arena_stats_t *stats_arenas); +static void stats_print(arena_t *arena); #endif static void *pages_map(void *addr, size_t size); static void pages_unmap(void *addr, size_t size); static void *chunk_alloc(size_t size); static void chunk_dealloc(void *chunk, size_t size); -static unsigned arena_bins_search(arena_t *arena, size_t size); -static bool arena_coalesce(arena_t *arena, region_t **reg, size_t size); -static void arena_coalesce_hard(arena_t *arena, region_t *reg, - region_t *next, size_t size, bool split_adjacent); -static void arena_large_insert(arena_t *arena, region_t *reg, bool lru); -static void arena_large_cache(arena_t *arena, region_t *reg, bool lru); -static void arena_lru_cache(arena_t *arena, region_t *reg); -static void arena_delay_cache(arena_t *arena, region_t *reg); -static region_t *arena_frag_reg_alloc(arena_t *arena, size_t size, bool fit); -static region_t *arena_split_reg_alloc(arena_t *arena, size_t size, bool fit); -static void arena_reg_fit(arena_t *arena, size_t size, region_t *reg, - bool restore_split); -static region_t *arena_large_reg_alloc(arena_t *arena, size_t size, bool fit); -static region_t *arena_chunk_reg_alloc(arena_t *arena, size_t size, bool fit); +static void arena_run_split(arena_t *arena, arena_run_t *run, bool large, + size_t size); +static arena_chunk_t *arena_chunk_alloc(arena_t *arena); +static void arena_chunk_dealloc(arena_chunk_t *chunk); +static void arena_bin_run_refile(arena_t *arena, arena_bin_t *bin, + arena_run_t *run, size_t size, bool promote); +static arena_run_t *arena_run_alloc(arena_t *arena, bool large, size_t size); +static void arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size); +static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin, + size_t size); +static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin, + size_t size); static void *arena_malloc(arena_t *arena, size_t size); -static void *arena_palloc(arena_t *arena, size_t alignment, size_t size); -static void *arena_calloc(arena_t *arena, size_t num, size_t size); static size_t arena_salloc(arena_t *arena, void *ptr); -#ifdef MALLOC_REDZONES -static void redzone_check(void *ptr); -#endif static void arena_dalloc(arena_t *arena, void *ptr); -#ifdef NOT_YET -static void *arena_ralloc(arena_t *arena, void *ptr, size_t size); -#endif #ifdef MALLOC_STATS -static bool arena_stats(arena_t *arena, size_t *allocated, size_t *total); +static size_t arena_allocated(arena_t *arena); #endif static bool arena_new(arena_t *arena); static arena_t *arenas_extend(unsigned ind); @@ -863,7 +789,7 @@ static void *huge_malloc(arena_t *arena, size_t size); static void huge_dalloc(void *ptr); static void *imalloc(arena_t *arena, size_t size); static void *ipalloc(arena_t *arena, size_t alignment, size_t size); -static void *icalloc(arena_t *arena, size_t num, size_t size); +static void *icalloc(arena_t *arena, size_t size); static size_t isalloc(void *ptr); static void idalloc(void *ptr); static void *iralloc(arena_t *arena, void *ptr, size_t size); @@ -889,7 +815,7 @@ malloc_mutex_init(malloc_mutex_t *a_mutex) a_mutex->lock = lock; } -static __inline void +static inline void malloc_mutex_lock(malloc_mutex_t *a_mutex) { @@ -897,7 +823,7 @@ malloc_mutex_lock(malloc_mutex_t *a_mutex) _SPINLOCK(&a_mutex->lock); } -static __inline void +static inline void malloc_mutex_unlock(malloc_mutex_t *a_mutex) { @@ -933,27 +859,21 @@ malloc_mutex_unlock(malloc_mutex_t *a_mutex) #define QUANTUM_CEILING(a) \ (((a) + quantum_mask) & ~quantum_mask) -/* Return the offset within a chunk to the first region separator. */ -#define CHUNK_REG_OFFSET \ - (QUANTUM_CEILING(sizeof(chunk_node_t) + \ - sizeof(region_sep_t)) - offsetof(region_t, next)) - -/* - * Return how many bytes of usable space are needed for an allocation of size - * bytes. This value is not a multiple of quantum, since it doesn't include - * the region separator. - */ -static __inline size_t -region_ceiling(size_t size) +/* Compute the smallest power of 2 that is >= x. */ +static inline size_t +pow2_ceil(size_t x) { - size_t quantum_size, min_reg_quantum; - - quantum_size = QUANTUM_CEILING(size + sizeof(region_sep_t)); - min_reg_quantum = QUANTUM_CEILING(sizeof(region_small_sizer_t)); - if (quantum_size >= min_reg_quantum) - return (quantum_size); - else - return (min_reg_quantum); + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; +#if (SIZEOF_PTR == 8) + x |= x >> 32; +#endif + x++; + return (x); } static void @@ -1066,199 +986,65 @@ base_chunk_node_dealloc(chunk_node_t *node) /******************************************************************************/ -/* - * Note that no bitshifting is done for booleans in any of the bitmask-based - * flag manipulation functions that follow; test for non-zero versus zero. - */ - -/**********************/ -static __inline uint32_t -region_prev_free_get(region_sep_t *sep) -{ - - return ((sep->bits) & 0x80000000U); -} - -static __inline void -region_prev_free_set(region_sep_t *sep) -{ - - sep->bits = ((sep->bits) | 0x80000000U); -} - -static __inline void -region_prev_free_unset(region_sep_t *sep) -{ - - sep->bits = ((sep->bits) & 0x7fffffffU); -} - -/**********************/ -static __inline uint32_t -region_next_free_get(region_sep_t *sep) -{ - - return ((sep->bits) & 0x40000000U); -} - -static __inline void -region_next_free_set(region_sep_t *sep) -{ - - sep->bits = ((sep->bits) | 0x40000000U); -} - -static __inline void -region_next_free_unset(region_sep_t *sep) -{ - - sep->bits = ((sep->bits) & 0xbfffffffU); -} - -/**********************/ -static __inline uint32_t -region_next_contig_get(region_sep_t *sep) -{ - - return ((sep->bits) & 0x20000000U); -} - -static __inline void -region_next_contig_set(region_sep_t *sep) -{ - - sep->bits = ((sep->bits) | 0x20000000U); -} - -static __inline void -region_next_contig_unset(region_sep_t *sep) -{ - - sep->bits = ((sep->bits) & 0xdfffffffU); -} - -/********************/ -static __inline size_t -region_next_size_get(region_sep_t *sep) -{ - - return ((size_t)(((sep->bits) & NEXT_SIZE_MASK) << opt_quantum_2pow)); -} - -static __inline void -region_next_size_set(region_sep_t *sep, size_t size) -{ - uint32_t bits; - - assert(size % quantum == 0); - - bits = sep->bits; - bits &= ~NEXT_SIZE_MASK; - bits |= (((uint32_t)size) >> opt_quantum_2pow); - - sep->bits = bits; -} - #ifdef MALLOC_STATS static void -stats_merge(arena_t *arena, arena_stats_t *stats_arenas) -{ - unsigned i; - - stats_arenas->nmalloc += arena->stats.nmalloc; - stats_arenas->npalloc += arena->stats.npalloc; - stats_arenas->ncalloc += arena->stats.ncalloc; - stats_arenas->ndalloc += arena->stats.ndalloc; - stats_arenas->nralloc += arena->stats.nralloc; - - stats_arenas->nsplit += arena->stats.nsplit; - stats_arenas->ncoalesce += arena->stats.ncoalesce; - - /* Split. */ - stats_arenas->split.nrequests += arena->stats.split.nrequests; - stats_arenas->split.nserviced += arena->stats.split.nserviced; - - /* Frag. */ - stats_arenas->frag.nrefills += arena->stats.frag.nrefills; - stats_arenas->frag.nrequests += arena->stats.frag.nrequests; - stats_arenas->frag.nserviced += arena->stats.frag.nserviced; - - /* Bins. */ - for (i = 0; i < NBINS; i++) { - stats_arenas->bins[i].nrequests += - arena->stats.bins[i].nrequests; - stats_arenas->bins[i].nserviced += - arena->stats.bins[i].nserviced; - if (arena->stats.bins[i].highcached - > stats_arenas->bins[i].highcached) { - stats_arenas->bins[i].highcached - = arena->stats.bins[i].highcached; - } - stats_arenas->bins[i].curcached - += arena->stats.bins[i].curcached; - } - - /* large and large_regions. */ - stats_arenas->large.nrequests += arena->stats.large.nrequests; - stats_arenas->large.nserviced += arena->stats.large.nserviced; - if (arena->stats.large.highcached > stats_arenas->large.highcached) - stats_arenas->large.highcached = arena->stats.large.highcached; - stats_arenas->large.curcached += arena->stats.large.curcached; - - /* Huge allocations. */ - stats_arenas->huge.nrequests += arena->stats.huge.nrequests; -} - -static void -stats_print(arena_stats_t *stats_arenas) +stats_print(arena_t *arena) { unsigned i; + int gap_start; malloc_printf("calls:\n"); - malloc_printf(" %13s%13s%13s%13s%13s\n", "nmalloc", "npalloc", - "ncalloc", "ndalloc", "nralloc"); - malloc_printf(" %13llu%13llu%13llu%13llu%13llu\n", - stats_arenas->nmalloc, stats_arenas->npalloc, stats_arenas->ncalloc, - stats_arenas->ndalloc, stats_arenas->nralloc); - - malloc_printf("region events:\n"); - malloc_printf(" %13s%13s\n", "nsplit", "ncoalesce"); - malloc_printf(" %13llu%13llu\n", stats_arenas->nsplit, - stats_arenas->ncoalesce); - - malloc_printf("cached split usage:\n"); - malloc_printf(" %13s%13s\n", "nrequests", "nserviced"); - malloc_printf(" %13llu%13llu\n", stats_arenas->split.nrequests, - stats_arenas->split.nserviced); - - malloc_printf("cached frag usage:\n"); - malloc_printf(" %13s%13s%13s\n", "nrefills", "nrequests", "nserviced"); - malloc_printf(" %13llu%13llu%13llu\n", stats_arenas->frag.nrefills, - stats_arenas->frag.nrequests, stats_arenas->frag.nserviced); + malloc_printf(" %12s %12s %12s %12s %12s %12s\n", "nmalloc", "npalloc", + "ncalloc", "ndalloc", "nralloc", "nmadvise"); + malloc_printf(" %12llu %12llu %12llu %12llu %12llu %12llu\n", + arena->stats.nmalloc, arena->stats.npalloc, arena->stats.ncalloc, + arena->stats.ndalloc, arena->stats.nralloc, arena->stats.nmadvise); malloc_printf("bins:\n"); - malloc_printf(" %4s%7s%13s%13s%11s%11s\n", "bin", - "size", "nrequests", "nserviced", "highcached", "curcached"); - for (i = 0; i < NBINS; i++) { - malloc_printf( - " %4u%7u%13llu%13llu%11lu%11lu\n", - i, ((i + bin_shift) << opt_quantum_2pow), - stats_arenas->bins[i].nrequests, - stats_arenas->bins[i].nserviced, - stats_arenas->bins[i].highcached, - stats_arenas->bins[i].curcached); + malloc_printf("%13s %1s %4s %5s %8s %9s %5s %6s %7s %6s %6s\n", + "bin", "", "size", "nregs", "run_size", "nrequests", "nruns", + "hiruns", "curruns", "npromo", "ndemo"); + for (i = 0, gap_start = -1; i < ntbins + nqbins + npbins; i++) { + if (arena->bins[i].stats.nrequests == 0) { + if (gap_start == -1) + gap_start = i; + } else { + if (gap_start != -1) { + if (i > gap_start + 1) { + /* Gap of more than one size class. */ + malloc_printf("[%u..%u]\n", + gap_start, i - 1); + } else { + /* Gap of one size class. */ + malloc_printf("[%u]\n", gap_start); + } + gap_start = -1; + } + malloc_printf( + "%13u %1s %4u %5u %8u %9llu %5llu" + " %6lu %7lu %6llu %6llu\n", + i, + i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "P", + arena->bins[i].reg_size, + arena->bins[i].nregs, + arena->bins[i].run_size, + arena->bins[i].stats.nrequests, + arena->bins[i].stats.nruns, + arena->bins[i].stats.highruns, + arena->bins[i].stats.curruns, + arena->bins[i].stats.npromote, + arena->bins[i].stats.ndemote); + } + } + if (gap_start != -1) { + if (i > gap_start + 1) { + /* Gap of more than one size class. */ + malloc_printf("[%u..%u]\n", gap_start, i - 1); + } else { + /* Gap of one size class. */ + malloc_printf("[%u]\n", gap_start); + } } - - malloc_printf("large:\n"); - malloc_printf(" %13s%13s%13s%13s\n", "nrequests", "nserviced", - "highcached", "curcached"); - malloc_printf(" %13llu%13llu%13lu%13lu\n", - stats_arenas->large.nrequests, stats_arenas->large.nserviced, - stats_arenas->large.highcached, stats_arenas->large.curcached); - - malloc_printf("huge\n"); - malloc_printf(" %13s\n", "nrequests"); - malloc_printf(" %13llu\n", stats_arenas->huge.nrequests); } #endif @@ -1267,75 +1053,27 @@ stats_print(arena_stats_t *stats_arenas) */ /******************************************************************************/ /* - * Begin Mem. + * Begin chunk management functions. */ -static __inline int +static inline int chunk_comp(chunk_node_t *a, chunk_node_t *b) { - int ret; assert(a != NULL); assert(b != NULL); if ((uintptr_t)a->chunk < (uintptr_t)b->chunk) - ret = -1; + return (-1); else if (a->chunk == b->chunk) - ret = 0; + return (0); else - ret = 1; - - return (ret); + return (1); } /* Generate red-black tree code for chunks. */ RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp); -static __inline int -region_comp(region_node_t *a, region_node_t *b) -{ - int ret; - size_t size_a, size_b; - - assert(a != NULL); - assert(b != NULL); - - size_a = region_next_size_get(&a->reg->sep); - size_b = region_next_size_get(&b->reg->sep); - if (size_a < size_b) - ret = -1; - else if (size_a == size_b) { - if (a == b) { - /* Regions are equal with themselves. */ - ret = 0; - } else { - if (a->reg->next.u.l.lru) { - /* - * Oldest region comes first (secondary LRU - * ordering). a is guaranteed to be the search - * key, which is how we can enforce this - * secondary ordering. - */ - ret = 1; - } else { - /* - * Oldest region comes last (secondary MRU - * ordering). a is guaranteed to be the search - * key, which is how we can enforce this - * secondary ordering. - */ - ret = -1; - } - } - } else - ret = 1; - - return (ret); -} - -/* Generate red-black tree code for regions. */ -RB_GENERATE_STATIC(region_tree_s, region_node_s, link, region_comp); - static void * pages_map(void *addr, size_t size) { @@ -1407,13 +1145,10 @@ chunk_alloc(size_t size) { void *ret, *chunk; chunk_node_t *tchunk, *delchunk; - chunk_tree_t delchunks; assert(size != 0); assert(size % chunk_size == 0); - RB_INIT(&delchunks); - malloc_mutex_lock(&chunks_mtx); if (size == chunk_size) { @@ -1430,17 +1165,9 @@ chunk_alloc(size_t size) delchunk = tchunk; tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk); - /* - * Remove delchunk from the tree, but keep track of the - * address. - */ + /* Remove delchunk from the tree. */ RB_REMOVE(chunk_tree_s, &old_chunks, delchunk); - - /* - * Keep track of the node so that it can be deallocated - * after chunks_mtx is released. - */ - RB_INSERT(chunk_tree_s, &delchunks, delchunk); + base_chunk_node_dealloc(delchunk); #ifdef USE_BRK if ((uintptr_t)chunk >= (uintptr_t)brk_base @@ -1540,18 +1267,6 @@ RETURN: #endif malloc_mutex_unlock(&chunks_mtx); - /* - * Deallocation of the chunk nodes must be done after releasing - * chunks_mtx, in case deallocation causes a chunk to be unmapped. - */ - tchunk = RB_MIN(chunk_tree_s, &delchunks); - while (tchunk != NULL) { - delchunk = tchunk; - tchunk = RB_NEXT(chunk_tree_s, &delchunks, delchunk); - RB_REMOVE(chunk_tree_s, &delchunks, delchunk); - base_chunk_node_dealloc(delchunk); - } - assert(CHUNK_ADDR2BASE(ret) == ret); return (ret); } @@ -1577,10 +1292,8 @@ chunk_dealloc(void *chunk, size_t size) * it, so that the address range can be recycled if * memory usage increases later on. */ - node->arena = NULL; node->chunk = chunk; node->size = size; - node->extra = 0; RB_INSERT(chunk_tree_s, &old_chunks, node); } @@ -1602,1751 +1315,594 @@ chunk_dealloc(void *chunk, size_t size) #endif } +/* + * End chunk management functions. + */ /******************************************************************************/ /* - * arena. + * Begin arena. */ -static __inline void -arena_mask_set(arena_t *arena, unsigned bin) +static inline int +arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b) { - unsigned elm, bit; - assert(bin < NBINS); + assert(a != NULL); + assert(b != NULL); - elm = bin / (sizeof(int) << 3); - bit = bin - (elm * (sizeof(int) << 3)); - assert((arena->bins_mask[elm] & (1 << bit)) == 0); - arena->bins_mask[elm] |= (1 << bit); + if ((uintptr_t)a < (uintptr_t)b) + return (-1); + else if (a == b) + return (0); + else + return (1); } -static __inline void -arena_mask_unset(arena_t *arena, unsigned bin) +/* Generate red-black tree code for arena chunks. */ +RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp); + +static inline void +arena_run_mask_free_set(arena_run_t *run, unsigned reg) { unsigned elm, bit; - assert(bin < NBINS); + assert(run->magic == ARENA_RUN_MAGIC); + assert(reg < run->bin->nregs); - elm = bin / (sizeof(int) << 3); - bit = bin - (elm * (sizeof(int) << 3)); - assert((arena->bins_mask[elm] & (1 << bit)) != 0); - arena->bins_mask[elm] ^= (1 << bit); + elm = reg / (sizeof(unsigned) << 3); + if (elm < run->regs_minelm) + run->regs_minelm = elm; + bit = reg - (elm * (sizeof(unsigned) << 3)); + assert((run->regs_mask[elm] & (1 << bit)) == 0); + run->regs_mask[elm] |= (1 << bit); } -static unsigned -arena_bins_search(arena_t *arena, size_t size) +static inline void +arena_run_mask_free_unset(arena_run_t *run, unsigned reg) { - unsigned minbin, i; - int bit; - - assert(QUANTUM_CEILING(size) == size); - assert((size >> opt_quantum_2pow) >= bin_shift); - - if (size > bin_maxsize) - return (UINT_MAX); - - minbin = (size >> opt_quantum_2pow) - bin_shift; - assert(minbin < NBINS); - for (i = minbin / (sizeof(int) << 3); i < BINMASK_NELMS; i++) { - bit = ffs(arena->bins_mask[i] - & (UINT_MAX << (minbin % (sizeof(int) << 3)))); - if (bit != 0) { - /* Usable allocation found. */ - return ((i * (sizeof(int) << 3)) + bit - 1); - } - } - - return (UINT_MAX); -} - -static __inline void -arena_delayed_extract(arena_t *arena, region_t *reg) -{ - - if (region_next_contig_get(®->sep)) { - uint32_t slot; - - /* Extract this region from the delayed FIFO. */ - assert(region_next_free_get(®->sep) == false); - - slot = reg->next.u.s.slot; - assert(arena->delayed[slot] == reg); - arena->delayed[slot] = NULL; - } -#ifdef MALLOC_DEBUG - else { - region_t *next; + unsigned elm, bit; - assert(region_next_free_get(®->sep)); + assert(run->magic == ARENA_RUN_MAGIC); + assert(reg < run->bin->nregs); - next = (region_t *) &((char *) reg) - [region_next_size_get(®->sep)]; - assert(region_prev_free_get(&next->sep)); - } -#endif + elm = reg / (sizeof(unsigned) << 3); + bit = reg - (elm * (sizeof(unsigned) << 3)); + assert((run->regs_mask[elm] & (1 << bit)) != 0); + run->regs_mask[elm] ^= (1 << bit); } -static __inline void -arena_bin_extract(arena_t *arena, unsigned bin, region_t *reg) +static inline unsigned +arena_run_search(arena_run_t *run) { - arena_bin_t *tbin; + unsigned i, mask, bit; - assert(bin < NBINS); + assert(run->magic == ARENA_RUN_MAGIC); - tbin = &arena->bins[bin]; - - assert(qr_next(&tbin->regions, next.u.s.link) != &tbin->regions); -#ifdef MALLOC_DEBUG - { - region_t *next; - - next = (region_t *) &((char *) reg) - [region_next_size_get(®->sep)]; - if (region_next_free_get(®->sep)) { - assert(region_prev_free_get(&next->sep)); - assert(region_next_size_get(®->sep) - == next->prev.size); + for (i = run->regs_minelm; i < REGS_MASK_NELMS; i++) { + mask = run->regs_mask[i]; + if (mask != 0) { + bit = ffs(mask); + if (bit != 0) { + /* Usable allocation found. */ + return ((i * (sizeof(unsigned) << 3)) + + bit - 1); + } } else { - assert(region_prev_free_get(&next->sep) == false); + /* + * Make a note that nothing before this element + * contains a free region. + */ + run->regs_minelm = i + 1; } } -#endif - assert(region_next_size_get(®->sep) - == ((bin + bin_shift) << opt_quantum_2pow)); - qr_remove(reg, next.u.s.link); -#ifdef MALLOC_STATS - arena->stats.bins[bin].curcached--; -#endif - if (qr_next(&tbin->regions, next.u.s.link) == &tbin->regions) - arena_mask_unset(arena, bin); - - arena_delayed_extract(arena, reg); + return (UINT_MAX); } -static __inline void -arena_extract(arena_t *arena, region_t *reg) +static void +arena_run_split(arena_t *arena, arena_run_t *run, bool large, size_t size) { - size_t size; + arena_chunk_t *chunk; + unsigned run_ind, map_offset, total_pages, need_pages; + unsigned i, log2_run_pages, run_pages; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) + >> pagesize_2pow); + assert(chunk->map[run_ind].free); + total_pages = chunk->map[run_ind].npages; + need_pages = (size >> pagesize_2pow); - assert(region_next_free_get(®->sep)); #ifdef MALLOC_DEBUG - { - region_t *next; + for (i = 0; i < total_pages; i++) { + assert(chunk->map[run_ind + i].free); + assert(chunk->map[run_ind + i].large == false); + assert(chunk->map[run_ind + i].npages == total_pages); + assert(chunk->map[run_ind + i].pos == i); + } +#endif + + /* Split enough pages from the front of run to fit allocation size. */ + map_offset = run_ind; + for (i = 0; i < need_pages; i++) { + chunk->map[map_offset + i].free = false; + chunk->map[map_offset + i].large = large; + chunk->map[map_offset + i].npages = need_pages; + chunk->map[map_offset + i].pos = i; + } + + /* Update map for trailing pages. */ + map_offset += need_pages; + while (map_offset < run_ind + total_pages) { + log2_run_pages = ffs(map_offset) - 1; + run_pages = (1 << log2_run_pages); + for (i = 0; i < run_pages; i++) { + chunk->map[map_offset + i].free = true; + chunk->map[map_offset + i].large = false; + chunk->map[map_offset + i].npages = run_pages; + chunk->map[map_offset + i].pos = i; + } - next = (region_t *)&((char *)reg) - [region_next_size_get(®->sep)]; - } -#endif + chunk->nfree_runs[log2_run_pages]++; - assert(reg != arena->split); - assert(reg != arena->frag); - if ((size = region_next_size_get(®->sep)) <= bin_maxsize) { - arena_bin_extract(arena, (size >> opt_quantum_2pow) - bin_shift, - reg); - } else { - RB_REMOVE(region_tree_s, &arena->large_regions, - ®->next.u.l.node); -#ifdef MALLOC_STATS - arena->stats.large.curcached--; -#endif + map_offset += run_pages; } + + chunk->pages_used += (size >> pagesize_2pow); } -/* Try to coalesce reg with its neighbors. Return false if coalescing fails. */ -static bool -arena_coalesce(arena_t *arena, region_t **reg, size_t size) +static arena_chunk_t * +arena_chunk_alloc(arena_t *arena) { - bool ret; - region_t *prev, *treg, *next, *nextnext; - size_t tsize, prev_size, next_size; + arena_chunk_t *chunk; + unsigned i, j, header_npages, pow2_header_npages, map_offset; + unsigned log2_run_pages, run_pages; + size_t header_size; + + chunk = (arena_chunk_t *)chunk_alloc(chunk_size); + if (chunk == NULL) + return (NULL); - ret = false; + chunk->arena = arena; - treg = *reg; + RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk); /* - * Keep track of the size while coalescing, then just set the size in - * the header/footer once at the end of coalescing. + * Claim that no pages are in use, since the header is merely overhead. */ - assert(size == region_next_size_get(&(*reg)->sep)); - tsize = size; - - next = (region_t *)&((char *)treg)[tsize]; - assert(region_next_free_get(&treg->sep)); - assert(region_prev_free_get(&next->sep)); - assert(region_next_size_get(&treg->sep) == next->prev.size); - - if (region_prev_free_get(&treg->sep)) { - prev_size = treg->prev.size; - prev = (region_t *)&((char *)treg)[-prev_size]; - assert(region_next_free_get(&prev->sep)); - - arena_extract(arena, prev); + chunk->pages_used = 0; - tsize += prev_size; + memset(&chunk->nfree_runs, 0, sizeof(chunk->nfree_runs)); - treg = prev; - -#ifdef MALLOC_STATS - arena->stats.ncoalesce++; -#endif - ret = true; + header_size = (size_t)((uintptr_t)&chunk->map[arena_chunk_maplen] + - (uintptr_t)chunk); + if (header_size % pagesize != 0) { + /* Round up to the nearest page boundary. */ + header_size += pagesize - (header_size % pagesize); } - assert(region_prev_free_get(&treg->sep) == false); - - if (region_next_free_get(&next->sep)) { - next_size = region_next_size_get(&next->sep); - nextnext = (region_t *)&((char *)next)[next_size]; - assert(region_prev_free_get(&nextnext->sep)); - assert(region_next_size_get(&next->sep) == nextnext->prev.size); - - arena_extract(arena, next); - - assert(region_next_size_get(&next->sep) == nextnext->prev.size); - - tsize += next_size; - -#ifdef MALLOC_STATS - if (ret == false) - arena->stats.ncoalesce++; -#endif - ret = true; - - next = (region_t *)&((char *)treg)[tsize]; - } - assert(region_next_free_get(&next->sep) == false); - - /* Update header/footer. */ - if (ret) { - region_next_size_set(&treg->sep, tsize); - next->prev.size = tsize; - } + header_npages = header_size / pagesize; + pow2_header_npages = pow2_ceil(header_npages); /* - * Now that coalescing with adjacent free regions is done, we need to - * try to coalesce with "split" and "frag". Those two regions are - * marked as allocated, which is why this takes special effort. There - * are seven possible cases, but we want to make the (hopefully) common - * case of no coalescence fast, so the checks are optimized for that - * case. The seven cases are: - * - * /------\ - * 0 | treg | No coalescence needed. Make this case fast. - * \------/ - * - * /------+------\ - * 1 | frag | treg | - * \------+------/ - * - * /------+------\ - * 2 | treg | frag | - * \------+------/ - * - * /-------+------\ - * 3 | split | treg | - * \-------+------/ - * - * /------+-------\ - * 4 | treg | split | - * \------+-------/ - * - * /------+------+-------\ - * 5 | frag | treg | split | - * \------+------+-------/ - * - * /-------+------+------\ - * 6 | split | treg | frag | - * \-------+------+------/ + * Iteratively mark runs as in use, until we've spoken for the entire + * header. */ - - if (arena->split == NULL) { - /* Cases 3-6 ruled out. */ - } else if ((uintptr_t)next < (uintptr_t)arena->split) { - /* Cases 3-6 ruled out. */ - } else { - region_t *split_next; - size_t split_size; - - split_size = region_next_size_get(&arena->split->sep); - split_next = (region_t *)&((char *)arena->split)[split_size]; - - if ((uintptr_t)split_next < (uintptr_t)treg) { - /* Cases 3-6 ruled out. */ - } else { - /* - * Split is adjacent to treg. Take the slow path and - * coalesce. - */ - - arena_coalesce_hard(arena, treg, next, tsize, true); - - treg = NULL; -#ifdef MALLOC_STATS - if (ret == false) - arena->stats.ncoalesce++; -#endif - ret = true; - goto RETURN; + map_offset = 0; + for (i = 0; header_npages > 0; i++) { + if ((pow2_header_npages >> i) <= header_npages) { + for (j = 0; j < (pow2_header_npages >> i); j++) { + chunk->map[map_offset + j].free = false; + chunk->map[map_offset + j].large = false; + chunk->map[map_offset + j].npages = + (pow2_header_npages >> i); + chunk->map[map_offset + j].pos = j; + } + header_npages -= (pow2_header_npages >> i); + map_offset += (pow2_header_npages >> i); } } - /* If we get here, then cases 3-6 have been ruled out. */ - if (arena->frag == NULL) { - /* Cases 1-6 ruled out. */ - } else if ((uintptr_t)next < (uintptr_t)arena->frag) { - /* Cases 1-6 ruled out. */ - } else { - region_t *frag_next; - size_t frag_size; - - frag_size = region_next_size_get(&arena->frag->sep); - frag_next = (region_t *)&((char *)arena->frag)[frag_size]; - - if ((uintptr_t)frag_next < (uintptr_t)treg) { - /* Cases 1-6 ruled out. */ - } else { - /* - * Frag is adjacent to treg. Take the slow path and - * coalesce. - */ - - arena_coalesce_hard(arena, treg, next, tsize, false); - - treg = NULL; -#ifdef MALLOC_STATS - if (ret == false) - arena->stats.ncoalesce++; -#endif - ret = true; - goto RETURN; + /* + * Finish initializing map. The chunk header takes up some space at + * the beginning of the chunk, which we just took care of by + * "allocating" the leading pages. + */ + while (map_offset < (chunk_size / pagesize)) { + log2_run_pages = ffs(map_offset) - 1; + run_pages = (1 << log2_run_pages); + for (i = 0; i < run_pages; i++) { + chunk->map[map_offset + i].free = true; + chunk->map[map_offset + i].large = false; + chunk->map[map_offset + i].npages = run_pages; + chunk->map[map_offset + i].pos = i; } - } - /* If we get here, no coalescence with "split" or "frag" was needed. */ + chunk->nfree_runs[log2_run_pages]++; - /* Finish updating header. */ - region_next_contig_unset(&treg->sep); - - assert(region_next_free_get(&treg->sep)); - assert(region_prev_free_get(&next->sep)); - assert(region_prev_free_get(&treg->sep) == false); - assert(region_next_free_get(&next->sep) == false); + map_offset += run_pages; + } -RETURN: - if (ret) - *reg = treg; - return (ret); + return (chunk); } -/* - * arena_coalesce() calls this function if it determines that a region needs to - * be coalesced with "split" and/or "frag". - */ static void -arena_coalesce_hard(arena_t *arena, region_t *reg, region_t *next, size_t size, - bool split_adjacent) +arena_chunk_dealloc(arena_chunk_t *chunk) { - bool frag_adjacent; - assert(next == (region_t *)&((char *)reg)[size]); - assert(region_next_free_get(®->sep)); - assert(region_next_size_get(®->sep) == size); - assert(region_prev_free_get(&next->sep)); - assert(next->prev.size == size); - - if (split_adjacent == false) - frag_adjacent = true; - else if (arena->frag != NULL) { - /* Determine whether frag will be coalesced with. */ - - if ((uintptr_t)next < (uintptr_t)arena->frag) - frag_adjacent = false; - else { - region_t *frag_next; - size_t frag_size; + RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk); - frag_size = region_next_size_get(&arena->frag->sep); - frag_next = (region_t *)&((char *)arena->frag) - [frag_size]; - - if ((uintptr_t)frag_next < (uintptr_t)reg) - frag_adjacent = false; - else - frag_adjacent = true; - } - } else - frag_adjacent = false; - - if (split_adjacent && frag_adjacent) { - region_t *a; - size_t a_size, b_size; - - /* Coalesce all three regions. */ - - if (arena->frag == next) - a = arena->split; - else { - a = arena->frag; - arena->split = a; - } - arena->frag = NULL; - - a_size = region_next_size_get(&a->sep); - assert(a_size == (uintptr_t)reg - (uintptr_t)a); - - b_size = region_next_size_get(&next->sep); - - region_next_size_set(&a->sep, a_size + size + b_size); - assert(region_next_free_get(&a->sep) == false); - } else { - /* Coalesce two regions. */ - - if (split_adjacent) { - size += region_next_size_get(&arena->split->sep); - if (arena->split == next) { - /* reg comes before split. */ - region_next_size_set(®->sep, size); - - assert(region_next_free_get(®->sep)); - region_next_free_unset(®->sep); - - arena->split = reg; - } else { - /* reg comes after split. */ - region_next_size_set(&arena->split->sep, size); - - assert(region_next_free_get(&arena->split->sep) - == false); - - assert(region_prev_free_get(&next->sep)); - region_prev_free_unset(&next->sep); - } - } else { - assert(frag_adjacent); - size += region_next_size_get(&arena->frag->sep); - if (arena->frag == next) { - /* reg comes before frag. */ - region_next_size_set(®->sep, size); - - assert(region_next_free_get(®->sep)); - region_next_free_unset(®->sep); - - arena->frag = reg; - } else { - /* reg comes after frag. */ - region_next_size_set(&arena->frag->sep, size); - - assert(region_next_free_get(&arena->frag->sep) - == false); - - assert(region_prev_free_get(&next->sep)); - region_prev_free_unset(&next->sep); - } - } - } + chunk_dealloc((void *)chunk, chunk_size); } -static __inline void -arena_bin_append(arena_t *arena, unsigned bin, region_t *reg) -{ - arena_bin_t *tbin; - - assert(bin < NBINS); - assert((region_next_size_get(®->sep) >> opt_quantum_2pow) - >= bin_shift); - assert(region_next_size_get(®->sep) - == ((bin + bin_shift) << opt_quantum_2pow)); - - tbin = &arena->bins[bin]; - - if (qr_next(&tbin->regions, next.u.s.link) == &tbin->regions) - arena_mask_set(arena, bin); - - qr_new(reg, next.u.s.link); - qr_before_insert(&tbin->regions, reg, next.u.s.link); -#ifdef MALLOC_STATS - arena->stats.bins[bin].curcached++; - - if (arena->stats.bins[bin].curcached - > arena->stats.bins[bin].highcached) { - arena->stats.bins[bin].highcached - = arena->stats.bins[bin].curcached; - } -#endif -} - -static __inline void -arena_bin_push(arena_t *arena, unsigned bin, region_t *reg) +static void +arena_bin_run_refile(arena_t *arena, arena_bin_t *bin, arena_run_t *run, + size_t size, bool promote) { - arena_bin_t *tbin; - - assert(bin < NBINS); - assert((region_next_size_get(®->sep) >> opt_quantum_2pow) - >= bin_shift); - assert(region_next_size_get(®->sep) - == ((bin + bin_shift) << opt_quantum_2pow)); - tbin = &arena->bins[bin]; + assert(bin == run->bin); - if (qr_next(&tbin->regions, next.u.s.link) == &tbin->regions) - arena_mask_set(arena, bin); - - region_next_contig_unset(®->sep); - qr_new(reg, next.u.s.link); - qr_after_insert(&tbin->regions, reg, next.u.s.link); + /* Determine whether to promote or demote run. */ + if (promote) { + /* Promote. */ + assert(run->free_min > run->nfree); + assert(run->quartile < RUN_Q100); + run->quartile++; #ifdef MALLOC_STATS - arena->stats.bins[bin].curcached++; - - if (arena->stats.bins[bin].curcached - > arena->stats.bins[bin].highcached) { - arena->stats.bins[bin].highcached - = arena->stats.bins[bin].curcached; - } + bin->stats.npromote++; #endif -} - -static __inline region_t * -arena_bin_pop(arena_t *arena, unsigned bin) -{ - region_t *ret; - arena_bin_t *tbin; - - assert(bin < NBINS); - - tbin = &arena->bins[bin]; - - assert(qr_next(&tbin->regions, next.u.s.link) != &tbin->regions); - - ret = qr_next(&tbin->regions, next.u.s.link); - assert(region_next_size_get(&ret->sep) - == ((bin + bin_shift) << opt_quantum_2pow)); - qr_remove(ret, next.u.s.link); + } else { + /* Demote. */ + assert(run->free_max < run->nfree); + assert(run->quartile > RUN_Q0); + run->quartile--; #ifdef MALLOC_STATS - arena->stats.bins[bin].curcached--; + bin->stats.ndemote++; #endif - if (qr_next(&tbin->regions, next.u.s.link) == &tbin->regions) - arena_mask_unset(arena, bin); - - if (region_next_free_get(&ret->sep) == false) { - uint32_t slot; - - assert(region_next_contig_get(&ret->sep)); - - /* Extract this region from the delayed FIFO. */ - slot = ret->next.u.s.slot; - assert(arena->delayed[slot] == ret); - arena->delayed[slot] = NULL; - } else { - region_t *next; - - /* Non-delayed region. */ - region_next_free_unset(&ret->sep); - - next = (region_t *)&((char *)ret) - [(bin + bin_shift) << opt_quantum_2pow]; - assert(next->prev.size == region_next_size_get(&ret->sep)); - assert(region_prev_free_get(&next->sep)); - region_prev_free_unset(&next->sep); } - return (ret); -} - -static void -arena_large_insert(arena_t *arena, region_t *reg, bool lru) -{ - - assert(region_next_free_get(®->sep)); -#ifdef MALLOC_DEBUG - { - region_t *next; - - next = (region_t *)&((char *)reg) - [region_next_size_get(®->sep)]; - assert(region_prev_free_get(&next->sep)); - assert(next->prev.size == region_next_size_get(®->sep)); - } -#endif - - /* Coalescing should have already been done. */ - assert(arena_coalesce(arena, ®, region_next_size_get(®->sep)) - == false); - - if (region_next_size_get(®->sep) < chunk_size - - (CHUNK_REG_OFFSET + offsetof(region_t, next))) { - /* - * Make sure not to cache a large region with the nextContig - * flag set, in order to simplify the logic that determines - * whether a region needs to be extracted from "delayed". - */ - region_next_contig_unset(®->sep); - - /* Store the region in the large_regions tree. */ - reg->next.u.l.node.reg = reg; - reg->next.u.l.lru = lru; - - RB_INSERT(region_tree_s, &arena->large_regions, - ®->next.u.l.node); + /* Re-file run. */ + qr_remove(run, link); + switch (run->quartile) { + case RUN_Q0: #ifdef MALLOC_STATS - arena->stats.large.curcached++; - if (arena->stats.large.curcached - > arena->stats.large.highcached) { - arena->stats.large.highcached - = arena->stats.large.curcached; - } + bin->stats.curruns--; #endif - } else { - chunk_node_t *node; - - /* - * This region now spans an entire chunk. Deallocate the chunk. - * - * Note that it is possible for allocation of a large region - * from a pristine chunk, followed by deallocation of the - * region, can cause the chunk to immediately be unmapped. - * This isn't ideal, but 1) such scenarios seem unlikely, and - * 2) delaying coalescence for large regions could cause - * excessive fragmentation for common scenarios. - */ - - node = (chunk_node_t *)CHUNK_ADDR2BASE(reg); - RB_REMOVE(chunk_tree_s, &arena->chunks, node); - arena->nchunks--; - assert(node->chunk == (chunk_node_t *)node); - chunk_dealloc(node->chunk, chunk_size); - } -} - -static void -arena_large_cache(arena_t *arena, region_t *reg, bool lru) -{ - size_t size; - - /* Try to coalesce before storing this region anywhere. */ - size = region_next_size_get(®->sep); - if (arena_coalesce(arena, ®, size)) { - if (reg == NULL) { - /* Region no longer needs cached. */ - return; - } - size = region_next_size_get(®->sep); - } - - arena_large_insert(arena, reg, lru); -} - -static void -arena_lru_cache(arena_t *arena, region_t *reg) -{ - size_t size; - - assert(region_next_free_get(®->sep)); + if (bin->runcur == run) + bin->runcur = NULL; #ifdef MALLOC_DEBUG - { - region_t *next; - - next = (region_t *)&((char *)reg) - [region_next_size_get(®->sep)]; - assert(region_prev_free_get(&next->sep)); - assert(next->prev.size == region_next_size_get(®->sep)); - } + run->magic = 0; #endif - assert(region_next_size_get(®->sep) % quantum == 0); - assert(region_next_size_get(®->sep) - >= QUANTUM_CEILING(sizeof(region_small_sizer_t))); - - size = region_next_size_get(®->sep); - assert(arena_coalesce(arena, ®, size) == false); - if (size <= bin_maxsize) { - arena_bin_append(arena, (size >> opt_quantum_2pow) - bin_shift, - reg); - } else - arena_large_cache(arena, reg, true); -} - -static __inline void -arena_mru_cache(arena_t *arena, region_t *reg, size_t size) -{ - - assert(region_next_free_get(®->sep)); -#ifdef MALLOC_DEBUG - { - region_t *next; - - next = (region_t *)&((char *)reg) - [region_next_size_get(®->sep)]; - assert(region_prev_free_get(&next->sep)); - assert(next->prev.size == region_next_size_get(®->sep)); + arena_run_dalloc(arena, run, bin->run_size); + break; + case RUN_Q25: + qr_before_insert(&bin->runs25, run, link); + run->free_max = run->bin->nregs - 1; + run->free_min = (run->bin->nregs >> 1) + 1; + break; + case RUN_Q50: + qr_before_insert(&bin->runs50, run, link); + run->free_max = ((run->bin->nregs >> 2) * 3) - 1; + run->free_min = (run->bin->nregs >> 2) + 1; + break; + case RUN_Q75: + qr_before_insert(&bin->runs75, run, link); + run->free_max = (run->bin->nregs >> 1) - 1; + run->free_min = 1; + break; + case RUN_Q100: + assert(bin->runcur == run); + bin->runcur = NULL; + qr_before_insert(&bin->runs100, run, link); + run->free_max = (run->bin->nregs >> 2) - 1; + run->free_min = 0; + break; + default: + assert(0); + break; } -#endif - assert(region_next_size_get(®->sep) % quantum == 0); - assert(region_next_size_get(®->sep) - >= QUANTUM_CEILING(sizeof(region_small_sizer_t))); - assert(size == region_next_size_get(®->sep)); - assert(arena_coalesce(arena, ®, size) == false); - - if (size <= bin_maxsize) { - arena_bin_push(arena, (size >> opt_quantum_2pow) - bin_shift, - reg); - } else - arena_large_cache(arena, reg, false); } -static __inline void -arena_undelay(arena_t *arena, uint32_t slot) +static arena_run_t * +arena_run_alloc(arena_t *arena, bool large, size_t size) { - region_t *reg, *next; - size_t size; - - assert(slot == arena->next_delayed); - assert(arena->delayed[slot] != NULL); - - /* Try to coalesce reg. */ - reg = arena->delayed[slot]; - - size = region_next_size_get(®->sep); - - assert(region_next_contig_get(®->sep)); - assert(reg->next.u.s.slot == slot); - - arena_bin_extract(arena, (size >> opt_quantum_2pow) - bin_shift, reg); - - arena->delayed[slot] = NULL; - - next = (region_t *) &((char *) reg)[size]; - - region_next_free_set(®->sep); - region_prev_free_set(&next->sep); - next->prev.size = size; + arena_run_t *run; + unsigned min_ind, i, j; + arena_chunk_t *chunk; +#ifndef NDEBUG + int rep = 0; +#endif - if (arena_coalesce(arena, ®, size) == false) { - /* Coalescing failed. Cache this region. */ - arena_mru_cache(arena, reg, size); - } else { - /* Coalescing succeeded. */ + assert(size <= arena_maxclass); - if (reg == NULL) { - /* Region no longer needs undelayed. */ - return; - } +AGAIN: +#ifndef NDEBUG + rep++; + assert(rep <= 2); +#endif + + min_ind = ffs(size / pagesize) - 1; + RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) { + for (i = min_ind; + i < (opt_chunk_2pow - pagesize_2pow); + i++) { + if (chunk->nfree_runs[i] > 0) { + arena_chunk_map_t *map = chunk->map; + + /* Scan chunk's map for free run. */ + for (j = 0; + j < arena_chunk_maplen; + j += map[j].npages) { + if (map[j].free + && map[j].npages == (1 << i)) + { + run = (arena_run_t *)&((char *)chunk)[j + << pagesize_2pow]; - if (region_next_size_get(®->sep) < chunk_size - - (CHUNK_REG_OFFSET + offsetof(region_t, next))) { - /* - * Insert coalesced region into appropriate bin (or - * large_regions). - */ - arena_lru_cache(arena, reg); - } else { - chunk_node_t *node; + assert(chunk->nfree_runs[i] > 0); + chunk->nfree_runs[i]--; - /* - * This region now spans an entire chunk. Deallocate - * the chunk. - */ + /* Update page map. */ + arena_run_split(arena, run, large, size); - node = (chunk_node_t *) CHUNK_ADDR2BASE(reg); - RB_REMOVE(chunk_tree_s, &arena->chunks, node); - arena->nchunks--; - assert(node->chunk == (chunk_node_t *) node); - chunk_dealloc(node->chunk, chunk_size); + return (run); } - } -} - -static void -arena_delay_cache(arena_t *arena, region_t *reg) -{ - region_t *next; - size_t size; - - assert(region_next_free_get(®->sep) == false); - assert(region_next_size_get(®->sep) % quantum == 0); - assert(region_next_size_get(®->sep) - >= QUANTUM_CEILING(sizeof(region_small_sizer_t))); - - size = region_next_size_get(®->sep); - next = (region_t *)&((char *)reg)[size]; - assert(region_prev_free_get(&next->sep) == false); - - if (size <= bin_maxsize) { - if (region_next_contig_get(®->sep)) { - uint32_t slot; - - /* Insert into delayed. */ - - /* Clear a slot, then put reg in it. */ - slot = arena->next_delayed; - if (arena->delayed[slot] != NULL) - arena_undelay(arena, slot); - assert(slot == arena->next_delayed); - assert(arena->delayed[slot] == NULL); - - reg->next.u.s.slot = slot; - - arena->delayed[slot] = reg; - - /* Update next_delayed. */ - slot++; - slot &= (opt_ndelay - 1); /* Handle wrap-around. */ - arena->next_delayed = slot; - - arena_bin_append(arena, (size >> opt_quantum_2pow) - - bin_shift, reg); - } else { - /* - * This region was a fragment when it was allocated, so - * don't delay coalescence for it. - */ - - region_next_free_set(®->sep); - region_prev_free_set(&next->sep); - next->prev.size = size; - - if (arena_coalesce(arena, ®, size)) { - /* Coalescing succeeded. */ - - if (reg == NULL) { - /* Region no longer needs cached. */ - return; } - - size = region_next_size_get(®->sep); + /* Not reached. */ + assert(0); } - - arena_mru_cache(arena, reg, size); } - } else { - region_next_free_set(®->sep); - region_prev_free_set(&next->sep); - region_next_contig_unset(®->sep); - next->prev.size = size; - - arena_large_cache(arena, reg, true); } + + /* No usable runs. Allocate a new chunk, then try again. */ + if (arena_chunk_alloc(arena) == NULL) + return (NULL); + goto AGAIN; } -static region_t * -arena_frag_reg_alloc(arena_t *arena, size_t size, bool fit) +static void +arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size) { - region_t *ret; - size_t total_size -#ifdef MALLOC_DEBUG - = 0 /* for assert() below. */ -#endif - ; - bool refill; + arena_chunk_t *chunk; + unsigned i, run_ind, buddy_ind, base_run_ind, run_pages, log2_run_pages; - /* - * Clear frag if it is too small to carve a maximally sized small - * region from. - */ - if (arena->frag != NULL) { - if ((total_size = region_next_size_get(&arena->frag->sep)) - < size && size <= bin_maxsize) { - region_t *reg; + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) + >> pagesize_2pow); + run_pages = (size >> pagesize_2pow); + log2_run_pages = ffs(run_pages) - 1; + assert(run_pages > 0); - reg = arena->frag; - region_next_contig_set(®->sep); + /* Subtract pages from count of pages used in chunk. */ + chunk->pages_used -= run_pages; - arena->frag = NULL; - - arena_delay_cache(arena, reg); - refill = true; - } else { - /* - * No need to refill. Note that total_size was set - * above. - */ - refill = false; - } - } else - refill = true; + /* Mark run as deallocated. */ + for (i = 0; i < run_pages; i++) { + chunk->map[run_ind + i].free = true; + chunk->map[run_ind + i].large = false; + chunk->map[run_ind + i].npages = run_pages; + chunk->map[run_ind + i].pos = i; + } /* - * Try to fill frag if it's empty. Frag needs to be marked as - * allocated. + * Tell the kernel that we don't need the data in this run, but only + * if requested via runtime configuration. */ - if (refill) { - region_node_t *node; - - node = RB_MIN(region_tree_s, &arena->large_regions); - if (node != NULL) { - region_t *frag, *next; - - RB_REMOVE(region_tree_s, &arena->large_regions, node); - - frag = node->reg; -#ifdef MALLOC_STATS - arena->stats.frag.nrefills++; -#endif - assert(region_next_free_get(&frag->sep)); - region_next_free_unset(&frag->sep); - - total_size = region_next_size_get(&frag->sep); - next = (region_t *)&((char *)frag)[total_size]; - assert(region_prev_free_get(&next->sep)); - region_prev_free_unset(&next->sep); - - arena->frag = frag; - } else { - unsigned bin; - - /* Look in bins for a large enough region. */ - if ((bin = arena_bins_search(arena, size)) - != UINT_MAX) { - /* Use the smallest available region. */ - arena->frag = arena_bin_pop(arena, bin); -#ifdef MALLOC_STATS - arena->stats.frag.nrefills++; -#endif - total_size = - region_next_size_get(&arena->frag->sep); - assert(total_size >= size); - } else { - /* Unable to fill frag. */ - return (NULL); - } - } - } - assert(arena->frag != NULL); - /* total_size has been set in all possible paths that lead to here. */ - assert(total_size != 0); - + if (opt_hint) { + madvise(run, size, MADV_FREE); #ifdef MALLOC_STATS - arena->stats.frag.nrequests++; + arena->stats.nmadvise += (size >> pagesize_2pow); #endif - - if (total_size < size) { - /* - * Frag isn't large enough to service this request. Note that - * this is only possible for a large request, since the refill - * code always makes sure to refill frag if it's too small to - * service a current small request. - */ - assert(size > bin_maxsize); - return (NULL); } - if (fit) { - /* - * Use frag, but try to use the beginning for smaller regions, - * and the end for larger regions. This reduces fragmentation - * in some pathological use cases. It tends to group - * short-lived (smaller) regions, which increases the - * effectiveness of coalescing. - */ - - assert(size % quantum == 0); - - if (total_size - size >= - QUANTUM_CEILING(sizeof(region_small_sizer_t))) { - if (size <= bin_maxsize) { - region_t *next; - - /* Carve space from the beginning of frag. */ - - /* ret. */ - ret = arena->frag; - region_next_size_set(&ret->sep, size); - assert(region_next_free_get(&ret->sep) - == false); - - /* next. */ - next = (region_t *)&((char *)ret)[size]; - region_next_size_set(&next->sep, - total_size - size); - assert(size >= QUANTUM_CEILING(sizeof( - region_small_sizer_t))); - region_prev_free_unset(&next->sep); - region_next_free_unset(&next->sep); - - /* Update frag. */ - arena->frag = next; - } else { - region_t *prev; - size_t prev_size; - - /* Carve space from the end of frag. */ - - /* prev. */ - prev_size = total_size - size; - prev = arena->frag; - region_next_size_set(&prev->sep, prev_size); - assert(prev_size >= QUANTUM_CEILING(sizeof( - region_small_sizer_t))); - assert(region_next_free_get(&prev->sep) - == false); - - /* ret. */ - ret = (region_t *)&((char *)prev)[prev_size]; - region_next_size_set(&ret->sep, size); - region_prev_free_unset(&ret->sep); - region_next_free_unset(&ret->sep); - -#ifdef MALLOC_DEBUG - { - region_t *next; - - /* next. */ - next = (region_t *)&((char *) ret) - [region_next_size_get(&ret->sep)]; - assert(region_prev_free_get(&next->sep) - == false); - } -#endif - } -#ifdef MALLOC_STATS - arena->stats.nsplit++; -#endif + /* + * Iteratively coalesce with buddies. Conceptually, the buddy scheme + * induces a tree on the set of pages. If we know the number of pages + * in the subtree rooted at the current node, we can quickly determine + * whether a run is the left or right buddy, and then calculate the + * buddy's index. + */ + for (; + (run_pages = (1 << log2_run_pages)) < arena_chunk_maplen; + log2_run_pages++) { + if (((run_ind >> log2_run_pages) & 1) == 0) { + /* Current run precedes its buddy. */ + buddy_ind = run_ind + run_pages; + base_run_ind = run_ind; } else { - /* - * Frag is close enough to the right size that there - * isn't enough room to create a neighboring region. - */ - - /* ret. */ - ret = arena->frag; - arena->frag = NULL; - assert(region_next_free_get(&ret->sep) == false); - -#ifdef MALLOC_DEBUG - { - region_t *next; - - /* next. */ - next = (region_t *)&((char *)ret)[total_size]; - assert(region_prev_free_get(&next->sep) - == false); - } -#endif - } -#ifdef MALLOC_STATS - arena->stats.frag.nserviced++; -#endif - } else { - /* Don't fit to the allocation size. */ - - /* ret. */ - ret = arena->frag; - arena->frag = NULL; - assert(region_next_free_get(&ret->sep) == false); - -#ifdef MALLOC_DEBUG - { - region_t *next; - - /* next. */ - next = (region_t *) &((char *) ret)[total_size]; - assert(region_prev_free_get(&next->sep) == false); + /* Current run follows its buddy. */ + buddy_ind = run_ind - run_pages; + base_run_ind = buddy_ind; } -#endif - } - region_next_contig_set(&ret->sep); - - return (ret); -} - -static region_t * -arena_split_reg_alloc(arena_t *arena, size_t size, bool fit) -{ - - if (arena->split == NULL) - return (NULL); -#ifdef MALLOC_STATS - arena->stats.split.nrequests++; -#endif - if (region_next_size_get(&arena->split->sep) >= size) { - region_t *ret; - - if (fit) { - size_t total_size; - - /* - * Use split, but try to use the beginning for smaller - * regions, and the end for larger regions. This - * reduces fragmentation in some pathological use - * cases. It tends to group short-lived (smaller) - * regions, which increases the effectiveness of - * coalescing. - */ - - total_size = region_next_size_get(&arena->split->sep); - assert(size % quantum == 0); - - if (total_size - size >= - QUANTUM_CEILING(sizeof(region_small_sizer_t))) { - if (size <= bin_maxsize) { - region_t *next; - - /* - * Carve space from the beginning of - * split. - */ - - /* ret. */ - ret = arena->split; - region_next_size_set(&ret->sep, size); - assert(region_next_free_get(&ret->sep) - == false); - - /* next. */ - next = (region_t *)&((char *)ret)[size]; - region_next_size_set(&next->sep, - total_size - size); - assert(size >= QUANTUM_CEILING(sizeof( - region_small_sizer_t))); - region_prev_free_unset(&next->sep); - region_next_free_unset(&next->sep); - - /* Update split. */ - arena->split = next; - } else { - region_t *prev; - size_t prev_size; - - /* Carve space from the end of split. */ - - /* prev. */ - prev_size = total_size - size; - prev = arena->split; - region_next_size_set(&prev->sep, - prev_size); - assert(prev_size >= - QUANTUM_CEILING(sizeof( - region_small_sizer_t))); - assert(region_next_free_get( - &prev->sep) == false); - - /* ret. */ - ret = (region_t *)&((char *) - prev)[prev_size]; - region_next_size_set(&ret->sep, size); - region_prev_free_unset(&ret->sep); - region_next_free_unset(&ret->sep); - -#ifdef MALLOC_DEBUG - { - region_t *next; - - /* next. */ - next = (region_t *)&((char *)ret) - [region_next_size_get(&ret->sep)]; - assert(region_prev_free_get(&next->sep) - == false); - } -#endif - } -#ifdef MALLOC_STATS - arena->stats.nsplit++; -#endif - } else { - /* - * Split is close enough to the right size that - * there isn't enough room to create a - * neighboring region. - */ - - /* ret. */ - ret = arena->split; - arena->split = NULL; - assert(region_next_free_get(&ret->sep) - == false); - -#ifdef MALLOC_DEBUG - { - region_t *next; - - /* next. */ - next = (region_t *)&((char *) - ret)[region_next_size_get( - &ret->sep)]; - assert(region_prev_free_get(&next->sep) - == false); - } -#endif - } -#ifdef MALLOC_STATS - arena->stats.split.nserviced++; -#endif - } else { - /* Don't fit to the allocation size. */ + if (chunk->map[buddy_ind].free == false + || chunk->map[buddy_ind].npages != run_pages) + break; - /* ret. */ - ret = arena->split; - arena->split = NULL; - assert(region_next_free_get(&ret->sep) == false); + assert(chunk->nfree_runs[log2_run_pages] > 0); + chunk->nfree_runs[log2_run_pages]--; -#ifdef MALLOC_DEBUG - { - region_t *next; - - /* next. */ - next = (region_t *) &((char *) ret) - [region_next_size_get(&ret->sep)]; - assert(region_prev_free_get(&next->sep) - == false); - } -#endif + /* Coalesce. */ + for (i = 0; i < (run_pages << 1); i++) { + chunk->map[base_run_ind + i].npages = (run_pages << 1); + chunk->map[base_run_ind + i].pos = i; } - region_next_contig_set(&ret->sep); - return (ret); - } - /* If we get here, split has failed to service the request. */ - - if (size <= bin_maxsize) { - region_t *reg; - - /* - * The split region is too small to service a small request. - * Clear split. - */ - - reg = arena->split; - region_next_contig_set(®->sep); - - arena->split = NULL; - arena_delay_cache(arena, reg); + /* Update run_ind to be the begginning of the coalesced run. */ + run_ind = base_run_ind; } - return (NULL); -} + /* Insert coalesced run into ring of free runs. */ + chunk->nfree_runs[log2_run_pages]++; -/* - * Split reg if necessary. The region must be overly large enough to be able - * to contain a trailing region. - */ -static void -arena_reg_fit(arena_t *arena, size_t size, region_t *reg, bool restore_split) -{ - assert(QUANTUM_CEILING(size) == size); - assert(region_next_free_get(®->sep) == 0); - - if (region_next_size_get(®->sep) - >= size + QUANTUM_CEILING(sizeof(region_small_sizer_t))) { - size_t total_size; - region_t *next; - - total_size = region_next_size_get(®->sep); - - region_next_size_set(®->sep, size); - - next = (region_t *) &((char *) reg)[size]; - region_next_size_set(&next->sep, total_size - size); - assert(region_next_size_get(&next->sep) - >= QUANTUM_CEILING(sizeof(region_small_sizer_t))); - region_prev_free_unset(&next->sep); - - if (restore_split) { - /* Restore what's left to "split". */ - region_next_free_unset(&next->sep); - arena->split = next; - } else if (arena->frag == NULL && total_size - size - > bin_maxsize) { - /* This region is large enough to use for "frag". */ - region_next_free_unset(&next->sep); - arena->frag = next; - } else { - region_t *nextnext; - size_t next_size; - - region_next_free_set(&next->sep); - - assert(region_next_size_get(&next->sep) == total_size - - size); - next_size = total_size - size; - nextnext = (region_t *) &((char *) next)[next_size]; - nextnext->prev.size = next_size; - assert(region_prev_free_get(&nextnext->sep) == false); - region_prev_free_set(&nextnext->sep); - - if (arena_coalesce(arena, &next, next_size)) { - if (next != NULL) { - next_size = region_next_size_get( - &next->sep); - arena_mru_cache(arena, next, next_size); - } - } else - arena_mru_cache(arena, next, next_size); - } -#ifdef MALLOC_STATS - arena->stats.nsplit++; -#endif + /* Free pages, to the extent possible. */ + if (chunk->pages_used == 0) { + /* This chunk is completely unused now, so deallocate it. */ + arena_chunk_dealloc(chunk); } } -static __inline region_t * -arena_bin_reg_alloc(arena_t *arena, size_t size, bool fit) +static arena_run_t * +arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin, size_t size) { - region_t *ret, *header; - unsigned bin; + arena_run_t *run; + unsigned i, remainder; - /* - * Look for an exact fit in bins (region cached in smallest possible - * bin). - */ - bin = (size >> opt_quantum_2pow) - bin_shift; -#ifdef MALLOC_STATS - arena->stats.bins[bin].nrequests++; -#endif - header = &arena->bins[bin].regions; - if (qr_next(header, next.u.s.link) != header) { - /* Exact fit. */ - ret = arena_bin_pop(arena, bin); - assert(region_next_size_get(&ret->sep) >= size); -#ifdef MALLOC_STATS - arena->stats.bins[bin].nserviced++; -#endif - return (ret); + /* Look for a usable run. */ + if ((run = qr_next(&bin->runs75, link)) != &bin->runs75 + || (run = qr_next(&bin->runs50, link)) != &bin->runs50 + || (run = qr_next(&bin->runs25, link)) != &bin->runs25) { + /* Use a run that is guaranteed to have available space. */ + qr_remove(run, link); + return (run); } - /* Look at frag to see whether it's large enough. */ - ret = arena_frag_reg_alloc(arena, size, fit); - if (ret != NULL) - return (ret); - - return (NULL); -} - -/* Look in large_regions for a large enough region. */ -static region_t * -arena_large_reg_alloc(arena_t *arena, size_t size, bool fit) -{ - region_t *ret, *next; - region_node_t *node; - region_t key; + /* Look for a run in runs100 that has available space. */ + if ((run = qr_next(&bin->runs100, link)) != &bin->runs100) { + do { + if (run->nfree > 0) { + qr_remove(run, link); + return (run); + } -#ifdef MALLOC_STATS - arena->stats.large.nrequests++; -#endif + run = qr_next(run, link); + } while (run != &bin->runs100); + } - key.next.u.l.node.reg = &key; - key.next.u.l.lru = true; - region_next_size_set(&key.sep, size); - node = RB_NFIND(region_tree_s, &arena->large_regions, - &key.next.u.l.node); - if (node == NULL) + /* Allocate a new run. */ + run = arena_run_alloc(arena, false, bin->run_size); + if (run == NULL) return (NULL); - /* Cached large region found. */ - ret = node->reg; - assert(region_next_free_get(&ret->sep)); + /* Initialize run internals. */ + qr_new(run, link); + run->bin = bin; - RB_REMOVE(region_tree_s, &arena->large_regions, node); -#ifdef MALLOC_STATS - arena->stats.large.curcached--; -#endif - - region_next_free_unset(&ret->sep); + for (i = 0; i < bin->nregs / (sizeof(unsigned) << 3); i++) + run->regs_mask[i] = UINT_MAX; + remainder = bin->nregs % (sizeof(unsigned) << 3); + if (remainder != 0) { + run->regs_mask[i] = (UINT_MAX >> ((sizeof(unsigned) << 3) + - remainder)); + i++; + } + for (; i < REGS_MASK_NELMS; i++) + run->regs_mask[i] = 0; - next = (region_t *)&((char *)ret)[region_next_size_get(&ret->sep)]; - assert(region_prev_free_get(&next->sep)); - region_prev_free_unset(&next->sep); + run->regs_minelm = 0; - if (fit) - arena_reg_fit(arena, size, ret, false); + run->nfree = bin->nregs; + run->quartile = RUN_Q0; + run->free_max = bin->nregs; + run->free_min = ((bin->nregs >> 2) * 3) + 1; +#ifdef MALLOC_DEBUG + run->magic = ARENA_RUN_MAGIC; +#endif #ifdef MALLOC_STATS - arena->stats.large.nserviced++; + bin->stats.nruns++; + bin->stats.curruns++; + if (bin->stats.curruns > bin->stats.highruns) + bin->stats.highruns = bin->stats.curruns; #endif - - return (ret); + return (run); } -/* Allocate a new chunk and create a single region from it. */ -static region_t * -arena_chunk_reg_alloc(arena_t *arena, size_t size, bool fit) +static inline void * +arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run, + size_t size) { - region_t *ret; - chunk_node_t *chunk; - - chunk = chunk_alloc(chunk_size); - if (chunk == NULL) - return (NULL); - -#ifdef MALLOC_DEBUG - { - chunk_node_t *tchunk; - chunk_node_t key; - - key.chunk = chunk; - tchunk = RB_FIND(chunk_tree_s, &arena->chunks, &key); - assert(tchunk == NULL); - } -#endif - chunk->arena = arena; - chunk->chunk = chunk; - chunk->size = chunk_size; - chunk->extra = 0; - - RB_INSERT(chunk_tree_s, &arena->chunks, chunk); - arena->nchunks++; + void *ret; + unsigned regind; - /* Carve a region from the new chunk. */ - ret = (region_t *) &((char *) chunk)[CHUNK_REG_OFFSET]; - region_next_size_set(&ret->sep, chunk_size - (CHUNK_REG_OFFSET - + offsetof(region_t, next))); - region_prev_free_unset(&ret->sep); - region_next_free_unset(&ret->sep); + assert(run->magic == ARENA_RUN_MAGIC); - /* - * Avoiding the following when possible is worthwhile, because it - * avoids touching a page that for many applications would never be - * touched otherwise. - */ -#ifdef USE_BRK - if ((uintptr_t)ret >= (uintptr_t)brk_base - && (uintptr_t)ret < (uintptr_t)brk_max) { - region_t *next; + regind = arena_run_search(run); + assert(regind != UINT_MAX); + assert(regind < bin->nregs); - /* - * This may be a re-used brk chunk, so we have no guarantee - * that the memory is zero-filled. Therefore manually create a - * separator at the end of this new region (all zero bits). - */ - next = (region_t *)&((char *)ret)[region_next_size_get( - &ret->sep)]; - region_next_size_set(&next->sep, 0); - region_prev_free_unset(&next->sep); - region_next_free_unset(&next->sep); - region_next_contig_unset(&next->sep); + ret = (void *)&((char *)run)[bin->reg0_offset + (bin->reg_size + * regind)]; + arena_run_mask_free_unset(run, regind); + run->nfree--; + if (run->nfree < run->free_min) { + /* Promote run to higher fullness quartile. */ + arena_bin_run_refile(arena, bin, run, size, true); } -#endif - - if (fit) - arena_reg_fit(arena, size, ret, (arena->split == NULL)); return (ret); } -/* - * Find a region that is at least as large as aSize, and return a pointer to - * the separator that precedes the region. The return value is ready for use, - * though it may be larger than is necessary if fit is false. - */ -static __inline region_t * -arena_reg_alloc(arena_t *arena, size_t size, bool fit) +static void * +arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin, size_t size) { - region_t *ret; - - assert(QUANTUM_CEILING(size) == size); - assert(size >= QUANTUM_CEILING(sizeof(region_small_sizer_t))); - assert(size <= (chunk_size >> 1)); - - if (size <= bin_maxsize) { - ret = arena_bin_reg_alloc(arena, size, fit); - if (ret != NULL) - return (ret); - } - - ret = arena_large_reg_alloc(arena, size, fit); - if (ret != NULL) - return (ret); - - ret = arena_split_reg_alloc(arena, size, fit); - if (ret != NULL) - return (ret); - /* - * Only try allocating from frag here if size is large, since - * arena_bin_reg_alloc() already falls back to allocating from frag for - * small regions. - */ - if (size > bin_maxsize) { - ret = arena_frag_reg_alloc(arena, size, fit); - if (ret != NULL) - return (ret); - } + assert(bin->runcur == NULL || bin->runcur->quartile == RUN_Q100); - ret = arena_chunk_reg_alloc(arena, size, fit); - if (ret != NULL) - return (ret); + bin->runcur = arena_bin_nonfull_run_get(arena, bin, size); + if (bin->runcur == NULL) + return (NULL); + assert(bin->runcur->magic == ARENA_RUN_MAGIC); - return (NULL); + return (arena_bin_malloc_easy(arena, bin, bin->runcur, size)); } static void * arena_malloc(arena_t *arena, size_t size) { void *ret; - region_t *reg; - size_t quantum_size; assert(arena != NULL); assert(arena->magic == ARENA_MAGIC); assert(size != 0); - assert(region_ceiling(size) <= (chunk_size >> 1)); - - quantum_size = region_ceiling(size); - if (quantum_size < size) { - /* size is large enough to cause size_t wrap-around. */ - return (NULL); - } - assert(quantum_size >= QUANTUM_CEILING(sizeof(region_small_sizer_t))); + assert(QUANTUM_CEILING(size) <= arena_maxclass); malloc_mutex_lock(&arena->mtx); - reg = arena_reg_alloc(arena, quantum_size, true); - if (reg == NULL) { - malloc_mutex_unlock(&arena->mtx); - return (NULL); - } + if (size <= bin_maxclass) { + arena_bin_t *bin; + arena_run_t *run; + if (size < small_min) { + size = pow2_ceil(size); + bin = &arena->bins[ffs(size >> (TINY_MIN_2POW + 1))]; #ifdef MALLOC_STATS - arena->allocated += quantum_size; -#endif - - malloc_mutex_unlock(&arena->mtx); - - ret = (void *)®->next; -#ifdef MALLOC_REDZONES - { - region_t *next; - size_t total_size; - - memset(reg->sep.next_red, 0xa5, MALLOC_RED); - - /* - * Unused trailing space in the region is considered part of the - * trailing redzone. - */ - total_size = region_next_size_get(®->sep); - assert(total_size >= size); - memset(&((char *)ret)[size], 0xa5, - total_size - size - sizeof(region_sep_t)); - - reg->sep.next_exact_size = size; - - next = (region_t *)&((char *)reg)[total_size]; - memset(next->sep.prev_red, 0xa5, MALLOC_RED); - } -#endif - return (ret); -} - -static void * -arena_palloc(arena_t *arena, size_t alignment, size_t size) -{ - void *ret; - - assert(arena != NULL); - assert(arena->magic == ARENA_MAGIC); - - if (alignment <= quantum) { - /* - * The requested alignment is always guaranteed, so use the - * normal allocation function. - */ - ret = arena_malloc(arena, size); - } else { - region_t *reg, *old_split; - size_t quantum_size, alloc_size, offset, total_size; - - /* - * Allocate more space than necessary, then carve an aligned - * region out of it. The smallest allowable region is - * potentially a multiple of the quantum size, so care must be - * taken to carve things up such that all resulting regions are - * large enough. - */ - - quantum_size = region_ceiling(size); - if (quantum_size < size) { - /* size is large enough to cause size_t wrap-around. */ - return (NULL); - } - - /* - * Calculate how large of a region to allocate. There must be - * enough space to advance far enough to have at least - * sizeof(region_small_sizer_t) leading bytes, yet also land at - * an alignment boundary. - */ - if (alignment >= sizeof(region_small_sizer_t)) { - alloc_size = - QUANTUM_CEILING(sizeof(region_small_sizer_t)) - + alignment + quantum_size; - } else { - alloc_size = - (QUANTUM_CEILING(sizeof(region_small_sizer_t)) << 1) - + quantum_size; - } - - if (alloc_size < quantum_size) { - /* size_t wrap-around occurred. */ - return (NULL); - } - - malloc_mutex_lock(&arena->mtx); - old_split = arena->split; - reg = arena_reg_alloc(arena, alloc_size, false); - if (reg == NULL) { - malloc_mutex_unlock(&arena->mtx); - return (NULL); - } - if (reg == old_split) { - /* - * We requested a non-fit allocation that was serviced - * by split, which means that we need to take care to - * restore split in the arena_reg_fit() call later on. - * - * Do nothing; a non-NULL old_split will be used as the - * signal to restore split. - */ - } else - old_split = NULL; - - total_size = region_next_size_get(®->sep); - - if (alignment > bin_maxsize || size > bin_maxsize) { - size_t split_size; - uintptr_t p; - - /* - * Put this allocation toward the end of reg, since - * it is large, and we try to put all large regions at - * the end of split regions. + /* + * Bin calculation is always correct, but we may need to + * fix size for the purposes of stats accuracy. */ - split_size = region_next_size_get(®->sep); - p = (uintptr_t)&((char *)®->next)[split_size]; - p -= offsetof(region_t, next); - p -= size; - p &= ~(alignment - 1); - p -= offsetof(region_t, next); - - offset = p - (uintptr_t)reg; + if (size < (1 << TINY_MIN_2POW)) + size = (1 << TINY_MIN_2POW); +#endif + } else if (size <= small_max) { + size = QUANTUM_CEILING(size); + bin = &arena->bins[ntbins + (size >> opt_quantum_2pow) + - 1]; } else { - if ((((uintptr_t)®->next) & (alignment - 1)) != 0) { - uintptr_t p; - - /* - * reg is unaligned. Calculate the offset into - * reg to actually base the allocation at. - */ - p = ((uintptr_t)®->next + alignment) - & ~(alignment - 1); - while (p - (uintptr_t)®->next - < QUANTUM_CEILING(sizeof( - region_small_sizer_t))) - p += alignment; - p -= offsetof(region_t, next); - - offset = p - (uintptr_t)reg; - } else - offset = 0; + size = pow2_ceil(size); + bin = &arena->bins[ntbins + nqbins + + (ffs(size >> opt_small_max_2pow) - 2)]; } - assert(offset % quantum == 0); - assert(offset < total_size); - - if (offset != 0) { - region_t *prev; - - /* - * Move ret to an alignment boundary that is far enough - * past the beginning of the allocation to leave a - * leading free region, then trim the leading space. - */ - - assert(offset >= QUANTUM_CEILING( - sizeof(region_small_sizer_t))); - assert(offset + size <= total_size); + assert(size == bin->reg_size); - prev = reg; - reg = (region_t *)&((char *)prev)[offset]; - assert(((uintptr_t)®->next & (alignment - 1)) == 0); - - /* prev. */ - region_next_size_set(&prev->sep, offset); - reg->prev.size = offset; - - /* reg. */ - region_next_size_set(®->sep, total_size - offset); - region_next_free_unset(®->sep); - if (region_next_contig_get(&prev->sep)) - region_next_contig_set(®->sep); - else - region_next_contig_unset(®->sep); - - if (old_split != NULL && (alignment > bin_maxsize - || size > bin_maxsize)) { - /* Restore to split. */ - region_prev_free_unset(®->sep); + if ((run = bin->runcur) != NULL && run->nfree > 0) + ret = arena_bin_malloc_easy(arena, bin, run, size); + else + ret = arena_bin_malloc_hard(arena, bin, size); - arena->split = prev; - old_split = NULL; - } else { - region_next_free_set(&prev->sep); - region_prev_free_set(®->sep); - - if (arena_coalesce(arena, &prev, offset)) { - if (prev != NULL) { - offset = region_next_size_get( - &prev->sep); - arena_mru_cache(arena, prev, - offset); - } - } else - arena_mru_cache(arena, prev, offset); - } #ifdef MALLOC_STATS - arena->stats.nsplit++; + bin->stats.nrequests++; #endif - } - - arena_reg_fit(arena, quantum_size, reg, (old_split != NULL)); + } else { + size = pow2_ceil(size); + ret = (void *)arena_run_alloc(arena, true, size); + } #ifdef MALLOC_STATS - arena->allocated += quantum_size; -#endif - - malloc_mutex_unlock(&arena->mtx); - - ret = (void *)®->next; -#ifdef MALLOC_REDZONES - { - region_t *next; - size_t total_size; - - memset(reg->sep.next_red, 0xa5, MALLOC_RED); - - /* - * Unused trailing space in the region is considered - * part of the trailing redzone. - */ - total_size = region_next_size_get(®->sep); - assert(total_size >= size); - memset(&((char *)ret)[size], 0xa5, - total_size - size - sizeof(region_sep_t)); - - reg->sep.next_exact_size = size; - - next = (region_t *)&((char *)reg)[total_size]; - memset(next->sep.prev_red, 0xa5, MALLOC_RED); - } + if (ret != NULL) + arena->allocated += size; #endif - } - assert(((uintptr_t)ret & (alignment - 1)) == 0); - return (ret); -} - -static void * -arena_calloc(arena_t *arena, size_t num, size_t size) -{ - void *ret; - - assert(arena != NULL); - assert(arena->magic == ARENA_MAGIC); - assert(num * size != 0); - - ret = arena_malloc(arena, num * size); - if (ret == NULL) - return (NULL); - - memset(ret, 0, num * size); + malloc_mutex_unlock(&arena->mtx); return (ret); } @@ -3355,7 +1911,9 @@ static size_t arena_salloc(arena_t *arena, void *ptr) { size_t ret; - region_t *reg; + arena_chunk_t *chunk; + uint32_t pageind; + arena_chunk_map_t *mapelm; assert(arena != NULL); assert(arena->magic == ARENA_MAGIC); @@ -3363,163 +1921,102 @@ arena_salloc(arena_t *arena, void *ptr) assert(ptr != &nil); assert(CHUNK_ADDR2BASE(ptr) != ptr); - reg = (region_t *)&((char *)ptr)[-offsetof(region_t, next)]; - - ret = region_next_size_get(®->sep); + malloc_mutex_lock(&arena->mtx); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow); + mapelm = &chunk->map[pageind]; + assert(mapelm->free == false); + if (mapelm->large == false) { + arena_run_t *run; + + pageind -= mapelm->pos; + mapelm = &chunk->map[pageind]; + + run = (arena_run_t *)&((char *)chunk)[pageind << pagesize_2pow]; + assert(run->magic == ARENA_RUN_MAGIC); + ret = run->bin->reg_size; + } else + ret = mapelm->npages << pagesize_2pow; + + malloc_mutex_unlock(&arena->mtx); return (ret); } -#ifdef MALLOC_REDZONES -static void -redzone_check(void *ptr) -{ - region_t *reg, *next; - size_t size; - unsigned i, ncorruptions; - - ncorruptions = 0; - - reg = (region_t *)&((char *)ptr)[-offsetof(region_t, next)]; - size = region_next_size_get(®->sep); - next = (region_t *)&((char *)reg)[size]; - - /* Leading redzone. */ - for (i = 0; i < MALLOC_RED; i++) { - if ((unsigned char)reg->sep.next_red[i] != 0xa5) { - size_t offset = (size_t)MALLOC_RED - i; - - ncorruptions++; - malloc_printf("%s: (malloc) Corrupted redzone %zu " - "byte%s before %p (0x%x)\n", _getprogname(), - offset, offset > 1 ? "s" : "", ptr, - (unsigned char)reg->sep.next_red[i]); - } - } - memset(®->sep.next_red, 0x5a, MALLOC_RED); - - /* Bytes immediately trailing allocation. */ - for (i = 0; i < size - reg->sep.next_exact_size - sizeof(region_sep_t); - i++) { - if ((unsigned char)((char *)ptr)[reg->sep.next_exact_size + i] - != 0xa5) { - size_t offset = (size_t)(i + 1); - - ncorruptions++; - malloc_printf("%s: (malloc) Corrupted redzone %zu " - "byte%s after %p (size %zu) (0x%x)\n", - _getprogname(), offset, offset > 1 ? "s" : "", ptr, - reg->sep.next_exact_size, (unsigned char)((char *) - ptr)[reg->sep.next_exact_size + i]); - } - } - memset(&((char *)ptr)[reg->sep.next_exact_size], 0x5a, - size - reg->sep.next_exact_size - sizeof(region_sep_t)); - - /* Trailing redzone. */ - for (i = 0; i < MALLOC_RED; i++) { - if ((unsigned char)next->sep.prev_red[i] != 0xa5) { - size_t offset = (size_t)(size - reg->sep.next_exact_size - - sizeof(region_sep_t) + i + 1); - - ncorruptions++; - malloc_printf("%s: (malloc) Corrupted redzone %zu " - "byte%s after %p (size %zu) (0x%x)\n", - _getprogname(), offset, offset > 1 ? "s" : "", ptr, - reg->sep.next_exact_size, - (unsigned char)next->sep.prev_red[i]); - } - } - memset(&next->sep.prev_red, 0x5a, MALLOC_RED); - - if (opt_abort && ncorruptions != 0) - abort(); - - reg->sep.next_exact_size = 0; -} -#endif - static void arena_dalloc(arena_t *arena, void *ptr) { - region_t *reg; + arena_chunk_t *chunk; + unsigned pageind; + arena_chunk_map_t *mapelm; + size_t size; assert(arena != NULL); + assert(arena->magic == ARENA_MAGIC); assert(ptr != NULL); assert(ptr != &nil); - - assert(arena != NULL); - assert(arena->magic == ARENA_MAGIC); assert(CHUNK_ADDR2BASE(ptr) != ptr); - reg = (region_t *)&((char *)ptr)[-offsetof(region_t, next)]; - malloc_mutex_lock(&arena->mtx); -#ifdef MALLOC_DEBUG - { - chunk_node_t *chunk, *node; - chunk_node_t key; + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow); + mapelm = &chunk->map[pageind]; + assert(mapelm->free == false); + if (mapelm->large == false) { + arena_run_t *run; + unsigned regind; - chunk = CHUNK_ADDR2BASE(ptr); - assert(chunk->arena == arena); + pageind -= mapelm->pos; + mapelm = &chunk->map[pageind]; - key.chunk = chunk; - node = RB_FIND(chunk_tree_s, &arena->chunks, &key); - assert(node == chunk); - } -#endif -#ifdef MALLOC_REDZONES - redzone_check(ptr); -#endif + run = (arena_run_t *)&((char *)chunk)[pageind << pagesize_2pow]; + assert(run->magic == ARENA_RUN_MAGIC); + size = run->bin->reg_size; -#ifdef MALLOC_STATS - arena->allocated -= region_next_size_get(®->sep); -#endif + if (opt_junk) + memset(ptr, 0x5a, size); + + regind = (unsigned)(((uintptr_t)ptr + - (uintptr_t)&((char *)run)[run->bin->reg0_offset]) + / run->bin->reg_size); + arena_run_mask_free_set(run, regind); + run->nfree++; + if (run->nfree > run->free_max) { + /* Demote run to lower fullness quartile. */ + arena_bin_run_refile(arena, run->bin, run, size, false); + } + } else { + size = mapelm->npages << pagesize_2pow; - if (opt_junk) { - memset(®->next, 0x5a, - region_next_size_get(®->sep) - sizeof(region_sep_t)); + if (opt_junk) { + memset(ptr, 0x5a, size); + } + + arena_run_dalloc(arena, (arena_run_t *)ptr, size); } - arena_delay_cache(arena, reg); +#ifdef MALLOC_STATS + arena->allocated -= size; +#endif malloc_mutex_unlock(&arena->mtx); } -#ifdef NOT_YET -static void * -arena_ralloc(arena_t *arena, void *ptr, size_t size) -{ - - /* - * Arenas don't need to support ralloc, since all reallocation is done - * by allocating new space and copying. This function should never be - * called. - */ - /* NOTREACHED */ - assert(false); - - return (NULL); -} -#endif - #ifdef MALLOC_STATS -static bool -arena_stats(arena_t *arena, size_t *allocated, size_t *total) +static size_t +arena_allocated(arena_t *arena) { + size_t ret; assert(arena != NULL); assert(arena->magic == ARENA_MAGIC); - assert(allocated != NULL); - assert(total != NULL); malloc_mutex_lock(&arena->mtx); - *allocated = arena->allocated; - *total = arena->nchunks * chunk_size; + ret = arena->allocated; malloc_mutex_unlock(&arena->mtx); - return (false); + return (ret); } #endif @@ -3527,35 +2024,128 @@ static bool arena_new(arena_t *arena) { unsigned i; + arena_bin_t *bin; + size_t pow2_size, run_size; malloc_mutex_init(&arena->mtx); - for (i = 0; i < NBINS; i++) - qr_new(&arena->bins[i].regions, next.u.s.link); - - for (i = 0; i < BINMASK_NELMS; i++) - arena->bins_mask[i] = 0; +#ifdef MALLOC_STATS + arena->allocated = 0; - arena->split = NULL; - arena->frag = NULL; - RB_INIT(&arena->large_regions); + memset(&arena->stats, 0, sizeof(arena_stats_t)); +#endif + /* Initialize chunks. */ RB_INIT(&arena->chunks); - arena->nchunks = 0; - assert(opt_ndelay > 0); - arena->delayed = (region_t **)base_alloc(opt_ndelay - * sizeof(region_t *)); - if (arena->delayed == NULL) - return (true); - memset(arena->delayed, 0, opt_ndelay * sizeof(region_t *)); - arena->next_delayed = 0; + /* Initialize bins. */ + + /* (2^n)-spaced tiny bins. */ + for (i = 0; i < ntbins; i++) { + bin = &arena->bins[i]; + bin->runcur = NULL; + qr_new(&bin->runs25, link); + qr_new(&bin->runs50, link); + qr_new(&bin->runs75, link); + qr_new(&bin->runs100, link); + + bin->reg_size = (1 << (TINY_MIN_2POW + i)); + + /* + * Calculate how large of a run to allocate. Make sure that at + * least RUN_MIN_REGS regions fit in the run. + */ + run_size = bin->reg_size << RUN_MIN_REGS_2POW; + if (run_size < pagesize) + run_size = pagesize; + else if (run_size > (pagesize << RUN_MAX_PAGES_2POW)) { + run_size = (pagesize << RUN_MAX_PAGES_2POW); + if (run_size > arena_maxclass) + run_size = arena_maxclass; + } + bin->run_size = run_size; + + assert(run_size >= sizeof(arena_run_t)); + bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size; + if (bin->nregs > REGS_MASK_NELMS * (sizeof(unsigned) << 3)) { + /* Take care not to overflow regs_mask. */ + bin->nregs = REGS_MASK_NELMS * (sizeof(unsigned) << 3); + } + bin->reg0_offset = run_size - (bin->nregs * bin->reg_size); #ifdef MALLOC_STATS - arena->allocated = 0; + memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); +#endif + } - memset(&arena->stats, 0, sizeof(arena_stats_t)); + /* Quantum-spaced bins. */ + for (; i < ntbins + nqbins; i++) { + bin = &arena->bins[i]; + bin->runcur = NULL; + qr_new(&bin->runs25, link); + qr_new(&bin->runs50, link); + qr_new(&bin->runs75, link); + qr_new(&bin->runs100, link); + + bin->reg_size = quantum * (i - ntbins + 1); + + /* + * Calculate how large of a run to allocate. Make sure that at + * least RUN_MIN_REGS regions fit in the run. + */ + pow2_size = pow2_ceil(quantum * (i - ntbins + 1)); + run_size = (pow2_size << RUN_MIN_REGS_2POW); + if (run_size < pagesize) + run_size = pagesize; + else if (run_size > (pagesize << RUN_MAX_PAGES_2POW)) { + run_size = (pagesize << RUN_MAX_PAGES_2POW); + if (run_size > arena_maxclass) + run_size = arena_maxclass; + } + bin->run_size = run_size; + + bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size; + assert(bin->nregs <= REGS_MASK_NELMS * (sizeof(unsigned) << 3)); + bin->reg0_offset = run_size - (bin->nregs * bin->reg_size); + +#ifdef MALLOC_STATS + memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); #endif + } + + /* (2^n)-spaced bins. */ + for (; i < ntbins + nqbins + npbins; i++) { + bin = &arena->bins[i]; + bin->runcur = NULL; + qr_new(&bin->runs25, link); + qr_new(&bin->runs50, link); + qr_new(&bin->runs75, link); + qr_new(&bin->runs100, link); + + bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1)); + + /* + * Calculate how large of a run to allocate. Make sure that at + * least RUN_MIN_REGS regions fit in the run. + */ + run_size = bin->reg_size << RUN_MIN_REGS_2POW; + if (run_size < pagesize) + run_size = pagesize; + else if (run_size > (pagesize << RUN_MAX_PAGES_2POW)) { + run_size = (pagesize << RUN_MAX_PAGES_2POW); + if (run_size > arena_maxclass) + run_size = arena_maxclass; + } + bin->run_size = run_size; + + bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size; + assert(bin->nregs <= REGS_MASK_NELMS * (sizeof(unsigned) << 3)); + bin->reg0_offset = run_size - (bin->nregs * bin->reg_size); + +#ifdef MALLOC_STATS + memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); +#endif + } #ifdef MALLOC_DEBUG arena->magic = ARENA_MAGIC; @@ -3570,7 +2160,9 @@ arenas_extend(unsigned ind) { arena_t *ret; - ret = (arena_t *)base_alloc(sizeof(arena_t)); + /* Allocate enough space for trailing bins. */ + ret = (arena_t *)base_alloc(sizeof(arena_t) + + (sizeof(arena_bin_t) * (ntbins + nqbins + npbins - 1))); if (ret != NULL && arena_new(ret) == false) { arenas[ind] = ret; return (ret); @@ -3596,14 +2188,14 @@ arenas_extend(unsigned ind) */ /******************************************************************************/ /* - * Begin general internal functions. + * Begin general internal functions. */ /* * Choose an arena based on a per-thread value (fast-path code, calls slow-path * code if necessary. */ -static __inline arena_t * +static inline arena_t * choose_arena(void) { arena_t *ret; @@ -3699,10 +2291,6 @@ huge_malloc(arena_t *arena, size_t size) /* Allocate a chunk for this request. */ -#ifdef MALLOC_STATS - arena->stats.huge.nrequests++; -#endif - chunk_size = CHUNK_CEILING(size); if (chunk_size == 0) { /* size is large enough to cause size_t wrap-around. */ @@ -3721,16 +2309,14 @@ huge_malloc(arena_t *arena, size_t size) } /* Insert node into chunks. */ - node->arena = arena; node->chunk = ret; node->size = chunk_size; - node->extra = chunk_size - size; malloc_mutex_lock(&chunks_mtx); RB_INSERT(chunk_tree_s, &huge, node); #ifdef MALLOC_STATS - huge_allocated += size; - huge_total += chunk_size; + huge_nmalloc++; + huge_allocated += chunk_size; #endif malloc_mutex_unlock(&chunks_mtx); @@ -3753,13 +2339,9 @@ huge_dalloc(void *ptr) RB_REMOVE(chunk_tree_s, &huge, node); #ifdef MALLOC_STATS - malloc_mutex_lock(&node->arena->mtx); - node->arena->stats.ndalloc++; - malloc_mutex_unlock(&node->arena->mtx); - /* Update counters. */ - huge_allocated -= (node->size - node->extra); - huge_total -= node->size; + huge_ndalloc++; + huge_allocated -= node->size; #endif malloc_mutex_unlock(&chunks_mtx); @@ -3779,7 +2361,7 @@ imalloc(arena_t *arena, size_t size) assert(arena->magic == ARENA_MAGIC); assert(size != 0); - if (region_ceiling(size) <= (chunk_size >> 1)) + if (size <= arena_maxclass) ret = arena_malloc(arena, size); else ret = huge_malloc(arena, size); @@ -3801,41 +2383,27 @@ static void * ipalloc(arena_t *arena, size_t alignment, size_t size) { void *ret; + size_t pow2_size; assert(arena != NULL); assert(arena->magic == ARENA_MAGIC); - /* - * The conditions that disallow calling arena_palloc() are quite - * tricky. - * - * The first main clause of the conditional mirrors that in imalloc(), - * and is necesary because arena_palloc() may in turn call - * arena_malloc(). - * - * The second and third clauses are necessary because we want to be - * sure that it will not be necessary to allocate more than a - * half-chunk region at any point during the creation of the aligned - * allocation. These checks closely mirror the calculation of - * alloc_size in arena_palloc(). - * - * Finally, the fourth clause makes explicit the constraint on what - * alignments will be attempted via regions. At first glance, this - * appears unnecessary, but in actuality, it protects against otherwise - * difficult-to-detect size_t wrap-around cases. + /* + * Round up to the nearest power of two that is >= alignment and + * >= size. */ - if (region_ceiling(size) <= (chunk_size >> 1) - - && (alignment < sizeof(region_small_sizer_t) - || (QUANTUM_CEILING(sizeof(region_small_sizer_t)) + alignment - + (region_ceiling(size))) <= (chunk_size >> 1)) - - && (alignment >= sizeof(region_small_sizer_t) - || ((QUANTUM_CEILING(sizeof(region_small_sizer_t)) << 1) - + (region_ceiling(size))) <= (chunk_size >> 1)) + if (size > alignment) + pow2_size = pow2_ceil(size); + else + pow2_size = alignment; + pow2_size = QUANTUM_CEILING(pow2_size); + if (pow2_size < size) { + /* size_t overflow. */ + return (NULL); + } - && alignment <= (chunk_size >> 2)) - ret = arena_palloc(arena, alignment, size); + if (pow2_size <= arena_maxclass) + ret = arena_malloc(arena, pow2_size); else { if (alignment <= chunk_size) ret = huge_malloc(arena, size); @@ -3901,16 +2469,13 @@ ipalloc(arena_t *arena, size_t alignment, size_t size) } /* Insert node into chunks. */ - node->arena = arena; node->chunk = ret; node->size = chunksize; - node->extra = node->size - size; malloc_mutex_lock(&chunks_mtx); RB_INSERT(chunk_tree_s, &huge, node); #ifdef MALLOC_STATS huge_allocated += size; - huge_total += chunksize; #endif malloc_mutex_unlock(&chunks_mtx); } @@ -3928,30 +2493,32 @@ ipalloc(arena_t *arena, size_t alignment, size_t size) } static void * -icalloc(arena_t *arena, size_t num, size_t size) +icalloc(arena_t *arena, size_t size) { void *ret; assert(arena != NULL); assert(arena->magic == ARENA_MAGIC); - assert(num * size != 0); - if (region_ceiling(num * size) <= (chunk_size >> 1)) - ret = arena_calloc(arena, num, size); - else { + if (size <= arena_maxclass) { + ret = arena_malloc(arena, size); + if (ret == NULL) + return (NULL); + memset(ret, 0, size); + } else { /* * The virtual memory system provides zero-filled pages, so * there is no need to do so manually. */ - ret = huge_malloc(arena, num * size); + ret = huge_malloc(arena, size); #ifdef USE_BRK if ((uintptr_t)ret >= (uintptr_t)brk_base - && (uintptr_t)ret < (uintptr_t)brk_max) { + && (uintptr_t)ret < (uintptr_t)brk_max) { /* * This may be a re-used brk chunk. Therefore, zero * the memory. */ - memset(ret, 0, num * size); + memset(ret, 0, size); } #endif } @@ -3969,19 +2536,19 @@ static size_t isalloc(void *ptr) { size_t ret; - chunk_node_t *node; + arena_chunk_t *chunk; assert(ptr != NULL); assert(ptr != &nil); - node = CHUNK_ADDR2BASE(ptr); - if (node != ptr) { + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) { /* Region. */ - assert(node->arena->magic == ARENA_MAGIC); + assert(chunk->arena->magic == ARENA_MAGIC); - ret = arena_salloc(node->arena, ptr); + ret = arena_salloc(chunk->arena, ptr); } else { - chunk_node_t key; + chunk_node_t *node, key; /* Chunk (huge allocation). */ @@ -3992,7 +2559,7 @@ isalloc(void *ptr) node = RB_FIND(chunk_tree_s, &huge, &key); assert(node != NULL); - ret = node->size - node->extra; + ret = node->size; malloc_mutex_unlock(&chunks_mtx); } @@ -4003,20 +2570,20 @@ isalloc(void *ptr) static void idalloc(void *ptr) { - chunk_node_t *node; + arena_chunk_t *chunk; assert(ptr != NULL); assert(ptr != &nil); - node = CHUNK_ADDR2BASE(ptr); - if (node != ptr) { + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) { /* Region. */ #ifdef MALLOC_STATS - malloc_mutex_lock(&node->arena->mtx); - node->arena->stats.ndalloc++; - malloc_mutex_unlock(&node->arena->mtx); + malloc_mutex_lock(&chunk->arena->mtx); + chunk->arena->stats.ndalloc++; + malloc_mutex_unlock(&chunk->arena->mtx); #endif - arena_dalloc(node->arena, ptr); + arena_dalloc(chunk->arena, ptr); } else huge_dalloc(ptr); } @@ -4035,7 +2602,7 @@ iralloc(arena_t *arena, void *ptr, size_t size) oldsize = isalloc(ptr); - if (region_ceiling(size) <= (chunk_size >> 1)) { + if (size <= arena_maxclass) { ret = arena_malloc(arena, size); if (ret == NULL) return (NULL); @@ -4081,25 +2648,20 @@ static void istats(size_t *allocated, size_t *total) { size_t tallocated, ttotal; - size_t rallocated, rtotal; unsigned i; tallocated = 0; - ttotal = base_total; /* arenas. */ for (i = 0; i < narenas; i++) { - if (arenas[i] != NULL) { - arena_stats(arenas[i], &rallocated, &rtotal); - tallocated += rallocated; - ttotal += rtotal; - } + if (arenas[i] != NULL) + tallocated += arena_allocated(arenas[i]); } /* huge. */ malloc_mutex_lock(&chunks_mtx); tallocated += huge_allocated; - ttotal += huge_total; + ttotal = stats_chunks.curchunks * chunk_size; malloc_mutex_unlock(&chunks_mtx); /* Return results. */ @@ -4116,14 +2678,12 @@ malloc_print_stats(void) malloc_printf("___ Begin malloc statistics ___\n"); malloc_printf("Number of CPUs: %u\n", ncpus); malloc_printf("Number of arenas: %u\n", narenas); - malloc_printf("Cache slots: %u\n", opt_ndelay); malloc_printf("Chunk size: %zu (2^%zu)\n", chunk_size, opt_chunk_2pow); malloc_printf("Quantum size: %zu (2^%zu)\n", quantum, opt_quantum_2pow); + malloc_printf("Max small size: %zu\n", small_max); malloc_printf("Pointer size: %u\n", sizeof(void *)); - malloc_printf("Number of bins: %u\n", NBINS); - malloc_printf("Maximum bin size: %u\n", bin_maxsize); malloc_printf("Assertions %s\n", #ifdef NDEBUG "disabled" @@ -4131,13 +2691,6 @@ malloc_print_stats(void) "enabled" #endif ); - malloc_printf("Redzone size: %u\n", -#ifdef MALLOC_REDZONES - MALLOC_RED -#else - 0 -#endif - ); #ifdef MALLOC_STATS { @@ -4149,9 +2702,8 @@ malloc_print_stats(void) } { - arena_stats_t stats_arenas; arena_t *arena; - unsigned i, narenas_used; + unsigned i; /* Print chunk stats. */ { @@ -4170,44 +2722,27 @@ malloc_print_stats(void) chunks_stats.curchunks); } + /* Print chunk stats. */ + malloc_printf("\nhuge:\n"); + malloc_printf("%12s %12s %12s\n", + "nmalloc", "ndalloc", "allocated"); + malloc_printf("%12llu %12llu %12zu\n", + huge_nmalloc, huge_ndalloc, huge_allocated); + /* Print stats for each arena. */ - for (i = 0, narenas_used = 0; i < narenas; i++) { + for (i = 0; i < narenas; i++) { arena = arenas[i]; if (arena != NULL) { - narenas_used++; malloc_printf( "\narenas[%u] statistics:\n", i); malloc_mutex_lock(&arena->mtx); - stats_print(&arena->stats); + stats_print(arena); malloc_mutex_unlock(&arena->mtx); } else { malloc_printf("\narenas[%u] statistics:" " unused arena\n", i); } } - - /* - * Only print merged stats if multiple arenas were - * used. - */ - if (narenas_used > 1) { - /* Merge arena stats from arenas. */ - memset(&stats_arenas, 0, sizeof(arena_stats_t)); - for (i = 0; i < narenas; i++) { - arena = arenas[i]; - if (arena != NULL) { - malloc_mutex_lock(&arena->mtx); - stats_merge(arena, - &stats_arenas); - malloc_mutex_unlock( - &arena->mtx); - } - } - - /* Print arena stats. */ - malloc_printf("\nMerged arena statistics:\n"); - stats_print(&stats_arenas); - } } #endif /* #ifdef MALLOC_STATS */ malloc_printf("--- End malloc statistics ---\n"); @@ -4225,7 +2760,7 @@ malloc_print_stats(void) * since otherwise choose_arena() has no way to know whether it's safe * to call _pthread_self(). */ -static __inline bool +static inline bool malloc_init(void) { @@ -4270,6 +2805,13 @@ malloc_init_hard(void) result = sysconf(_SC_PAGESIZE); assert(result != -1); pagesize = (unsigned) result; + + /* + * We assume that pagesize is a power of 2 when calculating + * pagesize_2pow. + */ + assert(((result - 1) & result) == 0); + pagesize_2pow = ffs(result) - 1; } for (i = 0; i < 3; i++) { @@ -4329,15 +2871,11 @@ malloc_init_hard(void) case 'A': opt_abort = true; break; - case 'c': - opt_ndelay >>= 1; - if (opt_ndelay == 0) - opt_ndelay = 1; + case 'h': + opt_hint = false; break; - case 'C': - opt_ndelay <<= 1; - if (opt_ndelay == 0) - opt_ndelay = 1; + case 'H': + opt_hint = true; break; case 'j': opt_junk = false; @@ -4346,7 +2884,12 @@ malloc_init_hard(void) opt_junk = true; break; case 'k': - if ((1 << opt_chunk_2pow) > pagesize) + /* + * Run fullness quartile limits don't have + * enough resolution if there are too few + * regions for the largest bin size classes. + */ + if (opt_chunk_2pow > pagesize_2pow + 3) opt_chunk_2pow--; break; case 'K': @@ -4370,9 +2913,17 @@ malloc_init_hard(void) opt_quantum_2pow--; break; case 'Q': - if ((1 << opt_quantum_2pow) < pagesize) + if (opt_quantum_2pow < pagesize_2pow - 1) opt_quantum_2pow++; break; + case 's': + if (opt_small_max_2pow > QUANTUM_2POW_MIN) + opt_small_max_2pow--; + break; + case 'S': + if (opt_small_max_2pow < pagesize_2pow - 1) + opt_small_max_2pow++; + break; case 'u': opt_utrace = false; break; @@ -4411,16 +2962,29 @@ malloc_init_hard(void) atexit(malloc_print_stats); } + /* Set variables according to the value of opt_small_max_2pow. */ + if (opt_small_max_2pow < opt_quantum_2pow) + opt_small_max_2pow = opt_quantum_2pow; + small_max = (1 << opt_small_max_2pow); + + /* Set bin-related variables. */ + bin_maxclass = (pagesize >> 1); + ntbins = opt_quantum_2pow - TINY_MIN_2POW; + assert(ntbins <= opt_quantum_2pow); + nqbins = (small_max >> opt_quantum_2pow); + npbins = pagesize_2pow - opt_small_max_2pow - 1; + /* Set variables according to the value of opt_quantum_2pow. */ quantum = (1 << opt_quantum_2pow); quantum_mask = quantum - 1; - bin_shift = ((QUANTUM_CEILING(sizeof(region_small_sizer_t)) - >> opt_quantum_2pow)); - bin_maxsize = ((NBINS + bin_shift - 1) * quantum); + small_min = (quantum >> 1) + 1; + assert(small_min <= quantum); /* Set variables according to the value of opt_chunk_2pow. */ chunk_size = (1 << opt_chunk_2pow); chunk_size_mask = chunk_size - 1; + arena_chunk_maplen = (1 << (opt_chunk_2pow - pagesize_2pow)); + arena_maxclass = (chunk_size >> 1); UTRACE(0, 0, 0); @@ -4429,7 +2993,7 @@ malloc_init_hard(void) #endif /* Various sanity checks that regard configuration. */ - assert(quantum >= 2 * sizeof(void *)); + assert(quantum >= sizeof(void *)); assert(quantum <= pagesize); assert(chunk_size >= pagesize); assert(quantum * 4 <= chunk_size); @@ -4443,8 +3007,9 @@ malloc_init_hard(void) brk_max = (void *)((uintptr_t)brk_base + MAXDSIZ); #endif #ifdef MALLOC_STATS + huge_nmalloc = 0; + huge_ndalloc = 0; huge_allocated = 0; - huge_total = 0; #endif RB_INIT(&old_chunks); @@ -4460,10 +3025,10 @@ malloc_init_hard(void) if (ncpus > 1) { /* - * For SMP systems, create twice as many arenas as there are - * CPUs by default. + * For SMP systems, create four times as many arenas as there + * are CPUs by default. */ - opt_narenas_lshift++; + opt_narenas_lshift += 2; } /* Determine how many arenas to use. */ @@ -4527,7 +3092,7 @@ malloc_init_hard(void) } /* - * End library-internal functions. + * End general internal functions. */ /******************************************************************************/ /* @@ -4651,7 +3216,7 @@ calloc(size_t num, size_t size) arena = choose_arena(); if (arena != NULL) - ret = icalloc(arena, num, size); + ret = icalloc(arena, num * size); else ret = NULL; |