diff options
| author | Jason Evans <jasone@FreeBSD.org> | 2006-01-13 18:38:56 +0000 | 
|---|---|---|
| committer | Jason Evans <jasone@FreeBSD.org> | 2006-01-13 18:38:56 +0000 | 
| commit | 24b6d11c346ec90729405e7de40276af50e02aa7 (patch) | |
| tree | 4d39c8468f3fe88dc332e2b9976aaed6613fb688 /lib/libc/stdlib/malloc.c | |
| parent | 97efeca38d641a8ebacc424656835f998341465d (diff) | |
Notes
Diffstat (limited to 'lib/libc/stdlib/malloc.c')
| -rw-r--r-- | lib/libc/stdlib/malloc.c | 5408 | 
1 files changed, 4481 insertions, 927 deletions
| diff --git a/lib/libc/stdlib/malloc.c b/lib/libc/stdlib/malloc.c index d279db6f8323..a3bc33511512 100644 --- a/lib/libc/stdlib/malloc.c +++ b/lib/libc/stdlib/malloc.c @@ -1,1214 +1,4739 @@ +/*- + * Copyright (C) 2006 Jason Evans <jasone@FreeBSD.org>. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice(s), this list of conditions and the following disclaimer as + *    the first lines of this file unmodified other than the possible + *    addition of one or more copyright notices. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice(s), this list of conditions and the following disclaimer in + *    the documentation and/or other materials provided with the + *    distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************* + * + * Following is a brief list of features that distinguish this malloc + * implementation: + * + *   + Multiple arenas are used if there are multiple CPUs, which reduces lock + *     contention and cache sloshing. + * + *   + Cache line sharing between arenas is avoided for internal data + *     structures. + * + *   + Memory is managed in chunks, rather than as individual pages. + * + *   + Allocations are region-based; internal region size is a discrete + *     multiple of a quantum that is appropriate for alignment constraints. + *     This applies to allocations that are up to half the chunk size. + * + *   + Coalescence of regions is delayed in order to reduce overhead and + *     fragmentation. + * + *   + realloc() always moves data, in order to reduce fragmentation. + * + *   + Red-black trees are used to sort large regions. + * + *   + Data structures for huge allocations are stored separately from + *     allocations, which reduces thrashing during low memory conditions. + * + ******************************************************************************* + */ +  /* - * ---------------------------------------------------------------------------- - * "THE BEER-WARE LICENSE" (Revision 42): - * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you - * can do whatever you want with this stuff. If we meet some day, and you think - * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp - * ---------------------------------------------------------------------------- + ******************************************************************************* + * + * Ring macros.   * + *******************************************************************************   */ +/* Ring definitions. */ +#define	qr(a_type) struct {						\ +	a_type *qre_next;						\ +	a_type *qre_prev;						\ +} + +#define	qr_initializer {NULL, NULL} + +/* Ring functions. */ +#define	qr_new(a_qr, a_field) do {					\ +	(a_qr)->a_field.qre_next = (a_qr);				\ +	(a_qr)->a_field.qre_prev = (a_qr);				\ +} while (0) + +#define	qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next) + +#define	qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev) + +#define	qr_before_insert(a_qrelm, a_qr, a_field) do {			\ +	(a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev;		\ +	(a_qr)->a_field.qre_next = (a_qrelm);				\ +	(a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr);		\ +	(a_qrelm)->a_field.qre_prev = (a_qr);				\ +} while (0) + +#define	qr_after_insert(a_qrelm, a_qr, a_field) do {			\ +	(a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next;		\ +	(a_qr)->a_field.qre_prev = (a_qrelm);				\ +	(a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr);		\ +	(a_qrelm)->a_field.qre_next = (a_qr);				\ +} while (0) + +#define	qr_meld(a_qr_a, a_qr_b, a_type, a_field) do {			\ +	a_type *t;							\ +	(a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b);	\ +	(a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a);	\ +	t = (a_qr_a)->a_field.qre_prev;					\ +	(a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev;	\ +	(a_qr_b)->a_field.qre_prev = t;					\ +} while (0) + +/* qr_meld() and qr_split() are functionally equivalent, so there's no need to + * have two copies of the code. */ +#define	qr_split(a_qr_a, a_qr_b, a_type, a_field)			\ +	qr_meld((a_qr_a), (a_qr_b), a_type, a_field) + +#define	qr_remove(a_qr, a_field) do {					\ +	(a_qr)->a_field.qre_prev->a_field.qre_next			\ +	    = (a_qr)->a_field.qre_next;					\ +	(a_qr)->a_field.qre_next->a_field.qre_prev			\ +	    = (a_qr)->a_field.qre_prev;					\ +	(a_qr)->a_field.qre_next = (a_qr);				\ +	(a_qr)->a_field.qre_prev = (a_qr);				\ +} while (0) + +#define	qr_foreach(var, a_qr, a_field)					\ +	for ((var) = (a_qr);						\ +	    (var) != NULL;						\ +	    (var) = (((var)->a_field.qre_next != (a_qr))		\ +	    ? (var)->a_field.qre_next : NULL)) + +#define	qr_reverse_foreach(var, a_qr, a_field)				\ +	for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL;	\ +	    (var) != NULL;						\ +	    (var) = (((var) != (a_qr))					\ +	    ? (var)->a_field.qre_prev : NULL)) + +/******************************************************************************/ + +#define	MALLOC_DEBUG +  #include <sys/cdefs.h>  __FBSDID("$FreeBSD$"); +#include "libc_private.h" +#ifdef MALLOC_DEBUG +#  define _LOCK_DEBUG +#endif +#include "spinlock.h" +#include "namespace.h" +#include <sys/mman.h> +#include <sys/param.h> +#include <sys/stddef.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/sysctl.h> +#include <sys/tree.h> +#include <sys/uio.h> +#include <sys/ktrace.h> /* Must come after several other sys/ includes. */ + +#include <machine/atomic.h> +#include <machine/cpufunc.h> +#include <machine/vmparam.h> + +#include <errno.h> +#include <limits.h> +#include <pthread.h> +#include <sched.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <unistd.h> + +#include "un-namespace.h" +  /* - * Defining MALLOC_EXTRA_SANITY will enable extra checks which are related - * to internal conditions and consistency in malloc.c. This has a - * noticeable runtime performance hit, and generally will not do you - * any good unless you fiddle with the internals of malloc or want - * to catch random pointer corruption as early as possible. + * Calculate statistics that can be used to get an idea of how well caching is + * working.   */ -#undef MALLOC_EXTRA_SANITY +#define	MALLOC_STATS +/* #define	MALLOC_STATS_BASE */ +#define	MALLOC_STATS_ARENAS  /* - * What to use for Junk.  This is the byte value we use to fill with - * when the 'J' option is enabled. + * Include redzones before/after every region, and check for buffer overflows.   */ -#define SOME_JUNK	0xd0		/* as in "Duh" :-) */ +#define MALLOC_REDZONES +#ifdef MALLOC_REDZONES +#  define MALLOC_RED_2POW	4 +#  define MALLOC_RED		((size_t)(1 << MALLOC_RED_2POW)) +#endif + +#ifndef MALLOC_DEBUG +#  ifndef NDEBUG +#    define NDEBUG +#  endif +#endif +#include <assert.h> + +#ifdef MALLOC_DEBUG +   /* Disable inlining to make debugging easier. */ +#  define __inline +#endif + +/* Size of stack-allocated buffer passed to strerror_r(). */ +#define	STRERROR_BUF 64 + +/* Number of quantum-spaced bins to store free regions in. */ +#define	NBINS 128 + +/* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */ +#ifdef __i386__ +#  define QUANTUM_2POW_MIN	4 +#  define SIZEOF_PTR		4 +#  define USE_BRK +#endif +#ifdef __ia64__ +#  define QUANTUM_2POW_MIN	4 +#  define SIZEOF_PTR		8 +#endif +#ifdef __alpha__ +#  define QUANTUM_2POW_MIN	4 +#  define SIZEOF_PTR		8 +#  define NO_TLS +#endif +#ifdef __sparc64__ +#  define QUANTUM_2POW_MIN	4 +#  define SIZEOF_PTR		8 +#  define NO_TLS +#endif +#ifdef __amd64__ +#  define QUANTUM_2POW_MIN	4 +#  define SIZEOF_PTR		8 +#endif +#ifdef __arm__ +#  define QUANTUM_2POW_MIN	3 +#  define SIZEOF_PTR		4 +#  define USE_BRK +#  define NO_TLS +#endif +#ifdef __powerpc__ +#  define QUANTUM_2POW_MIN	4 +#  define SIZEOF_PTR		4 +#  define USE_BRK +#endif + +/* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */ +#if (!defined(PIC) && !defined(NO_TLS)) +#  define NO_TLS +#endif  /* - * The basic parameters you can tweak. + * Size and alignment of memory chunks that are allocated by the OS's virtual + * memory system.   * - * malloc_pageshift	pagesize = 1 << malloc_pageshift - *			It's probably best if this is the native - *			page size, but it doesn't have to be. + * chunk_size must be at least as large as the system page size.  In practice, + * it actually needs to be quite a bit larger (64 KB or so), because calling + * _pthread_self() can trigger a ~16 KB allocation due to libpthread + * initialization, and infinite recursion will occur if libpthread + * initialization requires a chunk allocation.   * - * malloc_minsize	minimum size of an allocation in bytes. - *			If this is too small it's too much work - *			to manage them.  This is also the smallest - *			unit of alignment used for the storage - *			returned by malloc/realloc. + * chunk_size must be <= 2^29. + */ +#define	CHUNK_2POW_MIN		16 +#define	CHUNK_2POW_DEFAULT	24 +#define	CHUNK_2POW_MAX		29 + +/* + * Maximum size of L1 cache line.  This is used to avoid cache line aliasing, + * so over-estimates are okay (up to a point), but under-estimates will + * negatively affect performance.   * + * Most allocations from base_arena use this, in order to avoid any false + * sharing.   */ +#define	CACHELINE_2POW 6 +#define	CACHELINE ((size_t) (1 << CACHELINE_2POW)) -#include "namespace.h" -#if defined(__FreeBSD__) -#   if defined(__i386__) -#       define malloc_pageshift		12U -#       define malloc_minsize		16U -#   endif -#   if defined(__ia64__) -#	define malloc_pageshift		13U -#	define malloc_minsize		16U -#   endif -#   if defined(__alpha__) -#       define malloc_pageshift		13U -#       define malloc_minsize		16U -#   endif -#   if defined(__sparc64__) -#       define malloc_pageshift		13U -#       define malloc_minsize		16U -#   endif -#   if defined(__amd64__) -#       define malloc_pageshift		12U -#       define malloc_minsize		16U -#   endif -#   if defined(__arm__) -#       define malloc_pageshift         12U -#       define malloc_minsize           16U -#   endif -#   define HAS_UTRACE -    /* -     * Make malloc/free/realloc thread-safe in libc for use with -     * kernel threads. -     */ -#   include "libc_private.h" -#   include "spinlock.h" -    static spinlock_t thread_lock	= _SPINLOCK_INITIALIZER; -    spinlock_t *__malloc_lock		= &thread_lock; -#   define _MALLOC_LOCK()		if (__isthreaded) _SPINLOCK(&thread_lock); -#   define _MALLOC_UNLOCK()		if (__isthreaded) _SPINUNLOCK(&thread_lock); -#endif /* __FreeBSD__ */ - -#if defined(__sparc__) && defined(sun) -#   define malloc_pageshift		12U -#   define malloc_minsize		16U -#   define MAP_ANON			(0) -    static int fdzero; -#   define MMAP_FD	fdzero -#   define INIT_MMAP() \ -	{ if ((fdzero = _open(_PATH_DEVZERO, O_RDWR, 0000)) == -1) \ -	    wrterror("open of /dev/zero"); } -#   define MADV_FREE			MADV_DONTNEED -#endif /* __sparc__ */ - -/* Insert your combination here... */ -#if defined(__FOOCPU__) && defined(__BAROS__) -#   define malloc_pageshift		12U -#   define malloc_minsize		16U -#endif /* __FOOCPU__ && __BAROS__ */ - -#ifndef ZEROSIZEPTR -#define ZEROSIZEPTR	((void *)(uintptr_t)(1 << (malloc_pageshift - 1))) -#endif +/* Default number of regions to delay coalescence for. */ +#define NDELAY 256 + +/******************************************************************************/  /* - * No user serviceable parts behind this point. + * Mutexes based on spinlocks.  We can't use normal pthread mutexes, because + * they require malloc()ed memory.   */ -#include <sys/types.h> -#include <sys/mman.h> -#include <errno.h> -#include <fcntl.h> -#include <paths.h> -#include <stddef.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include "un-namespace.h" +typedef struct { +	spinlock_t	lock; +	bool		recursive; +	volatile int	val; +} malloc_mutex_t; + +static bool malloc_initialized = false; +/******************************************************************************/  /* - * This structure describes a page worth of chunks. + * Statistics data structures.   */ -struct pginfo { -    struct pginfo	*next;	/* next on the free list */ -    void		*page;	/* Pointer to the page */ -    u_short		size;	/* size of this page's chunks */ -    u_short		shift;	/* How far to shift for this size chunks */ -    u_short		free;	/* How many free chunks */ -    u_short		total;	/* How many chunk */ -    u_int		bits[1]; /* Which chunks are free */ +#ifdef MALLOC_STATS + +typedef struct malloc_bin_stats_s malloc_bin_stats_t; +struct malloc_bin_stats_s { +	/* +	 * Number of allocation requests that corresponded to the size of this +	 * bin. +	 */ +	uint64_t	nrequests; + +	/* +	 * Number of best-fit allocations that were successfully serviced by +	 * this bin. +	 */ +	uint64_t	nfit; + +	/* +	 * Number of allocation requests that were successfully serviced by this +	 * bin, but that a smaller bin could have serviced. +	 */ +	uint64_t	noverfit; + +	/* High-water marks for this bin. */ +	unsigned long	highcached; + +	/* +	 * Current number of regions in this bin.  This number isn't needed +	 * during normal operation, so is maintained here in order to allow +	 * calculating the high water mark. +	 */ +	unsigned	nregions; +}; + +typedef struct arena_stats_s arena_stats_t; +struct arena_stats_s { +	/* Number of times each function was called. */ +	uint64_t	nmalloc; +	uint64_t	npalloc; +	uint64_t	ncalloc; +	uint64_t	ndalloc; +	uint64_t	nralloc; + +	/* Number of region splits. */ +	uint64_t	nsplit; + +	/* Number of region coalescences. */ +	uint64_t	ncoalesce; + +	/* Bin statistics. */ +	malloc_bin_stats_t bins[NBINS]; + +	/* Split statistics. */ +	struct { +		/* +		 * Number of times a region is requested from the "split" field +		 * of the arena. +		 */ +		uint64_t	nrequests; + +		/* +		 * Number of times the "split" field of the arena successfully +		 * services requests. +		 */ +		uint64_t	nserviced; +	}	split; + +	/* Frag statistics. */ +	struct { +		/* +		 * Number of times a region is cached in the "frag" field of +		 * the arena. +		 */ +		uint64_t	ncached; + +		/* +		 * Number of times a region is requested from the "frag" field +		 * of the arena. +		 */ +		uint64_t	nrequests; + +		/* +		 * Number of times the "frag" field of the arena successfully +		 * services requests. +		 */ +		uint64_t	nserviced; +	}	frag; + +	/* large and large_regions statistics. */ +	struct { +		/* +		 * Number of allocation requests that were too large for a bin, +		 * but not large enough for a hugh allocation. +		 */ +		uint64_t	nrequests; + +		/* +		 * Number of best-fit allocations that were successfully +		 * serviced by large_regions. +		 */ +		uint64_t	nfit; + +		/* +		 * Number of allocation requests that were successfully serviced +		 * large_regions, but that a bin could have serviced. +		 */ +		uint64_t	noverfit; + +		/* +		 * High-water mark for large_regions (number of nodes in tree). +		 */ +		unsigned long	highcached; + +		/* +		 * Used only to store the current number of nodes, since this +		 * number isn't maintained anywhere else. +		 */ +		unsigned long	curcached; +	}	large; + +	/* Huge allocation statistics. */ +	struct { +		/* Number of huge allocation requests. */ +		uint64_t	nrequests; +	}	huge; +}; + +typedef struct chunk_stats_s chunk_stats_t; +struct chunk_stats_s { +	/* Number of chunks that were allocated. */ +	uint64_t	nchunks; + +	/* High-water mark for number of chunks allocated. */ +	unsigned long	highchunks; + +	/* +	 * Current number of chunks allocated.  This value isn't maintained for +	 * any other purpose, so keep track of it in order to be able to set +	 * highchunks. +	 */ +	unsigned long	curchunks;  }; +#endif /* #ifdef MALLOC_STATS */ + +/******************************************************************************/  /* - * This structure describes a number of free pages. + * Chunk data structures.   */ -struct pgfree { -    struct pgfree	*next;	/* next run of free pages */ -    struct pgfree	*prev;	/* prev run of free pages */ -    void		*page;	/* pointer to free pages */ -    void		*end;	/* pointer to end of free pages */ -    size_t		size;	/* number of bytes free */ +/* Needed by chunk data structures. */ +typedef struct	arena_s arena_t; + +/* Tree of chunks. */ +typedef struct chunk_node_s chunk_node_t; +struct chunk_node_s { +	/* +	 * For an active chunk that is currently carved into regions by an +	 * arena allocator, this points to the arena that owns the chunk.  We +	 * set this pointer even for huge allocations, so that it is possible +	 * to tell whether a huge allocation was done on behalf of a user +	 * allocation request, or on behalf of an internal allocation request. +	 */ +	arena_t *arena; + +	/* Linkage for the chunk tree. */ +	RB_ENTRY(chunk_node_s) link; + +	/* +	 * Pointer to the chunk that this tree node is responsible for.  In some +	 * (but certainly not all) cases, this data structure is placed at the +	 * beginning of the corresponding chunk, so this field may point to this +	 * node. +	 */ +	void	*chunk; + +	/* Total chunk size. */ +	size_t	size; + +	/* Number of trailing bytes that are not used. */ +	size_t	extra;  }; +typedef struct chunk_tree_s chunk_tree_t; +RB_HEAD(chunk_tree_s, chunk_node_s); +/******************************************************************************/  /* - * How many bits per u_int in the bitmap. - * Change only if not 8 bits/byte + * Region data structures.   */ -#define	MALLOC_BITS	(8*sizeof(u_int)) + +typedef struct region_s region_t;  /* - * Magic values to put in the page_directory + * Tree of region headers, used for free regions that don't fit in the arena + * bins.   */ -#define MALLOC_NOT_MINE	((struct pginfo*) 0) -#define MALLOC_FREE 	((struct pginfo*) 1) -#define MALLOC_FIRST	((struct pginfo*) 2) -#define MALLOC_FOLLOW	((struct pginfo*) 3) -#define MALLOC_MAGIC	((struct pginfo*) 4) +typedef struct region_node_s region_node_t; +struct region_node_s { +	RB_ENTRY(region_node_s)	link; +	region_t		*reg; +}; +typedef struct region_tree_s region_tree_t; +RB_HEAD(region_tree_s, region_node_s); -#ifndef malloc_pageshift -#define malloc_pageshift		12U -#endif +typedef struct region_prev_s region_prev_t; +struct region_prev_s { +	uint32_t	size; +}; -#ifndef malloc_minsize -#define malloc_minsize			16U +#define NEXT_SIZE_MASK 0x1fffffffU +typedef struct { +#ifdef MALLOC_REDZONES +	char		prev_red[MALLOC_RED];  #endif +	/* +	 * Typical bit pattern for bits: +	 * +	 *   pncsssss ssssssss ssssssss ssssssss +	 * +	 *   p : Previous free? +	 *   n : Next free? +	 *   c : Part of a range of contiguous allocations? +	 *   s : Next size (number of quanta). +	 * +	 * It's tempting to use structure bitfields here, but the compiler has +	 * to make assumptions that make the code slower than direct bit +	 * manipulations, and these fields are manipulated a lot. +	 */ +	uint32_t	bits; -#if !defined(malloc_pagesize) -#define malloc_pagesize			(1UL<<malloc_pageshift) +#ifdef MALLOC_REDZONES +	size_t		next_exact_size; +	char		next_red[MALLOC_RED];  #endif +} region_sep_t; -#if ((1<<malloc_pageshift) != malloc_pagesize) -#error	"(1<<malloc_pageshift) != malloc_pagesize" -#endif +typedef struct region_next_small_sizer_s region_next_small_sizer_t; +struct region_next_small_sizer_s +{ +	qr(region_t)	link; +}; -#ifndef malloc_maxsize -#define malloc_maxsize			((malloc_pagesize)>>1) -#endif +typedef struct region_next_small_s region_next_small_t; +struct region_next_small_s +{ +	qr(region_t)	link; -/* A mask for the offset inside a page.  */ -#define malloc_pagemask	((malloc_pagesize)-1) +	/* Only accessed for delayed regions & footer invalid. */ +	uint32_t	slot; +}; -#define pageround(foo) (((foo) + (malloc_pagemask))&(~(malloc_pagemask))) -#define ptr2index(foo) (((u_long)(foo) >> malloc_pageshift)-malloc_origo) +typedef struct region_next_large_s region_next_large_t; +struct region_next_large_s +{ +	region_node_t	node; -#ifndef _MALLOC_LOCK -#define _MALLOC_LOCK() -#endif +	/* Use for LRU vs MRU tree ordering. */ +	bool		lru; +}; -#ifndef _MALLOC_UNLOCK -#define _MALLOC_UNLOCK() +typedef struct region_next_s region_next_t; +struct region_next_s { +	union { +		region_next_small_t	s; +		region_next_large_t	l; +	}	u; +}; + +/* + * Region separator, including prev/next fields that are only accessible when + * the neighboring regions are free. + */ +struct region_s { +	/* This field must only be accessed if sep.prev_free is true. */ +	region_prev_t   prev; + +	/* Actual region separator that is always present between regions. */ +	region_sep_t    sep; + +	/* +	 * These fields must only be accessed if sep.next_free or +	 * sep.next_contig is true.  +	 */ +	region_next_t   next; +}; + +/* Small variant of region separator, only used for size calculations. */ +typedef struct region_small_sizer_s region_small_sizer_t; +struct region_small_sizer_s { +	region_prev_t   prev; +	region_sep_t    sep; +	region_next_small_sizer_t   next; +}; + +/******************************************************************************/ +/* + * Arena data structures. + */ + +typedef struct arena_bin_s arena_bin_t; +struct arena_bin_s { +	/* +	 * Link into ring before the oldest free region and just after the +	 * newest free region. +	 */ +	region_t	regions; +}; + +struct arena_s { +#ifdef MALLOC_DEBUG +	uint32_t	magic; +#  define ARENA_MAGIC 0x947d3d24  #endif -#ifndef MMAP_FD -#define MMAP_FD (-1) +	/* All operations on this arena require that mtx be locked. */ +	malloc_mutex_t	mtx; + +	/* True if this arena is allocated from base_arena. */ +	bool		malloced; + +	/* +	 * bins is used to store rings of free regions of the following sizes, +	 * assuming a 16-byte quantum (sizes include region separators): +	 * +	 *   bins[i] | size | +	 *   --------+------+ +	 *        0  |   32 | +	 *        1  |   48 | +	 *        2  |   64 | +	 *           :      : +	 *           :      : +	 *   --------+------+ +	 */ +	arena_bin_t	bins[NBINS]; + +	/* +	 * A bitmask that corresponds to which bins have elements in them. +	 * This is used when searching for the first bin that contains a free +	 * region that is large enough to service an allocation request. +	 */ +#define	BINMASK_NELMS (NBINS / (sizeof(int) << 3)) +	int		bins_mask[BINMASK_NELMS]; + +	/* +	 * Tree of free regions of the size range [bin_maxsize..~chunk).  These +	 * are sorted primarily by size, and secondarily by LRU. +	 */ +	region_tree_t	large_regions; + +	/* +	 * If not NULL, a region that is not stored in bins or large_regions. +	 * If large enough, this region is used instead of any region stored in +	 * bins or large_regions, in order to reduce the number of insert/remove +	 * operations, and in order to increase locality of allocation in +	 * common cases. +	 */ +	region_t	*split; + +	/* +	 * If not NULL, a region that is not stored in bins or large_regions. +	 * If large enough, this region is preferentially used for small +	 * allocations over any region in large_regions, split, or over-fit +	 * small bins. +	 */ +	region_t	*frag; + +	/* Tree of chunks that this arena currenly owns. */ +	chunk_tree_t	chunks; +	unsigned	nchunks; + +	/* +	 * FIFO ring of free regions for which coalescence is delayed.  A slot +	 * that contains NULL is considered empty.  opt_ndelay stores how many +	 * elements there are in the FIFO. +	 */ +	region_t	**delayed; +	uint32_t	next_delayed; /* Next slot in delayed to use. */ + +#ifdef MALLOC_STATS +	/* Total byte count of allocated memory, not including overhead. */ +	size_t		allocated; + +	arena_stats_t stats;  #endif +}; + +/******************************************************************************/ +/* + * Data. + */ + +/* Used as a special "nil" return value for malloc(0). */ +static int		nil; + +/* Number of CPUs. */ +static unsigned		ncpus; + +/* VM page size. */ +static unsigned		pagesize; + +/* Various quantum-related settings. */ +static size_t		quantum; +static size_t		quantum_mask; /* (quantum - 1). */ +static size_t		bin_shift; +static size_t		bin_maxsize; + +/* Various chunk-related settings. */ +static size_t		chunk_size; +static size_t		chunk_size_mask; /* (chunk_size - 1). */ -#ifndef INIT_MMAP -#define INIT_MMAP() +/********/ +/* + * Chunks. + */ + +/* Protects chunk-related data structures. */ +static malloc_mutex_t	chunks_mtx; + +/* Tree of chunks that are stand-alone huge allocations. */ +static chunk_tree_t	huge; + +#ifdef USE_BRK +/* + * Try to use brk for chunk-size allocations, due to address space constraints. + */ +void *brk_base; /* Result of first sbrk(0) call. */ +void *brk_prev; /* Current end of brk, or ((void *)-1) if brk is exhausted. */ +void *brk_max; /* Upper limit on brk addresses (may be an over-estimate). */  #endif -/* Number of free pages we cache */ -static unsigned malloc_cache = 16; +#ifdef MALLOC_STATS +/* + * Byte counters for allocated/total space used by the chunks in the huge + * allocations tree. + */ +static size_t		huge_allocated; +static size_t		huge_total; +#endif -/* The offset from pagenumber to index into the page directory */ -static u_long malloc_origo; +/* + * Tree of chunks that were previously allocated.  This is used when allocating + * chunks, in an attempt to re-use address space. + */ +static chunk_tree_t	old_chunks; -/* The last index in the page directory we care about */ -static u_long last_index; +/********/ +/* + * Arenas. + */ -/* Pointer to page directory. Allocated "as if with" malloc */ -static struct	pginfo **page_dir; +/* Arena that is used for internal memory allocations. */ +static arena_t		base_arena; -/* How many slots in the page directory */ -static unsigned	malloc_ninfo; +/*  + * Arenas that are used to service external requests.  Not all elements of the + * arenas array are necessarily used; arenas are created lazily as needed. + */ +static arena_t		**arenas; +static unsigned		narenas; +#ifndef NO_TLS +static unsigned		next_arena; +#endif +static malloc_mutex_t	arenas_mtx; /* Protects arenas initialization. */ -/* Free pages line up here */ -static struct pgfree free_list; +#ifndef NO_TLS +/* + * Map of pthread_self() --> arenas[???], used for selecting an arena to use + * for allocations. + */ +static __thread arena_t *arenas_map; +#endif -/* Abort(), user doesn't handle problems.  */ -static int malloc_abort = 1; +#ifdef MALLOC_STATS +/* Chunk statistics. */ +chunk_stats_t		stats_chunks; +#endif -/* Are we trying to die ?  */ -static int suicide; +/*******************************/ +/* + * Runtime configuration options. + */ +const char	*_malloc_options; + +static bool	opt_abort = true; +static bool	opt_junk = true; +static bool	opt_print_stats = false; +static size_t	opt_quantum_2pow = QUANTUM_2POW_MIN; +static size_t	opt_chunk_2pow = CHUNK_2POW_DEFAULT; +static bool	opt_utrace = false; +static bool	opt_sysv = false; +static bool	opt_xmalloc = false; +static bool	opt_zero = false; +static uint32_t	opt_ndelay = NDELAY; +static int32_t	opt_narenas_lshift = 0; + +typedef struct { +	void	*p; +	size_t	s; +	void	*r; +} malloc_utrace_t; + +#define	UTRACE(a, b, c)							\ +	if (opt_utrace) {						\ +		malloc_utrace_t ut = {a, b, c};				\ +		utrace(&ut, sizeof(ut));				\ +	} -/* always realloc ?  */ -static int malloc_realloc; +/******************************************************************************/ +/* + * Begin function prototypes for non-inline static functions. + */ -#if defined(MADV_FREE) -/* pass the kernel a hint on free pages ?  */ -static int malloc_hint = 0; +static void	malloc_mutex_init(malloc_mutex_t *a_mutex); +static void	malloc_mutex_recursive_init(malloc_mutex_t *a_mutex); +static void	wrtmessage(const char *p1, const char *p2, const char *p3, +		const char *p4); +static void	malloc_printf(const char *format, ...); +#ifdef MALLOC_STATS +static void	stats_merge(arena_t *arena, arena_stats_t *stats_arenas); +static void	stats_print(arena_stats_t *stats_arenas); +#endif +static void	*pages_map(void *addr, size_t size); +static void	pages_unmap(void *addr, size_t size); +static void	*chunk_alloc(size_t size); +static void	chunk_dealloc(void *chunk, size_t size); +static unsigned	arena_bins_search(arena_t *arena, size_t size); +static bool	arena_coalesce(arena_t *arena, region_t **reg, size_t size); +static void	arena_coalesce_hard(arena_t *arena, region_t *reg, +		region_t *next, size_t size, bool split_adjacent); +static void	arena_large_insert(arena_t *arena, region_t *reg, bool lru); +static void	arena_large_cache(arena_t *arena, region_t *reg, bool lru); +static void	arena_lru_cache(arena_t *arena, region_t *reg); +static void	arena_delay_cache(arena_t *arena, region_t *reg); +static region_t	*arena_split_reg_alloc(arena_t *arena, size_t size, bool fit); +static void	arena_reg_fit(arena_t *arena, size_t size, region_t *reg, +		bool restore_split); +static region_t	*arena_large_reg_alloc(arena_t *arena, size_t size, bool fit); +static region_t	*arena_chunk_reg_alloc(arena_t *arena, size_t size, bool fit); +static void	*arena_malloc(arena_t *arena, size_t size); +static void	*arena_palloc(arena_t *arena, size_t alignment, size_t size); +static void	*arena_calloc(arena_t *arena, size_t num, size_t size); +static size_t	arena_salloc(arena_t *arena, void *ptr); +#ifdef MALLOC_REDZONES +static void	redzone_check(void *ptr); +#endif +static void	arena_dalloc(arena_t *arena, void *ptr); +#ifdef NOT_YET +static void	*arena_ralloc(arena_t *arena, void *ptr, size_t size); +#endif +#ifdef MALLOC_STATS +static bool	arena_stats(arena_t *arena, size_t *allocated, size_t *total);  #endif +static void	arena_new(arena_t *arena, bool malloced, bool recursive); +static arena_t	*arenas_extend(unsigned ind); +#ifndef NO_TLS +static arena_t	*choose_arena_hard(void); +#endif +static void	*huge_malloc(arena_t *arena, size_t size); +static void	huge_dalloc(void *ptr); +static void	*imalloc(arena_t *arena, size_t size); +static void	*ipalloc(arena_t *arena, size_t alignment, size_t size); +static void	*icalloc(arena_t *arena, size_t num, size_t size); +static size_t	isalloc(void *ptr); +static void	idalloc(void *ptr); +static void	*iralloc(arena_t *arena, void *ptr, size_t size); +#ifdef MALLOC_STATS +static void	istats(size_t *allocated, size_t *total); +#endif +static void	malloc_print_stats(void); +static void	malloc_init_hard(void); -/* xmalloc behaviour ?  */ -static int malloc_xmalloc; +/* + * End function prototypes. + */ +/******************************************************************************/ +/* + * Begin mutex. + */ -/* sysv behaviour for malloc(0) ?  */ -static int malloc_sysv; +static void +malloc_mutex_init(malloc_mutex_t *a_mutex) +{ +	static const spinlock_t lock = _SPINLOCK_INITIALIZER; -/* zero fill ?  */ -static int malloc_zero; +	a_mutex->lock = lock; +	a_mutex->recursive = false; +	a_mutex->val = 0; +} -/* junk fill ?  */ -static int malloc_junk = 1; +static void +malloc_mutex_recursive_init(malloc_mutex_t *a_mutex) +{ +	static const spinlock_t lock = _SPINLOCK_INITIALIZER; -#ifdef HAS_UTRACE +	a_mutex->lock = lock; +	a_mutex->recursive = true; +	a_mutex->val = 0; +} -/* utrace ?  */ -static int malloc_utrace; +static __inline void +malloc_mutex_lock(malloc_mutex_t *a_mutex) +{ +#define	MAX_SPIN 1024 -struct ut { void *p; size_t s; void *r; }; +	if (__isthreaded) { +		if (a_mutex->recursive == false) +			_SPINLOCK(&a_mutex->lock); +		else { +			volatile long *owner = &a_mutex->lock.lock_owner; +			long self; -void utrace(struct ut *, int); +			self = (long)_pthread_self(); -#define UTRACE(a, b, c) \ -	if (malloc_utrace) \ -		{struct ut u; u.p=a; u.s = b; u.r=c; utrace(&u, sizeof u);} -#else /* !HAS_UTRACE */ -#define UTRACE(a,b,c) -#endif /* HAS_UTRACE */ +			if (atomic_cmpset_acq_long(owner, self, self) == 0) +				_SPINLOCK(&a_mutex->lock); +			a_mutex->val++; +		} +	} +} -/* my last break. */ -static void *malloc_brk; +static __inline void +malloc_mutex_unlock(malloc_mutex_t *a_mutex) +{ -/* one location cache for free-list holders */ -static struct pgfree *px; +	if (__isthreaded) { +		if (a_mutex->recursive == false) { +			_SPINUNLOCK(&a_mutex->lock); +		} else { +			a_mutex->val--; +			if (a_mutex->val == 0) { +				_SPINUNLOCK(&a_mutex->lock); +			} +		} +	} +} -/* compile-time options */ -const char *_malloc_options; +/* + * End mutex. + */ +/******************************************************************************/ +/* + * Begin Utility functions/macros. + */ + +/* Return the chunk address for allocation address a. */ +#define	CHUNK_ADDR2BASE(a)						\ +	((void *) ((size_t) (a) & ~chunk_size_mask)) -/* Name of the current public function */ -static const char *malloc_func; +/* Return the chunk offset of address a. */ +#define	CHUNK_ADDR2OFFSET(a)						\ +	((size_t) (a) & chunk_size_mask) -/* Macro for mmap */ -#define MMAP(size) \ -	mmap(NULL, (size), PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, \ -	    MMAP_FD, (off_t)0); +/* Return the smallest chunk multiple that is >= s. */ +#define	CHUNK_CEILING(s)						\ +	(((s) + chunk_size_mask) & ~chunk_size_mask) + +/* Return the smallest quantum multiple that is >= a. */ +#define	QUANTUM_CEILING(a)						\ +	(((a) + quantum_mask) & ~quantum_mask) + +/* Return the offset within a chunk to the first region separator. */ +#define	CHUNK_REG_OFFSET						\ +	(QUANTUM_CEILING(sizeof(chunk_node_t) +				\ +	    sizeof(region_sep_t)) - offsetof(region_t, next))  /* - * Necessary function declarations + * Return how many bytes of usable space are needed for an allocation of size + * bytes.  This value is not a multiple of quantum, since it doesn't include + * the region separator.   */ -static int extend_pgdir(u_long index); -static void *imalloc(size_t size); -static void ifree(void *ptr); -static void *irealloc(void *ptr, size_t size); +static __inline size_t +region_ceiling(size_t size) +{ +	size_t quantum_size, min_reg_quantum; + +	quantum_size = QUANTUM_CEILING(size + sizeof(region_sep_t)); +	min_reg_quantum = QUANTUM_CEILING(sizeof(region_small_sizer_t)); +	if (quantum_size >= min_reg_quantum) +		return (quantum_size); +	else +		return (min_reg_quantum); +}  static void  wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)  { -    _write(STDERR_FILENO, p1, strlen(p1)); -    _write(STDERR_FILENO, p2, strlen(p2)); -    _write(STDERR_FILENO, p3, strlen(p3)); -    _write(STDERR_FILENO, p4, strlen(p4)); +	_write(STDERR_FILENO, p1, strlen(p1)); +	_write(STDERR_FILENO, p2, strlen(p2)); +	_write(STDERR_FILENO, p3, strlen(p3)); +	_write(STDERR_FILENO, p4, strlen(p4));  } -void (*_malloc_message)(const char *p1, const char *p2, const char *p3, +void	(*_malloc_message)(const char *p1, const char *p2, const char *p3,  	    const char *p4) = wrtmessage; +/* + * Print to stderr in such a way as to (hopefully) avoid memory allocation. + */ +static void +malloc_printf(const char *format, ...) +{ +	char buf[4096]; +	va_list ap; + +	va_start(ap, format); +	vsnprintf(buf, sizeof(buf), format, ap); +	va_end(ap); +	_malloc_message(buf, "", "", ""); +} + +/* + * Note that no bitshifting is done for booleans in any of the bitmask-based + * flag manipulation functions that follow; test for non-zero versus zero. + */ + +/**********************/ +static __inline uint32_t +region_prev_free_get(region_sep_t *sep) +{ + +	return ((sep->bits) & 0x80000000U); +} + +static __inline void +region_prev_free_set(region_sep_t *sep) +{ + +	sep->bits = ((sep->bits) | 0x80000000U); +} + +static __inline void +region_prev_free_unset(region_sep_t *sep) +{ + +	sep->bits = ((sep->bits) & 0x7fffffffU); +} + +/**********************/ +static __inline uint32_t +region_next_free_get(region_sep_t *sep) +{ + +	return ((sep->bits) & 0x40000000U); +} + +static __inline void +region_next_free_set(region_sep_t *sep) +{ + +	sep->bits = ((sep->bits) | 0x40000000U); +} + +static __inline void +region_next_free_unset(region_sep_t *sep) +{ + +	sep->bits = ((sep->bits) & 0xbfffffffU); +} + +/**********************/ +static __inline uint32_t +region_next_contig_get(region_sep_t *sep) +{ + +	return ((sep->bits) & 0x20000000U); +} + +static __inline void +region_next_contig_set(region_sep_t *sep) +{ + +	sep->bits = ((sep->bits) | 0x20000000U); +} + +static __inline void +region_next_contig_unset(region_sep_t *sep) +{ + +	sep->bits = ((sep->bits) & 0xdfffffffU); +} + +/********************/ +static __inline size_t +region_next_size_get(region_sep_t *sep) +{ + +	return ((size_t) (((sep->bits) & NEXT_SIZE_MASK) << opt_quantum_2pow)); +} + +static __inline void +region_next_size_set(region_sep_t *sep, size_t size) +{ +	uint32_t bits; + +	assert(size % quantum == 0); + +	bits = sep->bits; +	bits &= ~NEXT_SIZE_MASK; +	bits |= (((uint32_t) size) >> opt_quantum_2pow); + +	sep->bits = bits; +} + +#ifdef MALLOC_STATS  static void -wrterror(char const *p) +stats_merge(arena_t *arena, arena_stats_t *stats_arenas)  { +	unsigned i; + +	stats_arenas->nmalloc += arena->stats.nmalloc; +	stats_arenas->npalloc += arena->stats.npalloc; +	stats_arenas->ncalloc += arena->stats.ncalloc; +	stats_arenas->ndalloc += arena->stats.ndalloc; +	stats_arenas->nralloc += arena->stats.nralloc; + +	stats_arenas->nsplit += arena->stats.nsplit; +	stats_arenas->ncoalesce += arena->stats.ncoalesce; + +	/* Split. */ +	stats_arenas->split.nrequests += arena->stats.split.nrequests; +	stats_arenas->split.nserviced += arena->stats.split.nserviced; + +	/* Frag. */ +	stats_arenas->frag.ncached += arena->stats.frag.ncached; +	stats_arenas->frag.nrequests += arena->stats.frag.nrequests; +	stats_arenas->frag.nserviced += arena->stats.frag.nserviced; + +	/* Bins. */ +	for (i = 0; i < NBINS; i++) { +		stats_arenas->bins[i].nrequests += +		    arena->stats.bins[i].nrequests; +		stats_arenas->bins[i].nfit += arena->stats.bins[i].nfit; +		stats_arenas->bins[i].noverfit += arena->stats.bins[i].noverfit; +		if (arena->stats.bins[i].highcached +		    > stats_arenas->bins[i].highcached) { +		    stats_arenas->bins[i].highcached +			= arena->stats.bins[i].highcached; +		} +	} + +	/* large and large_regions. */ +	stats_arenas->large.nrequests += arena->stats.large.nrequests; +	stats_arenas->large.nfit += arena->stats.large.nfit; +	stats_arenas->large.noverfit += arena->stats.large.noverfit; +	if (arena->stats.large.highcached > stats_arenas->large.highcached) +		stats_arenas->large.highcached = arena->stats.large.highcached; +	stats_arenas->large.curcached += arena->stats.large.curcached; -    suicide = 1; -    _malloc_message(_getprogname(), malloc_func, " error: ", p); -    abort(); +	/* Huge allocations. */ +	stats_arenas->huge.nrequests += arena->stats.huge.nrequests;  }  static void -wrtwarning(char *p) +stats_print(arena_stats_t *stats_arenas)  { +	unsigned i; + +	malloc_printf("calls:\n"); +	malloc_printf(" %13s%13s%13s%13s%13s\n", "nmalloc", "npalloc", +	    "ncalloc", "ndalloc", "nralloc"); +	malloc_printf(" %13llu%13llu%13llu%13llu%13llu\n", +	    stats_arenas->nmalloc, stats_arenas->npalloc, stats_arenas->ncalloc, +	    stats_arenas->ndalloc, stats_arenas->nralloc); + +	malloc_printf("region events:\n"); +	malloc_printf(" %13s%13s\n", "nsplit", "ncoalesce"); +	malloc_printf(" %13llu%13llu\n", stats_arenas->nsplit, +	    stats_arenas->ncoalesce); + +	malloc_printf("cached split usage:\n"); +	malloc_printf(" %13s%13s\n", "nrequests", "nserviced"); +	malloc_printf(" %13llu%13llu\n", stats_arenas->split.nrequests, +	    stats_arenas->split.nserviced); + +	malloc_printf("cached frag usage:\n"); +	malloc_printf(" %13s%13s%13s\n", "ncached", "nrequests", "nserviced"); +	malloc_printf(" %13llu%13llu%13llu\n", stats_arenas->frag.ncached, +	    stats_arenas->frag.nrequests, stats_arenas->frag.nserviced); + +	malloc_printf("bins:\n"); +	malloc_printf(" %4s%7s%13s%13s%13s%11s\n", "bin",  +	    "size", "nrequests", "nfit", "noverfit", "highcached"); +	for (i = 0; i < NBINS; i++) { +		malloc_printf( +		    " %4u%7u%13llu%13llu%13llu%11lu\n", +		    i, ((i + bin_shift) << opt_quantum_2pow), +		    stats_arenas->bins[i].nrequests, stats_arenas->bins[i].nfit, +		    stats_arenas->bins[i].noverfit, +		    stats_arenas->bins[i].highcached); +	} -    /* -     * Sensitive processes, somewhat arbitrarily defined here as setuid, -     * setgid, root and wheel cannot afford to have malloc mistakes. -     */ -    if (malloc_abort || issetugid() || getuid() == 0 || getgid() == 0) -	wrterror(p); -    _malloc_message(_getprogname(), malloc_func, " warning: ", p); +	malloc_printf("large:\n"); +	malloc_printf(" %13s%13s%13s%13s%13s\n", "nrequests", "nfit", +	    "noverfit", "highcached", "curcached"); +	malloc_printf(" %13llu%13llu%13llu%13lu%13lu\n", +	    stats_arenas->large.nrequests, stats_arenas->large.nfit, +	    stats_arenas->large.noverfit, stats_arenas->large.highcached, +	    stats_arenas->large.curcached); + +	malloc_printf("huge\n"); +	malloc_printf(" %13s\n", "nrequests"); +	malloc_printf(" %13llu\n", stats_arenas->huge.nrequests);  } +#endif  /* - * Allocate a number of pages from the OS + * End Utility functions/macros. + */ +/******************************************************************************/ +/* + * Begin Mem.   */ + +static __inline int +chunk_comp(chunk_node_t *a, chunk_node_t *b) +{ +	int ret; + +	assert(a != NULL); +	assert(b != NULL); + +	if ((size_t) a->chunk < (size_t) b->chunk) +		ret = -1; +	else if (a->chunk == b->chunk) +		ret = 0; +	else +		ret = 1; + +	return (ret); +} + +/* Generate red-black tree code for chunks. */ +RB_GENERATE(chunk_tree_s, chunk_node_s, link, chunk_comp); + +static __inline int +region_comp(region_node_t *a, region_node_t *b) +{ +	int ret; +	size_t size_a, size_b; + +	assert(a != NULL); +	assert(b != NULL); + +	size_a = region_next_size_get(&a->reg->sep); +	size_b = region_next_size_get(&b->reg->sep); +	if (size_a < size_b) +		ret = -1; +	else if (size_a == size_b) { +		if (a == b) { +			/* Regions are equal with themselves. */ +			ret = 0; +		} else { +			if (a->reg->next.u.l.lru) { +				/* +				 * Oldest region comes first (secondary LRU +				 * ordering).  a is guaranteed to be the search +				 * key, which is how we can enforce this +				 * secondary ordering. +				 */ +				ret = 1; +			} else { +				/* +				 * Oldest region comes last (secondary MRU +				 * ordering).  a is guaranteed to be the search +				 * key, which is how we can enforce this +				 * secondary ordering. +				 */ +				ret = -1; +			} +		} +	} else +		ret = 1; + +	return (ret); +} + +/* Generate red-black tree code for regions. */ +RB_GENERATE(region_tree_s, region_node_s, link, region_comp); +  static void * -map_pages(size_t pages) +pages_map(void *addr, size_t size)  { -    caddr_t result, tail; +	void *ret; -    result = (caddr_t)pageround((u_long)sbrk(0)); -    tail = result + (pages << malloc_pageshift); -    if (tail < result) -	return (NULL); +#ifdef USE_BRK +AGAIN: +#endif +	/* +	 * We don't use MAP_FIXED here, because it can cause the *replacement* +	 * of existing mappings, and we only want to create new mappings. +	 */ +	ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, +	    -1, 0); +	assert(ret != NULL); + +	if (ret == MAP_FAILED) +		ret = NULL; +	else if (addr != NULL && ret != addr) { +		/* +		 * We succeeded in mapping memory, but not in the right place. +		 */ +		if (munmap(ret, size) == -1) { +			char buf[STRERROR_BUF]; + +			strerror_r(errno, buf, sizeof(buf)); +			malloc_printf("%s: (malloc) Error in munmap(): %s\n", +			    _getprogname(), buf); +			if (opt_abort) +				abort(); +		} +		ret = NULL; +	} +#ifdef USE_BRK +	else if ((size_t)ret >= (size_t)brk_base +	    && (size_t)ret < (size_t)brk_max) { +		/* +		 * We succeeded in mapping memory, but at a location that could +		 * be confused with brk.  Leave the mapping intact so that this +		 * won't ever happen again, then try again. +		 */ +		assert(addr == NULL); +		goto AGAIN; +	} +#endif -    if (brk(tail)) { -#ifdef MALLOC_EXTRA_SANITY -	wrterror("(ES): map_pages fails\n"); -#endif /* MALLOC_EXTRA_SANITY */ -	return (NULL); -    } +	assert(ret == NULL || (addr == NULL && ret != addr) +	    || (addr != NULL && ret == addr)); +	return (ret); +} -    last_index = ptr2index(tail) - 1; -    malloc_brk = tail; +static void +pages_unmap(void *addr, size_t size) +{ -    if ((last_index+1) >= malloc_ninfo && !extend_pgdir(last_index)) -	return (NULL); +	if (munmap(addr, size) == -1) { +		char buf[STRERROR_BUF]; -    return (result); +		strerror_r(errno, buf, sizeof(buf)); +		malloc_printf("%s: (malloc) Error in munmap(): %s\n", +		    _getprogname(), buf); +		if (opt_abort) +			abort(); +	}  } +static void * +chunk_alloc(size_t size) +{ +	void *ret, *chunk; +	chunk_node_t *tchunk, *delchunk; +	chunk_tree_t delchunks; + +	assert(size != 0); +	assert(size % chunk_size == 0); + +	RB_INIT(&delchunks); + +	malloc_mutex_lock(&chunks_mtx); + +	if (size == chunk_size) { +		/* +		 * Check for address ranges that were previously chunks and try +		 * to use them. +		 */ + +		tchunk = RB_MIN(chunk_tree_s, &old_chunks); +		while (tchunk != NULL) { +			/* Found an address range.  Try to recycle it. */ + +			chunk = tchunk->chunk; +			delchunk = tchunk; +			tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk); + +			/* +			 * Remove delchunk from the tree, but keep track of the +			 * address. +			 */ +			RB_REMOVE(chunk_tree_s, &old_chunks, delchunk); + +			/* +			 * Keep track of the node so that it can be deallocated +			 * after chunks_mtx is released. +			 */ +			RB_INSERT(chunk_tree_s, &delchunks, delchunk); + +#ifdef USE_BRK +			if ((size_t)chunk >= (size_t)brk_base +			    && (size_t)chunk < (size_t)brk_max) { +				/* Re-use a previously freed brk chunk. */ +				ret = chunk; +				goto RETURN; +			} +#endif +			if ((ret = pages_map(chunk, size)) != NULL) { +				/* Success. */ +				goto RETURN; +			} +		} + +#ifdef USE_BRK +		/* +		 * Try to create chunk-size allocations in brk, in order to +		 * make full use of limited address space. +		 */ +		if (brk_prev != (void *)-1) { +			void *brk_cur; +			intptr_t incr; + +			/* +			 * The loop is necessary to recover from races with +			 * other threads that are using brk for something other +			 * than malloc. +			 */ +			do { +				/* Get the current end of brk. */ +				brk_cur = sbrk(0); + +				/* +				 * Calculate how much padding is necessary to +				 * chunk-align the end of brk. +				 */ +				incr = (char *)chunk_size +				    - (char *)CHUNK_ADDR2OFFSET(brk_cur); +				if (incr == chunk_size) { +					ret = brk_cur; +				} else { +					ret = (char *)brk_cur + incr; +					incr += chunk_size; +				} + +				brk_prev = sbrk(incr); +				if (brk_prev == brk_cur) { +					/* Success. */ +					goto RETURN; +				} +			} while (brk_prev != (void *)-1); +		} +#endif +	} + +	/* +	 * Try to over-allocate, but allow the OS to place the allocation +	 * anywhere.  Beware of size_t wrap-around. +	 */ +	if (size + chunk_size > size) { +		if ((ret = pages_map(NULL, size + chunk_size)) != NULL) { +			size_t offset = CHUNK_ADDR2OFFSET(ret); + +			/* +			 * Success.  Clean up unneeded leading/trailing space. +			 */ +			if (offset != 0) { +				/* Leading space. */ +				pages_unmap(ret, chunk_size - offset); + +				ret = (void *) ((size_t) ret + (chunk_size - +				    offset)); + +				/* Trailing space. */ +				pages_unmap((void *) ((size_t) ret + size), +				    offset); +			} else { +				/* Trailing space only. */ +				pages_unmap((void *) ((size_t) ret + size), +				    chunk_size); +			} +			goto RETURN; +		} +	} + +	/* All strategies for allocation failed. */ +	ret = NULL; +RETURN: +#ifdef MALLOC_STATS +	if (ret != NULL) { +		stats_chunks.nchunks += (size / chunk_size); +		stats_chunks.curchunks += (size / chunk_size); +	} +	if (stats_chunks.curchunks > stats_chunks.highchunks) +		stats_chunks.highchunks = stats_chunks.curchunks; +#endif +	malloc_mutex_unlock(&chunks_mtx); + +	/* +	 * Deallocation of the chunk nodes must be done after releasing +	 * chunks_mtx, in case deallocation causes a chunk to be unmapped. +	 */ +	tchunk = RB_MIN(chunk_tree_s, &delchunks); +	while (tchunk != NULL) { +		delchunk = tchunk; +		tchunk = RB_NEXT(chunk_tree_s, &delchunks, delchunk); +		RB_REMOVE(chunk_tree_s, &delchunks, delchunk); +		idalloc(delchunk); +	} + +	assert(CHUNK_ADDR2BASE(ret) == ret); +	return (ret); +} + +static void +chunk_dealloc(void *chunk, size_t size) +{ + +	assert(chunk != NULL); +	assert(CHUNK_ADDR2BASE(chunk) == chunk); +	assert(size != 0); +	assert(size % chunk_size == 0); + +	if (size == chunk_size) { +		chunk_node_t *node; + +		node = ipalloc(&base_arena, CACHELINE, +		    sizeof(chunk_node_t) + CACHELINE); + +		malloc_mutex_lock(&chunks_mtx); +		if (node != NULL) { +			/* +			 * Create a record of this chunk before deallocating +			 * it, so that the address range can be recycled if +			 * memory usage increases later on. +			 */ +			node->arena = NULL; +			node->chunk = chunk; +			node->size = size; +			node->extra = 0; + +			RB_INSERT(chunk_tree_s, &old_chunks, node); +		} +		malloc_mutex_unlock(&chunks_mtx); +	} + +#ifdef USE_BRK +	if ((size_t)chunk >= (size_t)brk_base +	    && (size_t)chunk < (size_t)brk_max) +		madvise(chunk, size, MADV_FREE); +	else +#endif +		pages_unmap(chunk, size); + +#ifdef MALLOC_STATS +	malloc_mutex_lock(&chunks_mtx); +	stats_chunks.curchunks -= (size / chunk_size); +	malloc_mutex_unlock(&chunks_mtx); +#endif +} + +/******************************************************************************/  /* - * Extend page directory + * arena.   */ -static int -extend_pgdir(u_long index) -{ -    struct  pginfo **new, **old; -    u_long i, oldlen; - -    /* Make it this many pages */ -    i = index * sizeof *page_dir; -    i /= malloc_pagesize; -    i += 2; - -    /* remember the old mapping size */ -    oldlen = malloc_ninfo * sizeof *page_dir; - -    /* -     * NOTE: we allocate new pages and copy the directory rather than tempt -     * fate by trying to "grow" the region.. There is nothing to prevent -     * us from accidently re-mapping space that's been allocated by our caller -     * via dlopen() or other mmap(). -     * -     * The copy problem is not too bad, as there is 4K of page index per -     * 4MB of malloc arena. -     * -     * We can totally avoid the copy if we open a file descriptor to associate -     * the anon mappings with.  Then, when we remap the pages at the new -     * address, the old pages will be "magically" remapped..  But this means -     * keeping open a "secret" file descriptor..... -     */ - -    /* Get new pages */ -    new = (struct pginfo**) MMAP(i * malloc_pagesize); -    if (new == MAP_FAILED) -	return (0); - -    /* Copy the old stuff */ -    memcpy(new, page_dir, -	    malloc_ninfo * sizeof *page_dir); - -    /* register the new size */ -    malloc_ninfo = i * malloc_pagesize / sizeof *page_dir; - -    /* swap the pointers */ -    old = page_dir; -    page_dir = new; - -    /* Now free the old stuff */ -    munmap(old, oldlen); -    return (1); + +static __inline void +arena_mask_set(arena_t *arena, unsigned bin) +{ +	unsigned elm, bit; + +	assert(bin < NBINS); + +	elm = bin / (sizeof(int) << 3); +	bit = bin - (elm * (sizeof(int) << 3)); +	assert((arena->bins_mask[elm] & (1 << bit)) == 0); +	arena->bins_mask[elm] |= (1 << bit); +} + +static __inline void +arena_mask_unset(arena_t *arena, unsigned bin) +{ +	unsigned elm, bit; + +	assert(bin < NBINS); + +	elm = bin / (sizeof(int) << 3); +	bit = bin - (elm * (sizeof(int) << 3)); +	assert((arena->bins_mask[elm] & (1 << bit)) != 0); +	arena->bins_mask[elm] ^= (1 << bit); +} + +static unsigned +arena_bins_search(arena_t *arena, size_t size) +{ +	unsigned ret, minbin, i; +	int bit; + +	assert(QUANTUM_CEILING(size) == size); +	assert((size >> opt_quantum_2pow) >= bin_shift); + +	if (size > bin_maxsize) { +		ret = UINT_MAX; +		goto RETURN; +	} + +	minbin = (size >> opt_quantum_2pow) - bin_shift; +	assert(minbin < NBINS); +	for (i = minbin / (sizeof(int) << 3); i < BINMASK_NELMS; i++) { +		bit = ffs(arena->bins_mask[i] +		    & (UINT_MAX << (minbin % (sizeof(int) << 3)))); +		if (bit != 0) { +			/* Usable allocation found. */ +			ret = (i * (sizeof(int) << 3)) + bit - 1; +#ifdef MALLOC_STATS +			if (ret == minbin) +				arena->stats.bins[minbin].nfit++; +			else +				arena->stats.bins[ret].noverfit++; +#endif +			goto RETURN; +		} +	} + +	ret = UINT_MAX; +RETURN: +	return (ret); +} + +static __inline void +arena_delayed_extract(arena_t *arena, region_t *reg) +{ + +	if (region_next_contig_get(®->sep)) { +		uint32_t slot; + +		/* Extract this region from the delayed FIFO. */ +		assert(region_next_free_get(®->sep) == false); + +		slot = reg->next.u.s.slot; +		assert(arena->delayed[slot] == reg); +		arena->delayed[slot] = NULL; +	} +#ifdef MALLOC_DEBUG +	else { +		region_t *next; + +		assert(region_next_free_get(®->sep)); + +		next = (region_t *) &((char *) reg) +		    [region_next_size_get(®->sep)]; +		assert(region_prev_free_get(&next->sep)); +	} +#endif +} + +static __inline void +arena_bin_extract(arena_t *arena, unsigned bin, region_t *reg) +{ +	arena_bin_t *tbin; + +	assert(bin < NBINS); + +	tbin = &arena->bins[bin]; + +	assert(qr_next(&tbin->regions, next.u.s.link) != &tbin->regions); +#ifdef MALLOC_DEBUG +	{ +		region_t *next; + +		next = (region_t *) &((char *) reg) +		    [region_next_size_get(®->sep)]; +		if (region_next_free_get(®->sep)) { +			assert(region_prev_free_get(&next->sep)); +			assert(region_next_size_get(®->sep) +			    == next->prev.size); +		} else { +			assert(region_prev_free_get(&next->sep) == false); +		} +	} +#endif +	assert(region_next_size_get(®->sep) +	    == ((bin + bin_shift) << opt_quantum_2pow)); + +	qr_remove(reg, next.u.s.link); +#ifdef MALLOC_STATS +	arena->stats.bins[bin].nregions--; +#endif +	if (qr_next(&tbin->regions, next.u.s.link) == &tbin->regions) +		arena_mask_unset(arena, bin); + +	arena_delayed_extract(arena, reg); +} + +static __inline void +arena_extract(arena_t *arena, region_t *reg) +{ +	size_t size; + +	assert(region_next_free_get(®->sep)); +#ifdef MALLOC_DEBUG +	{ +		region_t *next; + +		next = (region_t *)&((char *)reg) +		    [region_next_size_get(®->sep)]; +	} +#endif + +	assert(reg != arena->split); +	assert(reg != arena->frag); +	if ((size = region_next_size_get(®->sep)) <= bin_maxsize) { +		arena_bin_extract(arena, (size >> opt_quantum_2pow) - bin_shift, +		    reg); +	} else { +		RB_REMOVE(region_tree_s, &arena->large_regions, +		    ®->next.u.l.node); +#ifdef MALLOC_STATS +		arena->stats.large.curcached--; +#endif +	} +} + +/* Try to coalesce reg with its neighbors.  Return NULL if coalescing fails. */ +static bool +arena_coalesce(arena_t *arena, region_t **reg, size_t size) +{ +	bool ret; +	region_t *prev, *treg, *next, *nextnext; +	size_t tsize, prev_size, next_size; + +	ret = false; + +	treg = *reg; + +	/* +	 * Keep track of the size while coalescing, then just set the size in +	 * the header/footer once at the end of coalescing. +	 */ +	assert(size == region_next_size_get(&(*reg)->sep)); +	tsize = size; + +	next = (region_t *)&((char *)treg)[tsize]; +	assert(region_next_free_get(&treg->sep)); +	assert(region_prev_free_get(&next->sep)); +	assert(region_next_size_get(&treg->sep) == next->prev.size); + +	while (region_prev_free_get(&treg->sep)) { +		prev_size = treg->prev.size; +		prev = (region_t *)&((char *)treg)[-prev_size]; +		assert(region_next_free_get(&prev->sep)); + +		arena_extract(arena, prev); + +		tsize += prev_size; + +		treg = prev; + +#ifdef MALLOC_STATS +		if (ret == false) +			arena->stats.ncoalesce++; +#endif +		ret = true; +	} + +	while (region_next_free_get(&next->sep)) { +		next_size = region_next_size_get(&next->sep); +		nextnext = (region_t *)&((char *)next)[next_size]; +		assert(region_prev_free_get(&nextnext->sep)); + +		assert(region_next_size_get(&next->sep) == nextnext->prev.size); + +		arena_extract(arena, next); + +		assert(region_next_size_get(&next->sep) == nextnext->prev.size); + +		tsize += next_size; + +#ifdef MALLOC_STATS +		if (ret == false) +			arena->stats.ncoalesce++; +#endif +		ret = true; + +		next = (region_t *)&((char *)treg)[tsize]; +	} + +	/* Update header/footer. */ +	if (ret) { +		region_next_size_set(&treg->sep, tsize); +		next->prev.size = tsize; +	} + +	/* +	 * Now that coalescing with adjacent free regions is done, we need to +	 * try to coalesce with "split" and "frag".  Those two regions are +	 * marked as allocated, which is why this takes special effort.  There +	 * are seven possible cases, but we want to make the (hopefully) common +	 * case of no coalescence fast, so the checks are optimized for that +	 * case.  The seven cases are: +	 * +	 *   /------\ +	 * 0 | treg | No coalescence needed.  Make this case fast. +	 *   \------/ +	 * +	 *   /------+------\ +	 * 1 | frag | treg | +	 *   \------+------/ +	 * +	 *   /------+------\ +	 * 2 | treg | frag | +	 *   \------+------/ +	 * +	 *   /-------+------\ +	 * 3 | split | treg | +	 *   \-------+------/ +	 * +	 *   /------+-------\ +	 * 4 | treg | split | +	 *   \------+-------/ +	 * +	 *   /------+------+-------\ +	 * 5 | frag | treg | split | +	 *   \------+------+-------/ +	 * +	 *   /-------+------+------\ +	 * 6 | split | treg | frag | +	 *   \-------+------+------/ +	 */ + +	if (arena->split == NULL) { +		/* Cases 3-6 ruled out. */ +	} else if ((size_t)next < (size_t)arena->split) { +		/* Cases 3-6 ruled out. */ +	} else { +		region_t *split_next; +		size_t split_size; + +		split_size = region_next_size_get(&arena->split->sep); +		split_next = (region_t *)&((char *)arena->split)[split_size]; + +		if ((size_t)split_next < (size_t)treg) { +			/* Cases 3-6 ruled out. */ +		} else { +			/* +			 * Split is adjacent to treg.  Take the slow path and +			 * coalesce. +			 */ + +			arena_coalesce_hard(arena, treg, next, tsize, true); + +			treg = NULL; +#ifdef MALLOC_STATS +			if (ret == false) +				arena->stats.ncoalesce++; +#endif +			ret = true; +			goto RETURN; +		} +	} + +	/* If we get here, then cases 3-6 have been ruled out. */ +	if (arena->frag == NULL) { +		/* Cases 1-6 ruled out. */ +	} else if ((size_t)next < (size_t)arena->frag) { +		/* Cases 1-6 ruled out. */ +	} else { +		region_t *frag_next; +		size_t frag_size; + +		frag_size = region_next_size_get(&arena->frag->sep); +		frag_next = (region_t *)&((char *)arena->frag)[frag_size]; + +		if ((size_t)frag_next < (size_t)treg) { +			/* Cases 1-6 ruled out. */ +		} else { +			/* +			 * Frag is adjacent to treg.  Take the slow path and +			 * coalesce. +			 */ + +			arena_coalesce_hard(arena, treg, next, tsize, false); + +			treg = NULL; +#ifdef MALLOC_STATS +			if (ret == false) +				arena->stats.ncoalesce++; +#endif +			ret = true; +			goto RETURN; +		} +	} + +	/* If we get here, no coalescence with "split" or "frag" was needed. */ + +	/* Finish updating header. */ +	region_next_contig_unset(&treg->sep); + +	assert(region_next_free_get(&treg->sep)); +	assert(region_prev_free_get(&next->sep)); +	assert(region_prev_free_get(&treg->sep) == false); +	assert(region_next_free_get(&next->sep) == false); + +RETURN: +	if (ret) +		*reg = treg; +	return (ret);  }  /* - * Initialize the world + * arena_coalesce() calls this function if it determines that a region needs to + * be coalesced with "split" and/or "frag".   */  static void -malloc_init(void) +arena_coalesce_hard(arena_t *arena, region_t *reg, region_t *next, size_t size, +    bool split_adjacent)  { -    const char *p; -    char b[64]; -    int i, j; -    int save_errno = errno; - -    INIT_MMAP(); - -#ifdef MALLOC_EXTRA_SANITY -    malloc_junk = 1; -#endif /* MALLOC_EXTRA_SANITY */ - -    for (i = 0; i < 3; i++) { -	if (i == 0) { -	    j = readlink("/etc/malloc.conf", b, sizeof b - 1); -	    if (j <= 0) -		continue; -	    b[j] = '\0'; -	    p = b; -	} else if (i == 1 && issetugid() == 0) { -	    p = getenv("MALLOC_OPTIONS"); -	} else if (i == 1) { -	    continue; +	bool frag_adjacent; + +	assert(next == (region_t *)&((char *)reg)[size]); +	assert(region_next_free_get(®->sep)); +	assert(region_next_size_get(®->sep) == size); +	assert(region_prev_free_get(&next->sep)); +	assert(next->prev.size == size); + +	if (split_adjacent == false) +		frag_adjacent = true; +	else if (arena->frag != NULL) { +		/* Determine whether frag will be coalesced with. */ + +		if ((size_t)next < (size_t)arena->frag) +			frag_adjacent = false; +		else { +			region_t *frag_next; +			size_t frag_size; + +			frag_size = region_next_size_get(&arena->frag->sep); +			frag_next = (region_t *)&((char *)arena->frag) +			    [frag_size]; + +			if ((size_t)frag_next < (size_t)reg) +				frag_adjacent = false; +			else +				frag_adjacent = true; +		} +	} else +		frag_adjacent = false; + +	if (split_adjacent && frag_adjacent) { +		region_t *a; +		size_t a_size, b_size; + +		/* Coalesce all three regions. */ + +		if (arena->frag == next) +			a = arena->split; +		else { +			a = arena->frag; +			arena->split = a; +		} +		arena->frag = NULL; + +		a_size = region_next_size_get(&a->sep); +		assert(a_size == (size_t)reg - (size_t)a); + +		b_size = region_next_size_get(&next->sep); + +		region_next_size_set(&a->sep, a_size + size + b_size); +		assert(region_next_free_get(&a->sep) == false);  	} else { -	    p = _malloc_options; -	} -	for (; p != NULL && *p != '\0'; p++) { -	    switch (*p) { -		case '>': malloc_cache   <<= 1; break; -		case '<': malloc_cache   >>= 1; break; -		case 'a': malloc_abort   = 0; break; -		case 'A': malloc_abort   = 1; break; -#if defined(MADV_FREE) -		case 'h': malloc_hint    = 0; break; -		case 'H': malloc_hint    = 1; break; -#endif -		case 'r': malloc_realloc = 0; break; -		case 'R': malloc_realloc = 1; break; -		case 'j': malloc_junk    = 0; break; -		case 'J': malloc_junk    = 1; break; -#ifdef HAS_UTRACE -		case 'u': malloc_utrace  = 0; break; -		case 'U': malloc_utrace  = 1; break; -#endif -		case 'v': malloc_sysv    = 0; break; -		case 'V': malloc_sysv    = 1; break; -		case 'x': malloc_xmalloc = 0; break; -		case 'X': malloc_xmalloc = 1; break; -		case 'z': malloc_zero    = 0; break; -		case 'Z': malloc_zero    = 1; break; -		default: -		    _malloc_message(_getprogname(), malloc_func, -			 " warning: ", "unknown char in MALLOC_OPTIONS\n"); -		    break; -	    } +		/* Coalesce two regions. */ + +		if (split_adjacent) { +			size += region_next_size_get(&arena->split->sep); +			if (arena->split == next) { +				/* reg comes before split. */ +				region_next_size_set(®->sep, size); +				 +				assert(region_next_free_get(®->sep)); +				region_next_free_unset(®->sep); + +				arena->split = reg; +			} else { +				/* reg comes after split. */ +				region_next_size_set(&arena->split->sep, size); + +				assert(region_next_free_get(&arena->split->sep) +				    == false); + +				assert(region_prev_free_get(&next->sep)); +				region_prev_free_unset(&next->sep); +			} +		} else { +			assert(frag_adjacent); +			size += region_next_size_get(&arena->frag->sep); +			if (arena->frag == next) { +				/* reg comes before frag. */ +				region_next_size_set(®->sep, size); + +				assert(region_next_free_get(®->sep)); +				region_next_free_unset(®->sep); + +				arena->frag = reg; +			} else { +				/* reg comes after frag. */ +				region_next_size_set(&arena->frag->sep, size); + +				assert(region_next_free_get(&arena->frag->sep) +				    == false); + +				assert(region_prev_free_get(&next->sep)); +				region_prev_free_unset(&next->sep); +			} +		} +	} +} + +static __inline void +arena_bin_append(arena_t *arena, unsigned bin, region_t *reg) +{ +	arena_bin_t *tbin; + +	assert(bin < NBINS); +	assert((region_next_size_get(®->sep) >> opt_quantum_2pow) +	    >= bin_shift); +	assert(region_next_size_get(®->sep) +	    == ((bin + bin_shift) << opt_quantum_2pow)); + +	tbin = &arena->bins[bin]; + +	if (qr_next(&tbin->regions, next.u.s.link) == &tbin->regions) +		arena_mask_set(arena, bin); + +	qr_new(reg, next.u.s.link); +	qr_before_insert(&tbin->regions, reg, next.u.s.link); +#ifdef MALLOC_STATS +	arena->stats.bins[bin].nregions++; + +	if (arena->stats.bins[bin].nregions +	    > arena->stats.bins[bin].highcached) { +		arena->stats.bins[bin].highcached +		    = arena->stats.bins[bin].nregions;  	} -    } +#endif +} + +static __inline void +arena_bin_push(arena_t *arena, unsigned bin, region_t *reg) +{ +	arena_bin_t *tbin; +	assert(bin < NBINS); +	assert((region_next_size_get(®->sep) >> opt_quantum_2pow) +	    >= bin_shift); +	assert(region_next_size_get(®->sep) +	    == ((bin + bin_shift) << opt_quantum_2pow)); -    UTRACE(0, 0, 0); +	tbin = &arena->bins[bin]; -    /* -     * We want junk in the entire allocation, and zero only in the part -     * the user asked for. -     */ -    if (malloc_zero) -	malloc_junk=1; +	if (qr_next(&tbin->regions, next.u.s.link) == &tbin->regions) +		arena_mask_set(arena, bin); -    /* Allocate one page for the page directory */ -    page_dir = (struct pginfo **) MMAP(malloc_pagesize); +	region_next_contig_unset(®->sep); +	qr_new(reg, next.u.s.link); +	qr_after_insert(&tbin->regions, reg, next.u.s.link); +#ifdef MALLOC_STATS +	arena->stats.bins[bin].nregions++; -    if (page_dir == MAP_FAILED) -	wrterror("mmap(2) failed, check limits\n"); +	if (arena->stats.bins[bin].nregions +	    > arena->stats.bins[bin].highcached) { +		arena->stats.bins[bin].highcached +		    = arena->stats.bins[bin].nregions; +	} +#endif +} + +static __inline region_t * +arena_bin_pop(arena_t *arena, unsigned bin) +{ +	region_t *ret; +	arena_bin_t *tbin; + +	assert(bin < NBINS); + +	tbin = &arena->bins[bin]; -    /* -     * We need a maximum of malloc_pageshift buckets, steal these from the -     * front of the page_directory; -     */ -    malloc_origo = ((u_long)pageround((u_long)sbrk(0))) >> malloc_pageshift; -    malloc_origo -= malloc_pageshift; +	assert(qr_next(&tbin->regions, next.u.s.link) != &tbin->regions); -    malloc_ninfo = malloc_pagesize / sizeof *page_dir; +	ret = qr_next(&tbin->regions, next.u.s.link); +	assert(region_next_size_get(&ret->sep) +	    == ((bin + bin_shift) << opt_quantum_2pow)); +	qr_remove(ret, next.u.s.link); +#ifdef MALLOC_STATS +	arena->stats.bins[bin].nregions--; +#endif +	if (qr_next(&tbin->regions, next.u.s.link) == &tbin->regions) +		arena_mask_unset(arena, bin); + +	arena_delayed_extract(arena, ret); -    /* Recalculate the cache size in bytes, and make sure it's nonzero */ +	if (region_next_free_get(&ret->sep)) { +		region_t *next; -    if (!malloc_cache) -	malloc_cache++; +		/* Non-delayed region. */ +		region_next_free_unset(&ret->sep); -    malloc_cache <<= malloc_pageshift; +		next = (region_t *)&((char *)ret) +		    [(bin + bin_shift) << opt_quantum_2pow]; +		assert(next->prev.size == region_next_size_get(&ret->sep)); +		assert(region_prev_free_get(&next->sep)); +		region_prev_free_unset(&next->sep); +	} -    /* -     * This is a nice hack from Kaleb Keithly (kaleb@x.org). -     * We can sbrk(2) further back when we keep this on a low address. -     */ -    px = (struct pgfree *) imalloc (sizeof *px); -    errno = save_errno; +	return (ret);  } -/* - * Allocate a number of complete pages - */ -static void * -malloc_pages(size_t size) -{ -    void *p, *delay_free = NULL; -    size_t i; -    struct pgfree *pf; -    u_long index; - -    size = pageround(size); - -    p = NULL; - -    /* Look for free pages before asking for more */ -    for(pf = free_list.next; pf; pf = pf->next) { - -#ifdef MALLOC_EXTRA_SANITY -	if (pf->size & malloc_pagemask) -	    wrterror("(ES): junk length entry on free_list\n"); -	if (!pf->size) -	    wrterror("(ES): zero length entry on free_list\n"); -	if (pf->page == pf->end) -	    wrterror("(ES): zero entry on free_list\n"); -	if (pf->page > pf->end) -	    wrterror("(ES): sick entry on free_list\n"); -	if ((void*)pf->page >= (void*)sbrk(0)) -	    wrterror("(ES): entry on free_list past brk\n"); -	if (page_dir[ptr2index(pf->page)] != MALLOC_FREE) -	    wrterror("(ES): non-free first page on free-list\n"); -	if (page_dir[ptr2index(pf->end)-1] != MALLOC_FREE) -	    wrterror("(ES): non-free last page on free-list\n"); -#endif /* MALLOC_EXTRA_SANITY */ - -	if (pf->size < size) -	    continue; - -	if (pf->size == size) { -	    p = pf->page; -	    if (pf->next != NULL) -		    pf->next->prev = pf->prev; -	    pf->prev->next = pf->next; -	    delay_free = pf; -	    break; -	} - -	p = pf->page; -	pf->page = (char *)pf->page + size; -	pf->size -= size; -	break; -    } - -#ifdef MALLOC_EXTRA_SANITY -    if (p != NULL && page_dir[ptr2index(p)] != MALLOC_FREE) -	wrterror("(ES): allocated non-free page on free-list\n"); -#endif /* MALLOC_EXTRA_SANITY */ - -    size >>= malloc_pageshift; - -    /* Map new pages */ -    if (p == NULL) -	p = map_pages(size); - -    if (p != NULL) { - -	index = ptr2index(p); -	page_dir[index] = MALLOC_FIRST; -	for (i=1;i<size;i++) -	    page_dir[index+i] = MALLOC_FOLLOW; - -	if (malloc_junk) -	    memset(p, SOME_JUNK, size << malloc_pageshift); -    } - -    if (delay_free) { -	if (px == NULL) -	    px = delay_free; -	else -	    ifree(delay_free); -    } +static void +arena_large_insert(arena_t *arena, region_t *reg, bool lru) +{ -    return (p); +	assert(region_next_free_get(®->sep)); +#ifdef MALLOC_DEBUG +	{ +		region_t *next; + +		next = (region_t *)&((char *)reg) +		    [region_next_size_get(®->sep)]; +		assert(region_prev_free_get(&next->sep)); +		assert(next->prev.size == region_next_size_get(®->sep)); +	} +#endif + +	/* Coalescing should have already been done. */ +	assert(arena_coalesce(arena, ®, region_next_size_get(®->sep)) +	    == false); + +	if (region_next_size_get(®->sep) < chunk_size +	    - (CHUNK_REG_OFFSET + offsetof(region_t, next))) { +		/* +		 * Make sure not to cache a large region with the nextContig +		 * flag set, in order to simplify the logic that determines +		 * whether a region needs to be extracted from "delayed". +		 */ +		region_next_contig_unset(®->sep); + +		/* Store the region in the large_regions tree. */ +		reg->next.u.l.node.reg = reg; +		reg->next.u.l.lru = lru; + +		RB_INSERT(region_tree_s, &arena->large_regions, +		    ®->next.u.l.node); +#ifdef MALLOC_STATS +		arena->stats.large.curcached++; +		if (arena->stats.large.curcached +		    > arena->stats.large.highcached) { +			arena->stats.large.highcached +			    = arena->stats.large.curcached; +		} +#endif +	} else { +		chunk_node_t *node; + +		/* +		 * This region now spans an entire chunk. Deallocate the chunk. +		 * +		 * Note that it is possible for allocation of a large region +		 * from a pristine chunk, followed by deallocation of the +		 * region, can cause the chunk to immediately be unmapped. +		 * This isn't ideal, but 1) such scenarios seem unlikely, and +		 * 2) delaying coalescence for large regions could cause +		 * excessive fragmentation for common scenarios. +		 */ + +		node = (chunk_node_t *)CHUNK_ADDR2BASE(reg); +		RB_REMOVE(chunk_tree_s, &arena->chunks, node); +		arena->nchunks--; +		assert(node->chunk == (chunk_node_t *)node); +		chunk_dealloc(node->chunk, chunk_size); +	}  } -/* - * Allocate a page of fragments - */ +static void +arena_large_cache(arena_t *arena, region_t *reg, bool lru) +{ +	size_t size; + +	/* Try to coalesce before storing this region anywhere. */ +	size = region_next_size_get(®->sep); +	if (arena_coalesce(arena, ®, size)) { +		if (reg == NULL) { +			/* Region no longer needs cached. */ +			return; +		} +		size = region_next_size_get(®->sep); +	} -static __inline int -malloc_make_chunks(int bits) +	arena_large_insert(arena, reg, lru); +} + +static void +arena_lru_cache(arena_t *arena, region_t *reg) +{ +	size_t size; + +	assert(region_next_free_get(®->sep)); +#ifdef MALLOC_DEBUG +	{ +		region_t *next; + +		next = (region_t *)&((char *)reg) +		    [region_next_size_get(®->sep)]; +		assert(region_prev_free_get(&next->sep)); +		assert(next->prev.size == region_next_size_get(®->sep)); +	} +#endif +	assert(region_next_size_get(®->sep) % quantum == 0); +	assert(region_next_size_get(®->sep) +	    >= QUANTUM_CEILING(sizeof(region_small_sizer_t))); + +	size = region_next_size_get(®->sep); +	if (size <= bin_maxsize) { +		arena_bin_append(arena, (size >> opt_quantum_2pow) - bin_shift, +		    reg); +	} else +		arena_large_cache(arena, reg, true); +} + +static __inline void +arena_mru_cache(arena_t *arena, region_t *reg, size_t size) +{ + +	assert(region_next_free_get(®->sep)); +#ifdef MALLOC_DEBUG +	{ +		region_t *next; + +		next = (region_t *)&((char *)reg) +		    [region_next_size_get(®->sep)]; +		assert(region_prev_free_get(&next->sep)); +		assert(next->prev.size == region_next_size_get(®->sep)); +	} +#endif +	assert(region_next_size_get(®->sep) % quantum == 0); +	assert(region_next_size_get(®->sep) +	    >= QUANTUM_CEILING(sizeof(region_small_sizer_t))); +	assert(size == region_next_size_get(®->sep)); + +	if (size <= bin_maxsize) { +		arena_bin_push(arena, (size >> opt_quantum_2pow) - bin_shift, +		    reg); +	} else +		arena_large_cache(arena, reg, false); +} + +static __inline void +arena_undelay(arena_t *arena, uint32_t slot)  { -    struct  pginfo *bp; -    void *pp; -    int i, k, l; +	region_t *reg, *next; +	size_t size; + +	assert(slot == arena->next_delayed); +	assert(arena->delayed[slot] != NULL); + +	/* Try to coalesce reg. */ +	reg = arena->delayed[slot]; + +	size = region_next_size_get(®->sep); + +	assert(region_next_contig_get(®->sep)); +	assert(reg->next.u.s.slot == slot); + +	arena_bin_extract(arena, (size >> opt_quantum_2pow) - bin_shift, reg); + +	arena->delayed[slot] = NULL; + +	next = (region_t *) &((char *) reg)[size]; + +	region_next_free_set(®->sep); +	region_prev_free_set(&next->sep); +	next->prev.size = size; -    /* Allocate a new bucket */ -    pp = malloc_pages(malloc_pagesize); -    if (pp == NULL) -	return (0); +	if (arena_coalesce(arena, ®, size) == false) { +		/* Coalescing failed.  Cache this region. */ +		arena_mru_cache(arena, reg, size); +	} else { +		/* Coalescing succeeded. */ + +		if (reg == NULL) { +			/* Region no longer needs undelayed. */ +			return; +		} + +		if (region_next_size_get(®->sep) < chunk_size +		    - (CHUNK_REG_OFFSET + offsetof(region_t, next))) { +			/* +			 * Insert coalesced region into appropriate bin (or +			 * largeRegions). +			 */ +			arena_lru_cache(arena, reg); +		} else { +			chunk_node_t *node; + +			/* +			 * This region now spans an entire chunk.  Deallocate +			 * the chunk. +			 */ + +			node = (chunk_node_t *) CHUNK_ADDR2BASE(reg); +			RB_REMOVE(chunk_tree_s, &arena->chunks, node); +			arena->nchunks--; +			assert(node->chunk == (chunk_node_t *) node); +			chunk_dealloc(node->chunk, chunk_size); +		} +	} +} -    /* Find length of admin structure */ -    l = offsetof(struct pginfo, bits[0]); -    l += sizeof bp->bits[0] * -	(((malloc_pagesize >> bits)+MALLOC_BITS-1) / MALLOC_BITS); +static void +arena_delay_cache(arena_t *arena, region_t *reg) +{ +	region_t *next; +	size_t size; + +	assert(region_next_free_get(®->sep) == false); +	assert(region_next_size_get(®->sep) % quantum == 0); +	assert(region_next_size_get(®->sep) +	    >= QUANTUM_CEILING(sizeof(region_small_sizer_t))); + +	size = region_next_size_get(®->sep); +	next = (region_t *)&((char *)reg)[size]; +	assert(region_prev_free_get(&next->sep) == false); + +	if (size <= bin_maxsize) { +		if (region_next_contig_get(®->sep)) { +			uint32_t slot; + +			/* Insert into delayed. */ + +			/* +			 * Clear a slot, then put reg in it.  We have to loop +			 * here, because in the case of base_arena, it's +			 * possible for this loop to cause deallocation if a +			 * chunk is discarded. +			 */ +			for (slot = arena->next_delayed; +			    arena->delayed[slot] != NULL; +			    slot = arena->next_delayed) +				arena_undelay(arena, slot); + +			reg->next.u.s.slot = slot; + +			arena->delayed[slot] = reg; + +			/* Update next_delayed. */ +			slot++; +			slot &= (opt_ndelay - 1); /* Handle wrap-around. */ +			arena->next_delayed = slot; + +			arena_bin_append(arena, (size >> opt_quantum_2pow) +			    - bin_shift, reg); +		} else { +			/* +			 * This region was a fragment when it was allocated, so +			 * don't delay coalescence for it. +			 */ + +			region_next_free_set(®->sep); +			region_prev_free_set(&next->sep); +			next->prev.size = size; + +			if (arena_coalesce(arena, ®, size)) { +				/* Coalescing succeeded. */ + +				if (reg == NULL) { +					/* Region no longer needs cached. */ +					return; +				} + +				size = region_next_size_get(®->sep); +			} + +			arena_mru_cache(arena, reg, size); +		} +	} else { +		region_next_free_set(®->sep); +		region_prev_free_set(&next->sep); +		region_next_contig_unset(®->sep); +		next->prev.size = size; -    /* Don't waste more than two chunks on this */ -    if ((1<<(bits)) <= l+l) { -	bp = (struct  pginfo *)pp; -    } else { -	bp = (struct  pginfo *)imalloc(l); -	if (bp == NULL) { -	    ifree(pp); -	    return (0); +		arena_large_cache(arena, reg, true);  	} -    } +} -    bp->size = (1<<bits); -    bp->shift = bits; -    bp->total = bp->free = malloc_pagesize >> bits; -    bp->page = pp; +static __inline region_t * +arena_frag_reg_alloc(arena_t *arena, size_t size, bool fit) +{ +	region_t *ret; -    /* set all valid bits in the bitmap */ -    k = bp->total; -    i = 0; +	/* +	 * Try to fill frag if it's empty.  Frag needs to be marked as +	 * allocated. +	 */ +	if (arena->frag == NULL) { +		region_node_t *node; -    /* Do a bunch at a time */ -    for(;k-i >= MALLOC_BITS; i += MALLOC_BITS) -	bp->bits[i / MALLOC_BITS] = ~0; +		node = RB_MIN(region_tree_s, &arena->large_regions); +		if (node != NULL) { +			region_t *frag, *next; -    for(; i < k; i++) -        bp->bits[i/MALLOC_BITS] |= 1<<(i%MALLOC_BITS); +			RB_REMOVE(region_tree_s, &arena->large_regions, node); -    if (bp == bp->page) { -	/* Mark the ones we stole for ourselves */ -	for(i=0;l > 0;i++) { -	    bp->bits[i/MALLOC_BITS] &= ~(1<<(i%MALLOC_BITS)); -	    bp->free--; -	    bp->total--; -	    l -= (1 << bits); +			frag = node->reg; +#ifdef MALLOC_STATS +			arena->stats.frag.ncached++; +#endif +			assert(region_next_free_get(&frag->sep)); +			region_next_free_unset(&frag->sep); + +			next = (region_t *)&((char *)frag)[region_next_size_get( +			    &frag->sep)]; +			assert(region_prev_free_get(&next->sep)); +			region_prev_free_unset(&next->sep); + +			arena->frag = frag; +		}  	} -    } -    /* MALLOC_LOCK */ +	if (arena->frag != NULL) { +#ifdef MALLOC_STATS +		arena->stats.frag.nrequests++; +#endif + +		if (region_next_size_get(&arena->frag->sep) >= size) { +			if (fit) { +				size_t total_size; + +				/* +				 * Use frag, but try to use the beginning for +				 * smaller regions, and the end for larger +				 * regions.  This reduces fragmentation in some +				 * pathological use cases.  It tends to group +				 * short-lived (smaller) regions, which +				 * increases the effectiveness of coalescing. +				 */ + +				total_size = +				    region_next_size_get(&arena->frag->sep); +				assert(size % quantum == 0); + +				if (total_size - size >= QUANTUM_CEILING( +				    sizeof(region_small_sizer_t))) { +					if (size <= bin_maxsize) { +						region_t *next; + +						/* +						 * Carve space from the +						 * beginning of frag. +						 */ + +						/* ret. */ +						ret = arena->frag; +						region_next_size_set(&ret->sep, +						    size); +						assert(region_next_free_get( +						    &ret->sep) == false); + +						/* next. */ +						next = (region_t *)&((char *) +						    ret)[size]; +						region_next_size_set(&next->sep, +						    total_size - size); +						assert(size >= +						    QUANTUM_CEILING(sizeof( +						    region_small_sizer_t))); +						region_prev_free_unset( +						    &next->sep); +						region_next_free_unset( +						    &next->sep); + +						/* Update frag. */ +						arena->frag = next; +					} else { +						region_t *prev; +						size_t prev_size; + +						/* +						 * Carve space from the end of +						 * frag. +						 */ + +						/* prev. */ +						prev_size = total_size - size; +						prev = arena->frag; +						region_next_size_set(&prev->sep, +						    prev_size); +						assert(prev_size >= +						    QUANTUM_CEILING(sizeof( +						    region_small_sizer_t))); +						assert(region_next_free_get( +						    &prev->sep) == false); + +						/* ret. */ +						ret = (region_t *)&((char *) +						    prev)[prev_size]; +						region_next_size_set(&ret->sep, +						    size); +						region_prev_free_unset( +						    &ret->sep); +						region_next_free_unset( +						    &ret->sep); + +#ifdef MALLOC_DEBUG +						{ +					region_t *next; + +					/* next. */ +					next = (region_t *)&((char *) ret) +					    [region_next_size_get(&ret->sep)]; +					assert(region_prev_free_get(&next->sep) +					    == false); +						} +#endif +					} +#ifdef MALLOC_STATS +					arena->stats.nsplit++; +#endif +				} else { +					/* +					 * frag is close enough to the right +					 * size that there isn't enough room to +					 * create a neighboring region. +					 */ + +					/* ret. */ +					ret = arena->frag; +					arena->frag = NULL; +					assert(region_next_free_get(&ret->sep) +					    == false); + +#ifdef MALLOC_DEBUG +					{ +						region_t *next; + +						/* next. */ +						next = (region_t *)&((char *) +						    ret)[region_next_size_get( +						    &ret->sep)]; +						assert(region_prev_free_get( +						    &next->sep) == false); +					} +#endif +				} +#ifdef MALLOC_STATS +				arena->stats.frag.nserviced++; +#endif +			} else { +				/* Don't fit to the allocation size. */ + +				/* ret. */ +				ret = arena->frag; +				arena->frag = NULL; +				assert(region_next_free_get(&ret->sep) +				    == false); + +#ifdef MALLOC_DEBUG +				{ +					region_t *next; + +					/* next. */ +					next = (region_t *) &((char *) ret) +					   [region_next_size_get(&ret->sep)]; +					assert(region_prev_free_get(&next->sep) +					    == false); +				} +#endif +			} +			region_next_contig_set(&ret->sep); +			goto RETURN; +		} else if (size <= bin_maxsize) { +			region_t *reg; + +			/* +			 * The frag region is too small to service a small +			 * request.  Clear frag. +			 */ -    page_dir[ptr2index(pp)] = bp; +			reg = arena->frag; +			region_next_contig_set(®->sep); -    bp->next = page_dir[bits]; -    page_dir[bits] = bp; +			arena->frag = NULL; -    /* MALLOC_UNLOCK */ +			arena_delay_cache(arena, reg); +		} +	} -    return (1); +	ret = NULL; +RETURN: +	return (ret);  } -/* - * Allocate a fragment - */ -static void * -malloc_bytes(size_t size) -{ -    int i,j; -    u_int u; -    struct  pginfo *bp; -    int k; -    u_int *lp; - -    /* Don't bother with anything less than this */ -    if (size < malloc_minsize) -	size = malloc_minsize; - -    /* Find the right bucket */ -    j = 1; -    i = size-1; -    while (i >>= 1) -	j++; - -    /* If it's empty, make a page more of that size chunks */ -    if (page_dir[j] == NULL && !malloc_make_chunks(j)) -	return (NULL); +static region_t * +arena_split_reg_alloc(arena_t *arena, size_t size, bool fit) +{ +	region_t *ret; + +	if (arena->split != NULL) { +#ifdef MALLOC_STATS +		arena->stats.split.nrequests++; +#endif -    bp = page_dir[j]; +		if (region_next_size_get(&arena->split->sep) >= size) { +			if (fit) { +				size_t total_size; + +				/* +				 * Use split, but try to use the beginning for +				 * smaller regions, and the end for larger +				 * regions.  This reduces fragmentation in some +				 * pathological use cases.  It tends to group +				 * short-lived (smaller) regions, which +				 * increases the effectiveness of coalescing. +				 */ + +				total_size = +				    region_next_size_get(&arena->split->sep); +				assert(size % quantum == 0); + +				if (total_size - size >= QUANTUM_CEILING( +				    sizeof(region_small_sizer_t))) { +					if (size <= bin_maxsize) { +						region_t *next; + +						/* +						 * Carve space from the +						 * beginning of split. +						 */ + +						/* ret. */ +						ret = arena->split; +						region_next_size_set(&ret->sep, +						    size); +						assert(region_next_free_get( +						    &ret->sep) == false); + +						/* next. */ +						next = (region_t *)&((char *) +						    ret)[size]; +						region_next_size_set(&next->sep, +						    total_size - size); +						assert(size >= +						    QUANTUM_CEILING(sizeof( +						    region_small_sizer_t))); +						region_prev_free_unset( +						    &next->sep); +						region_next_free_unset( +						    &next->sep); + +						/* Update split. */ +						arena->split = next; +					} else { +						region_t *prev; +						size_t prev_size; + +						/* +						 * Carve space from the end of +						 * split. +						 */ + +						/* prev. */ +						prev_size = total_size - size; +						prev = arena->split; +						region_next_size_set(&prev->sep, +						    prev_size); +						assert(prev_size >= +						    QUANTUM_CEILING(sizeof( +						    region_small_sizer_t))); +						assert(region_next_free_get( +						    &prev->sep) == false); + +						/* ret. */ +						ret = (region_t *)&((char *) +						    prev)[prev_size]; +						region_next_size_set(&ret->sep, +						    size); +						region_prev_free_unset( +						    &ret->sep); +						region_next_free_unset( +						    &ret->sep); + +#ifdef MALLOC_DEBUG +						{ +					region_t *next; + +					/* next. */ +					next = (region_t *)&((char *) ret) +					    [region_next_size_get(&ret->sep)]; +					assert(region_prev_free_get(&next->sep) +					    == false); +						} +#endif +					} +#ifdef MALLOC_STATS +					arena->stats.nsplit++; +#endif +				} else { +					/* +					 * split is close enough to the right +					 * size that there isn't enough room to +					 * create a neighboring region. +					 */ + +					/* ret. */ +					ret = arena->split; +					arena->split = NULL; +					assert(region_next_free_get(&ret->sep) +					    == false); + +#ifdef MALLOC_DEBUG +					{ +						region_t *next; + +						/* next. */ +						next = (region_t *)&((char *) +						    ret)[region_next_size_get( +						    &ret->sep)]; +						assert(region_prev_free_get( +						    &next->sep) == false); +					} +#endif +				} -    /* Find first word of bitmap which isn't empty */ -    for (lp = bp->bits; !*lp; lp++) -	; +#ifdef MALLOC_STATS +				arena->stats.split.nserviced++; +#endif +			} else { +				/* Don't fit to the allocation size. */ + +				/* ret. */ +				ret = arena->split; +				arena->split = NULL; +				assert(region_next_free_get(&ret->sep) +				    == false); + +#ifdef MALLOC_DEBUG +				{ +					region_t *next; + +					/* next. */ +					next = (region_t *) &((char *) ret) +					   [region_next_size_get(&ret->sep)]; +					assert(region_prev_free_get(&next->sep) +					    == false); +				} +#endif +			} +			region_next_contig_set(&ret->sep); +			goto RETURN; +		} else if (size <= bin_maxsize) { +			region_t *reg; -    /* Find that bit, and tweak it */ -    u = 1; -    k = 0; -    while (!(*lp & u)) { -	u += u; -	k++; -    } -    *lp ^= u; +			/* +			 * The split region is too small to service a small +			 * request.  Clear split. +			 */ -    /* If there are no more free, remove from free-list */ -    if (!--bp->free) { -	page_dir[j] = bp->next; -	bp->next = NULL; -    } +			reg = arena->split; +			region_next_contig_set(®->sep); -    /* Adjust to the real offset of that chunk */ -    k += (lp-bp->bits)*MALLOC_BITS; -    k <<= bp->shift; +			arena->split = NULL; -    if (malloc_junk) -	memset((u_char *)bp->page + k, SOME_JUNK, bp->size); +			arena_delay_cache(arena, reg); +		} +	} -    return ((u_char *)bp->page + k); +	ret = NULL; +RETURN: +	return (ret);  }  /* - * Allocate a piece of memory + * Split reg if necessary.  The region must be overly large enough to be able + * to contain a trailing region.   */ -static void * -imalloc(size_t size) +static void +arena_reg_fit(arena_t *arena, size_t size, region_t *reg, bool restore_split)  { -    void *result; +	assert(QUANTUM_CEILING(size) == size); +	assert(region_next_free_get(®->sep) == 0); + +	if (region_next_size_get(®->sep) +	    >= size + QUANTUM_CEILING(sizeof(region_small_sizer_t))) { +		size_t total_size; +		region_t *next; + +		total_size = region_next_size_get(®->sep); + +		region_next_size_set(®->sep, size); + +		next = (region_t *) &((char *) reg)[size]; +		region_next_size_set(&next->sep, total_size - size); +		assert(region_next_size_get(&next->sep) +		    >= QUANTUM_CEILING(sizeof(region_small_sizer_t))); +		region_prev_free_unset(&next->sep); + +		if (restore_split) { +			/* Restore what's left to "split". */ +			region_next_free_unset(&next->sep); +			arena->split = next; +		} else if (arena->frag == NULL && total_size - size +		    > bin_maxsize) { +			/* This region is large enough to use for "frag". */ +			region_next_free_unset(&next->sep); +			arena->frag = next; +		} else { +			region_t *nextnext; +			size_t next_size; + +			region_next_free_set(&next->sep); + +			assert(region_next_size_get(&next->sep) == total_size +			    - size); +			next_size = total_size - size; +			nextnext = (region_t *) &((char *) next)[next_size]; +			nextnext->prev.size = next_size; +			assert(region_prev_free_get(&nextnext->sep) == false); +			region_prev_free_set(&nextnext->sep); + +			arena_mru_cache(arena, next, next_size); +		} + +#ifdef MALLOC_STATS +		arena->stats.nsplit++; +#endif +	} +} -    if (suicide) -	abort(); +static __inline region_t * +arena_bin_reg_alloc(arena_t *arena, size_t size, bool fit) +{ +	region_t *ret, *header; +	unsigned bin; -    if ((size + malloc_pagesize) < size)	/* Check for overflow */ -	result = NULL; -    else if ((size + malloc_pagesize) >= (uintptr_t)page_dir) -	result = NULL; -    else if (size <= malloc_maxsize) -	result = malloc_bytes(size); -    else -	result = malloc_pages(size); +	/* +	 * Look for an exact fit in bins (region cached in smallest possible +	 * bin). +	 */ +	bin = (size >> opt_quantum_2pow) - bin_shift; +#ifdef MALLOC_STATS +	arena->stats.bins[bin].nrequests++; +#endif +	header = &arena->bins[bin].regions; +	if (qr_next(header, next.u.s.link) != header) { +		/* Exact fit. */ +		ret = arena_bin_pop(arena, bin); +		assert(region_next_size_get(&ret->sep) >= size); +#ifdef MALLOC_STATS +		arena->stats.bins[bin].nfit++; +#endif +		goto RETURN; +	} + +	/* Look at frag to see whether it's large enough. */ +	ret = arena_frag_reg_alloc(arena, size, fit); +	if (ret != NULL) +		goto RETURN; + +	/* Look in all bins for a large enough region. */ +	if ((bin = arena_bins_search(arena, size)) == (size >> opt_quantum_2pow) +	    - bin_shift) { +		/* Over-fit. */ +		ret = arena_bin_pop(arena, bin); +		assert(region_next_size_get(&ret->sep) >= size); + +		if (fit) +			arena_reg_fit(arena, size, ret, false); + +#ifdef MALLOC_STATS +		arena->stats.bins[bin].noverfit++; +#endif +		goto RETURN; +	} + +	ret = NULL; +RETURN: +	return (ret); +} + +/* Look in large_regions for a large enough region. */ +static region_t * +arena_large_reg_alloc(arena_t *arena, size_t size, bool fit) +{ +	region_t *ret, *next; +	region_node_t *node; +	region_t key; + +#ifdef MALLOC_STATS +	arena->stats.large.nrequests++; +#endif + +	key.next.u.l.node.reg = &key; +	key.next.u.l.lru = true; +	region_next_size_set(&key.sep, size); +	node = RB_NFIND(region_tree_s, &arena->large_regions, +	    &key.next.u.l.node); +	if (node == NULL) { +		ret = NULL; +		goto RETURN; +	} + +	/* Cached large region found. */ +	ret = node->reg; +	assert(region_next_free_get(&ret->sep)); + +	RB_REMOVE(region_tree_s, &arena->large_regions, node); +#ifdef MALLOC_STATS +	arena->stats.large.curcached--; +#endif + +	region_next_free_unset(&ret->sep); + +	next = (region_t *)&((char *)ret)[region_next_size_get(&ret->sep)]; +	assert(region_prev_free_get(&next->sep)); +	region_prev_free_unset(&next->sep); + +	if (fit) +		arena_reg_fit(arena, size, ret, false); + +#ifdef MALLOC_STATS +	if (size > bin_maxsize) +		arena->stats.large.nfit++; +	else +		arena->stats.large.noverfit++; +#endif + +RETURN: +	return (ret); +} + +/* Allocate a new chunk and create a single region from it. */ +static region_t * +arena_chunk_reg_alloc(arena_t *arena, size_t size, bool fit) +{ +	region_t *ret, *next; +	chunk_node_t *chunk; -    if (malloc_zero && result != NULL) -	memset(result, 0, size); +	chunk = chunk_alloc(chunk_size); +	if (chunk == NULL) { +		ret = NULL; +		goto RETURN; +	} -    return (result); +#ifdef MALLOC_DEBUG +	{ +		chunk_node_t *tchunk; +		chunk_node_t key; + +		key.chunk = chunk; +		tchunk = RB_FIND(chunk_tree_s, &arena->chunks, &key); +		assert(tchunk == NULL); +	} +#endif +	chunk->arena = arena; +	chunk->chunk = chunk; +	chunk->size = chunk_size; +	chunk->extra = 0; + +	RB_INSERT(chunk_tree_s, &arena->chunks, chunk); +	arena->nchunks++; + +	/* Carve a region from the new chunk. */ +	ret = (region_t *) &((char *) chunk)[CHUNK_REG_OFFSET]; +	region_next_size_set(&ret->sep, chunk_size - (CHUNK_REG_OFFSET +	    + offsetof(region_t, next))); +	region_prev_free_unset(&ret->sep); +	region_next_free_unset(&ret->sep); + +	/* Create a separator at the end of this new region. */ +	next = (region_t *)&((char *)ret)[region_next_size_get(&ret->sep)]; +	region_next_size_set(&next->sep, 0); +	region_prev_free_unset(&next->sep); +	region_next_free_unset(&next->sep); +	region_next_contig_unset(&next->sep); + +	if (fit) +		arena_reg_fit(arena, size, ret, (arena->split == NULL)); + +RETURN: +	return (ret);  }  /* - * Change the size of an allocation. + * Find a region that is at least as large as aSize, and return a pointer to + * the separator that precedes the region.  The return value is ready for use, + * though it may be larger than is necessary if fit is false.   */ +static __inline region_t * +arena_reg_alloc(arena_t *arena, size_t size, bool fit) +{ +	region_t *ret; + +	assert(QUANTUM_CEILING(size) == size); +	assert(size >= QUANTUM_CEILING(sizeof(region_small_sizer_t))); +	assert(size <= (chunk_size >> 1)); + +	if (size <= bin_maxsize) { +		ret = arena_bin_reg_alloc(arena, size, fit); +		if (ret != NULL) +			goto RETURN; +	} + +	ret = arena_large_reg_alloc(arena, size, fit); +	if (ret != NULL) +		goto RETURN; + +	ret = arena_split_reg_alloc(arena, size, fit); +	if (ret != NULL) +		goto RETURN; + +	/* +	 * Only try allocating from frag here if size is large, since +	 * arena_bin_reg_alloc() already falls back to allocating from frag for +	 * small regions. +	 */ +	if (size > bin_maxsize) { +		ret = arena_frag_reg_alloc(arena, size, fit); +		if (ret != NULL) +			goto RETURN; +	} + +	ret = arena_chunk_reg_alloc(arena, size, fit); +	if (ret != NULL) +		goto RETURN; + +	ret = NULL; +RETURN: +	return (ret); +} +  static void * -irealloc(void *ptr, size_t size) +arena_malloc(arena_t *arena, size_t size)  { -    void *p; -    u_long osize, index; -    struct pginfo **mp; -    int i; +	void *ret; +	region_t *reg; +	size_t quantum_size; + +	assert(arena != NULL); +	assert(arena->magic == ARENA_MAGIC); +	assert(size != 0); +	assert(region_ceiling(size) <= (chunk_size >> 1)); + +	quantum_size = region_ceiling(size); +	if (quantum_size < size) { +		/* size is large enough to cause size_t wrap-around. */ +		ret = NULL; +		goto RETURN; +	} +	assert(quantum_size >= QUANTUM_CEILING(sizeof(region_small_sizer_t))); + +	malloc_mutex_lock(&arena->mtx); +	reg = arena_reg_alloc(arena, quantum_size, true); +	if (reg == NULL) { +		malloc_mutex_unlock(&arena->mtx); +		ret = NULL; +		goto RETURN; +	} -    if (suicide) -	abort(); +#ifdef MALLOC_STATS +	arena->allocated += quantum_size; +#endif -    index = ptr2index(ptr); +	malloc_mutex_unlock(&arena->mtx); -    if (index < malloc_pageshift) { -	wrtwarning("junk pointer, too low to make sense\n"); -	return (NULL); -    } +	ret = (void *)®->next; +#ifdef MALLOC_REDZONES +	{ +		region_t *next; +		size_t total_size; -    if (index > last_index) { -	wrtwarning("junk pointer, too high to make sense\n"); -	return (NULL); -    } +		memset(reg->sep.next_red, 0xa5, MALLOC_RED); -    mp = &page_dir[index]; +		/* +		 * Unused trailing space in the region is considered part of the +		 * trailing redzone. +		 */ +		total_size = region_next_size_get(®->sep); +		assert(total_size >= size); +		memset(&((char *)ret)[size], 0xa5, +		    total_size - size - sizeof(region_sep_t)); -    if (*mp == MALLOC_FIRST) {			/* Page allocation */ +		reg->sep.next_exact_size = size; -	/* Check the pointer */ -	if ((u_long)ptr & malloc_pagemask) { -	    wrtwarning("modified (page-) pointer\n"); -	    return (NULL); +		next = (region_t *)&((char *)reg)[total_size]; +		memset(next->sep.prev_red, 0xa5, MALLOC_RED);  	} +#endif +RETURN: +	return (ret); +} -	/* Find the size in bytes */ -	for (osize = malloc_pagesize; *(++mp) == MALLOC_FOLLOW;) -	    osize += malloc_pagesize; +static void * +arena_palloc(arena_t *arena, size_t alignment, size_t size) +{ +	void *ret; + +	assert(arena != NULL); +	assert(arena->magic == ARENA_MAGIC); + +	if (alignment <= quantum) { +		/* +		 * The requested alignment is always guaranteed, so use the +		 * normal allocation function. +		 */ +		ret = arena_malloc(arena, size); +	} else { +		region_t *reg, *old_split; +		size_t quantum_size, alloc_size, offset, total_size; + +		/* +		 * Allocate more space than necessary, then carve an aligned +		 * region out of it.  The smallest allowable region is +		 * potentially a multiple of the quantum size, so care must be +		 * taken to carve things up such that all resulting regions are +		 * large enough. +		 */ + +		quantum_size = region_ceiling(size); +		if (quantum_size < size) { +			/* size is large enough to cause size_t wrap-around. */ +			ret = NULL; +			goto RETURN; +		} + +		/* +		 * Calculate how large of a region to allocate.  There must be +		 * enough space to advance far enough to have at least +		 * sizeof(region_small_sizer_t) leading bytes, yet also land at +		 * an alignment boundary. +		 */ +		if (alignment >= sizeof(region_small_sizer_t)) { +			alloc_size = +			    QUANTUM_CEILING(sizeof(region_small_sizer_t)) +			    + alignment + quantum_size; +		} else { +			alloc_size = +			    (QUANTUM_CEILING(sizeof(region_small_sizer_t)) << 1) +			    + quantum_size; +		} + +		if (alloc_size < quantum_size) { +			/* size_t wrap-around occurred. */ +			ret = NULL; +			goto RETURN; +		} + +		malloc_mutex_lock(&arena->mtx); +		old_split = arena->split; +		reg = arena_reg_alloc(arena, alloc_size, false); +		if (reg == NULL) { +			malloc_mutex_unlock(&arena->mtx); +			ret = NULL; +			goto RETURN; +		} +		if (reg == old_split) { +			/* +			 * We requested a non-fit allocation that was serviced +			 * by split, which means that we need to take care to +			 * restore split in the arena_reg_fit() call later on. +			 * +			 * Do nothing; a non-NULL old_split will be used as the +			 * signal to restore split. +			 */ +		} else +			old_split = NULL; + +		total_size = region_next_size_get(®->sep); + +		if (alignment > bin_maxsize || size > bin_maxsize) { +			size_t split_size, p; + +			/* +			 * Put this allocation toward the end of reg, since +			 * it is large, and we try to put all large regions at +			 * the end of split regions. +			 */ +			split_size = region_next_size_get(®->sep); +			p = (size_t)&((char *)®->next)[split_size]; +			p -= offsetof(region_t, next); +			p -= size; +			p &= ~(alignment - 1); +			p -= offsetof(region_t, next); + +			offset = p - (size_t)reg; +		} else { +			if ((((size_t)®->next) & (alignment - 1)) != 0) { +				size_t p; + +				/* +				 * reg is unaligned.  Calculate the offset into +				 * reg to actually base the allocation at. +				 */ +				p = ((size_t)®->next + alignment) +				    & ~(alignment - 1); +				while (p - (size_t)®->next +				    < QUANTUM_CEILING(sizeof( +				    region_small_sizer_t))) +					p += alignment; +				p -= offsetof(region_t, next); + +				offset = p - (size_t)reg; +			} else +				offset = 0; +		} +		assert(offset % quantum == 0); +		assert(offset < total_size); + +		if (offset != 0) { +			region_t *prev; + +			/* +			 * Move ret to an alignment boundary that is far enough +			 * past the beginning of the allocation to leave a +			 * leading free region, then trim the leading space. +			 */ + +			assert(offset >= QUANTUM_CEILING( +			    sizeof(region_small_sizer_t))); +			assert(offset + size <= total_size); + +			prev = reg; +			reg = (region_t *)&((char *)prev)[offset]; +			assert(((size_t)®->next & (alignment - 1)) == 0); + +			/* prev. */ +			region_next_size_set(&prev->sep, offset); +			reg->prev.size = offset; + +			/* reg. */ +			region_next_size_set(®->sep, total_size - offset); +			region_next_free_unset(®->sep); +			if (region_next_contig_get(&prev->sep)) +				region_next_contig_set(®->sep); +			else +				region_next_contig_unset(®->sep); + +			if (old_split != NULL && (alignment > bin_maxsize +			    || size > bin_maxsize)) { +				/* Restore to split. */ +				region_prev_free_unset(®->sep); + +				arena->split = prev; +				old_split = NULL; +			} else { +				region_next_free_set(&prev->sep); +				region_prev_free_set(®->sep); + +				arena_mru_cache(arena, prev, offset); +			} +#ifdef MALLOC_STATS +			arena->stats.nsplit++; +#endif +		} + +		arena_reg_fit(arena, quantum_size, reg, (old_split != NULL)); + +#ifdef MALLOC_STATS +		arena->allocated += quantum_size; +#endif + +		malloc_mutex_unlock(&arena->mtx); + +		ret = (void *)®->next; +#ifdef MALLOC_REDZONES +		{ +			region_t *next; +			size_t total_size; + +			memset(reg->sep.next_red, 0xa5, MALLOC_RED); + +			/* +			 * Unused trailing space in the region is considered +			 * part of the trailing redzone. +			 */ +			total_size = region_next_size_get(®->sep); +			assert(total_size >= size); +			memset(&((char *)ret)[size], 0xa5, +			    total_size - size - sizeof(region_sep_t)); + +			reg->sep.next_exact_size = size; -        if (!malloc_realloc && 			/* Unless we have to, */ -	  size <= osize && 			/* .. or are too small, */ -	  size > (osize - malloc_pagesize)) {	/* .. or can free a page, */ -	    if (malloc_junk) -		memset((u_char *)ptr + size, SOME_JUNK, osize-size); -	    return (ptr);			/* ..don't do anything else. */ +			next = (region_t *)&((char *)reg)[total_size]; +			memset(next->sep.prev_red, 0xa5, MALLOC_RED); +		} +#endif  	} -    } else if (*mp >= MALLOC_MAGIC) {		/* Chunk allocation */ +RETURN: +	assert(((size_t)ret & (alignment - 1)) == 0); +	return (ret); +} + +static void * +arena_calloc(arena_t *arena, size_t num, size_t size) +{ +	void *ret; + +	assert(arena != NULL); +	assert(arena->magic == ARENA_MAGIC); +	assert(num * size != 0); + +	ret = arena_malloc(arena, num * size); +	if (ret == NULL) +		goto RETURN; + +	memset(ret, 0, num * size); + +RETURN: +	return (ret); +} + +static size_t +arena_salloc(arena_t *arena, void *ptr) +{ +	size_t ret; +	region_t *reg; + +	assert(arena != NULL); +	assert(arena->magic == ARENA_MAGIC); +	assert(ptr != NULL); +	assert(ptr != &nil); +	assert(CHUNK_ADDR2BASE(ptr) != ptr); -	/* Check the pointer for sane values */ -	if (((u_long)ptr & ((*mp)->size-1))) { -	    wrtwarning("modified (chunk-) pointer\n"); -	    return (NULL); +	reg = (region_t *)&((char *)ptr)[-offsetof(region_t, next)]; + +	ret = region_next_size_get(®->sep); + +	return (ret); +} + +#ifdef MALLOC_REDZONES +static void +redzone_check(void *ptr) +{ +	region_t *reg, *next; +	size_t size; +	unsigned i, ncorruptions; + +	ncorruptions = 0; + +	reg = (region_t *)&((char *)ptr)[-offsetof(region_t, next)]; +	size = region_next_size_get(®->sep); +	next = (region_t *)&((char *)reg)[size]; + +	/* Leading redzone. */ +	for (i = 0; i < MALLOC_RED; i++) { +		if ((unsigned char)reg->sep.next_red[i] != 0xa5) { +			size_t offset = (size_t)MALLOC_RED - i; + +			ncorruptions++; +			malloc_printf("%s: (malloc) Corrupted redzone %zu " +			    "byte%s before %p (0x%x)\n", _getprogname(),  +			    offset, offset > 1 ? "s" : "", ptr, +			    (unsigned char)reg->sep.next_red[i]); +		} +	} +	memset(®->sep.next_red, 0x5a, MALLOC_RED); + +	/* Bytes immediately trailing allocation. */ +	for (i = 0; i < size - reg->sep.next_exact_size - sizeof(region_sep_t); +	    i++) { +		if ((unsigned char)((char *)ptr)[reg->sep.next_exact_size + i] +		    != 0xa5) { +			size_t offset = (size_t)(i + 1); + +			ncorruptions++; +			malloc_printf("%s: (malloc) Corrupted redzone %zu " +			    "byte%s after %p (size %zu) (0x%x)\n", +			    _getprogname(), offset, offset > 1 ? "s" : "", ptr, +			    reg->sep.next_exact_size, (unsigned char)((char *) +			    ptr)[reg->sep.next_exact_size + i]); +		}  	} +	memset(&((char *)ptr)[reg->sep.next_exact_size], 0x5a, +	    size - reg->sep.next_exact_size - sizeof(region_sep_t)); + +	/* Trailing redzone. */ +	for (i = 0; i < MALLOC_RED; i++) { +		if ((unsigned char)next->sep.prev_red[i] != 0xa5) { +			size_t offset = (size_t)(size - reg->sep.next_exact_size +			    - sizeof(region_sep_t) + i + 1); + +			ncorruptions++; +			malloc_printf("%s: (malloc) Corrupted redzone %zu " +			    "byte%s after %p (size %zu) (0x%x)\n", +			    _getprogname(), offset, offset > 1 ? "s" : "", ptr, +			    reg->sep.next_exact_size, +			    (unsigned char)next->sep.prev_red[i]); +		} +	} +	memset(&next->sep.prev_red, 0x5a, MALLOC_RED); + +	if (opt_abort && ncorruptions != 0) +		abort(); -	/* Find the chunk index in the page */ -	i = ((u_long)ptr & malloc_pagemask) >> (*mp)->shift; +	reg->sep.next_exact_size = 0; +} +#endif + +static void +arena_dalloc(arena_t *arena, void *ptr) +{ +	region_t *reg; -	/* Verify that it isn't a free chunk already */ -        if ((*mp)->bits[i/MALLOC_BITS] & (1<<(i%MALLOC_BITS))) { -	    wrtwarning("chunk is already free\n"); -	    return (NULL); +	assert(arena != NULL); +	assert(ptr != NULL); +	assert(ptr != &nil); + +	assert(arena != NULL); +	assert(arena->magic == ARENA_MAGIC); +	assert(CHUNK_ADDR2BASE(ptr) != ptr); + +	reg = (region_t *)&((char *)ptr)[-offsetof(region_t, next)]; + +	malloc_mutex_lock(&arena->mtx); + +#ifdef MALLOC_DEBUG +	{ +		chunk_node_t *chunk, *node; +		chunk_node_t key; + +		chunk = CHUNK_ADDR2BASE(ptr); +		assert(chunk->arena == arena); + +		key.chunk = chunk; +		node = RB_FIND(chunk_tree_s, &arena->chunks, &key); +		assert(node == chunk);  	} +#endif +#ifdef MALLOC_REDZONES +	redzone_check(ptr); +#endif -	osize = (*mp)->size; +#ifdef MALLOC_STATS +	arena->allocated -= region_next_size_get(®->sep); +#endif -	if (!malloc_realloc &&		/* Unless we have to, */ -	  size <= osize && 		/* ..or are too small, */ -	  (size > osize/2 ||	 	/* ..or could use a smaller size, */ -	  osize == malloc_minsize)) {	/* ..(if there is one) */ -	    if (malloc_junk) -		memset((u_char *)ptr + size, SOME_JUNK, osize-size); -	    return (ptr);		/* ..don't do anything else. */ +	if (opt_junk) { +		memset(®->next, 0x5a, +		    region_next_size_get(®->sep) - sizeof(region_sep_t));  	} -    } else { -	wrtwarning("pointer to wrong page\n"); +	arena_delay_cache(arena, reg); + +	malloc_mutex_unlock(&arena->mtx); +} + +#ifdef NOT_YET +static void * +arena_ralloc(arena_t *arena, void *ptr, size_t size) +{ + +	/* +	 * Arenas don't need to support ralloc, since all reallocation is done +	 * by allocating new space and copying.  This function should never be +	 * called. +	 */ +	/* NOTREACHED */ +	assert(false); +  	return (NULL); -    } +} +#endif + +#ifdef MALLOC_STATS +static bool +arena_stats(arena_t *arena, size_t *allocated, size_t *total) +{ + +	assert(arena != NULL); +	assert(arena->magic == ARENA_MAGIC); +	assert(allocated != NULL); +	assert(total != NULL); + +	malloc_mutex_lock(&arena->mtx); +	*allocated = arena->allocated; +	*total = arena->nchunks * chunk_size; +	malloc_mutex_unlock(&arena->mtx); + +	return (false); +} +#endif -    p = imalloc(size); +static void +arena_new(arena_t *arena, bool malloced, bool recursive) +{ +	unsigned i; -    if (p != NULL) { -	/* copy the lesser of the two sizes, and free the old one */ -	if (!size || !osize) -	    ; -	else if (osize < size) -	    memcpy(p, ptr, osize); +	arena->malloced = malloced; + +	if (recursive) +		malloc_mutex_recursive_init(&arena->mtx);  	else -	    memcpy(p, ptr, size); -	ifree(ptr); -    } -    return (p); +		malloc_mutex_init(&arena->mtx); + +	for (i = 0; i < NBINS; i++) +		qr_new(&arena->bins[i].regions, next.u.s.link); + +	for (i = 0; i < BINMASK_NELMS; i++) +		arena->bins_mask[i] = 0; + +	arena->split = NULL; +	arena->frag = NULL; +	RB_INIT(&arena->large_regions); + +	RB_INIT(&arena->chunks); +	arena->nchunks = 0; + +#ifdef MALLOC_STATS +	arena->allocated = 0; + +	memset(&arena->stats, 0, sizeof(arena_stats_t)); +#endif + +#ifdef MALLOC_DEBUG +	arena->magic = ARENA_MAGIC; +#endif + +	/* +	 * Do this last, since it involved recursing in the case of base_arena. +	 * +	 * Use page alignment for delayed, so that only one page must be kept +	 * in RAM, rather than two (assuming a small enough array).  This seems +	 * prudent, given that all of delayed is touched often. +	 */ +	assert(opt_ndelay > 0); +	arena->delayed = (region_t **)ipalloc(&base_arena, pagesize, +	    (opt_ndelay * sizeof(region_t *)) + CACHELINE); +	memset(arena->delayed, 0, opt_ndelay * sizeof(region_t *)); +	arena->next_delayed = 0;  } -/* - * Free a sequence of pages - */ +/* Create a new arena and insert it into the arenas array at index ind. */ +static arena_t * +arenas_extend(unsigned ind) +{ +	arena_t *ret; -static __inline void -free_pages(void *ptr, u_long index, struct pginfo const *info) -{ -    u_long i; -    struct pgfree *pf, *pt=NULL; -    u_long l; -    void *tail; - -    if (info == MALLOC_FREE) { -	wrtwarning("page is already free\n"); -	return; -    } - -    if (info != MALLOC_FIRST) { -	wrtwarning("pointer to wrong page\n"); -	return; -    } - -    if ((u_long)ptr & malloc_pagemask) { -	wrtwarning("modified (page-) pointer\n"); -	return; -    } - -    /* Count how many pages and mark them free at the same time */ -    page_dir[index] = MALLOC_FREE; -    for (i = 1; page_dir[index+i] == MALLOC_FOLLOW; i++) -	page_dir[index + i] = MALLOC_FREE; - -    l = i << malloc_pageshift; - -    if (malloc_junk) -	memset(ptr, SOME_JUNK, l); - -#if defined(MADV_FREE) -    if (malloc_hint) -	madvise(ptr, l, MADV_FREE); -#endif - -    tail = (char *)ptr+l; - -    /* add to free-list */ -    if (px == NULL) -	px = imalloc(sizeof *px);	/* This cannot fail... */ -    px->page = ptr; -    px->end =  tail; -    px->size = l; -    if (free_list.next == NULL) { - -	/* Nothing on free list, put this at head */ -	px->next = free_list.next; -	px->prev = &free_list; -	free_list.next = px; -	pf = px; -	px = NULL; - -    } else { - -	/* Find the right spot, leave pf pointing to the modified entry. */ -	tail = (char *)ptr+l; - -	for(pf = free_list.next; pf->end < ptr && pf->next != NULL; -	    pf = pf->next) -	    ; /* Race ahead here */ - -	if (pf->page > tail) { -	    /* Insert before entry */ -	    px->next = pf; -	    px->prev = pf->prev; -	    pf->prev = px; -	    px->prev->next = px; -	    pf = px; -	    px = NULL; -	} else if (pf->end == ptr ) { -	    /* Append to the previous entry */ -	    pf->end = (char *)pf->end + l; -	    pf->size += l; -	    if (pf->next != NULL && pf->end == pf->next->page ) { -		/* And collapse the next too. */ -		pt = pf->next; -		pf->end = pt->end; -		pf->size += pt->size; -		pf->next = pt->next; -		if (pf->next != NULL) -		    pf->next->prev = pf; -	    } -	} else if (pf->page == tail) { -	    /* Prepend to entry */ -	    pf->size += l; -	    pf->page = ptr; -	} else if (pf->next == NULL) { -	    /* Append at tail of chain */ -	    px->next = NULL; -	    px->prev = pf; -	    pf->next = px; -	    pf = px; -	    px = NULL; +	ret = (arena_t *)ipalloc(&base_arena, CACHELINE, +	    sizeof(arena_t) + CACHELINE); +	if (ret != NULL) { +		arena_new(ret, true, false); +		arenas[ind] = ret;  	} else { -	    wrterror("freelist is destroyed\n"); +		/* +		 * OOM here is quite inconvenient to propagate, since dealing +		 * with it would require a check for failure in the fast path. +		 * Instead, punt by using arenas[0].  In practice, this is an +		 * extremely unlikely failure. +		 */ +		malloc_printf("%s: (malloc) Error initializing arena\n", +		    _getprogname()); +		if (opt_abort) +			abort(); + +		ret = arenas[0];  	} -    } -    /* Return something to OS ? */ -    if (pf->next == NULL &&			/* If we're the last one, */ -      pf->size > malloc_cache &&		/* ..and the cache is full, */ -      pf->end == malloc_brk &&			/* ..and none behind us, */ -      malloc_brk == sbrk(0)) {			/* ..and it's OK to do... */ +	return (ret); +} + +/* + * End arena. + */ +/******************************************************************************/ +/* + * Begin  general internal functions. + */ + +/* + * Choose an arena based on a per-thread value (fast-path code, calls slow-path + * code if necessary. + */ +static __inline arena_t * +choose_arena(void) +{ +	arena_t *ret;  	/* -	 * Keep the cache intact.  Notice that the '>' above guarantees that -	 * the pf will always have at least one page afterwards. +	 * We can only use TLS if this is a PIC library, since for the static +	 * library version, libc's malloc is used by TLS allocation, which +	 * introduces a bootstrapping issue.  	 */ -	pf->end = (char *)pf->page + malloc_cache; -	pf->size = malloc_cache; +#ifndef NO_TLS +	ret = arenas_map; +	if (ret == NULL) +		ret = choose_arena_hard(); +#else +	if (__isthreaded) { +		unsigned long ind; +		 +		/* +		 * Hash _pthread_self() to one of the arenas.  There is a prime +		 * number of arenas, so this has a reasonable chance of +		 * working.  Even so, the hashing can be easily thwarted by +		 * inconvenient _pthread_self() values.  Without specific +		 * knowledge of how _pthread_self() calculates values, we can't +		 * do much better than this. +		 */ +		ind = (unsigned long) _pthread_self() % narenas; + +		/* +		 * Optimistially assume that arenas[ind] has been initialized. +		 * At worst, we find out that some other thread has already +		 * done so, after acquiring the lock in preparation.  Note that +		 * this lazy locking also has the effect of lazily forcing +		 * cache coherency; without the lock acquisition, there's no +		 * guarantee that modification of arenas[ind] by another thread +		 * would be seen on this CPU for an arbitrary amount of time. +		 * +		 * In general, this approach to modifying a synchronized value +		 * isn't a good idea, but in this case we only ever modify the +		 * value once, so things work out well. +		 */ +		ret = arenas[ind]; +		if (ret == NULL) { +			/* +			 * Avoid races with another thread that may have already +			 * initialized arenas[ind]. +			 */ +			malloc_mutex_lock(&arenas_mtx); +			if (arenas[ind] == NULL) +				ret = arenas_extend((unsigned)ind); +			else +				ret = arenas[ind]; +			malloc_mutex_unlock(&arenas_mtx); +		} +	} else +		ret = arenas[0]; +#endif + +	return (ret); +} + +#ifndef NO_TLS +/* + * Choose an arena based on a per-thread value (slow-path code only, called + * only by choose_arena()). + */ +static arena_t * +choose_arena_hard(void) +{ +	arena_t *ret; + +	/* Assign one of the arenas to this thread, in a round-robin fashion. */ +	if (__isthreaded) { +		malloc_mutex_lock(&arenas_mtx); +		ret = arenas[next_arena]; +		if (ret == NULL) +			ret = arenas_extend(next_arena); +		next_arena = (next_arena + 1) % narenas; +		malloc_mutex_unlock(&arenas_mtx); +	} else +		ret = arenas[0]; +	arenas_map = ret; + +	return (ret); +} +#endif + +static void * +huge_malloc(arena_t *arena, size_t size) +{ +	void *ret; +	size_t chunk_size; +	chunk_node_t *node; + +	/* Allocate a chunk for this request. */ -	brk(pf->end); -	malloc_brk = pf->end; +#ifdef MALLOC_STATS +	arena->stats.huge.nrequests++; +#endif -	index = ptr2index(pf->end); +	chunk_size = CHUNK_CEILING(size); +	if (chunk_size == 0) { +		/* size is large enough to cause size_t wrap-around. */ +		ret = NULL; +		goto RETURN; +	} -	for(i=index;i <= last_index;) -	    page_dir[i++] = MALLOC_NOT_MINE; +	/* Allocate a chunk node with which to track the chunk. */ +	node = ipalloc(&base_arena, CACHELINE, +	    sizeof(chunk_node_t) + CACHELINE); +	if (node == NULL) { +		ret = NULL; +		goto RETURN; +	} -	last_index = index - 1; +	ret = chunk_alloc(chunk_size); +	if (ret == NULL) { +		idalloc(node); +		ret = NULL; +		goto RETURN; +	} -	/* XXX: We could realloc/shrink the pagedir here I guess. */ -    } -    if (pt != NULL) -	ifree(pt); +	/* Insert node into chunks. */ +	node->arena = arena; +	node->chunk = ret; +	node->size = chunk_size; +	node->extra = chunk_size - size; + +	malloc_mutex_lock(&chunks_mtx); +	RB_INSERT(chunk_tree_s, &huge, node); +#ifdef MALLOC_STATS +	huge_allocated += size; +	huge_total += chunk_size; +#endif +	malloc_mutex_unlock(&chunks_mtx); + +RETURN: +	return (ret);  } -/* - * Free a chunk, and possibly the page it's on, if the page becomes empty. - */ +static void +huge_dalloc(void *ptr) +{ +	chunk_node_t key; +	chunk_node_t *node; + +	malloc_mutex_lock(&chunks_mtx); + +	/* Extract from tree of huge allocations. */ +	key.chunk = ptr; +	node = RB_FIND(chunk_tree_s, &huge, &key); +	assert(node != NULL); +	assert(node->chunk == ptr); +	RB_REMOVE(chunk_tree_s, &huge, node); + +#ifdef MALLOC_STATS +	malloc_mutex_lock(&node->arena->mtx); +	node->arena->stats.ndalloc++; +	malloc_mutex_unlock(&node->arena->mtx); + +	/* Update counters. */ +	huge_allocated -= (node->size - node->extra); +	huge_total -= node->size; +#endif -static __inline void -free_bytes(void *ptr, u_long index, struct pginfo *info) +	malloc_mutex_unlock(&chunks_mtx); + +	/* Unmap chunk. */ +	chunk_dealloc(node->chunk, node->size); + +	idalloc(node); +} + +static void * +imalloc(arena_t *arena, size_t size)  { -    int i; -    struct pginfo **mp; -    void *vp; +	void *ret; -    /* Find the chunk number on the page */ -    i = ((u_long)ptr & malloc_pagemask) >> info->shift; +	assert(arena != NULL); +	assert(arena->magic == ARENA_MAGIC); +	assert(size != 0); -    if (((u_long)ptr & (info->size-1))) { -	wrtwarning("modified (chunk-) pointer\n"); -	return; -    } +	if (region_ceiling(size) <= (chunk_size >> 1)) +		ret = arena_malloc(arena, size); +	else +		ret = huge_malloc(arena, size); -    if (info->bits[i/MALLOC_BITS] & (1<<(i%MALLOC_BITS))) { -	wrtwarning("chunk is already free\n"); -	return; -    } +#ifdef MALLOC_STATS +	malloc_mutex_lock(&arena->mtx); +	arena->stats.nmalloc++; +	malloc_mutex_unlock(&arena->mtx); +#endif -    if (malloc_junk) -	memset(ptr, SOME_JUNK, info->size); +	if (opt_junk) { +		if (ret != NULL) +			memset(ret, 0xa5, size); +	} +	return (ret); +} -    info->bits[i/MALLOC_BITS] |= 1<<(i%MALLOC_BITS); -    info->free++; +static void * +ipalloc(arena_t *arena, size_t alignment, size_t size) +{ +	void *ret; -    mp = page_dir + info->shift; +	assert(arena != NULL); +	assert(arena->magic == ARENA_MAGIC); -    if (info->free == 1) { +	/* +	 * The conditions that disallow calling arena_palloc() are quite +	 * tricky. +	 * +	 * The first main clause of the conditional mirrors that in imalloc(), +	 * and is necesary because arena_palloc() may in turn call +	 * arena_malloc(). +	 * +	 * The second and third clauses are necessary because we want to be +	 * sure that it will not be necessary to allocate more than a +	 * half-chunk region at any point during the creation of the aligned +	 * allocation.  These checks closely mirror the calculation of +	 * alloc_size in arena_palloc(). +	 * +	 * Finally, the fourth clause makes explicit the constraint on what +	 * alignments will be attempted via regions.  At first glance, this +	 * appears unnecessary, but in actuality, it protects against otherwise +	 * difficult-to-detect size_t wrap-around cases. +	 */ +	if (region_ceiling(size) <= (chunk_size >> 1) + +	    && (alignment < sizeof(region_small_sizer_t) +	    || (QUANTUM_CEILING(sizeof(region_small_sizer_t)) + alignment +	    + (region_ceiling(size))) <= (chunk_size >> 1)) + +	    && (alignment >= sizeof(region_small_sizer_t) +	    || ((QUANTUM_CEILING(sizeof(region_small_sizer_t)) << 1) +	    + (region_ceiling(size))) <= (chunk_size >> 1)) + +	    && alignment <= (chunk_size >> 2)) +		ret = arena_palloc(arena, alignment, size); +	else { +		if (alignment <= chunk_size) +			ret = huge_malloc(arena, size); +		else { +			size_t chunksize, alloc_size, offset; +			chunk_node_t *node; + +			/* +			 * This allocation requires alignment that is even +			 * larger than chunk alignment.  This means that +			 * huge_malloc() isn't good enough. +			 * +			 * Allocate almost twice as many chunks as are demanded +			 * by the size or alignment, in order to assure the +			 * alignment can be achieved, then unmap leading and +			 * trailing chunks. +			 */ + +			chunksize = CHUNK_CEILING(size); + +			if (size >= alignment) +				alloc_size = chunksize + alignment - chunk_size; +			else +				alloc_size = (alignment << 1) - chunk_size; + +			/* +			 * Allocate a chunk node with which to track the chunk. +			 */ +			node = ipalloc(&base_arena, CACHELINE, +			    sizeof(chunk_node_t) + CACHELINE); +			if (node == NULL) { +				ret = NULL; +				goto RETURN; +			} + +			ret = chunk_alloc(alloc_size); +			if (ret == NULL) { +				idalloc(node); +				ret = NULL; +				goto RETURN; +			} + +			offset = (size_t)ret & (alignment - 1); +			assert(offset % chunk_size == 0); +			assert(offset < alloc_size); +			if (offset == 0) { +				/* Trim trailing space. */ +				chunk_dealloc((void *) ((size_t) ret +				    + chunksize), alloc_size - chunksize); +			} else { +				size_t trailsize; + +				/* Trim leading space. */ +				chunk_dealloc(ret, alignment - offset); + +				ret = (void *) ((size_t) ret + (alignment +				    - offset)); + +				trailsize = alloc_size - (alignment - offset) +				    - chunksize; +				if (trailsize != 0) { +				    /* Trim trailing space. */ +				    assert(trailsize < alloc_size); +				    chunk_dealloc((void *) ((size_t) ret +				        + chunksize), trailsize); +				} +			} + +			/* Insert node into chunks. */ +			node->arena = arena; +			node->chunk = ret; +			node->size = chunksize; +			node->extra = node->size - size; + +			malloc_mutex_lock(&chunks_mtx); +			RB_INSERT(chunk_tree_s, &huge, node); +#ifdef MALLOC_STATS +			huge_allocated += size; +			huge_total += chunksize; +#endif +			malloc_mutex_unlock(&chunks_mtx); +		} +	} -	/* Page became non-full */ +RETURN: +#ifdef MALLOC_STATS +	malloc_mutex_lock(&arena->mtx); +	arena->stats.npalloc++; +	malloc_mutex_unlock(&arena->mtx); +#endif -	mp = page_dir + info->shift; -	/* Insert in address order */ -	while (*mp && (*mp)->next && (*mp)->next->page < info->page) -	    mp = &(*mp)->next; -	info->next = *mp; -	*mp = info; -	return; -    } +	if (opt_junk) { +		if (ret != NULL) +			memset(ret, 0xa5, size); +	} +	assert(((size_t)ret & (alignment - 1)) == 0); +	return (ret); +} -    if (info->free != info->total) -	return; +static void * +icalloc(arena_t *arena, size_t num, size_t size) +{ +	void *ret; + +	assert(arena != NULL); +	assert(arena->magic == ARENA_MAGIC); +	assert(num * size != 0); + +	if (region_ceiling(num * size) <= (chunk_size >> 1)) +		ret = arena_calloc(arena, num, size); +	else { +		/* +		 * The virtual memory system provides zero-filled pages, so +		 * there is no need to do so manually. +		 */ +		ret = huge_malloc(arena, num * size); +#ifdef USE_BRK +		if ((size_t)ret >= (size_t)brk_base +		    && (size_t)ret < (size_t)brk_max) { +			/*  +			 * This may be a re-used brk chunk.  Therefore, zero +			 * the memory. +			 */ +			memset(ret, 0, num * size); +		} +#endif +	} -    /* Find & remove this page in the queue */ -    while (*mp != info) { -	mp = &((*mp)->next); -#ifdef MALLOC_EXTRA_SANITY -	if (!*mp) -		wrterror("(ES): Not on queue\n"); -#endif /* MALLOC_EXTRA_SANITY */ -    } -    *mp = info->next; +#ifdef MALLOC_STATS +	malloc_mutex_lock(&arena->mtx); +	arena->stats.ncalloc++; +	malloc_mutex_unlock(&arena->mtx); +#endif -    /* Free the page & the info structure if need be */ -    page_dir[ptr2index(info->page)] = MALLOC_FIRST; -    vp = info->page;		/* Order is important ! */ -    if(vp != (void*)info) -	ifree(info); -    ifree(vp); +	return (ret);  } -static void -ifree(void *ptr) +static size_t +isalloc(void *ptr)  { -    struct pginfo *info; -    u_long index; +	size_t ret; +	chunk_node_t *node; + +	assert(ptr != NULL); +	assert(ptr != &nil); -    /* This is legal */ -    if (ptr == NULL) -	return; +	node = CHUNK_ADDR2BASE(ptr); +	if (node != ptr) { +		/* Region. */ +		assert(node->arena->magic == ARENA_MAGIC); + +		ret = arena_salloc(node->arena, ptr); +	} else { +		chunk_node_t key; -    /* If we're already sinking, don't make matters any worse. */ -    if (suicide) -	return; +		/* Chunk (huge allocation). */ -    index = ptr2index(ptr); +		malloc_mutex_lock(&chunks_mtx); -    if (index < malloc_pageshift) { -	wrtwarning("junk pointer, too low to make sense\n"); -	return; -    } +		/* Extract from tree of huge allocations. */ +		key.chunk = ptr; +		node = RB_FIND(chunk_tree_s, &huge, &key); +		assert(node != NULL); -    if (index > last_index) { -	wrtwarning("junk pointer, too high to make sense\n"); -	return; -    } +		ret = node->size - node->extra; -    info = page_dir[index]; +		malloc_mutex_unlock(&chunks_mtx); +	} + +	return (ret); +} -    if (info < MALLOC_MAGIC) -        free_pages(ptr, index, info); -    else -	free_bytes(ptr, index, info); -    return; +static void +idalloc(void *ptr) +{ +	chunk_node_t *node; + +	assert(ptr != NULL); +	assert(ptr != &nil); + +	node = CHUNK_ADDR2BASE(ptr); +	if (node != ptr) { +		/* Region. */ +#ifdef MALLOC_STATS +		malloc_mutex_lock(&node->arena->mtx); +		node->arena->stats.ndalloc++; +		malloc_mutex_unlock(&node->arena->mtx); +#endif +		arena_dalloc(node->arena, ptr); +	} else +		huge_dalloc(ptr);  }  static void * -pubrealloc(void *ptr, size_t size, const char *func) -{ -    void *r; -    int err = 0; -    static int malloc_active; /* Recusion flag for public interface. */ -    static unsigned malloc_started; /* Set when initialization has been done */ - -    /* -     * If a thread is inside our code with a functional lock held, and then -     * catches a signal which calls us again, we would get a deadlock if the -     * lock is not of a recursive type. -     */ -    _MALLOC_LOCK(); -    malloc_func = func; -    if (malloc_active > 0) { -	if (malloc_active == 1) { -	    wrtwarning("recursive call\n"); -	    malloc_active = 2; -	} -        _MALLOC_UNLOCK(); -	errno = EDOOFUS; -	return (NULL); -    }  -    malloc_active = 1; +iralloc(arena_t *arena, void *ptr, size_t size) +{ +	void *ret; +	size_t oldsize; + +	assert(arena != NULL); +	assert(arena->magic == ARENA_MAGIC); +	assert(ptr != NULL); +	assert(ptr != &nil); +	assert(size != 0); + +	oldsize = isalloc(ptr); + +	if (region_ceiling(size) <= (chunk_size >> 1)) { +		ret = arena_malloc(arena, size); +		if (ret == NULL) +			goto RETURN; +		if (opt_junk) +			memset(ret, 0xa5, size); + +		if (size < oldsize) +			memcpy(ret, ptr, size); +		else +			memcpy(ret, ptr, oldsize); +	} else { +		ret = huge_malloc(arena, size); +		if (ret == NULL) +			goto RETURN; +		if (opt_junk) +			memset(ret, 0xa5, size); + +		if (CHUNK_ADDR2BASE(ptr) == ptr) { +			/* The old allocation is a chunk. */ +			if (size < oldsize) +				memcpy(ret, ptr, size); +			else +				memcpy(ret, ptr, oldsize); +		} else { +			/* The old allocation is a region. */ +			assert(oldsize < size); +			memcpy(ret, ptr, oldsize); +		} +	} + +	idalloc(ptr); + +RETURN: +#ifdef MALLOC_STATS +	malloc_mutex_lock(&arena->mtx); +	arena->stats.nralloc++; +	malloc_mutex_unlock(&arena->mtx); +#endif +	return (ret); +} + +#ifdef MALLOC_STATS +static void +istats(size_t *allocated, size_t *total) +{ +	size_t tallocated, ttotal; +	size_t rallocated, rtotal; +	unsigned i; -    if (!malloc_started) { -        if (ptr != NULL) { -	    wrtwarning("malloc() has never been called\n"); -	    malloc_active = 0; -            _MALLOC_UNLOCK(); -	    errno = EDOOFUS; -	    return (NULL); +	tallocated = 0; +	ttotal = 0; + +	/* base_arena. */ +	arena_stats(&base_arena, &rallocated, &rtotal); +	/* +	 * base_arena is all overhead, so pay no attention to to rallocated +	 * here. +	 */ +	tallocated = 0; +	ttotal = rtotal; + +	/* arenas. */ +	for (i = 0; i < narenas; i++) { +		if (arenas[i] != NULL) { +			arena_stats(arenas[i], &rallocated, &rtotal); +			tallocated += rallocated; +			ttotal += rtotal; +		} +	} + +	/* huge. */ +	malloc_mutex_lock(&chunks_mtx); +	tallocated += huge_allocated; +	ttotal += huge_total; +	malloc_mutex_unlock(&chunks_mtx); + +	/* Return results. */ +	*allocated = tallocated; +	*total = ttotal; +} +#endif + +static void +malloc_print_stats(void) +{ + +	if (opt_print_stats) { +		malloc_printf("___ Begin malloc statistics ___\n"); +		malloc_printf("Number of CPUs: %u\n", ncpus); +		malloc_printf("Number of arenas: %u\n", narenas); +		malloc_printf("Cache slots: %u\n", opt_ndelay); +		malloc_printf("Chunk size: %zu (2^%zu)\n", chunk_size, +		    opt_chunk_2pow); +		malloc_printf("Quantum size: %zu (2^%zu)\n", quantum,  +		    opt_quantum_2pow); +		malloc_printf("Pointer size: %u\n", sizeof(size_t)); +		malloc_printf("Number of bins: %u\n", NBINS); +		malloc_printf("Maximum bin size: %u\n", bin_maxsize); +		malloc_printf("Assertions %s\n", +#ifdef NDEBUG +		    "disabled" +#else +		    "enabled" +#endif +		    ); +		malloc_printf("Redzone size: %u\n",  +#ifdef MALLOC_REDZONES +				MALLOC_RED +#else +				0 +#endif +				); + +#ifdef MALLOC_STATS +		{ +			size_t a, b; + +			istats(&a, &b); +			malloc_printf("Allocated: %zu, space used: %zu\n", a, +			    b); +		} + +		{ +			arena_stats_t stats_arenas; +			arena_t *arena; +			unsigned i; + +			/* Print chunk stats. */ +			{ +				chunk_stats_t chunks_stats; + +				malloc_mutex_lock(&chunks_mtx); +				chunks_stats = stats_chunks; +				malloc_mutex_unlock(&chunks_mtx); + +				malloc_printf("\nchunks:\n"); +				malloc_printf(" %13s%13s%13s\n", "nchunks", +				    "highchunks", "curchunks"); +				malloc_printf(" %13llu%13lu%13lu\n", +				    chunks_stats.nchunks,  +				    chunks_stats.highchunks, +				    chunks_stats.curchunks); +			} + +#ifdef MALLOC_STATS_BASE +			/* +			 * Copy base_arena's stats out, so that they are +			 * self-consistent, even if the process of printing +			 * stats is causing additional allocation. +			 */ +			memset(&stats_arenas, 0, sizeof(arena_stats_t)); +			malloc_mutex_lock(&base_arena.mtx); +			stats_merge(&base_arena, &stats_arenas); +			malloc_mutex_unlock(&base_arena.mtx); + +			/* Print base_arena stats. */ +			malloc_printf("\nbase_arena statistics:\n"); +			stats_print(&stats_arenas); +#endif + +#ifdef MALLOC_STATS_ARENAS +			/* Print stats for each arena. */ +			for (i = 0; i < narenas; i++) { +				arena = arenas[i]; +				if (arena != NULL) { +					malloc_printf( +					    "\narenas[%u] statistics:\n", i); +					malloc_mutex_lock(&arena->mtx); +					stats_print(&arena->stats); +					malloc_mutex_unlock(&arena->mtx); +				} else { +					malloc_printf("\narenas[%u] statistics:" +					    " unused arena\n", i); +				} +			} +#endif + +			/*  +			 * Merge arena stats from arenas (leave out base_arena). +			 */ +			memset(&stats_arenas, 0, sizeof(arena_stats_t)); +			for (i = 0; i < narenas; i++) { +				arena = arenas[i]; +				if (arena != NULL) { +					malloc_mutex_lock(&arena->mtx); +					stats_merge(arena, &stats_arenas); +					malloc_mutex_unlock(&arena->mtx); +				} +			} + +			/* Print arena stats. */ +			malloc_printf("\nMerged arena statistics:\n"); +			stats_print(&stats_arenas); +		} +#endif /* #ifdef MALLOC_STATS */ +		malloc_printf("--- End malloc statistics ---\n");  	} -	malloc_init(); -	malloc_started = 1; -    } -    -    if (ptr == ZEROSIZEPTR) -	ptr = NULL; -    if (malloc_sysv && !size) { -	if (ptr != NULL) -	    ifree(ptr); -	r = NULL; -    } else if (!size) { -	if (ptr != NULL) -	    ifree(ptr); -	r = ZEROSIZEPTR; -    } else if (ptr == NULL) { -	r = imalloc(size); -	err = (r == NULL); -    } else { -        r = irealloc(ptr, size); -	err = (r == NULL); -    } -    UTRACE(ptr, size, r); -    malloc_active = 0; -    _MALLOC_UNLOCK(); -    if (malloc_xmalloc && err) -	wrterror("out of memory\n"); -    if (err) -	errno = ENOMEM; -    return (r);  }  /* - * These are the public exported interface routines. + * FreeBSD's pthreads implementation calls malloc(3), so the malloc + * implementation has to take pains to avoid infinite recursion during + * initialization. + * + * atomic_init_start() returns true if it started initializing.  In that case, + * the caller must also call atomic_init_finish(), just before returning + * to its caller.  This delayed finalization of initialization is critical, + * since otherwise choose_arena() has no way to know whether it's safe + * to call _pthread_self(). + */ +static __inline void +malloc_init(void) +{ + +	/* +	 * We always initialize before threads are created, since any thread +	 * creation first triggers allocations. +	 */ +	assert(__isthreaded == 0 || malloc_initialized); + +	if (malloc_initialized == false) +		malloc_init_hard(); +} + +static void +malloc_init_hard(void) +{ +	unsigned i, j; +	int linklen; +	char buf[PATH_MAX + 1]; +	const char *opts; + +	/* Get number of CPUs. */ +	{ +		int mib[2]; +		size_t len; + +		mib[0] = CTL_HW; +		mib[1] = HW_NCPU; +		len = sizeof(ncpus); +		if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) { +			/* Error. */ +			ncpus = 1; +		} +	} + +	/* Get page size. */ +	{ +		long result; + +		result = sysconf(_SC_PAGESIZE); +		assert(result != -1); +		pagesize = (unsigned) result; +	} + +	for (i = 0; i < 3; i++) { +		/* Get runtime configuration. */ +		switch (i) { +		case 0: +			if ((linklen = readlink("/etc/malloc.conf", buf, +						sizeof(buf) - 1)) != -1) { +				/* +				 * Use the contents of the "/etc/malloc.conf" +				 * symbolic link's name. +				 */ +				buf[linklen] = '\0'; +				opts = buf; +			} else { +				/* No configuration specified. */ +				buf[0] = '\0'; +				opts = buf; +			} +			break; +		case 1: +			if (issetugid() == 0 && (opts = +			    getenv("MALLOC_OPTIONS")) != NULL) { +				/* +				 * Do nothing; opts is already initialized to +				 * the value of the MALLOC_OPTIONS environment +				 * variable. +				 */ +			} else { +				/* No configuration specified. */ +				buf[0] = '\0'; +				opts = buf; +			} +			break; +		case 2: +			if (_malloc_options != NULL) { +			    /* +			     * Use options that were compiled into the program. +			     */ +			    opts = _malloc_options; +			} else { +				/* No configuration specified. */ +				buf[0] = '\0'; +				opts = buf; +			} +			break; +		default: +			/* NOTREACHED */ +			assert(false); +		} + +		for (j = 0; opts[j] != '\0'; j++) { +			switch (opts[j]) { +			case 'a': +				opt_abort = false; +				break; +			case 'A': +				opt_abort = true; +				break; +			case 'c': +				opt_ndelay <<= 1; +				if (opt_ndelay == 0) +					opt_ndelay = 1; +				break; +			case 'C': +				opt_ndelay >>= 1; +				if (opt_ndelay == 0) +					opt_ndelay = 1; +				break; +			case 'j': +				opt_junk = false; +				break; +			case 'J': +				opt_junk = true; +				break; +			case 'k': +				if (opt_chunk_2pow > CHUNK_2POW_MIN) +					opt_chunk_2pow--; +				break; +			case 'K': +				if (opt_chunk_2pow < CHUNK_2POW_MAX) +					opt_chunk_2pow++; +				break; +			case 'n': +				opt_narenas_lshift--; +				break; +			case 'N': +				opt_narenas_lshift++; +				break; +			case 'p': +				opt_print_stats = false; +				break; +			case 'P': +				opt_print_stats = true; +				break; +			case 'q': +				if (opt_quantum_2pow > QUANTUM_2POW_MIN) +					opt_quantum_2pow--; +				break; +			case 'Q': +				if ((1 << opt_quantum_2pow) < pagesize) +					opt_quantum_2pow++; +				break; +			case 'u': +				opt_utrace = false; +				break; +			case 'U': +				opt_utrace = true; +				break; +			case 'v': +				opt_sysv = false; +				break; +			case 'V': +				opt_sysv = true; +				break; +			case 'x': +				opt_xmalloc = false; +				break; +			case 'X': +				opt_xmalloc = true; +				break; +			case 'z': +				opt_zero = false; +				break; +			case 'Z': +				opt_zero = true; +				break; +			default: +				malloc_printf("%s: (malloc) Unsupported" +				    " character in malloc options: '%c'\n", +				    _getprogname(), opts[j]); +			} +		} +	} + +	/* Take care to call atexit() only once. */ +	if (opt_print_stats) { +		/* Print statistics at exit. */ +		atexit(malloc_print_stats); +	} + +	/* Set variables according to the value of opt_quantum_2pow. */ +	quantum = (1 << opt_quantum_2pow); +	quantum_mask = quantum - 1; +	bin_shift = ((QUANTUM_CEILING(sizeof(region_small_sizer_t)) +	    >> opt_quantum_2pow)); +	bin_maxsize = ((NBINS + bin_shift - 1) * quantum); + +	/* Set variables according to the value of opt_chunk_2pow. */ +	chunk_size = (1 << opt_chunk_2pow); +	chunk_size_mask = chunk_size - 1; + +	UTRACE(0, 0, 0); + +#ifdef MALLOC_STATS +	memset(&stats_chunks, 0, sizeof(chunk_stats_t)); +#endif + +	/* Various sanity checks that regard configuration. */ +	assert(quantum >= 2 * sizeof(void *)); +	assert(quantum <= pagesize); +	assert(chunk_size >= pagesize); +	assert(quantum * 4 <= chunk_size); + +	/* Initialize chunks data. */ +	malloc_mutex_init(&chunks_mtx); +	RB_INIT(&huge); +#ifdef USE_BRK +	brk_base = sbrk(0); +	brk_prev = brk_base; +	brk_max = (void *)((size_t)brk_base + MAXDSIZ); +#endif +#ifdef MALLOC_STATS +	huge_allocated = 0; +	huge_total = 0; +#endif +	RB_INIT(&old_chunks); + +	/* Create base arena. */ +	arena_new(&base_arena, false, true); + +	if (ncpus > 1) { +		/*  +		 * For SMP systems, create twice as many arenas as there are +		 * CPUs by default. +		 */ +		opt_narenas_lshift++; +	} + +	/* Determine how many arenas to use. */ +	narenas = 1; +	if (opt_narenas_lshift > 0) +		narenas <<= opt_narenas_lshift; + +#ifdef NO_TLS +	if (narenas > 1) { +		static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19, +		    23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, +		    89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, +		    151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, +		    223, 227, 229, 233, 239, 241, 251, 257, 263}; +		unsigned i, nprimes, parenas; + +		/* +		 * Pick a prime number of hash arenas that is more than narenas +		 * so that direct hashing of pthread_self() pointers tends to +		 * spread allocations evenly among the arenas. +		 */ +		assert((narenas & 1) == 0); /* narenas must be even. */ +		nprimes = sizeof(primes) / sizeof(unsigned); +		parenas = primes[nprimes - 1]; /* In case not enough primes. */ +		for (i = 1; i < nprimes; i++) { +			if (primes[i] > narenas) { +				parenas = primes[i]; +				break; +			} +		} +		narenas = parenas; +	} +#endif + +#ifndef NO_TLS +	next_arena = 0; +#endif + +	/* Allocate and initialize arenas. */ +	arenas = (arena_t **)ipalloc(&base_arena, CACHELINE, +	    (sizeof(arena_t *) * narenas) + CACHELINE); +	/* OOM here is fatal. */ +	assert(arenas != NULL); +	/* +	 * Zero the array.  In practice, this should always be pre-zeroed, +	 * since it was just mmap()ed, but let's be sure. +	 */ +	memset(arenas, 0, sizeof(arena_t *) * narenas); + +	/* +	 * Initialize one arena here.  The rest are lazily created in +	 * arena_choose_hard(). +	 */ +	arenas_extend(0); +	/* OOM here is fatal. */ +	assert(arenas[0] != NULL); + +	malloc_mutex_init(&arenas_mtx); + +	malloc_initialized = true; +} + +/* + * End library-internal functions. + */ +/******************************************************************************/ +/* + * Begin malloc(3)-compatible functions.   */  void *  malloc(size_t size)  { +	void *ret; +	arena_t *arena; -    return (pubrealloc(NULL, size, " in malloc():")); +	malloc_init(); + +	if (size == 0) { +		if (opt_sysv == false) +			ret = &nil; +		else +			ret = NULL; +		goto RETURN; +	} + +	arena = choose_arena(); +	if (arena != NULL) +		ret = imalloc(arena, size); +	else +		ret = NULL; + +	if (ret == NULL) { +		if (opt_xmalloc) { +			malloc_printf("%s: (malloc) Error in malloc(%zu):" +			    " out of memory\n", _getprogname(), size); +			abort(); +		} +		errno = ENOMEM; +	} else if (opt_zero) +		memset(ret, 0, size); + +RETURN: +	UTRACE(0, size, ret); +	return (ret);  }  int  posix_memalign(void **memptr, size_t alignment, size_t size)  { -    int err; -    void *result; - -    /* Make sure that alignment is a large enough power of 2. */ -    if (((alignment - 1) & alignment) != 0 || alignment < sizeof(void *)) -	    return (EINVAL); +	int ret; +	arena_t *arena; +	void *result; -    /*  -     * (size | alignment) is enough to assure the requested alignment, since -     * the allocator always allocates power-of-two blocks. -     */ -    err = errno; /* Protect errno against changes in pubrealloc(). */ -    result = pubrealloc(NULL, (size | alignment), " in posix_memalign()"); -    errno = err; +	malloc_init(); -    if (result == NULL) -	return (ENOMEM); +	/* Make sure that alignment is a large enough power of 2. */ +	if (((alignment - 1) & alignment) != 0 || alignment < sizeof(void *)) { +		if (opt_xmalloc) { +			malloc_printf("%s: (malloc) Error in" +			    " posix_memalign(%zu, %zu): invalid alignment\n", +			    _getprogname(), alignment, size); +			abort(); +		} +		result = NULL; +		ret = EINVAL; +		goto RETURN; +	} -    *memptr = result; -    return (0); +	arena = choose_arena(); +	if (arena != NULL) +		result = ipalloc(arena, alignment, size); +	else +		result = NULL; + +	if (result == NULL) { +		if (opt_xmalloc) { +			malloc_printf("%s: (malloc) Error in" +			    " posix_memalign(%zu, %zu): out of memory\n", +			    _getprogname(), alignment, size); +			abort(); +		} +		ret = ENOMEM; +		goto RETURN; +	} else if (opt_zero) +		memset(result, 0, size); + +	*memptr = result; +	ret = 0; + +RETURN: +	UTRACE(0, size, result); +	return (ret);  }  void *  calloc(size_t num, size_t size)  { -    void *ret; +	void *ret; +	arena_t *arena; -    if (size != 0 && (num * size) / size != num) { -	/* size_t overflow. */ -	errno = ENOMEM; -	return (NULL); -    } +	malloc_init(); -    ret = pubrealloc(NULL, num * size, " in calloc():"); +	if (num * size == 0) { +		if (opt_sysv == false) +			ret = &nil; +		else +			ret = NULL; +		goto RETURN; +	} else if ((num * size) / size != num) { +		/* size_t overflow. */ +		ret = NULL; +		goto RETURN; +	} -    if (ret != NULL) -	memset(ret, 0, num * size); +	arena = choose_arena(); +	if (arena != NULL) +		ret = icalloc(arena, num, size); +	else +		ret = NULL; + +RETURN: +	if (ret == NULL) { +		if (opt_xmalloc) { +			malloc_printf("%s: (malloc) Error in" +			    " calloc(%zu, %zu): out of memory\n",  +			    _getprogname(), num, size); +			abort(); +		} +		errno = ENOMEM; +	} else if (opt_zero) { +		/* +		 * This has the side effect of faulting pages in, even if the +		 * pages are pre-zeroed by the kernel. +		 */ +		memset(ret, 0, num * size); +	} -    return (ret); +	UTRACE(0, num * size, ret); +	return (ret);  } -void -free(void *ptr) +void * +realloc(void *ptr, size_t size)  { +	void *ret; + +	if (size != 0) { +		arena_t *arena; + +		if (ptr != &nil && ptr != NULL) { +			assert(malloc_initialized); + +			arena = choose_arena(); +			if (arena != NULL) +				ret = iralloc(arena, ptr, size); +			else +				ret = NULL; + +			if (ret == NULL) { +				if (opt_xmalloc) { +					malloc_printf("%s: (malloc) Error in" +					    " ralloc(%p, %zu): out of memory\n", +					    _getprogname(), ptr, size); +					abort(); +				} +				errno = ENOMEM; +			} else if (opt_zero) { +				size_t old_size; + +				old_size = isalloc(ptr); + +				if (old_size < size) { +				    memset(&((char *)ret)[old_size], 0, +					    size - old_size); +				} +			} +		} else { +			malloc_init(); + +			arena = choose_arena(); +			if (arena != NULL) +				ret = imalloc(arena, size); +			else +				ret = NULL; + +			if (ret == NULL) { +				if (opt_xmalloc) { +					malloc_printf("%s: (malloc) Error in" +					    " ralloc(%p, %zu): out of memory\n", +					    _getprogname(), ptr, size); +					abort(); +				} +				errno = ENOMEM; +			} else if (opt_zero) +				memset(ret, 0, size); +		} +	} else { +		if (ptr != &nil && ptr != NULL) +			idalloc(ptr); + +		ret = &nil; +	} -    pubrealloc(ptr, 0, " in free():"); +	UTRACE(ptr, size, ret); +	return (ret);  } -void * -realloc(void *ptr, size_t size) +void +free(void *ptr)  { -    return (pubrealloc(ptr, size, " in realloc():")); +	UTRACE(ptr, 0, 0); +	if (ptr != &nil && ptr != NULL) { +		assert(malloc_initialized); + +		idalloc(ptr); +	}  }  /* + * End malloc(3)-compatible functions. + */ +/******************************************************************************/ +/*   * Begin library-private functions, used by threading libraries for protection   * of malloc during fork().  These functions are only called if the program is   * running in threaded mode, so there is no need to check whether the program @@ -1218,13 +4743,42 @@ realloc(void *ptr, size_t size)  void  _malloc_prefork(void)  { +	unsigned i; -	_spinlock(__malloc_lock); +	/* Acquire all mutexes in a safe order. */ + +	malloc_mutex_lock(&arenas_mtx); +	for (i = 0; i < narenas; i++) { +		if (arenas[i] != NULL) +			malloc_mutex_lock(&arenas[i]->mtx); +	} +	malloc_mutex_unlock(&arenas_mtx); + +	malloc_mutex_lock(&base_arena.mtx); + +	malloc_mutex_lock(&chunks_mtx);  }  void  _malloc_postfork(void)  { +	unsigned i; + +	/* Release all mutexes, now that fork() has completed. */ -	_spinunlock(__malloc_lock); +	malloc_mutex_unlock(&chunks_mtx); + +	malloc_mutex_unlock(&base_arena.mtx); + +	malloc_mutex_lock(&arenas_mtx); +	for (i = 0; i < narenas; i++) { +		if (arenas[i] != NULL) +			malloc_mutex_unlock(&arenas[i]->mtx); +	} +	malloc_mutex_unlock(&arenas_mtx);  } + +/* + * End library-private functions. + */ +/******************************************************************************/ | 
