diff options
Diffstat (limited to 'cvmx-malloc/malloc.c')
| -rw-r--r-- | cvmx-malloc/malloc.c | 4106 | 
1 files changed, 4106 insertions, 0 deletions
| diff --git a/cvmx-malloc/malloc.c b/cvmx-malloc/malloc.c new file mode 100644 index 0000000000000..222ad5def124c --- /dev/null +++ b/cvmx-malloc/malloc.c @@ -0,0 +1,4106 @@ +/* +Copyright (c) 2001 Wolfram Gloger +Copyright (c) 2006 Cavium networks + +Permission to use, copy, modify, distribute, and sell this software +and its documentation for any purpose is hereby granted without fee, +provided that (i) the above copyright notices and this permission +notice appear in all copies of the software and related documentation, +and (ii) the name of Wolfram Gloger may not be used in any advertising +or publicity relating to the software. + +THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, +EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY +WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +IN NO EVENT SHALL WOLFRAM GLOGER BE LIABLE FOR ANY SPECIAL, +INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY +DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY +OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE. +*/ + +/* +  This is a version (aka ptmalloc2) of malloc/free/realloc written by +  Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger. + +* Version ptmalloc2-20011215 +  $Id: malloc.c 30481 2007-12-05 21:46:59Z rfranz $ +  based on: +  VERSION 2.7.1pre1 Sat May 12 07:41:21 2001  Doug Lea  (dl at gee) + +   Note: There may be an updated version of this malloc obtainable at +           http://www.malloc.de/malloc/ptmalloc2.tar.gz +         Check before installing! + +* Quickstart + +  In order to compile this implementation, a Makefile is provided with +  the ptmalloc2 distribution, which has pre-defined targets for some +  popular systems (e.g. "make posix" for Posix threads).  All that is +  typically required with regard to compiler flags is the selection of +  the thread package via defining one out of USE_PTHREADS, USE_THR or +  USE_SPROC.  Check the thread-m.h file for what effects this has. +  Many/most systems will additionally require USE_TSD_DATA_HACK to be +  defined, so this is the default for "make posix". + +* Why use this malloc? + +  This is not the fastest, most space-conserving, most portable, or +  most tunable malloc ever written. However it is among the fastest +  while also being among the most space-conserving, portable and tunable. +  Consistent balance across these factors results in a good general-purpose +  allocator for malloc-intensive programs. + +  The main properties of the algorithms are: +  * For large (>= 512 bytes) requests, it is a pure best-fit allocator, +    with ties normally decided via FIFO (i.e. least recently used). +  * For small (<= 64 bytes by default) requests, it is a caching +    allocator, that maintains pools of quickly recycled chunks. +  * In between, and for combinations of large and small requests, it does +    the best it can trying to meet both goals at once. +  * For very large requests (>= 128KB by default), it relies on system +    memory mapping facilities, if supported. + +  For a longer but slightly out of date high-level description, see +     http://gee.cs.oswego.edu/dl/html/malloc.html + +  You may already by default be using a C library containing a malloc +  that is  based on some version of this malloc (for example in +  linux). You might still want to use the one in this file in order to +  customize settings or to avoid overheads associated with library +  versions. + +* Contents, described in more detail in "description of public routines" below. + +  Standard (ANSI/SVID/...)  functions: +    malloc(size_t n); +    calloc(size_t n_elements, size_t element_size); +    free(Void_t* p); +    realloc(Void_t* p, size_t n); +    memalign(size_t alignment, size_t n); +    valloc(size_t n); +    mallinfo() +    mallopt(int parameter_number, int parameter_value) + +  Additional functions: +    independent_calloc(size_t n_elements, size_t size, Void_t* chunks[]); +    independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]); +    pvalloc(size_t n); +    cfree(Void_t* p); +    malloc_trim(size_t pad); +    malloc_usable_size(Void_t* p); +    malloc_stats(); + +* Vital statistics: + +  Supported pointer representation:       4 or 8 bytes +  Supported size_t  representation:       4 or 8 bytes +       Note that size_t is allowed to be 4 bytes even if pointers are 8. +       You can adjust this by defining INTERNAL_SIZE_T + +  Alignment:                              2 * sizeof(size_t) (default) +       (i.e., 8 byte alignment with 4byte size_t). This suffices for +       nearly all current machines and C compilers. However, you can +       define MALLOC_ALIGNMENT to be wider than this if necessary. + +  Minimum overhead per allocated chunk:   4 or 8 bytes +       Each malloced chunk has a hidden word of overhead holding size +       and status information. + +  Minimum allocated size: 4-byte ptrs:  16 bytes    (including 4 overhead) +                          8-byte ptrs:  24/32 bytes (including, 4/8 overhead) + +       When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte +       ptrs but 4 byte size) or 24 (for 8/8) additional bytes are +       needed; 4 (8) for a trailing size field and 8 (16) bytes for +       free list pointers. Thus, the minimum allocatable size is +       16/24/32 bytes. + +       Even a request for zero bytes (i.e., malloc(0)) returns a +       pointer to something of the minimum allocatable size. + +       The maximum overhead wastage (i.e., number of extra bytes +       allocated than were requested in malloc) is less than or equal +       to the minimum size, except for requests >= mmap_threshold that +       are serviced via mmap(), where the worst case wastage is 2 * +       sizeof(size_t) bytes plus the remainder from a system page (the +       minimal mmap unit); typically 4096 or 8192 bytes. + +  Maximum allocated size:  4-byte size_t: 2^32 minus about two pages +                           8-byte size_t: 2^64 minus about two pages + +       It is assumed that (possibly signed) size_t values suffice to +       represent chunk sizes. `Possibly signed' is due to the fact +       that `size_t' may be defined on a system as either a signed or +       an unsigned type. The ISO C standard says that it must be +       unsigned, but a few systems are known not to adhere to this. +       Additionally, even when size_t is unsigned, sbrk (which is by +       default used to obtain memory from system) accepts signed +       arguments, and may not be able to handle size_t-wide arguments +       with negative sign bit.  Generally, values that would +       appear as negative after accounting for overhead and alignment +       are supported only via mmap(), which does not have this +       limitation. + +       Requests for sizes outside the allowed range will perform an optional +       failure action and then return null. (Requests may also +       also fail because a system is out of memory.) + +  Thread-safety: thread-safe unless NO_THREADS is defined + +  Compliance: I believe it is compliant with the 1997 Single Unix Specification +       (See http://www.opennc.org). Also SVID/XPG, ANSI C, and probably +       others as well. + +* Synopsis of compile-time options: + +    People have reported using previous versions of this malloc on all +    versions of Unix, sometimes by tweaking some of the defines +    below. It has been tested most extensively on Solaris and +    Linux. It is also reported to work on WIN32 platforms. +    People also report using it in stand-alone embedded systems. + +    The implementation is in straight, hand-tuned ANSI C.  It is not +    at all modular. (Sorry!)  It uses a lot of macros.  To be at all +    usable, this code should be compiled using an optimizing compiler +    (for example gcc -O3) that can simplify expressions and control +    paths. (FAQ: some macros import variables as arguments rather than +    declare locals because people reported that some debuggers +    otherwise get confused.) + +    OPTION                     DEFAULT VALUE + +    Compilation Environment options: + +    __STD_C                    derived from C compiler defines +    WIN32                      NOT defined +    HAVE_MEMCPY                defined +    USE_MEMCPY                 1 if HAVE_MEMCPY is defined +    HAVE_MMAP                  defined as 1 +    MMAP_CLEARS                1 +    HAVE_MREMAP                0 unless linux defined +    USE_ARENAS                 the same as HAVE_MMAP +    malloc_getpagesize         derived from system #includes, or 4096 if not +    HAVE_USR_INCLUDE_MALLOC_H  NOT defined +    LACKS_UNISTD_H             NOT defined unless WIN32 +    LACKS_SYS_PARAM_H          NOT defined unless WIN32 +    LACKS_SYS_MMAN_H           NOT defined unless WIN32 + +    Changing default word sizes: + +    INTERNAL_SIZE_T            size_t +    MALLOC_ALIGNMENT           2 * sizeof(INTERNAL_SIZE_T) + +    Configuration and functionality options: + +    USE_DL_PREFIX              NOT defined +    USE_PUBLIC_MALLOC_WRAPPERS NOT defined +    USE_MALLOC_LOCK            NOT defined +    MALLOC_DEBUG               NOT defined +    REALLOC_ZERO_BYTES_FREES   1 +    MALLOC_FAILURE_ACTION      errno = ENOMEM, if __STD_C defined, else no-op +    TRIM_FASTBINS              0 +    FIRST_SORTED_BIN_SIZE      512 + +    Options for customizing MORECORE: + +    MORECORE                   sbrk +    MORECORE_FAILURE           -1 +    MORECORE_CONTIGUOUS        1 +    MORECORE_CANNOT_TRIM       NOT defined +    MORECORE_CLEARS            1 +    MMAP_AS_MORECORE_SIZE      (1024 * 1024) + +    Tuning options that are also dynamically changeable via mallopt: + +    DEFAULT_MXFAST             64 +    DEFAULT_TRIM_THRESHOLD     128 * 1024 +    DEFAULT_TOP_PAD            0 +    DEFAULT_MMAP_THRESHOLD     128 * 1024 +    DEFAULT_MMAP_MAX           65536 + +    There are several other #defined constants and macros that you +    probably don't want to touch unless you are extending or adapting malloc.  */ + +/* +  __STD_C should be nonzero if using ANSI-standard C compiler, a C++ +  compiler, or a C compiler sufficiently close to ANSI to get away +  with it. +*/ + +#include "cvmx-config.h" +#include "cvmx.h" +#include "cvmx-spinlock.h" +#include "cvmx-malloc.h" + + +#ifndef __STD_C +#if defined(__STDC__) || defined(__cplusplus) +#define __STD_C     1 +#else +#define __STD_C     0 +#endif +#endif /*__STD_C*/ + + +/* +  Void_t* is the pointer type that malloc should say it returns +*/ + +#ifndef Void_t +#if 1 +#define Void_t      void +#else +#define Void_t      char +#endif +#endif /*Void_t*/ + + +#ifdef __cplusplus +extern "C" { +#endif + +/* define LACKS_UNISTD_H if your system does not have a <unistd.h>. */ + +/* #define  LACKS_UNISTD_H */ + +#ifndef LACKS_UNISTD_H +#include <unistd.h> +#endif + +/* define LACKS_SYS_PARAM_H if your system does not have a <sys/param.h>. */ + +/* #define  LACKS_SYS_PARAM_H */ + + +#include <stdio.h>    /* needed for malloc_stats */ +#include <errno.h>    /* needed for optional MALLOC_FAILURE_ACTION */ + + +/* +  Debugging: + +  Because freed chunks may be overwritten with bookkeeping fields, this +  malloc will often die when freed memory is overwritten by user +  programs.  This can be very effective (albeit in an annoying way) +  in helping track down dangling pointers. + +  If you compile with -DMALLOC_DEBUG, a number of assertion checks are +  enabled that will catch more memory errors. You probably won't be +  able to make much sense of the actual assertion errors, but they +  should help you locate incorrectly overwritten memory.  The checking +  is fairly extensive, and will slow down execution +  noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set +  will attempt to check every non-mmapped allocated and free chunk in +  the course of computing the summmaries. (By nature, mmapped regions +  cannot be checked very much automatically.) + +  Setting MALLOC_DEBUG may also be helpful if you are trying to modify +  this code. The assertions in the check routines spell out in more +  detail the assumptions and invariants underlying the algorithms. + +  Setting MALLOC_DEBUG does NOT provide an automated mechanism for +  checking that all accesses to malloced memory stay within their +  bounds. However, there are several add-ons and adaptations of this +  or other mallocs available that do this. +*/ + +#define MALLOC_DEBUG 1 +#if MALLOC_DEBUG +#include <assert.h> +#else +#define assert(x) ((void)0) +#endif + + +/* +  INTERNAL_SIZE_T is the word-size used for internal bookkeeping +  of chunk sizes. + +  The default version is the same as size_t. + +  While not strictly necessary, it is best to define this as an +  unsigned type, even if size_t is a signed type. This may avoid some +  artificial size limitations on some systems. + +  On a 64-bit machine, you may be able to reduce malloc overhead by +  defining INTERNAL_SIZE_T to be a 32 bit `unsigned int' at the +  expense of not being able to handle more than 2^32 of malloced +  space. If this limitation is acceptable, you are encouraged to set +  this unless you are on a platform requiring 16byte alignments. In +  this case the alignment requirements turn out to negate any +  potential advantages of decreasing size_t word size. + +  Implementors: Beware of the possible combinations of: +     - INTERNAL_SIZE_T might be signed or unsigned, might be 32 or 64 bits, +       and might be the same width as int or as long +     - size_t might have different width and signedness as INTERNAL_SIZE_T +     - int and long might be 32 or 64 bits, and might be the same width +  To deal with this, most comparisons and difference computations +  among INTERNAL_SIZE_Ts should cast them to unsigned long, being +  aware of the fact that casting an unsigned int to a wider long does +  not sign-extend. (This also makes checking for negative numbers +  awkward.) Some of these casts result in harmless compiler warnings +  on some systems. +*/ + +#ifndef INTERNAL_SIZE_T +#define INTERNAL_SIZE_T size_t +#endif + +/* The corresponding word size */ +#define SIZE_SZ                (sizeof(INTERNAL_SIZE_T)) + + +/* +  MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks. +  It must be a power of two at least 2 * SIZE_SZ, even on machines +  for which smaller alignments would suffice. It may be defined as +  larger than this though. Note however that code and data structures +  are optimized for the case of 8-byte alignment. +*/ + + +#ifndef MALLOC_ALIGNMENT +#define MALLOC_ALIGNMENT       (2 * SIZE_SZ) +#endif + +/* The corresponding bit mask value */ +#define MALLOC_ALIGN_MASK      (MALLOC_ALIGNMENT - 1) + + + +/* +  REALLOC_ZERO_BYTES_FREES should be set if a call to +  realloc with zero bytes should be the same as a call to free. +  This is required by the C standard. Otherwise, since this malloc +  returns a unique pointer for malloc(0), so does realloc(p, 0). +*/ + +#ifndef REALLOC_ZERO_BYTES_FREES +#define REALLOC_ZERO_BYTES_FREES 1 +#endif + +/* +  TRIM_FASTBINS controls whether free() of a very small chunk can +  immediately lead to trimming. Setting to true (1) can reduce memory +  footprint, but will almost always slow down programs that use a lot +  of small chunks. + +  Define this only if you are willing to give up some speed to more +  aggressively reduce system-level memory footprint when releasing +  memory in programs that use many small chunks.  You can get +  essentially the same effect by setting MXFAST to 0, but this can +  lead to even greater slowdowns in programs using many small chunks. +  TRIM_FASTBINS is an in-between compile-time option, that disables +  only those chunks bordering topmost memory from being placed in +  fastbins. +*/ + +#ifndef TRIM_FASTBINS +#define TRIM_FASTBINS  0 +#endif + + +/* +  USE_DL_PREFIX will prefix all public routines with the string 'dl'. +  This is necessary when you only want to use this malloc in one part +  of a program, using your regular system malloc elsewhere. +*/ + +#define USE_DL_PREFIX + + +/* +   Two-phase name translation. +   All of the actual routines are given mangled names. +   When wrappers are used, they become the public callable versions. +   When DL_PREFIX is used, the callable names are prefixed. +*/ + +#ifdef USE_DL_PREFIX +#define public_cALLOc    cvmx_calloc +#define public_fREe      cvmx_free +#define public_cFREe     dlcfree +#define public_mALLOc    cvmx_malloc +#define public_mEMALIGn  cvmx_memalign +#define public_rEALLOc   cvmx_realloc +#define public_vALLOc    dlvalloc +#define public_pVALLOc   dlpvalloc +#define public_mALLINFo  dlmallinfo +#define public_mALLOPt   dlmallopt +#define public_mTRIm     dlmalloc_trim +#define public_mSTATs    dlmalloc_stats +#define public_mUSABLe   dlmalloc_usable_size +#define public_iCALLOc   dlindependent_calloc +#define public_iCOMALLOc dlindependent_comalloc +#define public_gET_STATe dlget_state +#define public_sET_STATe dlset_state +#else /* USE_DL_PREFIX */ +#ifdef _LIBC +#error _LIBC defined and should not be +/* Special defines for the GNU C library.  */ +#define public_cALLOc    __libc_calloc +#define public_fREe      __libc_free +#define public_cFREe     __libc_cfree +#define public_mALLOc    __libc_malloc +#define public_mEMALIGn  __libc_memalign +#define public_rEALLOc   __libc_realloc +#define public_vALLOc    __libc_valloc +#define public_pVALLOc   __libc_pvalloc +#define public_mALLINFo  __libc_mallinfo +#define public_mALLOPt   __libc_mallopt +#define public_mTRIm     __malloc_trim +#define public_mSTATs    __malloc_stats +#define public_mUSABLe   __malloc_usable_size +#define public_iCALLOc   __libc_independent_calloc +#define public_iCOMALLOc __libc_independent_comalloc +#define public_gET_STATe __malloc_get_state +#define public_sET_STATe __malloc_set_state +#define malloc_getpagesize __getpagesize() +#define open             __open +#define mmap             __mmap +#define munmap           __munmap +#define mremap           __mremap +#define mprotect         __mprotect +#define MORECORE         (*__morecore) +#define MORECORE_FAILURE 0 + +Void_t * __default_morecore (ptrdiff_t); +Void_t *(*__morecore)(ptrdiff_t) = __default_morecore; + +#else /* !_LIBC */ +#define public_cALLOc    calloc +#define public_fREe      free +#define public_cFREe     cfree +#define public_mALLOc    malloc +#define public_mEMALIGn  memalign +#define public_rEALLOc   realloc +#define public_vALLOc    valloc +#define public_pVALLOc   pvalloc +#define public_mALLINFo  mallinfo +#define public_mALLOPt   mallopt +#define public_mTRIm     malloc_trim +#define public_mSTATs    malloc_stats +#define public_mUSABLe   malloc_usable_size +#define public_iCALLOc   independent_calloc +#define public_iCOMALLOc independent_comalloc +#define public_gET_STATe malloc_get_state +#define public_sET_STATe malloc_set_state +#endif /* _LIBC */ +#endif /* USE_DL_PREFIX */ + + +/* +  HAVE_MEMCPY should be defined if you are not otherwise using +  ANSI STD C, but still have memcpy and memset in your C library +  and want to use them in calloc and realloc. Otherwise simple +  macro versions are defined below. + +  USE_MEMCPY should be defined as 1 if you actually want to +  have memset and memcpy called. People report that the macro +  versions are faster than libc versions on some systems. + +  Even if USE_MEMCPY is set to 1, loops to copy/clear small chunks +  (of <= 36 bytes) are manually unrolled in realloc and calloc. +*/ + +#define HAVE_MEMCPY + +#ifndef USE_MEMCPY +#ifdef HAVE_MEMCPY +#define USE_MEMCPY 1 +#else +#define USE_MEMCPY 0 +#endif +#endif + + +#if (__STD_C || defined(HAVE_MEMCPY)) + +#ifdef WIN32 +/* On Win32 memset and memcpy are already declared in windows.h */ +#else +#if __STD_C +void* memset(void*, int, size_t); +void* memcpy(void*, const void*, size_t); +#else +Void_t* memset(); +Void_t* memcpy(); +#endif +#endif +#endif + +/* +  MALLOC_FAILURE_ACTION is the action to take before "return 0" when +  malloc fails to be able to return memory, either because memory is +  exhausted or because of illegal arguments. + +  By default, sets errno if running on STD_C platform, else does nothing. +*/ + +#ifndef MALLOC_FAILURE_ACTION +#if __STD_C +#define MALLOC_FAILURE_ACTION \ +   errno = ENOMEM; + +#else +#define MALLOC_FAILURE_ACTION +#endif +#endif + +/* +  MORECORE-related declarations. By default, rely on sbrk +*/ + + +#ifdef LACKS_UNISTD_H +#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) +#if __STD_C +extern Void_t*     sbrk(ptrdiff_t); +#else +extern Void_t*     sbrk(); +#endif +#endif +#endif + +/* +  MORECORE is the name of the routine to call to obtain more memory +  from the system.  See below for general guidance on writing +  alternative MORECORE functions, as well as a version for WIN32 and a +  sample version for pre-OSX macos. +*/ +#undef MORECORE  // not supported +#ifndef MORECORE +#define MORECORE notsupported +#endif + +/* +  MORECORE_FAILURE is the value returned upon failure of MORECORE +  as well as mmap. Since it cannot be an otherwise valid memory address, +  and must reflect values of standard sys calls, you probably ought not +  try to redefine it. +*/ + +#ifndef MORECORE_FAILURE +#define MORECORE_FAILURE (-1) +#endif + +/* +  If MORECORE_CONTIGUOUS is true, take advantage of fact that +  consecutive calls to MORECORE with positive arguments always return +  contiguous increasing addresses.  This is true of unix sbrk.  Even +  if not defined, when regions happen to be contiguous, malloc will +  permit allocations spanning regions obtained from different +  calls. But defining this when applicable enables some stronger +  consistency checks and space efficiencies. +*/ + +#ifndef MORECORE_CONTIGUOUS +#define MORECORE_CONTIGUOUS 0 +#endif + +/* +  Define MORECORE_CANNOT_TRIM if your version of MORECORE +  cannot release space back to the system when given negative +  arguments. This is generally necessary only if you are using +  a hand-crafted MORECORE function that cannot handle negative arguments. +*/ + +#define MORECORE_CANNOT_TRIM 1 + +/*  MORECORE_CLEARS           (default 1) +     The degree to which the routine mapped to MORECORE zeroes out +     memory: never (0), only for newly allocated space (1) or always +     (2).  The distinction between (1) and (2) is necessary because on +     some systems, if the application first decrements and then +     increments the break value, the contents of the reallocated space +     are unspecified. +*/ + +#ifndef MORECORE_CLEARS +#define MORECORE_CLEARS 0 +#endif + + +/* +  Define HAVE_MMAP as true to optionally make malloc() use mmap() to +  allocate very large blocks.  These will be returned to the +  operating system immediately after a free(). Also, if mmap +  is available, it is used as a backup strategy in cases where +  MORECORE fails to provide space from system. + +  This malloc is best tuned to work with mmap for large requests. +  If you do not have mmap, operations involving very large chunks (1MB +  or so) may be slower than you'd like. +*/ + +#undef HAVE_MMAP +#ifndef HAVE_MMAP +#define HAVE_MMAP 0 + +/* +   Standard unix mmap using /dev/zero clears memory so calloc doesn't +   need to. +*/ + +#ifndef MMAP_CLEARS +#define MMAP_CLEARS 0 +#endif + +#else /* no mmap */ +#ifndef MMAP_CLEARS +#define MMAP_CLEARS 0 +#endif +#endif + + +/* +   MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if +   sbrk fails, and mmap is used as a backup (which is done only if +   HAVE_MMAP).  The value must be a multiple of page size.  This +   backup strategy generally applies only when systems have "holes" in +   address space, so sbrk cannot perform contiguous expansion, but +   there is still space available on system.  On systems for which +   this is known to be useful (i.e. most linux kernels), this occurs +   only when programs allocate huge amounts of memory.  Between this, +   and the fact that mmap regions tend to be limited, the size should +   be large, to avoid too many mmap calls and thus avoid running out +   of kernel resources. +*/ + +#ifndef MMAP_AS_MORECORE_SIZE +#define MMAP_AS_MORECORE_SIZE (1024 * 1024) +#endif + +/* +  Define HAVE_MREMAP to make realloc() use mremap() to re-allocate +  large blocks.  This is currently only possible on Linux with +  kernel versions newer than 1.3.77. +*/ +#undef linux +#ifndef HAVE_MREMAP +#ifdef linux +#define HAVE_MREMAP 1 +#else +#define HAVE_MREMAP 0 +#endif + +#endif /* HAVE_MMAP */ + +/* Define USE_ARENAS to enable support for multiple `arenas'.  These +   are allocated using mmap(), are necessary for threads and +   occasionally useful to overcome address space limitations affecting +   sbrk(). */ + +#ifndef USE_ARENAS +#define USE_ARENAS 1  // we 'manually' mmap the arenas..... +#endif + + +/* +  The system page size. To the extent possible, this malloc manages +  memory from the system in page-size units.  Note that this value is +  cached during initialization into a field of malloc_state. So even +  if malloc_getpagesize is a function, it is only called once. + +  The following mechanics for getpagesize were adapted from bsd/gnu +  getpagesize.h. If none of the system-probes here apply, a value of +  4096 is used, which should be OK: If they don't apply, then using +  the actual value probably doesn't impact performance. +*/ + + +#define malloc_getpagesize (4096) +#ifndef malloc_getpagesize + +#ifndef LACKS_UNISTD_H +#  include <unistd.h> +#endif + +#  ifdef _SC_PAGESIZE         /* some SVR4 systems omit an underscore */ +#    ifndef _SC_PAGE_SIZE +#      define _SC_PAGE_SIZE _SC_PAGESIZE +#    endif +#  endif + +#  ifdef _SC_PAGE_SIZE +#    define malloc_getpagesize sysconf(_SC_PAGE_SIZE) +#  else +#    if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE) +       extern size_t getpagesize(); +#      define malloc_getpagesize getpagesize() +#    else +#      ifdef WIN32 /* use supplied emulation of getpagesize */ +#        define malloc_getpagesize getpagesize() +#      else +#        ifndef LACKS_SYS_PARAM_H +#          include <sys/param.h> +#        endif +#        ifdef EXEC_PAGESIZE +#          define malloc_getpagesize EXEC_PAGESIZE +#        else +#          ifdef NBPG +#            ifndef CLSIZE +#              define malloc_getpagesize NBPG +#            else +#              define malloc_getpagesize (NBPG * CLSIZE) +#            endif +#          else +#            ifdef NBPC +#              define malloc_getpagesize NBPC +#            else +#              ifdef PAGESIZE +#                define malloc_getpagesize PAGESIZE +#              else /* just guess */ +#                define malloc_getpagesize (4096) +#              endif +#            endif +#          endif +#        endif +#      endif +#    endif +#  endif +#endif + +/* +  This version of malloc supports the standard SVID/XPG mallinfo +  routine that returns a struct containing usage properties and +  statistics. It should work on any SVID/XPG compliant system that has +  a /usr/include/malloc.h defining struct mallinfo. (If you'd like to +  install such a thing yourself, cut out the preliminary declarations +  as described above and below and save them in a malloc.h file. But +  there's no compelling reason to bother to do this.) + +  The main declaration needed is the mallinfo struct that is returned +  (by-copy) by mallinfo().  The SVID/XPG malloinfo struct contains a +  bunch of fields that are not even meaningful in this version of +  malloc.  These fields are are instead filled by mallinfo() with +  other numbers that might be of interest. + +  HAVE_USR_INCLUDE_MALLOC_H should be set if you have a +  /usr/include/malloc.h file that includes a declaration of struct +  mallinfo.  If so, it is included; else an SVID2/XPG2 compliant +  version is declared below.  These must be precisely the same for +  mallinfo() to work.  The original SVID version of this struct, +  defined on most systems with mallinfo, declares all fields as +  ints. But some others define as unsigned long. If your system +  defines the fields using a type of different width than listed here, +  you must #include your system version and #define +  HAVE_USR_INCLUDE_MALLOC_H. +*/ + +/* #define HAVE_USR_INCLUDE_MALLOC_H */ + +#ifdef HAVE_USR_INCLUDE_MALLOC_H +#include "/usr/include/malloc.h" +#endif + + +/* ---------- description of public routines ------------ */ + +/* +  malloc(size_t n) +  Returns a pointer to a newly allocated chunk of at least n bytes, or null +  if no space is available. Additionally, on failure, errno is +  set to ENOMEM on ANSI C systems. + +  If n is zero, malloc returns a minumum-sized chunk. (The minimum +  size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit +  systems.)  On most systems, size_t is an unsigned type, so calls +  with negative arguments are interpreted as requests for huge amounts +  of space, which will often fail. The maximum supported value of n +  differs across systems, but is in all cases less than the maximum +  representable value of a size_t. +*/ +#if __STD_C +Void_t*  public_mALLOc(cvmx_arena_list_t arena_list, size_t); +#else +Void_t*  public_mALLOc(); +#endif + +/* +  free(Void_t* p) +  Releases the chunk of memory pointed to by p, that had been previously +  allocated using malloc or a related routine such as realloc. +  It has no effect if p is null. It can have arbitrary (i.e., bad!) +  effects if p has already been freed. + +  Unless disabled (using mallopt), freeing very large spaces will +  when possible, automatically trigger operations that give +  back unused memory to the system, thus reducing program footprint. +*/ +#if __STD_C +void     public_fREe(Void_t*); +#else +void     public_fREe(); +#endif + +/* +  calloc(size_t n_elements, size_t element_size); +  Returns a pointer to n_elements * element_size bytes, with all locations +  set to zero. +*/ +#if __STD_C +Void_t*  public_cALLOc(cvmx_arena_list_t arena_list, size_t, size_t); +#else +Void_t*  public_cALLOc(); +#endif + +/* +  realloc(Void_t* p, size_t n) +  Returns a pointer to a chunk of size n that contains the same data +  as does chunk p up to the minimum of (n, p's size) bytes, or null +  if no space is available. + +  The returned pointer may or may not be the same as p. The algorithm +  prefers extending p when possible, otherwise it employs the +  equivalent of a malloc-copy-free sequence. + +  If p is null, realloc is equivalent to malloc. + +  If space is not available, realloc returns null, errno is set (if on +  ANSI) and p is NOT freed. + +  if n is for fewer bytes than already held by p, the newly unused +  space is lopped off and freed if possible.  Unless the #define +  REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of +  zero (re)allocates a minimum-sized chunk. + +  Large chunks that were internally obtained via mmap will always +  be reallocated using malloc-copy-free sequences unless +  the system supports MREMAP (currently only linux). + +  The old unix realloc convention of allowing the last-free'd chunk +  to be used as an argument to realloc is not supported. +*/ +#if __STD_C +Void_t*  public_rEALLOc(cvmx_arena_list_t arena_list, Void_t*, size_t); +#else +Void_t*  public_rEALLOc(); +#endif + +/* +  memalign(size_t alignment, size_t n); +  Returns a pointer to a newly allocated chunk of n bytes, aligned +  in accord with the alignment argument. + +  The alignment argument should be a power of two. If the argument is +  not a power of two, the nearest greater power is used. +  8-byte alignment is guaranteed by normal malloc calls, so don't +  bother calling memalign with an argument of 8 or less. + +  Overreliance on memalign is a sure way to fragment space. +*/ +#if __STD_C +Void_t*  public_mEMALIGn(cvmx_arena_list_t arena_list, size_t, size_t); +#else +Void_t*  public_mEMALIGn(); +#endif + +/* +  valloc(size_t n); +  Equivalent to memalign(pagesize, n), where pagesize is the page +  size of the system. If the pagesize is unknown, 4096 is used. +*/ +#if __STD_C +Void_t*  public_vALLOc(size_t); +#else +Void_t*  public_vALLOc(); +#endif + + + +/* +  mallopt(int parameter_number, int parameter_value) +  Sets tunable parameters The format is to provide a +  (parameter-number, parameter-value) pair.  mallopt then sets the +  corresponding parameter to the argument value if it can (i.e., so +  long as the value is meaningful), and returns 1 if successful else +  0.  SVID/XPG/ANSI defines four standard param numbers for mallopt, +  normally defined in malloc.h.  Only one of these (M_MXFAST) is used +  in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply, +  so setting them has no effect. But this malloc also supports four +  other options in mallopt. See below for details.  Briefly, supported +  parameters are as follows (listed defaults are for "typical" +  configurations). + +  Symbol            param #   default    allowed param values +  M_MXFAST          1         64         0-80  (0 disables fastbins) +  M_TRIM_THRESHOLD -1         128*1024   any   (-1U disables trimming) +  M_TOP_PAD        -2         0          any +  M_MMAP_THRESHOLD -3         128*1024   any   (or 0 if no MMAP support) +  M_MMAP_MAX       -4         65536      any   (0 disables use of mmap) +*/ +#if __STD_C +int      public_mALLOPt(int, int); +#else +int      public_mALLOPt(); +#endif + + +/* +  mallinfo() +  Returns (by copy) a struct containing various summary statistics: + +  arena:     current total non-mmapped bytes allocated from system +  ordblks:   the number of free chunks +  smblks:    the number of fastbin blocks (i.e., small chunks that +               have been freed but not use resused or consolidated) +  hblks:     current number of mmapped regions +  hblkhd:    total bytes held in mmapped regions +  usmblks:   the maximum total allocated space. This will be greater +                than current total if trimming has occurred. +  fsmblks:   total bytes held in fastbin blocks +  uordblks:  current total allocated space (normal or mmapped) +  fordblks:  total free space +  keepcost:  the maximum number of bytes that could ideally be released +               back to system via malloc_trim. ("ideally" means that +               it ignores page restrictions etc.) + +  Because these fields are ints, but internal bookkeeping may +  be kept as longs, the reported values may wrap around zero and +  thus be inaccurate. +*/ +#if __STD_C +struct mallinfo public_mALLINFo(void); +#else +struct mallinfo public_mALLINFo(); +#endif + +/* +  independent_calloc(size_t n_elements, size_t element_size, Void_t* chunks[]); + +  independent_calloc is similar to calloc, but instead of returning a +  single cleared space, it returns an array of pointers to n_elements +  independent elements that can hold contents of size elem_size, each +  of which starts out cleared, and can be independently freed, +  realloc'ed etc. The elements are guaranteed to be adjacently +  allocated (this is not guaranteed to occur with multiple callocs or +  mallocs), which may also improve cache locality in some +  applications. + +  The "chunks" argument is optional (i.e., may be null, which is +  probably the most typical usage). If it is null, the returned array +  is itself dynamically allocated and should also be freed when it is +  no longer needed. Otherwise, the chunks array must be of at least +  n_elements in length. It is filled in with the pointers to the +  chunks. + +  In either case, independent_calloc returns this pointer array, or +  null if the allocation failed.  If n_elements is zero and "chunks" +  is null, it returns a chunk representing an array with zero elements +  (which should be freed if not wanted). + +  Each element must be individually freed when it is no longer +  needed. If you'd like to instead be able to free all at once, you +  should instead use regular calloc and assign pointers into this +  space to represent elements.  (In this case though, you cannot +  independently free elements.) + +  independent_calloc simplifies and speeds up implementations of many +  kinds of pools.  It may also be useful when constructing large data +  structures that initially have a fixed number of fixed-sized nodes, +  but the number is not known at compile time, and some of the nodes +  may later need to be freed. For example: + +  struct Node { int item; struct Node* next; }; + +  struct Node* build_list() { +    struct Node** pool; +    int n = read_number_of_nodes_needed(); +    if (n <= 0) return 0; +    pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); +    if (pool == 0) die(); +    // organize into a linked list... +    struct Node* first = pool[0]; +    for (i = 0; i < n-1; ++i) +      pool[i]->next = pool[i+1]; +    free(pool);     // Can now free the array (or not, if it is needed later) +    return first; +  } +*/ +#if __STD_C +Void_t** public_iCALLOc(size_t, size_t, Void_t**); +#else +Void_t** public_iCALLOc(); +#endif + +/* +  independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]); + +  independent_comalloc allocates, all at once, a set of n_elements +  chunks with sizes indicated in the "sizes" array.    It returns +  an array of pointers to these elements, each of which can be +  independently freed, realloc'ed etc. The elements are guaranteed to +  be adjacently allocated (this is not guaranteed to occur with +  multiple callocs or mallocs), which may also improve cache locality +  in some applications. + +  The "chunks" argument is optional (i.e., may be null). If it is null +  the returned array is itself dynamically allocated and should also +  be freed when it is no longer needed. Otherwise, the chunks array +  must be of at least n_elements in length. It is filled in with the +  pointers to the chunks. + +  In either case, independent_comalloc returns this pointer array, or +  null if the allocation failed.  If n_elements is zero and chunks is +  null, it returns a chunk representing an array with zero elements +  (which should be freed if not wanted). + +  Each element must be individually freed when it is no longer +  needed. If you'd like to instead be able to free all at once, you +  should instead use a single regular malloc, and assign pointers at +  particular offsets in the aggregate space. (In this case though, you +  cannot independently free elements.) + +  independent_comallac differs from independent_calloc in that each +  element may have a different size, and also that it does not +  automatically clear elements. + +  independent_comalloc can be used to speed up allocation in cases +  where several structs or objects must always be allocated at the +  same time.  For example: + +  struct Head { ... } +  struct Foot { ... } + +  void send_message(char* msg) { +    int msglen = strlen(msg); +    size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; +    void* chunks[3]; +    if (independent_comalloc(3, sizes, chunks) == 0) +      die(); +    struct Head* head = (struct Head*)(chunks[0]); +    char*        body = (char*)(chunks[1]); +    struct Foot* foot = (struct Foot*)(chunks[2]); +    // ... +  } + +  In general though, independent_comalloc is worth using only for +  larger values of n_elements. For small values, you probably won't +  detect enough difference from series of malloc calls to bother. + +  Overuse of independent_comalloc can increase overall memory usage, +  since it cannot reuse existing noncontiguous small chunks that +  might be available for some of the elements. +*/ +#if __STD_C +Void_t** public_iCOMALLOc(size_t, size_t*, Void_t**); +#else +Void_t** public_iCOMALLOc(); +#endif + + +/* +  pvalloc(size_t n); +  Equivalent to valloc(minimum-page-that-holds(n)), that is, +  round up n to nearest pagesize. + */ +#if __STD_C +Void_t*  public_pVALLOc(size_t); +#else +Void_t*  public_pVALLOc(); +#endif + +/* +  cfree(Void_t* p); +  Equivalent to free(p). + +  cfree is needed/defined on some systems that pair it with calloc, +  for odd historical reasons (such as: cfree is used in example +  code in the first edition of K&R). +*/ +#if __STD_C +void     public_cFREe(Void_t*); +#else +void     public_cFREe(); +#endif + +/* +  malloc_trim(size_t pad); + +  If possible, gives memory back to the system (via negative +  arguments to sbrk) if there is unused memory at the `high' end of +  the malloc pool. You can call this after freeing large blocks of +  memory to potentially reduce the system-level memory requirements +  of a program. However, it cannot guarantee to reduce memory. Under +  some allocation patterns, some large free blocks of memory will be +  locked between two used chunks, so they cannot be given back to +  the system. + +  The `pad' argument to malloc_trim represents the amount of free +  trailing space to leave untrimmed. If this argument is zero, +  only the minimum amount of memory to maintain internal data +  structures will be left (one page or less). Non-zero arguments +  can be supplied to maintain enough trailing space to service +  future expected allocations without having to re-obtain memory +  from the system. + +  Malloc_trim returns 1 if it actually released any memory, else 0. +  On systems that do not support "negative sbrks", it will always +  rreturn 0. +*/ +#if __STD_C +int      public_mTRIm(size_t); +#else +int      public_mTRIm(); +#endif + +/* +  malloc_usable_size(Void_t* p); + +  Returns the number of bytes you can actually use in +  an allocated chunk, which may be more than you requested (although +  often not) due to alignment and minimum size constraints. +  You can use this many bytes without worrying about +  overwriting other allocated objects. This is not a particularly great +  programming practice. malloc_usable_size can be more useful in +  debugging and assertions, for example: + +  p = malloc(n); +  assert(malloc_usable_size(p) >= 256); + +*/ +#if __STD_C +size_t   public_mUSABLe(Void_t*); +#else +size_t   public_mUSABLe(); +#endif + +/* +  malloc_stats(); +  Prints on stderr the amount of space obtained from the system (both +  via sbrk and mmap), the maximum amount (which may be more than +  current if malloc_trim and/or munmap got called), and the current +  number of bytes allocated via malloc (or realloc, etc) but not yet +  freed. Note that this is the number of bytes allocated, not the +  number requested. It will be larger than the number requested +  because of alignment and bookkeeping overhead. Because it includes +  alignment wastage as being in use, this figure may be greater than +  zero even when no user-level chunks are allocated. + +  The reported current and maximum system memory can be inaccurate if +  a program makes other calls to system memory allocation functions +  (normally sbrk) outside of malloc. + +  malloc_stats prints only the most commonly interesting statistics. +  More information can be obtained by calling mallinfo. + +*/ +#if __STD_C +void     public_mSTATs(void); +#else +void     public_mSTATs(); +#endif + +/* +  malloc_get_state(void); + +  Returns the state of all malloc variables in an opaque data +  structure. +*/ +#if __STD_C +Void_t*  public_gET_STATe(void); +#else +Void_t*  public_gET_STATe(); +#endif + +/* +  malloc_set_state(Void_t* state); + +  Restore the state of all malloc variables from data obtained with +  malloc_get_state(). +*/ +#if __STD_C +int      public_sET_STATe(Void_t*); +#else +int      public_sET_STATe(); +#endif + +#ifdef _LIBC +/* +  posix_memalign(void **memptr, size_t alignment, size_t size); + +  POSIX wrapper like memalign(), checking for validity of size. +*/ +int      __posix_memalign(void **, size_t, size_t); +#endif + +/* mallopt tuning options */ + +/* +  M_MXFAST is the maximum request size used for "fastbins", special bins +  that hold returned chunks without consolidating their spaces. This +  enables future requests for chunks of the same size to be handled +  very quickly, but can increase fragmentation, and thus increase the +  overall memory footprint of a program. + +  This malloc manages fastbins very conservatively yet still +  efficiently, so fragmentation is rarely a problem for values less +  than or equal to the default.  The maximum supported value of MXFAST +  is 80. You wouldn't want it any higher than this anyway.  Fastbins +  are designed especially for use with many small structs, objects or +  strings -- the default handles structs/objects/arrays with sizes up +  to 8 4byte fields, or small strings representing words, tokens, +  etc. Using fastbins for larger objects normally worsens +  fragmentation without improving speed. + +  M_MXFAST is set in REQUEST size units. It is internally used in +  chunksize units, which adds padding and alignment.  You can reduce +  M_MXFAST to 0 to disable all use of fastbins.  This causes the malloc +  algorithm to be a closer approximation of fifo-best-fit in all cases, +  not just for larger requests, but will generally cause it to be +  slower. +*/ + + +/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */ +#ifndef M_MXFAST +#define M_MXFAST            1 +#endif + +#ifndef DEFAULT_MXFAST +#define DEFAULT_MXFAST     64 +#endif + + +/* +  M_TRIM_THRESHOLD is the maximum amount of unused top-most memory +  to keep before releasing via malloc_trim in free(). + +  Automatic trimming is mainly useful in long-lived programs. +  Because trimming via sbrk can be slow on some systems, and can +  sometimes be wasteful (in cases where programs immediately +  afterward allocate more large chunks) the value should be high +  enough so that your overall system performance would improve by +  releasing this much memory. + +  The trim threshold and the mmap control parameters (see below) +  can be traded off with one another. Trimming and mmapping are +  two different ways of releasing unused memory back to the +  system. Between these two, it is often possible to keep +  system-level demands of a long-lived program down to a bare +  minimum. For example, in one test suite of sessions measuring +  the XF86 X server on Linux, using a trim threshold of 128K and a +  mmap threshold of 192K led to near-minimal long term resource +  consumption. + +  If you are using this malloc in a long-lived program, it should +  pay to experiment with these values.  As a rough guide, you +  might set to a value close to the average size of a process +  (program) running on your system.  Releasing this much memory +  would allow such a process to run in memory.  Generally, it's +  worth it to tune for trimming rather tham memory mapping when a +  program undergoes phases where several large chunks are +  allocated and released in ways that can reuse each other's +  storage, perhaps mixed with phases where there are no such +  chunks at all.  And in well-behaved long-lived programs, +  controlling release of large blocks via trimming versus mapping +  is usually faster. + +  However, in most programs, these parameters serve mainly as +  protection against the system-level effects of carrying around +  massive amounts of unneeded memory. Since frequent calls to +  sbrk, mmap, and munmap otherwise degrade performance, the default +  parameters are set to relatively high values that serve only as +  safeguards. + +  The trim value It must be greater than page size to have any useful +  effect.  To disable trimming completely, you can set to +  (unsigned long)(-1) + +  Trim settings interact with fastbin (MXFAST) settings: Unless +  TRIM_FASTBINS is defined, automatic trimming never takes place upon +  freeing a chunk with size less than or equal to MXFAST. Trimming is +  instead delayed until subsequent freeing of larger chunks. However, +  you can still force an attempted trim by calling malloc_trim. + +  Also, trimming is not generally possible in cases where +  the main arena is obtained via mmap. + +  Note that the trick some people use of mallocing a huge space and +  then freeing it at program startup, in an attempt to reserve system +  memory, doesn't have the intended effect under automatic trimming, +  since that memory will immediately be returned to the system. +*/ + +#define M_TRIM_THRESHOLD       -1 + +#ifndef DEFAULT_TRIM_THRESHOLD +#define DEFAULT_TRIM_THRESHOLD (128 * 1024) +#endif + +/* +  M_TOP_PAD is the amount of extra `padding' space to allocate or +  retain whenever sbrk is called. It is used in two ways internally: + +  * When sbrk is called to extend the top of the arena to satisfy +  a new malloc request, this much padding is added to the sbrk +  request. + +  * When malloc_trim is called automatically from free(), +  it is used as the `pad' argument. + +  In both cases, the actual amount of padding is rounded +  so that the end of the arena is always a system page boundary. + +  The main reason for using padding is to avoid calling sbrk so +  often. Having even a small pad greatly reduces the likelihood +  that nearly every malloc request during program start-up (or +  after trimming) will invoke sbrk, which needlessly wastes +  time. + +  Automatic rounding-up to page-size units is normally sufficient +  to avoid measurable overhead, so the default is 0.  However, in +  systems where sbrk is relatively slow, it can pay to increase +  this value, at the expense of carrying around more memory than +  the program needs. +*/ + +#define M_TOP_PAD              -2 + +#ifndef DEFAULT_TOP_PAD +#define DEFAULT_TOP_PAD        (0) +#endif + +/* +  M_MMAP_THRESHOLD is the request size threshold for using mmap() +  to service a request. Requests of at least this size that cannot +  be allocated using already-existing space will be serviced via mmap. +  (If enough normal freed space already exists it is used instead.) + +  Using mmap segregates relatively large chunks of memory so that +  they can be individually obtained and released from the host +  system. A request serviced through mmap is never reused by any +  other request (at least not directly; the system may just so +  happen to remap successive requests to the same locations). + +  Segregating space in this way has the benefits that: + +   1. Mmapped space can ALWAYS be individually released back +      to the system, which helps keep the system level memory +      demands of a long-lived program low. +   2. Mapped memory can never become `locked' between +      other chunks, as can happen with normally allocated chunks, which +      means that even trimming via malloc_trim would not release them. +   3. On some systems with "holes" in address spaces, mmap can obtain +      memory that sbrk cannot. + +  However, it has the disadvantages that: + +   1. The space cannot be reclaimed, consolidated, and then +      used to service later requests, as happens with normal chunks. +   2. It can lead to more wastage because of mmap page alignment +      requirements +   3. It causes malloc performance to be more dependent on host +      system memory management support routines which may vary in +      implementation quality and may impose arbitrary +      limitations. Generally, servicing a request via normal +      malloc steps is faster than going through a system's mmap. + +  The advantages of mmap nearly always outweigh disadvantages for +  "large" chunks, but the value of "large" varies across systems.  The +  default is an empirically derived value that works well in most +  systems. +*/ + +#define M_MMAP_THRESHOLD      -3 + +#ifndef DEFAULT_MMAP_THRESHOLD +#define DEFAULT_MMAP_THRESHOLD (128 * 1024) +#endif + +/* +  M_MMAP_MAX is the maximum number of requests to simultaneously +  service using mmap. This parameter exists because +  some systems have a limited number of internal tables for +  use by mmap, and using more than a few of them may degrade +  performance. + +  The default is set to a value that serves only as a safeguard. +  Setting to 0 disables use of mmap for servicing large requests.  If +  HAVE_MMAP is not set, the default value is 0, and attempts to set it +  to non-zero values in mallopt will fail. +*/ + +#define M_MMAP_MAX             -4 + +#ifndef DEFAULT_MMAP_MAX +#if HAVE_MMAP +#define DEFAULT_MMAP_MAX       (65536) +#else +#define DEFAULT_MMAP_MAX       (0) +#endif +#endif + +#ifdef __cplusplus +};  /* end of extern "C" */ +#endif + +#include <cvmx-spinlock.h> +#include "malloc.h" +#include "thread-m.h" + +#ifdef DEBUG_PRINTS +#define debug_printf    printf +#else +#define debug_printf(format, args...)   +#endif + +#ifndef BOUNDED_N +#define BOUNDED_N(ptr, sz) (ptr) +#endif +#ifndef RETURN_ADDRESS +#define RETURN_ADDRESS(X_) (NULL) +#endif + +/* On some platforms we can compile internal, not exported functions better. +   Let the environment provide a macro and define it to be empty if it +   is not available.  */ +#ifndef internal_function +# define internal_function +#endif + +/* Forward declarations.  */ +struct malloc_chunk; +typedef struct malloc_chunk* mchunkptr; + +/* Internal routines.  */ + +#if __STD_C + +static Void_t*         _int_malloc(mstate, size_t); +static void            _int_free(mstate, Void_t*); +static Void_t*         _int_realloc(mstate, Void_t*, size_t); +static Void_t*         _int_memalign(mstate, size_t, size_t); +static Void_t*         _int_valloc(mstate, size_t); +static Void_t*  _int_pvalloc(mstate, size_t); +static Void_t*  cALLOc(cvmx_arena_list_t arena_list, size_t, size_t); +static Void_t** _int_icalloc(mstate, size_t, size_t, Void_t**); +static Void_t** _int_icomalloc(mstate, size_t, size_t*, Void_t**); +static int      mTRIm(size_t); +static size_t   mUSABLe(Void_t*); +static void     mSTATs(void); +static int      mALLOPt(int, int); +static struct mallinfo mALLINFo(mstate); + +static Void_t* internal_function mem2mem_check(Void_t *p, size_t sz); +static int internal_function top_check(void); +static void internal_function munmap_chunk(mchunkptr p); +#if HAVE_MREMAP +static mchunkptr internal_function mremap_chunk(mchunkptr p, size_t new_size); +#endif + +static Void_t*   malloc_check(size_t sz, const Void_t *caller); +static void      free_check(Void_t* mem, const Void_t *caller); +static Void_t*   realloc_check(Void_t* oldmem, size_t bytes, +			       const Void_t *caller); +static Void_t*   memalign_check(size_t alignment, size_t bytes, +				const Void_t *caller); +#ifndef NO_THREADS +static Void_t*   malloc_starter(size_t sz, const Void_t *caller); +static void      free_starter(Void_t* mem, const Void_t *caller); +static Void_t*   malloc_atfork(size_t sz, const Void_t *caller); +static void      free_atfork(Void_t* mem, const Void_t *caller); +#endif + +#else + +Void_t*         _int_malloc(); +void            _int_free(); +Void_t*         _int_realloc(); +Void_t*         _int_memalign(); +Void_t*         _int_valloc(); +Void_t*         _int_pvalloc(); +/*static Void_t*  cALLOc();*/ +static Void_t** _int_icalloc(); +static Void_t** _int_icomalloc(); +static int      mTRIm(); +static size_t   mUSABLe(); +static void     mSTATs(); +static int      mALLOPt(); +static struct mallinfo mALLINFo(); + +#endif + + + + +/* ------------- Optional versions of memcopy ---------------- */ + + +#if USE_MEMCPY + +/* +  Note: memcpy is ONLY invoked with non-overlapping regions, +  so the (usually slower) memmove is not needed. +*/ + +#define MALLOC_COPY(dest, src, nbytes)  memcpy(dest, src, nbytes) +#define MALLOC_ZERO(dest, nbytes)       memset(dest, 0,   nbytes) + +#else /* !USE_MEMCPY */ + +/* Use Duff's device for good zeroing/copying performance. */ + +#define MALLOC_ZERO(charp, nbytes)                                            \ +do {                                                                          \ +  INTERNAL_SIZE_T* mzp = (INTERNAL_SIZE_T*)(charp);                           \ +  unsigned long mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T);                     \ +  long mcn;                                                                   \ +  if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; }             \ +  switch (mctmp) {                                                            \ +    case 0: for(;;) { *mzp++ = 0;                                             \ +    case 7:           *mzp++ = 0;                                             \ +    case 6:           *mzp++ = 0;                                             \ +    case 5:           *mzp++ = 0;                                             \ +    case 4:           *mzp++ = 0;                                             \ +    case 3:           *mzp++ = 0;                                             \ +    case 2:           *mzp++ = 0;                                             \ +    case 1:           *mzp++ = 0; if(mcn <= 0) break; mcn--; }                \ +  }                                                                           \ +} while(0) + +#define MALLOC_COPY(dest,src,nbytes)                                          \ +do {                                                                          \ +  INTERNAL_SIZE_T* mcsrc = (INTERNAL_SIZE_T*) src;                            \ +  INTERNAL_SIZE_T* mcdst = (INTERNAL_SIZE_T*) dest;                           \ +  unsigned long mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T);                     \ +  long mcn;                                                                   \ +  if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; }             \ +  switch (mctmp) {                                                            \ +    case 0: for(;;) { *mcdst++ = *mcsrc++;                                    \ +    case 7:           *mcdst++ = *mcsrc++;                                    \ +    case 6:           *mcdst++ = *mcsrc++;                                    \ +    case 5:           *mcdst++ = *mcsrc++;                                    \ +    case 4:           *mcdst++ = *mcsrc++;                                    \ +    case 3:           *mcdst++ = *mcsrc++;                                    \ +    case 2:           *mcdst++ = *mcsrc++;                                    \ +    case 1:           *mcdst++ = *mcsrc++; if(mcn <= 0) break; mcn--; }       \ +  }                                                                           \ +} while(0) + +#endif + +/* ------------------ MMAP support ------------------  */ + + +#if HAVE_MMAP + +#include <fcntl.h> +#ifndef LACKS_SYS_MMAN_H +#include <sys/mman.h> +#endif + +#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) +# define MAP_ANONYMOUS MAP_ANON +#endif +#if !defined(MAP_FAILED) +# define MAP_FAILED ((char*)-1) +#endif + +#ifndef MAP_NORESERVE +# ifdef MAP_AUTORESRV +#  define MAP_NORESERVE MAP_AUTORESRV +# else +#  define MAP_NORESERVE 0 +# endif +#endif + +/* +   Nearly all versions of mmap support MAP_ANONYMOUS, +   so the following is unlikely to be needed, but is +   supplied just in case. +*/ + +#ifndef MAP_ANONYMOUS + +static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ + +#define MMAP(addr, size, prot, flags) ((dev_zero_fd < 0) ? \ + (dev_zero_fd = open("/dev/zero", O_RDWR), \ +  mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) : \ +   mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) + +#else + +#define MMAP(addr, size, prot, flags) \ + (mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS, -1, 0)) + +#endif + + +#endif /* HAVE_MMAP */ + + +/* +  -----------------------  Chunk representations ----------------------- +*/ + + +/* +  This struct declaration is misleading (but accurate and necessary). +  It declares a "view" into memory allowing access to necessary +  fields at known offsets from a given base. See explanation below. +*/ +struct malloc_chunk { + +  INTERNAL_SIZE_T      prev_size;  /* Size of previous chunk (if free).  */ +  INTERNAL_SIZE_T      size;       /* Size in bytes, including overhead. */ +  mstate               arena_ptr;  /* ptr to arena chunk belongs to */ + +  struct malloc_chunk* fd;         /* double links -- used only if free. */ +  struct malloc_chunk* bk; +}; + + +/* +   malloc_chunk details: + +    (The following includes lightly edited explanations by Colin Plumb.) + +    Chunks of memory are maintained using a `boundary tag' method as +    described in e.g., Knuth or Standish.  (See the paper by Paul +    Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a +    survey of such techniques.)  Sizes of free chunks are stored both +    in the front of each chunk and at the end.  This makes +    consolidating fragmented chunks into bigger chunks very fast.  The +    size fields also hold bits representing whether chunks are free or +    in use. + +    An allocated chunk looks like this: + + +    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +            |             Size of previous chunk, if allocated            | | +            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +            |             Size of chunk, in bytes                         |P| +      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +            |             User data starts here...                          . +            .                                                               . +            .             (malloc_usable_space() bytes)                     . +            .                                                               | +nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +            |             Size of chunk                                     | +            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + +    Where "chunk" is the front of the chunk for the purpose of most of +    the malloc code, but "mem" is the pointer that is returned to the +    user.  "Nextchunk" is the beginning of the next contiguous chunk. + +    Chunks always begin on even word boundries, so the mem portion +    (which is returned to the user) is also on an even word boundary, and +    thus at least double-word aligned. + +    Free chunks are stored in circular doubly-linked lists, and look like this: + +    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +            |             Size of previous chunk                            | +            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +    `head:' |             Size of chunk, in bytes                         |P| +      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +            |             Forward pointer to next chunk in list             | +            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +            |             Back pointer to previous chunk in list            | +            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +            |             Unused space (may be 0 bytes long)                . +            .                                                               . +            .                                                               | +nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +    `foot:' |             Size of chunk, in bytes                           | +            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +    The P (PREV_INUSE) bit, stored in the unused low-order bit of the +    chunk size (which is always a multiple of two words), is an in-use +    bit for the *previous* chunk.  If that bit is *clear*, then the +    word before the current chunk size contains the previous chunk +    size, and can be used to find the front of the previous chunk. +    The very first chunk allocated always has this bit set, +    preventing access to non-existent (or non-owned) memory. If +    prev_inuse is set for any given chunk, then you CANNOT determine +    the size of the previous chunk, and might even get a memory +    addressing fault when trying to do so. + +    Note that the `foot' of the current chunk is actually represented +    as the prev_size of the NEXT chunk. This makes it easier to +    deal with alignments etc but can be very confusing when trying +    to extend or adapt this code. + +    The two exceptions to all this are + +     1. The special chunk `top' doesn't bother using the +        trailing size field since there is no next contiguous chunk +        that would have to index off it. After initialization, `top' +        is forced to always exist.  If it would become less than +        MINSIZE bytes long, it is replenished. + +     2. Chunks allocated via mmap, which have the second-lowest-order +        bit (IS_MMAPPED) set in their size fields.  Because they are +        allocated one-by-one, each must contain its own trailing size field. + +*/ + +/* +  ---------- Size and alignment checks and conversions ---------- +*/ + +/* conversion from malloc headers to user pointers, and back */ +/* Added size for pointer to make room for arena_ptr */ +#define chunk2mem(p)   ((Void_t*)((char*)(p) + 2*SIZE_SZ + sizeof(void *))) +#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ - sizeof(void *))) + +/* The smallest possible chunk */ +#define MIN_CHUNK_SIZE        (sizeof(struct malloc_chunk)) + +/* The smallest size we can malloc is an aligned minimal chunk */ + +#define MINSIZE  \ +  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)) + +/* Check if m has acceptable alignment */ + +#define aligned_OK(m)  (((unsigned long)((m)) & (MALLOC_ALIGN_MASK)) == 0) + + +/* +   Check if a request is so large that it would wrap around zero when +   padded and aligned. To simplify some other code, the bound is made +   low enough so that adding MINSIZE will also not wrap around zero. +*/ + +#define REQUEST_OUT_OF_RANGE(req)                                 \ +  ((unsigned long)(req) >=                                        \ +   (unsigned long)(INTERNAL_SIZE_T)(-2 * MINSIZE)) + +/* pad request bytes into a usable size -- internal version */ + + +/* prev_size field of next chunk is overwritten with data +** when in use.  NOTE - last SIZE_SZ of arena must be left +** unused for last chunk to use +*/ +/* Added sizeof(void *) to make room for arena_ptr */ +#define request2size(req)                                         \ +  (((req) + sizeof(void *) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \ +   MINSIZE :                                                      \ +   ((req) + sizeof(void *) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK) + +/*  Same, except also perform argument check */ + +#define checked_request2size(req, sz)                             \ +  if (REQUEST_OUT_OF_RANGE(req)) {                                \ +    MALLOC_FAILURE_ACTION;                                        \ +    return 0;                                                     \ +  }                                                               \ +  (sz) = request2size(req); + +/* +  --------------- Physical chunk operations --------------- +*/ + + +/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */ +#define PREV_INUSE 0x1 + +/* extract inuse bit of previous chunk */ +#define prev_inuse(p)       ((p)->size & PREV_INUSE) + + +/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */ +#define IS_MMAPPED 0x2 + +/* check for mmap()'ed chunk */ +#define chunk_is_mmapped(p) ((p)->size & IS_MMAPPED) + + + +/* +  Bits to mask off when extracting size + +  Note: IS_MMAPPED is intentionally not masked off from size field in +  macros for which mmapped chunks should never be seen. This should +  cause helpful core dumps to occur if it is tried by accident by +  people extending or adapting this malloc. +*/ +#define SIZE_BITS (PREV_INUSE|IS_MMAPPED) + +/* Get size, ignoring use bits */ +#define chunksize(p)         ((p)->size & ~(SIZE_BITS)) + + +/* Ptr to next physical malloc_chunk. */ +#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->size & ~SIZE_BITS) )) + +/* Ptr to previous physical malloc_chunk */ +#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_size) )) + +/* Treat space at ptr + offset as a chunk */ +#define chunk_at_offset(p, s)  ((mchunkptr)(((char*)(p)) + (s))) + +/* extract p's inuse bit */ +#define inuse(p)\ +((((mchunkptr)(((char*)(p))+((p)->size & ~SIZE_BITS)))->size) & PREV_INUSE) + +/* set/clear chunk as being inuse without otherwise disturbing */ +#define set_inuse(p)\ +((mchunkptr)(((char*)(p)) + ((p)->size & ~SIZE_BITS)))->size |= PREV_INUSE + +#define clear_inuse(p)\ +((mchunkptr)(((char*)(p)) + ((p)->size & ~SIZE_BITS)))->size &= ~(PREV_INUSE) + + +/* check/set/clear inuse bits in known places */ +#define inuse_bit_at_offset(p, s)\ + (((mchunkptr)(((char*)(p)) + (s)))->size & PREV_INUSE) + +#define set_inuse_bit_at_offset(p, s)\ + (((mchunkptr)(((char*)(p)) + (s)))->size |= PREV_INUSE) + +#define clear_inuse_bit_at_offset(p, s)\ + (((mchunkptr)(((char*)(p)) + (s)))->size &= ~(PREV_INUSE)) + + +/* Set size at head, without disturbing its use bit */ +#define set_head_size(p, s)  ((p)->size = (((p)->size & SIZE_BITS) | (s))) + +/* Set size/use field */ +#define set_head(p, s)       ((p)->size = (s)) + +/* Set size at footer (only when chunk is not in use) */ +#define set_foot(p, s)       (((mchunkptr)((char*)(p) + (s)))->prev_size = (s)) + + +/* +  -------------------- Internal data structures -------------------- + +   All internal state is held in an instance of malloc_state defined +   below. There are no other static variables, except in two optional +   cases: +   * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above. +   * If HAVE_MMAP is true, but mmap doesn't support +     MAP_ANONYMOUS, a dummy file descriptor for mmap. + +   Beware of lots of tricks that minimize the total bookkeeping space +   requirements. The result is a little over 1K bytes (for 4byte +   pointers and size_t.) +*/ + +/* +  Bins + +    An array of bin headers for free chunks. Each bin is doubly +    linked.  The bins are approximately proportionally (log) spaced. +    There are a lot of these bins (128). This may look excessive, but +    works very well in practice.  Most bins hold sizes that are +    unusual as malloc request sizes, but are more usual for fragments +    and consolidated sets of chunks, which is what these bins hold, so +    they can be found quickly.  All procedures maintain the invariant +    that no consolidated chunk physically borders another one, so each +    chunk in a list is known to be preceeded and followed by either +    inuse chunks or the ends of memory. + +    Chunks in bins are kept in size order, with ties going to the +    approximately least recently used chunk. Ordering isn't needed +    for the small bins, which all contain the same-sized chunks, but +    facilitates best-fit allocation for larger chunks. These lists +    are just sequential. Keeping them in order almost never requires +    enough traversal to warrant using fancier ordered data +    structures. + +    Chunks of the same size are linked with the most +    recently freed at the front, and allocations are taken from the +    back.  This results in LRU (FIFO) allocation order, which tends +    to give each chunk an equal opportunity to be consolidated with +    adjacent freed chunks, resulting in larger free chunks and less +    fragmentation. + +    To simplify use in double-linked lists, each bin header acts +    as a malloc_chunk. This avoids special-casing for headers. +    But to conserve space and improve locality, we allocate +    only the fd/bk pointers of bins, and then use repositioning tricks +    to treat these as the fields of a malloc_chunk*. +*/ + +typedef struct malloc_chunk* mbinptr; + +/* addressing -- note that bin_at(0) does not exist */ +#define bin_at(m, i) ((mbinptr)((char*)&((m)->bins[(i)<<1]) - (SIZE_SZ<<1))) + +/* analog of ++bin */ +#define next_bin(b)  ((mbinptr)((char*)(b) + (sizeof(mchunkptr)<<1))) + +/* Reminders about list directionality within bins */ +#define first(b)     ((b)->fd) +#define last(b)      ((b)->bk) + +/* Take a chunk off a bin list */ +#define unlink(P, BK, FD) {                                            \ +  FD = P->fd;                                                          \ +  BK = P->bk;                                                          \ +  FD->bk = BK;                                                         \ +  BK->fd = FD;                                                         \ +} + +/* +  Indexing + +    Bins for sizes < 512 bytes contain chunks of all the same size, spaced +    8 bytes apart. Larger bins are approximately logarithmically spaced: + +    64 bins of size       8 +    32 bins of size      64 +    16 bins of size     512 +     8 bins of size    4096 +     4 bins of size   32768 +     2 bins of size  262144 +     1 bin  of size what's left + +    There is actually a little bit of slop in the numbers in bin_index +    for the sake of speed. This makes no difference elsewhere. + +    The bins top out around 1MB because we expect to service large +    requests via mmap. +*/ + +#define NBINS             128 +#define NSMALLBINS         64 +#define SMALLBIN_WIDTH      8 +#define MIN_LARGE_SIZE    512 + +#define in_smallbin_range(sz)  \ +  ((unsigned long)(sz) < (unsigned long)MIN_LARGE_SIZE) + +#define smallbin_index(sz)     (((unsigned)(sz)) >> 3) + +#define largebin_index(sz)                                                   \ +(((((unsigned long)(sz)) >>  6) <= 32)?  56 + (((unsigned long)(sz)) >>  6): \ + ((((unsigned long)(sz)) >>  9) <= 20)?  91 + (((unsigned long)(sz)) >>  9): \ + ((((unsigned long)(sz)) >> 12) <= 10)? 110 + (((unsigned long)(sz)) >> 12): \ + ((((unsigned long)(sz)) >> 15) <=  4)? 119 + (((unsigned long)(sz)) >> 15): \ + ((((unsigned long)(sz)) >> 18) <=  2)? 124 + (((unsigned long)(sz)) >> 18): \ +                                        126) + +#define bin_index(sz) \ + ((in_smallbin_range(sz)) ? smallbin_index(sz) : largebin_index(sz)) + +/* +  FIRST_SORTED_BIN_SIZE is the chunk size corresponding to the +  first bin that is maintained in sorted order. This must +  be the smallest size corresponding to a given bin. + +  Normally, this should be MIN_LARGE_SIZE. But you can weaken +  best fit guarantees to sometimes speed up malloc by increasing value. +  Doing this means that malloc may choose a chunk that is  +  non-best-fitting by up to the width of the bin. + +  Some useful cutoff values: +      512 - all bins sorted +     2560 - leaves bins <=     64 bytes wide unsorted   +    12288 - leaves bins <=    512 bytes wide unsorted +    65536 - leaves bins <=   4096 bytes wide unsorted +   262144 - leaves bins <=  32768 bytes wide unsorted +       -1 - no bins sorted (not recommended!) +*/ + +#define FIRST_SORTED_BIN_SIZE MIN_LARGE_SIZE  +/* #define FIRST_SORTED_BIN_SIZE 65536 */ + +/* +  Unsorted chunks + +    All remainders from chunk splits, as well as all returned chunks, +    are first placed in the "unsorted" bin. They are then placed +    in regular bins after malloc gives them ONE chance to be used before +    binning. So, basically, the unsorted_chunks list acts as a queue, +    with chunks being placed on it in free (and malloc_consolidate), +    and taken off (to be either used or placed in bins) in malloc. + +    The NON_MAIN_ARENA flag is never set for unsorted chunks, so it +    does not have to be taken into account in size comparisons. +*/ + +/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */ +#define unsorted_chunks(M)          (bin_at(M, 1)) + +/* +  Top + +    The top-most available chunk (i.e., the one bordering the end of +    available memory) is treated specially. It is never included in +    any bin, is used only if no other chunk is available, and is +    released back to the system if it is very large (see +    M_TRIM_THRESHOLD).  Because top initially +    points to its own bin with initial zero size, thus forcing +    extension on the first malloc request, we avoid having any special +    code in malloc to check whether it even exists yet. But we still +    need to do so when getting memory from system, so we make +    initial_top treat the bin as a legal but unusable chunk during the +    interval between initialization and the first call to +    sYSMALLOc. (This is somewhat delicate, since it relies on +    the 2 preceding words to be zero during this interval as well.) +*/ + +/* Conveniently, the unsorted bin can be used as dummy top on first call */ +#define initial_top(M)              (unsorted_chunks(M)) + +/* +  Binmap + +    To help compensate for the large number of bins, a one-level index +    structure is used for bin-by-bin searching.  `binmap' is a +    bitvector recording whether bins are definitely empty so they can +    be skipped over during during traversals.  The bits are NOT always +    cleared as soon as bins are empty, but instead only +    when they are noticed to be empty during traversal in malloc. +*/ + +/* Conservatively use 32 bits per map word, even if on 64bit system */ +#define BINMAPSHIFT      5 +#define BITSPERMAP       (1U << BINMAPSHIFT) +#define BINMAPSIZE       (NBINS / BITSPERMAP) + +#define idx2block(i)     ((i) >> BINMAPSHIFT) +#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT)-1)))) + +#define mark_bin(m,i)    ((m)->binmap[idx2block(i)] |=  idx2bit(i)) +#define unmark_bin(m,i)  ((m)->binmap[idx2block(i)] &= ~(idx2bit(i))) +#define get_binmap(m,i)  ((m)->binmap[idx2block(i)] &   idx2bit(i)) + +/* +  Fastbins + +    An array of lists holding recently freed small chunks.  Fastbins +    are not doubly linked.  It is faster to single-link them, and +    since chunks are never removed from the middles of these lists, +    double linking is not necessary. Also, unlike regular bins, they +    are not even processed in FIFO order (they use faster LIFO) since +    ordering doesn't much matter in the transient contexts in which +    fastbins are normally used. + +    Chunks in fastbins keep their inuse bit set, so they cannot +    be consolidated with other free chunks. malloc_consolidate +    releases all chunks in fastbins and consolidates them with +    other free chunks. +*/ + +typedef struct malloc_chunk* mfastbinptr; + +/* offset 2 to use otherwise unindexable first 2 bins */ +#define fastbin_index(sz)        ((int)((((unsigned int)(sz)) >> 3) - 2)) + +/* The maximum fastbin request size we support */ +#define MAX_FAST_SIZE     80 + +#define NFASTBINS  (fastbin_index(request2size(MAX_FAST_SIZE))+1) + +/* +  FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free() +  that triggers automatic consolidation of possibly-surrounding +  fastbin chunks. This is a heuristic, so the exact value should not +  matter too much. It is defined at half the default trim threshold as a +  compromise heuristic to only attempt consolidation if it is likely +  to lead to trimming. However, it is not dynamically tunable, since +  consolidation reduces fragmentation surrounding large chunks even +  if trimming is not used. +*/ + +#define FASTBIN_CONSOLIDATION_THRESHOLD  (65536UL) + +/* +  Since the lowest 2 bits in max_fast don't matter in size comparisons, +  they are used as flags. +*/ + +/* +  FASTCHUNKS_BIT held in max_fast indicates that there are probably +  some fastbin chunks. It is set true on entering a chunk into any +  fastbin, and cleared only in malloc_consolidate. + +  The truth value is inverted so that have_fastchunks will be true +  upon startup (since statics are zero-filled), simplifying +  initialization checks. +*/ + +#define FASTCHUNKS_BIT        (1U) + +#define have_fastchunks(M)     (((M)->max_fast &  FASTCHUNKS_BIT) == 0) +#define clear_fastchunks(M)    ((M)->max_fast |=  FASTCHUNKS_BIT) +#define set_fastchunks(M)      ((M)->max_fast &= ~FASTCHUNKS_BIT) + +/* +  NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous +  regions.  Otherwise, contiguity is exploited in merging together, +  when possible, results from consecutive MORECORE calls. + +  The initial value comes from MORECORE_CONTIGUOUS, but is +  changed dynamically if mmap is ever used as an sbrk substitute. +*/ + +#define NONCONTIGUOUS_BIT     (2U) + +#define contiguous(M)          (((M)->max_fast &  NONCONTIGUOUS_BIT) == 0) +#define noncontiguous(M)       (((M)->max_fast &  NONCONTIGUOUS_BIT) != 0) +#define set_noncontiguous(M)   ((M)->max_fast |=  NONCONTIGUOUS_BIT) +#define set_contiguous(M)      ((M)->max_fast &= ~NONCONTIGUOUS_BIT) + +/* +   Set value of max_fast. +   Use impossibly small value if 0. +   Precondition: there are no existing fastbin chunks. +   Setting the value clears fastchunk bit but preserves noncontiguous bit. +*/ + +#define set_max_fast(M, s) \ +  (M)->max_fast = (((s) == 0)? SMALLBIN_WIDTH: request2size(s)) | \ +  FASTCHUNKS_BIT | \ +  ((M)->max_fast &  NONCONTIGUOUS_BIT) + + +/* +   ----------- Internal state representation and initialization ----------- +*/ + +struct malloc_state { +  /* Serialize access.  */ +  mutex_t mutex; + +  /* Statistics for locking.  Only used if THREAD_STATS is defined.  */ +  long stat_lock_direct, stat_lock_loop, stat_lock_wait; +  long pad0_[1]; /* try to give the mutex its own cacheline */ + +  /* The maximum chunk size to be eligible for fastbin */ +  INTERNAL_SIZE_T  max_fast;   /* low 2 bits used as flags */ + +  /* Fastbins */ +  mfastbinptr      fastbins[NFASTBINS]; + +  /* Base of the topmost chunk -- not otherwise kept in a bin */ +  mchunkptr        top; + +  /* The remainder from the most recent split of a small request */ +  mchunkptr        last_remainder; + +  /* Normal bins packed as described above */ +  mchunkptr        bins[NBINS * 2]; + +  /* Bitmap of bins */ +  unsigned int     binmap[BINMAPSIZE]; + +  /* Linked list */ +  struct malloc_state *next; + +  /* Memory allocated from the system in this arena.  */ +  INTERNAL_SIZE_T system_mem; +  INTERNAL_SIZE_T max_system_mem; +}; + +struct malloc_par { +  /* Tunable parameters */ +  unsigned long    trim_threshold; +  INTERNAL_SIZE_T  top_pad; +  INTERNAL_SIZE_T  mmap_threshold; + +  /* Memory map support */ +  int              n_mmaps; +  int              n_mmaps_max; +  int              max_n_mmaps; + +  /* Cache malloc_getpagesize */ +  unsigned int     pagesize; + +  /* Statistics */ +  INTERNAL_SIZE_T  mmapped_mem; +  /*INTERNAL_SIZE_T  sbrked_mem;*/ +  /*INTERNAL_SIZE_T  max_sbrked_mem;*/ +  INTERNAL_SIZE_T  max_mmapped_mem; +  INTERNAL_SIZE_T  max_total_mem; /* only kept for NO_THREADS */ + +  /* First address handed out by MORECORE/sbrk.  */ +  char*            sbrk_base; +}; + +/* There are several instances of this struct ("arenas") in this +   malloc.  If you are adapting this malloc in a way that does NOT use +   a static or mmapped malloc_state, you MUST explicitly zero-fill it +   before using. This malloc relies on the property that malloc_state +   is initialized to all zeroes (as is true of C statics).  */ + + + +/* +  Initialize a malloc_state struct. + +  This is called only from within malloc_consolidate, which needs +  be called in the same contexts anyway.  It is never called directly +  outside of malloc_consolidate because some optimizing compilers try +  to inline it at all call points, which turns out not to be an +  optimization at all. (Inlining it in malloc_consolidate is fine though.) +*/ + +#if __STD_C +static void malloc_init_state(mstate av) +#else +static void malloc_init_state(av) mstate av; +#endif +{ +  int     i; +  mbinptr bin; + +  /* Establish circular links for normal bins */ +  for (i = 1; i < NBINS; ++i) { +    bin = bin_at(av,i); +    bin->fd = bin->bk = bin; +  } + +  set_noncontiguous(av); + +  set_max_fast(av, DEFAULT_MXFAST); + +  av->top            = initial_top(av); +} + +/* +   Other internal utilities operating on mstates +*/ + +#if __STD_C +static Void_t*  sYSMALLOc(INTERNAL_SIZE_T, mstate); +static void     malloc_consolidate(mstate); +//static Void_t** iALLOc(mstate, size_t, size_t*, int, Void_t**); +#else +static Void_t*  sYSMALLOc(); +static void     malloc_consolidate(); +static Void_t** iALLOc(); +#endif + +/* ------------------- Support for multiple arenas -------------------- */ +#include "arena.c" + +/* +  Debugging support + +  These routines make a number of assertions about the states +  of data structures that should be true at all times. If any +  are not true, it's very likely that a user program has somehow +  trashed memory. (It's also possible that there is a coding error +  in malloc. In which case, please report it!) +*/ + +#if ! MALLOC_DEBUG + +#define check_chunk(A,P) +#define check_free_chunk(A,P) +#define check_inuse_chunk(A,P) +#define check_remalloced_chunk(A,P,N) +#define check_malloced_chunk(A,P,N) +#define check_malloc_state(A) + +#else + +#define check_chunk(A,P)              do_check_chunk(A,P) +#define check_free_chunk(A,P)         do_check_free_chunk(A,P) +#define check_inuse_chunk(A,P)        do_check_inuse_chunk(A,P) +#define check_remalloced_chunk(A,P,N) do_check_remalloced_chunk(A,P,N) +#define check_malloced_chunk(A,P,N)   do_check_malloced_chunk(A,P,N) +#define check_malloc_state(A)         do_check_malloc_state(A) + +/* +  Properties of all chunks +*/ + +#if __STD_C +static void do_check_chunk(mstate av, mchunkptr p) +#else +static void do_check_chunk(av, p) mstate av; mchunkptr p; +#endif +{ +  unsigned long sz = chunksize(p); +  /* min and max possible addresses assuming contiguous allocation */ +  char* max_address = (char*)(av->top) + chunksize(av->top); +  char* min_address = max_address - av->system_mem; + +  if (!chunk_is_mmapped(p)) { + +    /* Has legal address ... */ +    if (p != av->top) { +      if (contiguous(av)) { +        assert(((char*)p) >= min_address); +        assert(((char*)p + sz) <= ((char*)(av->top))); +      } +    } +    else { +      /* top size is always at least MINSIZE */ +      assert((unsigned long)(sz) >= MINSIZE); +      /* top predecessor always marked inuse */ +      assert(prev_inuse(p)); +    } + +  } +  else { +#if HAVE_MMAP +    /* address is outside main heap  */ +    if (contiguous(av) && av->top != initial_top(av)) { +      assert(((char*)p) < min_address || ((char*)p) > max_address); +    } +    /* chunk is page-aligned */ +    assert(((p->prev_size + sz) & (mp_.pagesize-1)) == 0); +    /* mem is aligned */ +    assert(aligned_OK(chunk2mem(p))); +#else +    /* force an appropriate assert violation if debug set */ +    assert(!chunk_is_mmapped(p)); +#endif +  } +} + +/* +  Properties of free chunks +*/ + +#if __STD_C +static void do_check_free_chunk(mstate av, mchunkptr p) +#else +static void do_check_free_chunk(av, p) mstate av; mchunkptr p; +#endif +{ +  INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE); +  mchunkptr next = chunk_at_offset(p, sz); + +  do_check_chunk(av, p); + +  /* Chunk must claim to be free ... */ +  assert(!inuse(p)); +  assert (!chunk_is_mmapped(p)); + +  /* Unless a special marker, must have OK fields */ +  if ((unsigned long)(sz) >= MINSIZE) +  { +    assert((sz & MALLOC_ALIGN_MASK) == 0); +    assert(aligned_OK(chunk2mem(p))); +    /* ... matching footer field */ +    assert(next->prev_size == sz); +    /* ... and is fully consolidated */ +    assert(prev_inuse(p)); +    assert (next == av->top || inuse(next)); + +    /* ... and has minimally sane links */ +    assert(p->fd->bk == p); +    assert(p->bk->fd == p); +  } +  else /* markers are always of size SIZE_SZ */ +    assert(sz == SIZE_SZ); +} + +/* +  Properties of inuse chunks +*/ + +#if __STD_C +static void do_check_inuse_chunk(mstate av, mchunkptr p) +#else +static void do_check_inuse_chunk(av, p) mstate av; mchunkptr p; +#endif +{ +  mchunkptr next; + +  do_check_chunk(av, p); + +  assert(av == arena_for_chunk(p)); +  if (chunk_is_mmapped(p)) +    return; /* mmapped chunks have no next/prev */ + +  /* Check whether it claims to be in use ... */ +  assert(inuse(p)); + +  next = next_chunk(p); + +  /* ... and is surrounded by OK chunks. +    Since more things can be checked with free chunks than inuse ones, +    if an inuse chunk borders them and debug is on, it's worth doing them. +  */ +  if (!prev_inuse(p))  { +    /* Note that we cannot even look at prev unless it is not inuse */ +    mchunkptr prv = prev_chunk(p); +    assert(next_chunk(prv) == p); +    do_check_free_chunk(av, prv); +  } + +  if (next == av->top) { +    assert(prev_inuse(next)); +    assert(chunksize(next) >= MINSIZE); +  } +  else if (!inuse(next)) +    do_check_free_chunk(av, next); +} + +/* +  Properties of chunks recycled from fastbins +*/ + +#if __STD_C +static void do_check_remalloced_chunk(mstate av, mchunkptr p, INTERNAL_SIZE_T s) +#else +static void do_check_remalloced_chunk(av, p, s) +mstate av; mchunkptr p; INTERNAL_SIZE_T s; +#endif +{ +  INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE); + +  if (!chunk_is_mmapped(p)) { +    assert(av == arena_for_chunk(p)); +  } + +  do_check_inuse_chunk(av, p); + +  /* Legal size ... */ +  assert((sz & MALLOC_ALIGN_MASK) == 0); +  assert((unsigned long)(sz) >= MINSIZE); +  /* ... and alignment */ +  assert(aligned_OK(chunk2mem(p))); +  /* chunk is less than MINSIZE more than request */ +  assert((long)(sz) - (long)(s) >= 0); +  assert((long)(sz) - (long)(s + MINSIZE) < 0); +} + +/* +  Properties of nonrecycled chunks at the point they are malloced +*/ + +#if __STD_C +static void do_check_malloced_chunk(mstate av, mchunkptr p, INTERNAL_SIZE_T s) +#else +static void do_check_malloced_chunk(av, p, s) +mstate av; mchunkptr p; INTERNAL_SIZE_T s; +#endif +{ +  /* same as recycled case ... */ +  do_check_remalloced_chunk(av, p, s); + +  /* +    ... plus,  must obey implementation invariant that prev_inuse is +    always true of any allocated chunk; i.e., that each allocated +    chunk borders either a previously allocated and still in-use +    chunk, or the base of its memory arena. This is ensured +    by making all allocations from the the `lowest' part of any found +    chunk.  This does not necessarily hold however for chunks +    recycled via fastbins. +  */ + +  assert(prev_inuse(p)); +} + + +/* +  Properties of malloc_state. + +  This may be useful for debugging malloc, as well as detecting user +  programmer errors that somehow write into malloc_state. + +  If you are extending or experimenting with this malloc, you can +  probably figure out how to hack this routine to print out or +  display chunk addresses, sizes, bins, and other instrumentation. +*/ + +static void do_check_malloc_state(mstate av) +{ +  int i; +  mchunkptr p; +  mchunkptr q; +  mbinptr b; +  unsigned int binbit; +  int empty; +  unsigned int idx; +  INTERNAL_SIZE_T size; +  unsigned long total = 0; +  int max_fast_bin; + +  /* internal size_t must be no wider than pointer type */ +  assert(sizeof(INTERNAL_SIZE_T) <= sizeof(char*)); + +  /* alignment is a power of 2 */ +  assert((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-1)) == 0); + +  /* cannot run remaining checks until fully initialized */ +  if (av->top == 0 || av->top == initial_top(av)) +    return; + + +  /* properties of fastbins */ + +  /* max_fast is in allowed range */ +  assert((av->max_fast & ~1) <= request2size(MAX_FAST_SIZE)); + +  max_fast_bin = fastbin_index(av->max_fast); + +  for (i = 0; i < NFASTBINS; ++i) { +    p = av->fastbins[i]; + +    /* all bins past max_fast are empty */ +    if (i > max_fast_bin) +      assert(p == 0); + +    while (p != 0) { +      /* each chunk claims to be inuse */ +      do_check_inuse_chunk(av, p); +      total += chunksize(p); +      /* chunk belongs in this bin */ +      assert(fastbin_index(chunksize(p)) == i); +      p = p->fd; +    } +  } + +  if (total != 0) +    assert(have_fastchunks(av)); +  else if (!have_fastchunks(av)) +    assert(total == 0); + +  /* check normal bins */ +  for (i = 1; i < NBINS; ++i) { +    b = bin_at(av,i); + +    /* binmap is accurate (except for bin 1 == unsorted_chunks) */ +    if (i >= 2) { +      binbit = get_binmap(av,i); +      empty = last(b) == b; +      if (!binbit) +        assert(empty); +      else if (!empty) +        assert(binbit); +    } + +    for (p = last(b); p != b; p = p->bk) { +      /* each chunk claims to be free */ +      do_check_free_chunk(av, p); +      size = chunksize(p); +      total += size; +      if (i >= 2) { +        /* chunk belongs in bin */ +        idx = bin_index(size); +        assert(idx == (unsigned int)i); +        /* lists are sorted */ +        if ((unsigned long) size >= (unsigned long)(FIRST_SORTED_BIN_SIZE)) { +	  assert(p->bk == b || +		 (unsigned long)chunksize(p->bk) >= +		 (unsigned long)chunksize(p)); +	} +      } +      /* chunk is followed by a legal chain of inuse chunks */ +      for (q = next_chunk(p); +           (q != av->top && inuse(q) && +             (unsigned long)(chunksize(q)) >= MINSIZE); +           q = next_chunk(q)) +        do_check_inuse_chunk(av, q); +    } +  } + +  /* top chunk is OK */ +  check_chunk(av, av->top); + +  /* sanity checks for statistics */ + + +  assert((unsigned long)(av->system_mem) <= +         (unsigned long)(av->max_system_mem)); + + +} +#endif + + + +/* ----------- Routines dealing with system allocation -------------- */ + +/* No system allocation routines supported */ + + +/*------------------------ Public wrappers. --------------------------------*/ + + + +#undef DEBUG_MALLOC +Void_t* +public_mALLOc(cvmx_arena_list_t arena_list, size_t bytes) +{ +  mstate ar_ptr, orig_ar_ptr; +  Void_t *victim = NULL; +  static mstate debug_prev_ar;  // debug only! +#ifdef DEBUG_MALLOC +  int arena_cnt=0; +#endif +   +  ar_ptr = arena_list; + +  if (!ar_ptr) +  { +     return(NULL); +  } + +  if (debug_prev_ar != ar_ptr) +  { +      debug_printf("New arena: %p\n", ar_ptr); +#ifdef CVMX_SPINLOCK_DEBUG +      cvmx_dprintf("lock wait count for arena: %p is %ld\n", ar_ptr, ar_ptr->mutex.wait_cnt); +#endif +      debug_prev_ar = ar_ptr; +  } +  orig_ar_ptr = ar_ptr; + +  // try to get an arena without contention +  do +  { +#ifdef DEBUG_MALLOC +  arena_cnt++; +#endif +      if (!mutex_trylock(&ar_ptr->mutex)) +      { +          // we locked it +          victim = _int_malloc(ar_ptr, bytes); +          (void)mutex_unlock(&ar_ptr->mutex); +          if(victim) +          { +              break; +          } +      } +      ar_ptr = ar_ptr->next; +  } while (ar_ptr != orig_ar_ptr); + +  // we couldn't get the memory without contention, so try all +  // arenas.  SLOW! +  if (!victim) +  { +      ar_ptr = orig_ar_ptr; +      do +      { +#ifdef DEBUG_MALLOC +  arena_cnt++; +#endif +          mutex_lock(&ar_ptr->mutex); +          victim = _int_malloc(ar_ptr, bytes); +          (void)mutex_unlock(&ar_ptr->mutex); +          if(victim) +          { +              break; +          } +          ar_ptr = ar_ptr->next; +      } while (ar_ptr != orig_ar_ptr); +  } + + +  assert(!victim || chunk_is_mmapped(mem2chunk(victim)) || +	 ar_ptr == arena_for_chunk(mem2chunk(victim))); + +#ifdef DEBUG_MALLOC +  if (!victim) +  { +     cvmx_dprintf("Malloc failed: size: %ld, arena_cnt: %d\n", bytes, arena_cnt); +  } +#endif + +  debug_printf("cvmx_malloc(%ld) = %p\n", bytes, victim); + +  // remember which arena we last used..... +  tsd_setspecific(arena_key, (Void_t *)ar_ptr); +  return victim; +} + + + +void +public_fREe(Void_t* mem) +{ +  mstate ar_ptr; +  mchunkptr p;                          /* chunk corresponding to mem */ + +  debug_printf("cvmx_free(%p)\n", mem); + + +  if (mem == 0)                              /* free(0) has no effect */ +    return; + +  p = mem2chunk(mem); + + +  ar_ptr = arena_for_chunk(p); +  assert(ar_ptr); +#if THREAD_STATS +  if(!mutex_trylock(&ar_ptr->mutex)) +    ++(ar_ptr->stat_lock_direct); +  else { +    (void)mutex_lock(&ar_ptr->mutex); +    ++(ar_ptr->stat_lock_wait); +  } +#else +  (void)mutex_lock(&ar_ptr->mutex); +#endif +  _int_free(ar_ptr, mem); +  (void)mutex_unlock(&ar_ptr->mutex); +} + +Void_t* +public_rEALLOc(cvmx_arena_list_t arena_list, Void_t* oldmem, size_t bytes) +{ +  mstate ar_ptr; +  INTERNAL_SIZE_T    nb;      /* padded request size */ + +  mchunkptr oldp;             /* chunk corresponding to oldmem */ +  INTERNAL_SIZE_T    oldsize; /* its size */ + +  Void_t* newp;             /* chunk to return */ + + +#if REALLOC_ZERO_BYTES_FREES +  if (bytes == 0 && oldmem != NULL) { public_fREe(oldmem); return 0; } +#endif + +  /* realloc of null is supposed to be same as malloc */ +  if (oldmem == 0) return public_mALLOc(arena_list, bytes); + +  oldp    = mem2chunk(oldmem); +  oldsize = chunksize(oldp); + +  checked_request2size(bytes, nb); + + +  ar_ptr = arena_for_chunk(oldp); +  (void)mutex_lock(&ar_ptr->mutex); + + +  newp = _int_realloc(ar_ptr, oldmem, bytes); + +  (void)mutex_unlock(&ar_ptr->mutex); +  assert(!newp || chunk_is_mmapped(mem2chunk(newp)) || +	 ar_ptr == arena_for_chunk(mem2chunk(newp))); +  return newp; +} + +#undef DEBUG_MEMALIGN +Void_t* +public_mEMALIGn(cvmx_arena_list_t arena_list, size_t alignment, size_t bytes) +{ +  mstate ar_ptr, orig_ar_ptr; +  Void_t *p = NULL; +#ifdef DEBUG_MEMALIGN +  int arena_cnt=0; +#endif + + +  /* If need less alignment than we give anyway, just relay to malloc */ +  if (alignment <= MALLOC_ALIGNMENT) return public_mALLOc(arena_list, bytes); + +  /* Otherwise, ensure that it is at least a minimum chunk size */ +  if (alignment <  MINSIZE) alignment = MINSIZE; + + +  ar_ptr = arena_list; + +  if (!ar_ptr) +  { +     return(NULL); +  } + +  orig_ar_ptr = ar_ptr; + + +  // try to get an arena without contention +  do +  { + +#ifdef DEBUG_MEMALIGN +   arena_cnt++; +#endif +      if (!mutex_trylock(&ar_ptr->mutex)) +      { +          // we locked it +          p = _int_memalign(ar_ptr, alignment, bytes); +          (void)mutex_unlock(&ar_ptr->mutex); +          if(p) +          { +              break; +          } +      } +      ar_ptr = ar_ptr->next; +  } while (ar_ptr != orig_ar_ptr); + + +  // we couldn't get the memory without contention, so try all +  // arenas.  SLOW! +  if (!p) +  { +#ifdef DEBUG_MEMALIGN +   arena_cnt++; +#endif +      ar_ptr = orig_ar_ptr; +      do +      { +          mutex_lock(&ar_ptr->mutex); +          p = _int_memalign(ar_ptr, alignment, bytes); +          (void)mutex_unlock(&ar_ptr->mutex); +          if(p) +          { +              break; +          } +          ar_ptr = ar_ptr->next; +      } while (ar_ptr != orig_ar_ptr); +  } + + +  if (p) +  { +     assert(ar_ptr == arena_for_chunk(mem2chunk(p))); +  } +  else +  { +#ifdef DEBUG_MEMALIGN +     cvmx_dprintf("Memalign failed: align: 0x%x, size: %ld, arena_cnt: %ld\n", alignment, bytes, arena_cnt); +#endif +  } + +  assert(!p || ar_ptr == arena_for_chunk(mem2chunk(p))); +  return p; +} + + + +Void_t* +public_cALLOc(cvmx_arena_list_t arena_list, size_t n, size_t elem_size) +{ +  mstate av; +  mchunkptr oldtop, p; +  INTERNAL_SIZE_T sz, csz, oldtopsize; +  Void_t* mem; +  unsigned long clearsize; +  unsigned long nclears; +  INTERNAL_SIZE_T* d; + + +  /* FIXME: check for overflow on multiplication.  */ +  sz = n * elem_size; + +  mem = public_mALLOc(arena_list, sz); +  if (mem) +  { +     memset(mem, 0, sz); +  } + +  return mem; +} + + +#ifndef _LIBC + +void +public_cFREe(Void_t* m) +{ +  public_fREe(m); +} + +#endif /* _LIBC */ + +/* +  ------------------------------ malloc ------------------------------ +*/ + +static Void_t* +_int_malloc(mstate av, size_t bytes) +{ +  INTERNAL_SIZE_T nb;               /* normalized request size */ +  unsigned int    idx;              /* associated bin index */ +  mbinptr         bin;              /* associated bin */ +  mfastbinptr*    fb;               /* associated fastbin */ + +  mchunkptr       victim;           /* inspected/selected chunk */ +  INTERNAL_SIZE_T size;             /* its size */ +  int             victim_index;     /* its bin index */ + +  mchunkptr       remainder;        /* remainder from a split */ +  unsigned long   remainder_size;   /* its size */ + +  unsigned int    block;            /* bit map traverser */ +  unsigned int    bit;              /* bit map traverser */ +  unsigned int    map;              /* current word of binmap */ + +  mchunkptr       fwd;              /* misc temp for linking */ +  mchunkptr       bck;              /* misc temp for linking */ + +  /* +    Convert request size to internal form by adding SIZE_SZ bytes +    overhead plus possibly more to obtain necessary alignment and/or +    to obtain a size of at least MINSIZE, the smallest allocatable +    size. Also, checked_request2size traps (returning 0) request sizes +    that are so large that they wrap around zero when padded and +    aligned. +  */ + + +  checked_request2size(bytes, nb); + +  /* +    If the size qualifies as a fastbin, first check corresponding bin. +    This code is safe to execute even if av is not yet initialized, so we +    can try it without checking, which saves some time on this fast path. +  */ + +  if ((unsigned long)(nb) <= (unsigned long)(av->max_fast)) { +    fb = &(av->fastbins[(fastbin_index(nb))]); +    if ( (victim = *fb) != 0) { +      *fb = victim->fd; +      check_remalloced_chunk(av, victim, nb); +      set_arena_for_chunk(victim, av); +      return chunk2mem(victim); +    } +  } + +  /* +    If a small request, check regular bin.  Since these "smallbins" +    hold one size each, no searching within bins is necessary. +    (For a large request, we need to wait until unsorted chunks are +    processed to find best fit. But for small ones, fits are exact +    anyway, so we can check now, which is faster.) +  */ + +  if (in_smallbin_range(nb)) { +    idx = smallbin_index(nb); +    bin = bin_at(av,idx); + +    if ( (victim = last(bin)) != bin) { +      if (victim == 0) /* initialization check */ +        malloc_consolidate(av); +      else { +        bck = victim->bk; +        set_inuse_bit_at_offset(victim, nb); +        bin->bk = bck; +        bck->fd = bin; + +        set_arena_for_chunk(victim, av); +        check_malloced_chunk(av, victim, nb); +        return chunk2mem(victim); +      } +    } +  } + +  /* +     If this is a large request, consolidate fastbins before continuing. +     While it might look excessive to kill all fastbins before +     even seeing if there is space available, this avoids +     fragmentation problems normally associated with fastbins. +     Also, in practice, programs tend to have runs of either small or +     large requests, but less often mixtures, so consolidation is not +     invoked all that often in most programs. And the programs that +     it is called frequently in otherwise tend to fragment. +  */ + +  else { +    idx = largebin_index(nb); +    if (have_fastchunks(av)) +      malloc_consolidate(av); +  } + +  /* +    Process recently freed or remaindered chunks, taking one only if +    it is exact fit, or, if this a small request, the chunk is remainder from +    the most recent non-exact fit.  Place other traversed chunks in +    bins.  Note that this step is the only place in any routine where +    chunks are placed in bins. + +    The outer loop here is needed because we might not realize until +    near the end of malloc that we should have consolidated, so must +    do so and retry. This happens at most once, and only when we would +    otherwise need to expand memory to service a "small" request. +  */ + +  for(;;) { + +    while ( (victim = unsorted_chunks(av)->bk) != unsorted_chunks(av)) { +      bck = victim->bk; +      size = chunksize(victim); + +      /* +         If a small request, try to use last remainder if it is the +         only chunk in unsorted bin.  This helps promote locality for +         runs of consecutive small requests. This is the only +         exception to best-fit, and applies only when there is +         no exact fit for a small chunk. +      */ + +      if (in_smallbin_range(nb) && +          bck == unsorted_chunks(av) && +          victim == av->last_remainder && +          (unsigned long)(size) > (unsigned long)(nb + MINSIZE)) { + +        /* split and reattach remainder */ +        remainder_size = size - nb; +        remainder = chunk_at_offset(victim, nb); +        unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; +        av->last_remainder = remainder; +        remainder->bk = remainder->fd = unsorted_chunks(av); + +        set_head(victim, nb | PREV_INUSE); +        set_head(remainder, remainder_size | PREV_INUSE); +        set_foot(remainder, remainder_size); + +        set_arena_for_chunk(victim, av); +        check_malloced_chunk(av, victim, nb); +        return chunk2mem(victim); +      } + +      /* remove from unsorted list */ +      unsorted_chunks(av)->bk = bck; +      bck->fd = unsorted_chunks(av); + +      /* Take now instead of binning if exact fit */ + +      if (size == nb) { +        set_inuse_bit_at_offset(victim, size); +        set_arena_for_chunk(victim, av); +        check_malloced_chunk(av, victim, nb); +        return chunk2mem(victim); +      } + +      /* place chunk in bin */ + +      if (in_smallbin_range(size)) { +        victim_index = smallbin_index(size); +        bck = bin_at(av, victim_index); +        fwd = bck->fd; +      } +      else { +        victim_index = largebin_index(size); +        bck = bin_at(av, victim_index); +        fwd = bck->fd; + +        if (fwd != bck) { +          /* if smaller than smallest, place first */ +          if ((unsigned long)(size) < (unsigned long)(bck->bk->size)) { +            fwd = bck; +            bck = bck->bk; +          } +          else if ((unsigned long)(size) >=  +                   (unsigned long)(FIRST_SORTED_BIN_SIZE)) { + +            /* maintain large bins in sorted order */ +            size |= PREV_INUSE; /* Or with inuse bit to speed comparisons */ +            while ((unsigned long)(size) < (unsigned long)(fwd->size)) { +              fwd = fwd->fd; +	    } +            bck = fwd->bk; +          } +        } +      } + +      mark_bin(av, victim_index); +      victim->bk = bck; +      victim->fd = fwd; +      fwd->bk = victim; +      bck->fd = victim; +    } + +    /* +      If a large request, scan through the chunks of current bin in +      sorted order to find smallest that fits.  This is the only step +      where an unbounded number of chunks might be scanned without doing +      anything useful with them. However the lists tend to be short. +    */ + +    if (!in_smallbin_range(nb)) { +      bin = bin_at(av, idx); + +      for (victim = last(bin); victim != bin; victim = victim->bk) { +	size = chunksize(victim); + +	if ((unsigned long)(size) >= (unsigned long)(nb)) { +	  remainder_size = size - nb; +	  unlink(victim, bck, fwd); + +	  /* Exhaust */ +	  if (remainder_size < MINSIZE)  { +	    set_inuse_bit_at_offset(victim, size); +        set_arena_for_chunk(victim, av); +	    check_malloced_chunk(av, victim, nb); +	    return chunk2mem(victim); +	  } +	  /* Split */ +	  else { +	    remainder = chunk_at_offset(victim, nb); +	    unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; +	    remainder->bk = remainder->fd = unsorted_chunks(av); +	    set_head(victim, nb | PREV_INUSE); +	    set_head(remainder, remainder_size | PREV_INUSE); +	    set_foot(remainder, remainder_size); +        set_arena_for_chunk(victim, av); +	    check_malloced_chunk(av, victim, nb); +	    return chunk2mem(victim); +	  } +	} +      } +    } + +    /* +      Search for a chunk by scanning bins, starting with next largest +      bin. This search is strictly by best-fit; i.e., the smallest +      (with ties going to approximately the least recently used) chunk +      that fits is selected. + +      The bitmap avoids needing to check that most blocks are nonempty. +      The particular case of skipping all bins during warm-up phases +      when no chunks have been returned yet is faster than it might look. +    */ + +    ++idx; +    bin = bin_at(av,idx); +    block = idx2block(idx); +    map = av->binmap[block]; +    bit = idx2bit(idx); + +    for (;;) { + +      /* Skip rest of block if there are no more set bits in this block.  */ +      if (bit > map || bit == 0) { +        do { +          if (++block >= BINMAPSIZE)  /* out of bins */ +            goto use_top; +        } while ( (map = av->binmap[block]) == 0); + +        bin = bin_at(av, (block << BINMAPSHIFT)); +        bit = 1; +      } + +      /* Advance to bin with set bit. There must be one. */ +      while ((bit & map) == 0) { +        bin = next_bin(bin); +        bit <<= 1; +        assert(bit != 0); +      } + +      /* Inspect the bin. It is likely to be non-empty */ +      victim = last(bin); + +      /*  If a false alarm (empty bin), clear the bit. */ +      if (victim == bin) { +        av->binmap[block] = map &= ~bit; /* Write through */ +        bin = next_bin(bin); +        bit <<= 1; +      } + +      else { +        size = chunksize(victim); + +        /*  We know the first chunk in this bin is big enough to use. */ +        assert((unsigned long)(size) >= (unsigned long)(nb)); + +        remainder_size = size - nb; + +        /* unlink */ +        bck = victim->bk; +        bin->bk = bck; +        bck->fd = bin; + +        /* Exhaust */ +        if (remainder_size < MINSIZE) { +          set_inuse_bit_at_offset(victim, size); +          set_arena_for_chunk(victim, av); +          check_malloced_chunk(av, victim, nb); +          return chunk2mem(victim); +        } + +        /* Split */ +        else { +          remainder = chunk_at_offset(victim, nb); + +          unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; +          remainder->bk = remainder->fd = unsorted_chunks(av); +          /* advertise as last remainder */ +          if (in_smallbin_range(nb)) +            av->last_remainder = remainder; + +          set_head(victim, nb | PREV_INUSE); +          set_head(remainder, remainder_size | PREV_INUSE); +          set_foot(remainder, remainder_size); +          set_arena_for_chunk(victim, av); +          check_malloced_chunk(av, victim, nb); +          return chunk2mem(victim); +        } +      } +    } + +  use_top: +    /* +      If large enough, split off the chunk bordering the end of memory +      (held in av->top). Note that this is in accord with the best-fit +      search rule.  In effect, av->top is treated as larger (and thus +      less well fitting) than any other available chunk since it can +      be extended to be as large as necessary (up to system +      limitations). + +      We require that av->top always exists (i.e., has size >= +      MINSIZE) after initialization, so if it would otherwise be +      exhuasted by current request, it is replenished. (The main +      reason for ensuring it exists is that we may need MINSIZE space +      to put in fenceposts in sysmalloc.) +    */ + +    victim = av->top; +    size = chunksize(victim); + +    if ((unsigned long)(size) >= (unsigned long)(nb + MINSIZE)) { +      remainder_size = size - nb; +      remainder = chunk_at_offset(victim, nb); +      av->top = remainder; +      set_head(victim, nb | PREV_INUSE); +      set_head(remainder, remainder_size | PREV_INUSE); + +      set_arena_for_chunk(victim, av); +      check_malloced_chunk(av, victim, nb); +      return chunk2mem(victim); +    } + +    /* +      If there is space available in fastbins, consolidate and retry, +      to possibly avoid expanding memory. This can occur only if nb is +      in smallbin range so we didn't consolidate upon entry. +    */ + +    else if (have_fastchunks(av)) { +      assert(in_smallbin_range(nb)); +      malloc_consolidate(av); +      idx = smallbin_index(nb); /* restore original bin index */ +    } + +    /* +       Otherwise, relay to handle system-dependent cases +    */ +    else +      return(NULL); // sysmalloc not supported +  } +} + +/* +  ------------------------------ free ------------------------------ +*/ + +static void +_int_free(mstate av, Void_t* mem) +{ +  mchunkptr       p;           /* chunk corresponding to mem */ +  INTERNAL_SIZE_T size;        /* its size */ +  mfastbinptr*    fb;          /* associated fastbin */ +  mchunkptr       nextchunk;   /* next contiguous chunk */ +  INTERNAL_SIZE_T nextsize;    /* its size */ +  int             nextinuse;   /* true if nextchunk is used */ +  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */ +  mchunkptr       bck;         /* misc temp for linking */ +  mchunkptr       fwd;         /* misc temp for linking */ + + +  /* free(0) has no effect */ +  if (mem != 0) { +    p = mem2chunk(mem); +    size = chunksize(p); + +    check_inuse_chunk(av, p); + +    /* +      If eligible, place chunk on a fastbin so it can be found +      and used quickly in malloc. +    */ + +    if ((unsigned long)(size) <= (unsigned long)(av->max_fast) + +#if TRIM_FASTBINS +        /* +           If TRIM_FASTBINS set, don't place chunks +           bordering top into fastbins +        */ +        && (chunk_at_offset(p, size) != av->top) +#endif +        ) { + +      set_fastchunks(av); +      fb = &(av->fastbins[fastbin_index(size)]); +      p->fd = *fb; +      *fb = p; +    } + +    /* +       Consolidate other non-mmapped chunks as they arrive. +    */ + +    else if (!chunk_is_mmapped(p)) { +      nextchunk = chunk_at_offset(p, size); +      nextsize = chunksize(nextchunk); +      assert(nextsize > 0); + +      /* consolidate backward */ +      if (!prev_inuse(p)) { +        prevsize = p->prev_size; +        size += prevsize; +        p = chunk_at_offset(p, -((long) prevsize)); +        unlink(p, bck, fwd); +      } + +      if (nextchunk != av->top) { +        /* get and clear inuse bit */ +        nextinuse = inuse_bit_at_offset(nextchunk, nextsize); + +        /* consolidate forward */ +        if (!nextinuse) { +          unlink(nextchunk, bck, fwd); +          size += nextsize; +        } else +	  clear_inuse_bit_at_offset(nextchunk, 0); + +        /* +          Place the chunk in unsorted chunk list. Chunks are +          not placed into regular bins until after they have +          been given one chance to be used in malloc. +        */ + +        bck = unsorted_chunks(av); +        fwd = bck->fd; +        p->bk = bck; +        p->fd = fwd; +        bck->fd = p; +        fwd->bk = p; + +        set_head(p, size | PREV_INUSE); +        set_foot(p, size); + +        check_free_chunk(av, p); +      } + +      /* +         If the chunk borders the current high end of memory, +         consolidate into top +      */ + +      else { +        size += nextsize; +        set_head(p, size | PREV_INUSE); +        av->top = p; +        check_chunk(av, p); +      } + +      /* +        If freeing a large space, consolidate possibly-surrounding +        chunks. Then, if the total unused topmost memory exceeds trim +        threshold, ask malloc_trim to reduce top. + +        Unless max_fast is 0, we don't know if there are fastbins +        bordering top, so we cannot tell for sure whether threshold +        has been reached unless fastbins are consolidated.  But we +        don't want to consolidate on each free.  As a compromise, +        consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD +        is reached. +      */ + +      if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) { +        if (have_fastchunks(av)) +          malloc_consolidate(av); +      } +    } +  } +} + +/* +  ------------------------- malloc_consolidate ------------------------- + +  malloc_consolidate is a specialized version of free() that tears +  down chunks held in fastbins.  Free itself cannot be used for this +  purpose since, among other things, it might place chunks back onto +  fastbins.  So, instead, we need to use a minor variant of the same +  code. + +  Also, because this routine needs to be called the first time through +  malloc anyway, it turns out to be the perfect place to trigger +  initialization code. +*/ + +#if __STD_C +static void malloc_consolidate(mstate av) +#else +static void malloc_consolidate(av) mstate av; +#endif +{ +  mfastbinptr*    fb;                 /* current fastbin being consolidated */ +  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */ +  mchunkptr       p;                  /* current chunk being consolidated */ +  mchunkptr       nextp;              /* next chunk to consolidate */ +  mchunkptr       unsorted_bin;       /* bin header */ +  mchunkptr       first_unsorted;     /* chunk to link to */ + +  /* These have same use as in free() */ +  mchunkptr       nextchunk; +  INTERNAL_SIZE_T size; +  INTERNAL_SIZE_T nextsize; +  INTERNAL_SIZE_T prevsize; +  int             nextinuse; +  mchunkptr       bck; +  mchunkptr       fwd; + +  /* +    If max_fast is 0, we know that av hasn't +    yet been initialized, in which case do so below +  */ + +  if (av->max_fast != 0) { +    clear_fastchunks(av); + +    unsorted_bin = unsorted_chunks(av); + +    /* +      Remove each chunk from fast bin and consolidate it, placing it +      then in unsorted bin. Among other reasons for doing this, +      placing in unsorted bin avoids needing to calculate actual bins +      until malloc is sure that chunks aren't immediately going to be +      reused anyway. +    */ + +    maxfb = &(av->fastbins[fastbin_index(av->max_fast)]); +    fb = &(av->fastbins[0]); +    do { +      if ( (p = *fb) != 0) { +        *fb = 0; + +        do { +          check_inuse_chunk(av, p); +          nextp = p->fd; + +          /* Slightly streamlined version of consolidation code in free() */ +          size = p->size & ~(PREV_INUSE); +          nextchunk = chunk_at_offset(p, size); +          nextsize = chunksize(nextchunk); + +          if (!prev_inuse(p)) { +            prevsize = p->prev_size; +            size += prevsize; +            p = chunk_at_offset(p, -((long) prevsize)); +            unlink(p, bck, fwd); +          } + +          if (nextchunk != av->top) { +            nextinuse = inuse_bit_at_offset(nextchunk, nextsize); + +            if (!nextinuse) { +              size += nextsize; +              unlink(nextchunk, bck, fwd); +            } else +	      clear_inuse_bit_at_offset(nextchunk, 0); + +            first_unsorted = unsorted_bin->fd; +            unsorted_bin->fd = p; +            first_unsorted->bk = p; + +            set_head(p, size | PREV_INUSE); +            p->bk = unsorted_bin; +            p->fd = first_unsorted; +            set_foot(p, size); +          } + +          else { +            size += nextsize; +            set_head(p, size | PREV_INUSE); +            av->top = p; +          } + +        } while ( (p = nextp) != 0); + +      } +    } while (fb++ != maxfb); +  } +  else { +    malloc_init_state(av); +    check_malloc_state(av); +  } +} + +/* +  ------------------------------ realloc ------------------------------ +*/ + +static Void_t* +_int_realloc(mstate av, Void_t* oldmem, size_t bytes) +{ +  INTERNAL_SIZE_T  nb;              /* padded request size */ + +  mchunkptr        oldp;            /* chunk corresponding to oldmem */ +  INTERNAL_SIZE_T  oldsize;         /* its size */ + +  mchunkptr        newp;            /* chunk to return */ +  INTERNAL_SIZE_T  newsize;         /* its size */ +  Void_t*          newmem;          /* corresponding user mem */ + +  mchunkptr        next;            /* next contiguous chunk after oldp */ + +  mchunkptr        remainder;       /* extra space at end of newp */ +  unsigned long    remainder_size;  /* its size */ + +  mchunkptr        bck;             /* misc temp for linking */ +  mchunkptr        fwd;             /* misc temp for linking */ + +  unsigned long    copysize;        /* bytes to copy */ +  unsigned int     ncopies;         /* INTERNAL_SIZE_T words to copy */ +  INTERNAL_SIZE_T* s;               /* copy source */ +  INTERNAL_SIZE_T* d;               /* copy destination */ + + +#if REALLOC_ZERO_BYTES_FREES +  if (bytes == 0) { +    _int_free(av, oldmem); +    return 0; +  } +#endif + +  /* realloc of null is supposed to be same as malloc */ +  if (oldmem == 0) return _int_malloc(av, bytes); + +  checked_request2size(bytes, nb); + +  oldp    = mem2chunk(oldmem); +  oldsize = chunksize(oldp); + +  check_inuse_chunk(av, oldp); + +  // force to act like not mmapped +  if (1) { + +    if ((unsigned long)(oldsize) >= (unsigned long)(nb)) { +      /* already big enough; split below */ +      newp = oldp; +      newsize = oldsize; +    } + +    else { +      next = chunk_at_offset(oldp, oldsize); + +      /* Try to expand forward into top */ +      if (next == av->top && +          (unsigned long)(newsize = oldsize + chunksize(next)) >= +          (unsigned long)(nb + MINSIZE)) { +        set_head_size(oldp, nb ); +        av->top = chunk_at_offset(oldp, nb); +        set_head(av->top, (newsize - nb) | PREV_INUSE); +    	check_inuse_chunk(av, oldp); +        set_arena_for_chunk(oldp, av); +        return chunk2mem(oldp); +      } + +      /* Try to expand forward into next chunk;  split off remainder below */ +      else if (next != av->top && +               !inuse(next) && +               (unsigned long)(newsize = oldsize + chunksize(next)) >= +               (unsigned long)(nb)) { +        newp = oldp; +        unlink(next, bck, fwd); +      } + +      /* allocate, copy, free */ +      else { +        newmem = _int_malloc(av, nb - MALLOC_ALIGN_MASK); +        if (newmem == 0) +          return 0; /* propagate failure */ + +        newp = mem2chunk(newmem); +        newsize = chunksize(newp); + +        /* +          Avoid copy if newp is next chunk after oldp. +        */ +        if (newp == next) { +          newsize += oldsize; +          newp = oldp; +        } +        else { +          /* +            Unroll copy of <= 36 bytes (72 if 8byte sizes) +            We know that contents have an odd number of +            INTERNAL_SIZE_T-sized words; minimally 3. +          */ + +          copysize = oldsize - SIZE_SZ; +          s = (INTERNAL_SIZE_T*)(oldmem); +          d = (INTERNAL_SIZE_T*)(newmem); +          ncopies = copysize / sizeof(INTERNAL_SIZE_T); +          assert(ncopies >= 3); + +          if (ncopies > 9) +            MALLOC_COPY(d, s, copysize); + +          else { +            *(d+0) = *(s+0); +            *(d+1) = *(s+1); +            *(d+2) = *(s+2); +            if (ncopies > 4) { +              *(d+3) = *(s+3); +              *(d+4) = *(s+4); +              if (ncopies > 6) { +                *(d+5) = *(s+5); +                *(d+6) = *(s+6); +                if (ncopies > 8) { +                  *(d+7) = *(s+7); +                  *(d+8) = *(s+8); +                } +              } +            } +          } + +          _int_free(av, oldmem); +          set_arena_for_chunk(newp, av); +          check_inuse_chunk(av, newp); +          return chunk2mem(newp); +        } +      } +    } + +    /* If possible, free extra space in old or extended chunk */ + +    assert((unsigned long)(newsize) >= (unsigned long)(nb)); + +    remainder_size = newsize - nb; + +    if (remainder_size < MINSIZE) { /* not enough extra to split off */ +      set_head_size(newp, newsize); +      set_inuse_bit_at_offset(newp, newsize); +    } +    else { /* split remainder */ +      remainder = chunk_at_offset(newp, nb); +      set_head_size(newp, nb ); +      set_head(remainder, remainder_size | PREV_INUSE ); +      /* Mark remainder as inuse so free() won't complain */ +      set_inuse_bit_at_offset(remainder, remainder_size); +      set_arena_for_chunk(remainder, av); +      _int_free(av, chunk2mem(remainder)); +    } + +    set_arena_for_chunk(newp, av); +    check_inuse_chunk(av, newp); +    return chunk2mem(newp); +  } + +  /* +    Handle mmap cases +  */ + +  else { +    /* If !HAVE_MMAP, but chunk_is_mmapped, user must have overwritten mem */ +    check_malloc_state(av); +    MALLOC_FAILURE_ACTION; +    return 0; +  } +} + +/* +  ------------------------------ memalign ------------------------------ +*/ + +static Void_t* +_int_memalign(mstate av, size_t alignment, size_t bytes) +{ +  INTERNAL_SIZE_T nb;             /* padded  request size */ +  char*           m;              /* memory returned by malloc call */ +  mchunkptr       p;              /* corresponding chunk */ +  char*           brk;            /* alignment point within p */ +  mchunkptr       newp;           /* chunk to return */ +  INTERNAL_SIZE_T newsize;        /* its size */ +  INTERNAL_SIZE_T leadsize;       /* leading space before alignment point */ +  mchunkptr       remainder;      /* spare room at end to split off */ +  unsigned long   remainder_size; /* its size */ +  INTERNAL_SIZE_T size; + +  /* If need less alignment than we give anyway, just relay to malloc */ + +  if (alignment <= MALLOC_ALIGNMENT) return _int_malloc(av, bytes); + +  /* Otherwise, ensure that it is at least a minimum chunk size */ + +  if (alignment <  MINSIZE) alignment = MINSIZE; + +  /* Make sure alignment is power of 2 (in case MINSIZE is not).  */ +  if ((alignment & (alignment - 1)) != 0) { +    size_t a = MALLOC_ALIGNMENT * 2; +    while ((unsigned long)a < (unsigned long)alignment) a <<= 1; +    alignment = a; +  } + +  checked_request2size(bytes, nb); + +  /* +    Strategy: find a spot within that chunk that meets the alignment +    request, and then possibly free the leading and trailing space. +  */ + + +  /* Call malloc with worst case padding to hit alignment. */ + +  m  = (char*)(_int_malloc(av, nb + alignment + MINSIZE)); + +  if (m == 0) return 0; /* propagate failure */ + +  p = mem2chunk(m); + +  if ((((unsigned long)(m)) % alignment) != 0) { /* misaligned */ + +    /* +      Find an aligned spot inside chunk.  Since we need to give back +      leading space in a chunk of at least MINSIZE, if the first +      calculation places us at a spot with less than MINSIZE leader, +      we can move to the next aligned spot -- we've allocated enough +      total room so that this is always possible. +    */ + +    brk = (char*)mem2chunk(((unsigned long)(m + alignment - 1)) & +                           -((signed long) alignment)); +    if ((unsigned long)(brk - (char*)(p)) < MINSIZE) +      brk += alignment; + +    newp = (mchunkptr)brk; +    leadsize = brk - (char*)(p); +    newsize = chunksize(p) - leadsize; + +    /* For mmapped chunks, just adjust offset */ +    if (chunk_is_mmapped(p)) { +      newp->prev_size = p->prev_size + leadsize; +      set_head(newp, newsize|IS_MMAPPED); +      set_arena_for_chunk(newp, av); +      return chunk2mem(newp); +    } + +    /* Otherwise, give back leader, use the rest */ +    set_head(newp, newsize | PREV_INUSE ); +    set_inuse_bit_at_offset(newp, newsize); +    set_head_size(p, leadsize); +    set_arena_for_chunk(p, av); +    _int_free(av, chunk2mem(p)); +    p = newp; + +    assert (newsize >= nb && +            (((unsigned long)(chunk2mem(p))) % alignment) == 0); +  } + +  /* Also give back spare room at the end */ +  if (!chunk_is_mmapped(p)) { +    size = chunksize(p); +    if ((unsigned long)(size) > (unsigned long)(nb + MINSIZE)) { +      remainder_size = size - nb; +      remainder = chunk_at_offset(p, nb); +      set_head(remainder, remainder_size | PREV_INUSE ); +      set_head_size(p, nb); +      set_arena_for_chunk(remainder, av); +      _int_free(av, chunk2mem(remainder)); +    } +  } + +  set_arena_for_chunk(p, av); +  check_inuse_chunk(av, p); +  return chunk2mem(p); +} + +#if 1 +/* +  ------------------------------ calloc ------------------------------ +*/ + +#if __STD_C +Void_t* cALLOc(cvmx_arena_list_t arena_list, size_t n_elements, size_t elem_size) +#else +Void_t* cALLOc(n_elements, elem_size) size_t n_elements; size_t elem_size; +#endif +{ +  mchunkptr p; +  unsigned long clearsize; +  unsigned long nclears; +  INTERNAL_SIZE_T* d; + +  Void_t* mem = public_mALLOc(arena_list, n_elements * elem_size); + +  if (mem != 0) { +    p = mem2chunk(mem); + +    { +      /* +        Unroll clear of <= 36 bytes (72 if 8byte sizes) +        We know that contents have an odd number of +        INTERNAL_SIZE_T-sized words; minimally 3. +      */ + +      d = (INTERNAL_SIZE_T*)mem; +      clearsize = chunksize(p) - SIZE_SZ; +      nclears = clearsize / sizeof(INTERNAL_SIZE_T); +      assert(nclears >= 3); + +      if (nclears > 9) +        MALLOC_ZERO(d, clearsize); + +      else { +        *(d+0) = 0; +        *(d+1) = 0; +        *(d+2) = 0; +        if (nclears > 4) { +          *(d+3) = 0; +          *(d+4) = 0; +          if (nclears > 6) { +            *(d+5) = 0; +            *(d+6) = 0; +            if (nclears > 8) { +              *(d+7) = 0; +              *(d+8) = 0; +            } +          } +        } +      } +    } +  } +  return mem; +} +#endif + + +/* +  ------------------------- malloc_usable_size ------------------------- +*/ + +#if __STD_C +size_t mUSABLe(Void_t* mem) +#else +size_t mUSABLe(mem) Void_t* mem; +#endif +{ +  mchunkptr p; +  if (mem != 0) { +    p = mem2chunk(mem); +    if (chunk_is_mmapped(p)) +      return chunksize(p) - 3*SIZE_SZ; /* updated size for adding arena_ptr */ +    else if (inuse(p)) +      return chunksize(p) - 2*SIZE_SZ; /* updated size for adding arena_ptr */ +  } +  return 0; +} + +/* +  ------------------------------ mallinfo ------------------------------ +*/ + +struct mallinfo mALLINFo(mstate av) +{ +  struct mallinfo mi; +  int i; +  mbinptr b; +  mchunkptr p; +  INTERNAL_SIZE_T avail; +  INTERNAL_SIZE_T fastavail; +  int nblocks; +  int nfastblocks; + +  /* Ensure initialization */ +  if (av->top == 0)  malloc_consolidate(av); + +  check_malloc_state(av); + +  /* Account for top */ +  avail = chunksize(av->top); +  nblocks = 1;  /* top always exists */ + +  /* traverse fastbins */ +  nfastblocks = 0; +  fastavail = 0; + +  for (i = 0; i < NFASTBINS; ++i) { +    for (p = av->fastbins[i]; p != 0; p = p->fd) { +      ++nfastblocks; +      fastavail += chunksize(p); +    } +  } + +  avail += fastavail; + +  /* traverse regular bins */ +  for (i = 1; i < NBINS; ++i) { +    b = bin_at(av, i); +    for (p = last(b); p != b; p = p->bk) { +      ++nblocks; +      avail += chunksize(p); +    } +  } + +  mi.smblks = nfastblocks; +  mi.ordblks = nblocks; +  mi.fordblks = avail; +  mi.uordblks = av->system_mem - avail; +  mi.arena = av->system_mem; +  mi.fsmblks = fastavail; +  mi.keepcost = chunksize(av->top); +  return mi; +} + +/* +  ------------------------------ malloc_stats ------------------------------ +*/ + +void mSTATs() +{ +} + + +/* +  ------------------------------ mallopt ------------------------------ +*/ + +#if 0 +#if __STD_C +int mALLOPt(int param_number, int value) +#else +int mALLOPt(param_number, value) int param_number; int value; +#endif +{ +} +#endif + + +/* +  -------------------- Alternative MORECORE functions -------------------- +*/ + + +/* +  General Requirements for MORECORE. + +  The MORECORE function must have the following properties: + +  If MORECORE_CONTIGUOUS is false: + +    * MORECORE must allocate in multiples of pagesize. It will +      only be called with arguments that are multiples of pagesize. + +    * MORECORE(0) must return an address that is at least +      MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.) + +  else (i.e. If MORECORE_CONTIGUOUS is true): + +    * Consecutive calls to MORECORE with positive arguments +      return increasing addresses, indicating that space has been +      contiguously extended. + +    * MORECORE need not allocate in multiples of pagesize. +      Calls to MORECORE need not have args of multiples of pagesize. + +    * MORECORE need not page-align. + +  In either case: + +    * MORECORE may allocate more memory than requested. (Or even less, +      but this will generally result in a malloc failure.) + +    * MORECORE must not allocate memory when given argument zero, but +      instead return one past the end address of memory from previous +      nonzero call. This malloc does NOT call MORECORE(0) +      until at least one call with positive arguments is made, so +      the initial value returned is not important. + +    * Even though consecutive calls to MORECORE need not return contiguous +      addresses, it must be OK for malloc'ed chunks to span multiple +      regions in those cases where they do happen to be contiguous. + +    * MORECORE need not handle negative arguments -- it may instead +      just return MORECORE_FAILURE when given negative arguments. +      Negative arguments are always multiples of pagesize. MORECORE +      must not misinterpret negative args as large positive unsigned +      args. You can suppress all such calls from even occurring by defining +      MORECORE_CANNOT_TRIM, + +  There is some variation across systems about the type of the +  argument to sbrk/MORECORE. If size_t is unsigned, then it cannot +  actually be size_t, because sbrk supports negative args, so it is +  normally the signed type of the same width as size_t (sometimes +  declared as "intptr_t", and sometimes "ptrdiff_t").  It doesn't much +  matter though. Internally, we use "long" as arguments, which should +  work across all reasonable possibilities. + +  Additionally, if MORECORE ever returns failure for a positive +  request, and HAVE_MMAP is true, then mmap is used as a noncontiguous +  system allocator. This is a useful backup strategy for systems with +  holes in address spaces -- in this case sbrk cannot contiguously +  expand the heap, but mmap may be able to map noncontiguous space. + +  If you'd like mmap to ALWAYS be used, you can define MORECORE to be +  a function that always returns MORECORE_FAILURE. + +  If you are using this malloc with something other than sbrk (or its +  emulation) to supply memory regions, you probably want to set +  MORECORE_CONTIGUOUS as false.  As an example, here is a custom +  allocator kindly contributed for pre-OSX macOS.  It uses virtually +  but not necessarily physically contiguous non-paged memory (locked +  in, present and won't get swapped out).  You can use it by +  uncommenting this section, adding some #includes, and setting up the +  appropriate defines above: + +      #define MORECORE osMoreCore +      #define MORECORE_CONTIGUOUS 0 + +  There is also a shutdown routine that should somehow be called for +  cleanup upon program exit. + +  #define MAX_POOL_ENTRIES 100 +  #define MINIMUM_MORECORE_SIZE  (64 * 1024) +  static int next_os_pool; +  void *our_os_pools[MAX_POOL_ENTRIES]; + +  void *osMoreCore(int size) +  { +    void *ptr = 0; +    static void *sbrk_top = 0; + +    if (size > 0) +    { +      if (size < MINIMUM_MORECORE_SIZE) +         size = MINIMUM_MORECORE_SIZE; +      if (CurrentExecutionLevel() == kTaskLevel) +         ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0); +      if (ptr == 0) +      { +        return (void *) MORECORE_FAILURE; +      } +      // save ptrs so they can be freed during cleanup +      our_os_pools[next_os_pool] = ptr; +      next_os_pool++; +      ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK); +      sbrk_top = (char *) ptr + size; +      return ptr; +    } +    else if (size < 0) +    { +      // we don't currently support shrink behavior +      return (void *) MORECORE_FAILURE; +    } +    else +    { +      return sbrk_top; +    } +  } + +  // cleanup any allocated memory pools +  // called as last thing before shutting down driver + +  void osCleanupMem(void) +  { +    void **ptr; + +    for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++) +      if (*ptr) +      { +         PoolDeallocate(*ptr); +         *ptr = 0; +      } +  } + +*/ + + + +/* ------------------------------------------------------------ +History: + +[see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc] + +*/ | 
