summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile9
-rw-r--r--compat/nedmalloc/License.txt23
-rw-r--r--compat/nedmalloc/Readme.txt136
-rw-r--r--compat/nedmalloc/malloc.c.h5750
-rw-r--r--compat/nedmalloc/nedmalloc.c966
-rw-r--r--compat/nedmalloc/nedmalloc.h180
6 files changed, 7064 insertions, 0 deletions
diff --git a/Makefile b/Makefile
index 107015a598..c718ec7af2 100644
--- a/Makefile
+++ b/Makefile
@@ -178,6 +178,9 @@ all::
#
# Define NO_CROSS_DIRECTORY_HARDLINKS if you plan to distribute the installed
# programs as a tar, where bin/ and libexec/ might be on different file systems.
+#
+# Define USE_NED_ALLOCATOR if you want to replace the platforms default
+# memory allocators with the nedmalloc allocator written by Niall Douglas.
GIT-VERSION-FILE: .FORCE-GIT-VERSION-FILE
@$(SHELL_PATH) ./GIT-VERSION-GEN
@@ -844,6 +847,7 @@ ifneq (,$(findstring MINGW,$(uname_S)))
NO_ST_BLOCKS_IN_STRUCT_STAT = YesPlease
NO_NSEC = YesPlease
USE_WIN32_MMAP = YesPlease
+ USE_NED_ALLOCATOR = YesPlease
UNRELIABLE_FSTAT = UnfortunatelyYes
OBJECT_CREATION_USES_RENAMES = UnfortunatelyNeedsTo
COMPAT_CFLAGS += -D__USE_MINGW_ACCESS -DNOGDI -Icompat -Icompat/regex -Icompat/fnmatch
@@ -1130,6 +1134,11 @@ ifdef UNRELIABLE_FSTAT
BASIC_CFLAGS += -DUNRELIABLE_FSTAT
endif
+ifdef USE_NED_ALLOCATOR
+ COMPAT_CFLAGS += -DUSE_NED_ALLOCATOR -DOVERRIDE_STRDUP -DNDEBUG -DREPLACE_SYSTEM_ALLOCATOR -Icompat/nedmalloc
+ COMPAT_OBJS += compat/nedmalloc/nedmalloc.o
+endif
+
ifeq ($(TCLTK_PATH),)
NO_TCLTK=NoThanks
endif
diff --git a/compat/nedmalloc/License.txt b/compat/nedmalloc/License.txt
new file mode 100644
index 0000000000..36b7cd93cd
--- /dev/null
+++ b/compat/nedmalloc/License.txt
@@ -0,0 +1,23 @@
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/compat/nedmalloc/Readme.txt b/compat/nedmalloc/Readme.txt
new file mode 100644
index 0000000000..876365646e
--- /dev/null
+++ b/compat/nedmalloc/Readme.txt
@@ -0,0 +1,136 @@
+nedalloc v1.05 15th June 2008:
+-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+
+by Niall Douglas (http://www.nedprod.com/programs/portable/nedmalloc/)
+
+Enclosed is nedalloc, an alternative malloc implementation for multiple
+threads without lock contention based on dlmalloc v2.8.4. It is more
+or less a newer implementation of ptmalloc2, the standard allocator in
+Linux (which is based on dlmalloc v2.7.0) but also contains a per-thread
+cache for maximum CPU scalability.
+
+It is licensed under the Boost Software License which basically means
+you can do anything you like with it. This does not apply to the malloc.c.h
+file which remains copyright to others.
+
+It has been tested on win32 (x86), win64 (x64), Linux (x64), FreeBSD (x64)
+and Apple MacOS X (x86). It works very well on all of these and is very
+significantly faster than the system allocator on all of these platforms.
+
+By literally dropping in this allocator as a replacement for your system
+allocator, you can see real world improvements of up to three times in normal
+code!
+
+To use:
+-=-=-=-
+Drop in nedmalloc.h, nedmalloc.c and malloc.c.h into your project.
+Configure using the instructions in nedmalloc.h. Run and enjoy.
+
+To test, compile test.c. It will run a comparison between your system
+allocator and nedalloc and tell you how much faster nedalloc is. It also
+serves as an example of usage.
+
+Notes:
+-=-=-=
+If you want the very latest version of this allocator, get it from the
+TnFOX SVN repository at svn://svn.berlios.de/viewcvs/tnfox/trunk/src/nedmalloc
+
+Because of how nedalloc allocates an mspace per thread, it can cause
+severe bloating of memory usage under certain allocation patterns.
+You can substantially reduce this wastage by setting MAXTHREADSINPOOL
+or the threads parameter to nedcreatepool() to a fraction of the number of
+threads which would normally be in a pool at once. This will reduce
+bloating at the cost of an increase in lock contention. If allocated size
+is less than THREADCACHEMAX, locking is avoided 90-99% of the time and
+if most of your allocations are below this value, you can safely set
+MAXTHREADSINPOOL to one.
+
+You will suffer memory leakage unless you call neddisablethreadcache()
+per pool for every thread which exits. This is because nedalloc cannot
+portably know when a thread exits and thus when its thread cache can
+be returned for use by other code. Don't forget pool zero, the system pool.
+
+For C++ type allocation patterns (where the same sizes of memory are
+regularly allocated and deallocated as objects are created and destroyed),
+the threadcache always benefits performance. If however your allocation
+patterns are different, searching the threadcache may significantly slow
+down your code - as a rule of thumb, if cache utilisation is below 80%
+(see the source for neddisablethreadcache() for how to enable debug
+printing in release mode) then you should disable the thread cache for
+that thread. You can compile out the threadcache code by setting
+THREADCACHEMAX to zero.
+
+Speed comparisons:
+-=-=-=-=-=-=-=-=-=
+See Benchmarks.xls for details.
+
+The enclosed test.c can do two things: it can be a torture test or a speed
+test. The speed test is designed to be a representative synthetic
+memory allocator test. It works by randomly mixing allocations with frees
+with half of the allocation sizes being a two power multiple less than
+512 bytes (to mimic C++ stack instantiated objects) and the other half
+being a simple random value less than 16Kb.
+
+The real world code results are from Tn's TestIO benchmark. This is a
+heavily multithreaded and memory intensive benchmark with a lot of branching
+and other stuff modern processors don't like so much. As you'll note, the
+test doesn't show the benefits of the threadcache mostly due to the saturation
+of the memory bus being the limiting factor.
+
+ChangeLog:
+-=-=-=-=-=
+v1.05 15th June 2008:
+ * { 1042 } Added error check for TLSSET() and TLSFREE() macros. Thanks to
+Markus Elfring for reporting this.
+ * { 1043 } Fixed a segfault when freeing memory allocated using
+nedindependent_comalloc(). Thanks to Pavel Vozenilek for reporting this.
+
+v1.04 14th July 2007:
+ * Fixed a bug with the new optimised implementation that failed to lock
+on a realloc under certain conditions.
+ * Fixed lack of thread synchronisation in InitPool() causing pool corruption
+ * Fixed a memory leak of thread cache contents on disabling. Thanks to Earl
+Chew for reporting this.
+ * Added a sanity check for freed blocks being valid.
+ * Reworked test.c into being a torture test.
+ * Fixed GCC assembler optimisation misspecification
+
+v1.04alpha_svn915 7th October 2006:
+ * Fixed failure to unlock thread cache list if allocating a new list failed.
+Thanks to Dmitry Chichkov for reporting this. Futher thanks to Aleksey Sanin.
+ * Fixed realloc(0, <size>) segfaulting. Thanks to Dmitry Chichkov for
+reporting this.
+ * Made config defines #ifndef so they can be overriden by the build system.
+Thanks to Aleksey Sanin for suggesting this.
+ * Fixed deadlock in nedprealloc() due to unnecessary locking of preferred
+thread mspace when mspace_realloc() always uses the original block's mspace
+anyway. Thanks to Aleksey Sanin for reporting this.
+ * Made some speed improvements by hacking mspace_malloc() to no longer lock
+its mspace, thus allowing the recursive mutex implementation to be removed
+with an associated speed increase. Thanks to Aleksey Sanin for suggesting this.
+ * Fixed a bug where allocating mspaces overran its max limit. Thanks to
+Aleksey Sanin for reporting this.
+
+v1.03 10th July 2006:
+ * Fixed memory corruption bug in threadcache code which only appeared with >4
+threads and in heavy use of the threadcache.
+
+v1.02 15th May 2006:
+ * Integrated dlmalloc v2.8.4, fixing the win32 memory release problem and
+improving performance still further. Speed is now up to twice the speed of v1.01
+(average is 67% faster).
+ * Fixed win32 critical section implementation. Thanks to Pavel Kuznetsov
+for reporting this.
+ * Wasn't locking mspace if all mspaces were locked. Thanks to Pavel Kuznetsov
+for reporting this.
+ * Added Apple Mac OS X support.
+
+v1.01 24th February 2006:
+ * Fixed multiprocessor scaling problems by removing sources of cache sloshing
+ * Earl Chew <earl_chew <at> agilent <dot> com> sent patches for the following:
+ 1. size2binidx() wasn't working for default code path (non x86)
+ 2. Fixed failure to release mspace lock under certain circumstances which
+ caused a deadlock
+
+v1.00 1st January 2006:
+ * First release
diff --git a/compat/nedmalloc/malloc.c.h b/compat/nedmalloc/malloc.c.h
new file mode 100644
index 0000000000..bb0f482d9f
--- /dev/null
+++ b/compat/nedmalloc/malloc.c.h
@@ -0,0 +1,5750 @@
+/*
+ This is a version (aka dlmalloc) of malloc/free/realloc written by
+ Doug Lea and released to the public domain, as explained at
+ http://creativecommons.org/licenses/publicdomain. Send questions,
+ comments, complaints, performance data, etc to dl@cs.oswego.edu
+
+* Version pre-2.8.4 Mon Nov 27 11:22:37 2006 (dl at gee)
+
+ Note: There may be an updated version of this malloc obtainable at
+ ftp://gee.cs.oswego.edu/pub/misc/malloc.c
+ Check before installing!
+
+* Quickstart
+
+ This library is all in one file to simplify the most common usage:
+ ftp it, compile it (-O3), and link it into another program. All of
+ the compile-time options default to reasonable values for use on
+ most platforms. You might later want to step through various
+ compile-time and dynamic tuning options.
+
+ For convenience, an include file for code using this malloc is at:
+ ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.4.h
+ You don't really need this .h file unless you call functions not
+ defined in your system include files. The .h file contains only the
+ excerpts from this file needed for using this malloc on ANSI C/C++
+ systems, so long as you haven't changed compile-time options about
+ naming and tuning parameters. If you do, then you can create your
+ own malloc.h that does include all settings by cutting at the point
+ indicated below. Note that you may already by default be using a C
+ library containing a malloc that is based on some version of this
+ malloc (for example in linux). You might still want to use the one
+ in this file to customize settings or to avoid overheads associated
+ with library versions.
+
+* Vital statistics:
+
+ Supported pointer/size_t representation: 4 or 8 bytes
+ size_t MUST be an unsigned type of the same width as
+ pointers. (If you are using an ancient system that declares
+ size_t as a signed type, or need it to be a different width
+ than pointers, you can use a previous release of this malloc
+ (e.g. 2.7.2) supporting these.)
+
+ Alignment: 8 bytes (default)
+ This suffices for nearly all current machines and C compilers.
+ However, you can define MALLOC_ALIGNMENT to be wider than this
+ if necessary (up to 128bytes), at the expense of using more space.
+
+ Minimum overhead per allocated chunk: 4 or 8 bytes (if 4byte sizes)
+ 8 or 16 bytes (if 8byte sizes)
+ Each malloced chunk has a hidden word of overhead holding size
+ and status information, and additional cross-check word
+ if FOOTERS is defined.
+
+ Minimum allocated size: 4-byte ptrs: 16 bytes (including overhead)
+ 8-byte ptrs: 32 bytes (including overhead)
+
+ Even a request for zero bytes (i.e., malloc(0)) returns a
+ pointer to something of the minimum allocatable size.
+ The maximum overhead wastage (i.e., number of extra bytes
+ allocated than were requested in malloc) is less than or equal
+ to the minimum size, except for requests >= mmap_threshold that
+ are serviced via mmap(), where the worst case wastage is about
+ 32 bytes plus the remainder from a system page (the minimal
+ mmap unit); typically 4096 or 8192 bytes.
+
+ Security: static-safe; optionally more or less
+ The "security" of malloc refers to the ability of malicious
+ code to accentuate the effects of errors (for example, freeing
+ space that is not currently malloc'ed or overwriting past the
+ ends of chunks) in code that calls malloc. This malloc
+ guarantees not to modify any memory locations below the base of
+ heap, i.e., static variables, even in the presence of usage
+ errors. The routines additionally detect most improper frees
+ and reallocs. All this holds as long as the static bookkeeping
+ for malloc itself is not corrupted by some other means. This
+ is only one aspect of security -- these checks do not, and
+ cannot, detect all possible programming errors.
+
+ If FOOTERS is defined nonzero, then each allocated chunk
+ carries an additional check word to verify that it was malloced
+ from its space. These check words are the same within each
+ execution of a program using malloc, but differ across
+ executions, so externally crafted fake chunks cannot be
+ freed. This improves security by rejecting frees/reallocs that
+ could corrupt heap memory, in addition to the checks preventing
+ writes to statics that are always on. This may further improve
+ security at the expense of time and space overhead. (Note that
+ FOOTERS may also be worth using with MSPACES.)
+
+ By default detected errors cause the program to abort (calling
+ "abort()"). You can override this to instead proceed past
+ errors by defining PROCEED_ON_ERROR. In this case, a bad free
+ has no effect, and a malloc that encounters a bad address
+ caused by user overwrites will ignore the bad address by
+ dropping pointers and indices to all known memory. This may
+ be appropriate for programs that should continue if at all
+ possible in the face of programming errors, although they may
+ run out of memory because dropped memory is never reclaimed.
+
+ If you don't like either of these options, you can define
+ CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything
+ else. And if if you are sure that your program using malloc has
+ no errors or vulnerabilities, you can define INSECURE to 1,
+ which might (or might not) provide a small performance improvement.
+
+ Thread-safety: NOT thread-safe unless USE_LOCKS defined
+ When USE_LOCKS is defined, each public call to malloc, free,
+ etc is surrounded with either a pthread mutex or a win32
+ spinlock (depending on WIN32). This is not especially fast, and
+ can be a major bottleneck. It is designed only to provide
+ minimal protection in concurrent environments, and to provide a
+ basis for extensions. If you are using malloc in a concurrent
+ program, consider instead using nedmalloc
+ (http://www.nedprod.com/programs/portable/nedmalloc/) or
+ ptmalloc (See http://www.malloc.de), which are derived
+ from versions of this malloc.
+
+ System requirements: Any combination of MORECORE and/or MMAP/MUNMAP
+ This malloc can use unix sbrk or any emulation (invoked using
+ the CALL_MORECORE macro) and/or mmap/munmap or any emulation
+ (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system
+ memory. On most unix systems, it tends to work best if both
+ MORECORE and MMAP are enabled. On Win32, it uses emulations
+ based on VirtualAlloc. It also uses common C library functions
+ like memset.
+
+ Compliance: I believe it is compliant with the Single Unix Specification
+ (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably
+ others as well.
+
+* Overview of algorithms
+
+ This is not the fastest, most space-conserving, most portable, or
+ most tunable malloc ever written. However it is among the fastest
+ while also being among the most space-conserving, portable and
+ tunable. Consistent balance across these factors results in a good
+ general-purpose allocator for malloc-intensive programs.
+
+ In most ways, this malloc is a best-fit allocator. Generally, it
+ chooses the best-fitting existing chunk for a request, with ties
+ broken in approximately least-recently-used order. (This strategy
+ normally maintains low fragmentation.) However, for requests less
+ than 256bytes, it deviates from best-fit when there is not an
+ exactly fitting available chunk by preferring to use space adjacent
+ to that used for the previous small request, as well as by breaking
+ ties in approximately most-recently-used order. (These enhance
+ locality of series of small allocations.) And for very large requests
+ (>= 256Kb by default), it relies on system memory mapping
+ facilities, if supported. (This helps avoid carrying around and
+ possibly fragmenting memory used only for large chunks.)
+
+ All operations (except malloc_stats and mallinfo) have execution
+ times that are bounded by a constant factor of the number of bits in
+ a size_t, not counting any clearing in calloc or copying in realloc,
+ or actions surrounding MORECORE and MMAP that have times
+ proportional to the number of non-contiguous regions returned by
+ system allocation routines, which is often just 1. In real-time
+ applications, you can optionally suppress segment traversals using
+ NO_SEGMENT_TRAVERSAL, which assures bounded execution even when
+ system allocators return non-contiguous spaces, at the typical
+ expense of carrying around more memory and increased fragmentation.
+
+ The implementation is not very modular and seriously overuses
+ macros. Perhaps someday all C compilers will do as good a job
+ inlining modular code as can now be done by brute-force expansion,
+ but now, enough of them seem not to.
+
+ Some compilers issue a lot of warnings about code that is
+ dead/unreachable only on some platforms, and also about intentional
+ uses of negation on unsigned types. All known cases of each can be
+ ignored.
+
+ For a longer but out of date high-level description, see
+ http://gee.cs.oswego.edu/dl/html/malloc.html
+
+* MSPACES
+ If MSPACES is defined, then in addition to malloc, free, etc.,
+ this file also defines mspace_malloc, mspace_free, etc. These
+ are versions of malloc routines that take an "mspace" argument
+ obtained using create_mspace, to control all internal bookkeeping.
+ If ONLY_MSPACES is defined, only these versions are compiled.
+ So if you would like to use this allocator for only some allocations,
+ and your system malloc for others, you can compile with
+ ONLY_MSPACES and then do something like...
+ static mspace mymspace = create_mspace(0,0); // for example
+ #define mymalloc(bytes) mspace_malloc(mymspace, bytes)
+
+ (Note: If you only need one instance of an mspace, you can instead
+ use "USE_DL_PREFIX" to relabel the global malloc.)
+
+ You can similarly create thread-local allocators by storing
+ mspaces as thread-locals. For example:
+ static __thread mspace tlms = 0;
+ void* tlmalloc(size_t bytes) {
+ if (tlms == 0) tlms = create_mspace(0, 0);
+ return mspace_malloc(tlms, bytes);
+ }
+ void tlfree(void* mem) { mspace_free(tlms, mem); }
+
+ Unless FOOTERS is defined, each mspace is completely independent.
+ You cannot allocate from one and free to another (although
+ conformance is only weakly checked, so usage errors are not always
+ caught). If FOOTERS is defined, then each chunk carries around a tag
+ indicating its originating mspace, and frees are directed to their
+ originating spaces.
+
+ ------------------------- Compile-time options ---------------------------
+
+Be careful in setting #define values for numerical constants of type
+size_t. On some systems, literal values are not automatically extended
+to size_t precision unless they are explicitly casted. You can also
+use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below.
+
+WIN32 default: defined if _WIN32 defined
+ Defining WIN32 sets up defaults for MS environment and compilers.
+ Otherwise defaults are for unix. Beware that there seem to be some
+ cases where this malloc might not be a pure drop-in replacement for
+ Win32 malloc: Random-looking failures from Win32 GDI API's (eg;
+ SetDIBits()) may be due to bugs in some video driver implementations
+ when pixel buffers are malloc()ed, and the region spans more than
+ one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb)
+ default granularity, pixel buffers may straddle virtual allocation
+ regions more often than when using the Microsoft allocator. You can
+ avoid this by using VirtualAlloc() and VirtualFree() for all pixel
+ buffers rather than using malloc(). If this is not possible,
+ recompile this malloc with a larger DEFAULT_GRANULARITY.
+
+MALLOC_ALIGNMENT default: (size_t)8
+ Controls the minimum alignment for malloc'ed chunks. It must be a
+ power of two and at least 8, even on machines for which smaller
+ alignments would suffice. It may be defined as larger than this
+ though. Note however that code and data structures are optimized for
+ the case of 8-byte alignment.
+
+MSPACES default: 0 (false)
+ If true, compile in support for independent allocation spaces.
+ This is only supported if HAVE_MMAP is true.
+
+ONLY_MSPACES default: 0 (false)
+ If true, only compile in mspace versions, not regular versions.
+
+USE_LOCKS default: 0 (false)
+ Causes each call to each public routine to be surrounded with
+ pthread or WIN32 mutex lock/unlock. (If set true, this can be
+ overridden on a per-mspace basis for mspace versions.) If set to a
+ non-zero value other than 1, locks are used, but their
+ implementation is left out, so lock functions must be supplied manually.
+
+USE_SPIN_LOCKS default: 1 iff USE_LOCKS and on x86 using gcc or MSC
+ If true, uses custom spin locks for locking. This is currently
+ supported only for x86 platforms using gcc or recent MS compilers.
+ Otherwise, posix locks or win32 critical sections are used.
+
+FOOTERS default: 0
+ If true, provide extra checking and dispatching by placing
+ information in the footers of allocated chunks. This adds
+ space and time overhead.
+
+INSECURE default: 0
+ If true, omit checks for usage errors and heap space overwrites.
+
+USE_DL_PREFIX default: NOT defined
+ Causes compiler to prefix all public routines with the string 'dl'.
+ This can be useful when you only want to use this malloc in one part
+ of a program, using your regular system malloc elsewhere.
+
+ABORT default: defined as abort()
+ Defines how to abort on failed checks. On most systems, a failed
+ check cannot die with an "assert" or even print an informative
+ message, because the underlying print routines in turn call malloc,
+ which will fail again. Generally, the best policy is to simply call
+ abort(). It's not very useful to do more than this because many
+ errors due to overwriting will show up as address faults (null, odd
+ addresses etc) rather than malloc-triggered checks, so will also
+ abort. Also, most compilers know that abort() does not return, so
+ can better optimize code conditionally calling it.
+
+PROCEED_ON_ERROR default: defined as 0 (false)
+ Controls whether detected bad addresses cause them to bypassed
+ rather than aborting. If set, detected bad arguments to free and
+ realloc are ignored. And all bookkeeping information is zeroed out
+ upon a detected overwrite of freed heap space, thus losing the
+ ability to ever return it from malloc again, but enabling the
+ application to proceed. If PROCEED_ON_ERROR is defined, the
+ static variable malloc_corruption_error_count is compiled in
+ and can be examined to see if errors have occurred. This option
+ generates slower code than the default abort policy.
+
+DEBUG default: NOT defined
+ The DEBUG setting is mainly intended for people trying to modify
+ this code or diagnose problems when porting to new platforms.
+ However, it may also be able to better isolate user errors than just
+ using runtime checks. The assertions in the check routines spell
+ out in more detail the assumptions and invariants underlying the
+ algorithms. The checking is fairly extensive, and will slow down
+ execution noticeably. Calling malloc_stats or mallinfo with DEBUG
+ set will attempt to check every non-mmapped allocated and free chunk
+ in the course of computing the summaries.
+
+ABORT_ON_ASSERT_FAILURE default: defined as 1 (true)
+ Debugging assertion failures can be nearly impossible if your
+ version of the assert macro causes malloc to be called, which will
+ lead to a cascade of further failures, blowing the runtime stack.
+ ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(),
+ which will usually make debugging easier.
+
+MALLOC_FAILURE_ACTION default: sets errno to ENOMEM, or no-op on win32
+ The action to take before "return 0" when malloc fails to be able to
+ return memory because there is none available.
+
+HAVE_MORECORE default: 1 (true) unless win32 or ONLY_MSPACES
+ True if this system supports sbrk or an emulation of it.
+
+MORECORE default: sbrk
+ The name of the sbrk-style system routine to call to obtain more
+ memory. See below for guidance on writing custom MORECORE
+ functions. The type of the argument to sbrk/MORECORE varies across
+ systems. It cannot be size_t, because it supports negative
+ arguments, so it is normally the signed type of the same width as
+ size_t (sometimes declared as "intptr_t"). It doesn't much matter
+ though. Internally, we only call it with arguments less than half
+ the max value of a size_t, which should work across all reasonable
+ possibilities, although sometimes generating compiler warnings.
+
+MORECORE_CONTIGUOUS default: 1 (true) if HAVE_MORECORE
+ If true, take advantage of fact that consecutive calls to MORECORE
+ with positive arguments always return contiguous increasing
+ addresses. This is true of unix sbrk. It does not hurt too much to
+ set it true anyway, since malloc copes with non-contiguities.
+ Setting it false when definitely non-contiguous saves time
+ and possibly wasted space it would take to discover this though.
+
+MORECORE_CANNOT_TRIM default: NOT defined
+ True if MORECORE cannot release space back to the system when given
+ negative arguments. This is generally necessary only if you are
+ using a hand-crafted MORECORE function that cannot handle negative
+ arguments.
+
+NO_SEGMENT_TRAVERSAL default: 0
+ If non-zero, suppresses traversals of memory segments
+ returned by either MORECORE or CALL_MMAP. This disables
+ merging of segments that are contiguous, and selectively
+ releasing them to the OS if unused, but bounds execution times.
+
+HAVE_MMAP default: 1 (true)
+ True if this system supports mmap or an emulation of it. If so, and
+ HAVE_MORECORE is not true, MMAP is used for all system
+ allocation. If set and HAVE_MORECORE is true as well, MMAP is
+ primarily used to directly allocate very large blocks. It is also
+ used as a backup strategy in cases where MORECORE fails to provide
+ space from system. Note: A single call to MUNMAP is assumed to be
+ able to unmap memory that may have be allocated using multiple calls
+ to MMAP, so long as they are adjacent.
+
+HAVE_MREMAP default: 1 on linux, else 0
+ If true realloc() uses mremap() to re-allocate large blocks and
+ extend or shrink allocation spaces.
+
+MMAP_CLEARS default: 1 except on WINCE.
+ True if mmap clears memory so calloc doesn't need to. This is true
+ for standard unix mmap using /dev/zero and on WIN32 except for WINCE.
+
+USE_BUILTIN_FFS default: 0 (i.e., not used)
+ Causes malloc to use the builtin ffs() function to compute indices.
+ Some compilers may recognize and intrinsify ffs to be faster than the
+ supplied C version. Also, the case of x86 using gcc is special-cased
+ to an asm instruction, so is already as fast as it can be, and so
+ this setting has no effect. Similarly for Win32 under recent MS compilers.
+ (On most x86s, the asm version is only slightly faster than the C version.)
+
+malloc_getpagesize default: derive from system includes, or 4096.
+ The system page size. To the extent possible, this malloc manages
+ memory from the system in page-size units. This may be (and
+ usually is) a function rather than a constant. This is ignored
+ if WIN32, where page size is determined using getSystemInfo during
+ initialization.
+
+USE_DEV_RANDOM default: 0 (i.e., not used)
+ Causes malloc to use /dev/random to initialize secure magic seed for
+ stamping footers. Otherwise, the current time is used.
+
+NO_MALLINFO default: 0
+ If defined, don't compile "mallinfo". This can be a simple way
+ of dealing with mismatches between system declarations and
+ those in this file.
+
+MALLINFO_FIELD_TYPE default: size_t
+ The type of the fields in the mallinfo struct. This was originally
+ defined as "int" in SVID etc, but is more usefully defined as
+ size_t. The value is used only if HAVE_USR_INCLUDE_MALLOC_H is not set
+
+REALLOC_ZERO_BYTES_FREES default: not defined
+ This should be set if a call to realloc with zero bytes should
+ be the same as a call to free. Some people think it should. Otherwise,
+ since this malloc returns a unique pointer for malloc(0), so does
+ realloc(p, 0).
+
+LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H
+LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H, LACKS_ERRNO_H
+LACKS_STDLIB_H default: NOT defined unless on WIN32
+ Define these if your system does not have these header files.
+ You might need to manually insert some of the declarations they provide.
+
+DEFAULT_GRANULARITY default: page size if MORECORE_CONTIGUOUS,
+ system_info.dwAllocationGranularity in WIN32,
+ otherwise 64K.
+ Also settable using mallopt(M_GRANULARITY, x)
+ The unit for allocating and deallocating memory from the system. On
+ most systems with contiguous MORECORE, there is no reason to
+ make this more than a page. However, systems with MMAP tend to
+ either require or encourage larger granularities. You can increase
+ this value to prevent system allocation functions to be called so
+ often, especially if they are slow. The value must be at least one
+ page and must be a power of two. Setting to 0 causes initialization
+ to either page size or win32 region size. (Note: In previous
+ versions of malloc, the equivalent of this option was called
+ "TOP_PAD")
+
+DEFAULT_TRIM_THRESHOLD default: 2MB
+ Also settable using mallopt(M_TRIM_THRESHOLD, x)
+ The maximum amount of unused top-most memory to keep before
+ releasing via malloc_trim in free(). Automatic trimming is mainly
+ useful in long-lived programs using contiguous MORECORE. Because
+ trimming via sbrk can be slow on some systems, and can sometimes be
+ wasteful (in cases where programs immediately afterward allocate
+ more large chunks) the value should be high enough so that your
+ overall system performance would improve by releasing this much
+ memory. As a rough guide, you might set to a value close to the
+ average size of a process (program) running on your system.
+ Releasing this much memory would allow such a process to run in
+ memory. Generally, it is worth tuning trim thresholds when a
+ program undergoes phases where several large chunks are allocated
+ and released in ways that can reuse each other's storage, perhaps
+ mixed with phases where there are no such chunks at all. The trim
+ value must be greater than page size to have any useful effect. To
+ disable trimming completely, you can set to MAX_SIZE_T. Note that the trick
+ some people use of mallocing a huge space and then freeing it at
+ program startup, in an attempt to reserve system memory, doesn't
+ have the intended effect under automatic trimming, since that memory
+ will immediately be returned to the system.
+
+DEFAULT_MMAP_THRESHOLD default: 256K
+ Also settable using mallopt(M_MMAP_THRESHOLD, x)
+ The request size threshold for using MMAP to directly service a
+ request. Requests of at least this size that cannot be allocated
+ using already-existing space will be serviced via mmap. (If enough
+ normal freed space already exists it is used instead.) Using mmap
+ segregates relatively large chunks of memory so that they can be
+ individually obtained and released from the host system. A request
+ serviced through mmap is never reused by any other request (at least
+ not directly; the system may just so happen to remap successive
+ requests to the same locations). Segregating space in this way has
+ the benefits that: Mmapped space can always be individually released
+ back to the system, which helps keep the system level memory demands
+ of a long-lived program low. Also, mapped memory doesn't become
+ `locked' between other chunks, as can happen with normally allocated
+ chunks, which means that even trimming via malloc_trim would not
+ release them. However, it has the disadvantage that the space
+ cannot be reclaimed, consolidated, and then used to service later
+ requests, as happens with normal chunks. The advantages of mmap
+ nearly always outweigh disadvantages for "large" chunks, but the
+ value of "large" may vary across systems. The default is an
+ empirically derived value that works well in most systems. You can
+ disable mmap by setting to MAX_SIZE_T.
+
+MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP
+ The number of consolidated frees between checks to release
+ unused segments when freeing. When using non-contiguous segments,
+ especially with multiple mspaces, checking only for topmost space
+ doesn't always suffice to trigger trimming. To compensate for this,
+ free() will, with a period of MAX_RELEASE_CHECK_RATE (or the
+ current number of segments, if greater) try to release unused
+ segments to the OS when freeing chunks that result in
+ consolidation. The best value for this parameter is a compromise
+ between slowing down frees with relatively costly checks that
+ rarely trigger versus holding on to unused memory. To effectively
+ disable, set to MAX_SIZE_T. This may lead to a very slight speed
+ improvement at the expense of carrying around more memory.
+*/
+
+/* Version identifier to allow people to support multiple versions */
+#ifndef DLMALLOC_VERSION
+#define DLMALLOC_VERSION 20804
+#endif /* DLMALLOC_VERSION */
+
+#ifndef WIN32
+#ifdef _WIN32
+#define WIN32 1
+#endif /* _WIN32 */
+#ifdef _WIN32_WCE
+#define LACKS_FCNTL_H
+#define WIN32 1
+#endif /* _WIN32_WCE */
+#endif /* WIN32 */
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#define _WIN32_WINNT 0x403
+#include <windows.h>
+#define HAVE_MMAP 1
+#define HAVE_MORECORE 0
+#define LACKS_UNISTD_H
+#define LACKS_SYS_PARAM_H
+#define LACKS_SYS_MMAN_H
+#define LACKS_STRING_H
+#define LACKS_STRINGS_H
+#define LACKS_SYS_TYPES_H
+#define LACKS_ERRNO_H
+#ifndef MALLOC_FAILURE_ACTION
+#define MALLOC_FAILURE_ACTION
+#endif /* MALLOC_FAILURE_ACTION */
+#ifdef _WIN32_WCE /* WINCE reportedly does not clear */
+#define MMAP_CLEARS 0
+#else
+#define MMAP_CLEARS 1
+#endif /* _WIN32_WCE */
+#endif /* WIN32 */
+
+#if defined(DARWIN) || defined(_DARWIN)
+/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */
+#ifndef HAVE_MORECORE
+#define HAVE_MORECORE 0
+#define HAVE_MMAP 1
+/* OSX allocators provide 16 byte alignment */
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT ((size_t)16U)
+#endif
+#endif /* HAVE_MORECORE */
+#endif /* DARWIN */
+
+#ifndef LACKS_SYS_TYPES_H
+#include <sys/types.h> /* For size_t */
+#endif /* LACKS_SYS_TYPES_H */
+
+/* The maximum possible size_t value has all bits set */
+#define MAX_SIZE_T (~(size_t)0)
+
+#ifndef ONLY_MSPACES
+#define ONLY_MSPACES 0 /* define to a value */
+#else
+#define ONLY_MSPACES 1
+#endif /* ONLY_MSPACES */
+#ifndef MSPACES
+#if ONLY_MSPACES
+#define MSPACES 1
+#else /* ONLY_MSPACES */
+#define MSPACES 0
+#endif /* ONLY_MSPACES */
+#endif /* MSPACES */
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT ((size_t)8U)
+#endif /* MALLOC_ALIGNMENT */
+#ifndef FOOTERS
+#define FOOTERS 0
+#endif /* FOOTERS */
+#ifndef ABORT
+#define ABORT abort()
+#endif /* ABORT */
+#ifndef ABORT_ON_ASSERT_FAILURE
+#define ABORT_ON_ASSERT_FAILURE 1
+#endif /* ABORT_ON_ASSERT_FAILURE */
+#ifndef PROCEED_ON_ERROR
+#define PROCEED_ON_ERROR 0
+#endif /* PROCEED_ON_ERROR */
+#ifndef USE_LOCKS
+#define USE_LOCKS 0
+#endif /* USE_LOCKS */
+#ifndef USE_SPIN_LOCKS
+#if USE_LOCKS && (defined(__GNUC__) && ((defined(__i386__) || defined(__x86_64__)))) || (defined(_MSC_VER) && _MSC_VER>=1310)
+#define USE_SPIN_LOCKS 1
+#else
+#define USE_SPIN_LOCKS 0
+#endif /* USE_LOCKS && ... */
+#endif /* USE_SPIN_LOCKS */
+#ifndef INSECURE
+#define INSECURE 0
+#endif /* INSECURE */
+#ifndef HAVE_MMAP
+#define HAVE_MMAP 1
+#endif /* HAVE_MMAP */
+#ifndef MMAP_CLEARS
+#define MMAP_CLEARS 1
+#endif /* MMAP_CLEARS */
+#ifndef HAVE_MREMAP
+#ifdef linux
+#define HAVE_MREMAP 1
+#else /* linux */
+#define HAVE_MREMAP 0
+#endif /* linux */
+#endif /* HAVE_MREMAP */
+#ifndef MALLOC_FAILURE_ACTION
+#define MALLOC_FAILURE_ACTION errno = ENOMEM;
+#endif /* MALLOC_FAILURE_ACTION */
+#ifndef HAVE_MORECORE
+#if ONLY_MSPACES
+#define HAVE_MORECORE 0
+#else /* ONLY_MSPACES */
+#define HAVE_MORECORE 1
+#endif /* ONLY_MSPACES */
+#endif /* HAVE_MORECORE */
+#if !HAVE_MORECORE
+#define MORECORE_CONTIGUOUS 0
+#else /* !HAVE_MORECORE */
+#define MORECORE_DEFAULT sbrk
+#ifndef MORECORE_CONTIGUOUS
+#define MORECORE_CONTIGUOUS 1
+#endif /* MORECORE_CONTIGUOUS */
+#endif /* HAVE_MORECORE */
+#ifndef DEFAULT_GRANULARITY
+#if (MORECORE_CONTIGUOUS || defined(WIN32))
+#define DEFAULT_GRANULARITY (0) /* 0 means to compute in init_mparams */
+#else /* MORECORE_CONTIGUOUS */
+#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U)
+#endif /* MORECORE_CONTIGUOUS */
+#endif /* DEFAULT_GRANULARITY */
+#ifndef DEFAULT_TRIM_THRESHOLD
+#ifndef MORECORE_CANNOT_TRIM
+#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
+#else /* MORECORE_CANNOT_TRIM */
+#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T
+#endif /* MORECORE_CANNOT_TRIM */
+#endif /* DEFAULT_TRIM_THRESHOLD */
+#ifndef DEFAULT_MMAP_THRESHOLD
+#if HAVE_MMAP
+#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U)
+#else /* HAVE_MMAP */
+#define DEFAULT_MMAP_THR