Merge branch 'master' of https://github.com/azerothcore/azerothcore-wotlk into dir-restructure

2026-02-05 03:53:48 +00:00 · 2017-12-21 11:26:43 +01:00
parent acd60005e8 a0d17509a2
commit 403ed2600f
445 changed files with 49192 additions and 15431 deletions
--- a/deps/jemalloc/src/arena.c
+++ b/deps/jemalloc/src/arena.c
--- a/deps/jemalloc/src/base.c
+++ b/deps/jemalloc/src/base.c
@@ -1,142 +1,402 @@
-#define	JEMALLOC_BASE_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define JEMALLOC_BASE_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/sz.h"

 /******************************************************************************/
 /* Data. */

-static malloc_mutex_t	base_mtx;
+static base_t	*b0;
+
+/******************************************************************************/
+
+static void *
+base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size) {
+	void *addr;
+	bool zero = true;
+	bool commit = true;
+
+	assert(size == HUGEPAGE_CEILING(size));
+
+	if (extent_hooks == &extent_hooks_default) {
+		addr = extent_alloc_mmap(NULL, size, PAGE, &zero, &commit);
+	} else {
+		/* No arena context as we are creating new arenas. */
+		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+		pre_reentrancy(tsd, NULL);
+		addr = extent_hooks->alloc(extent_hooks, NULL, size, PAGE,
+		    &zero, &commit, ind);
+		post_reentrancy(tsd);
+	}
+
+	return addr;
+}
+
+static void
+base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
+    size_t size) {
+	/*
+	 * Cascade through dalloc, decommit, purge_forced, and purge_lazy,
+	 * stopping at first success.  This cascade is performed for consistency
+	 * with the cascade in extent_dalloc_wrapper() because an application's
+	 * custom hooks may not support e.g. dalloc.  This function is only ever
+	 * called as a side effect of arena destruction, so although it might
+	 * seem pointless to do anything besides dalloc here, the application
+	 * may in fact want the end state of all associated virtual memory to be
+	 * in some consistent-but-allocated state.
+	 */
+	if (extent_hooks == &extent_hooks_default) {
+		if (!extent_dalloc_mmap(addr, size)) {
+			return;
+		}
+		if (!pages_decommit(addr, size)) {
+			return;
+		}
+		if (!pages_purge_forced(addr, size)) {
+			return;
+		}
+		if (!pages_purge_lazy(addr, size)) {
+			return;
+		}
+		/* Nothing worked.  This should never happen. */
+		not_reached();
+	} else {
+		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+		pre_reentrancy(tsd, NULL);
+		if (extent_hooks->dalloc != NULL &&
+		    !extent_hooks->dalloc(extent_hooks, addr, size, true,
+		    ind)) {
+			goto label_done;
+		}
+		if (extent_hooks->decommit != NULL &&
+		    !extent_hooks->decommit(extent_hooks, addr, size, 0, size,
+		    ind)) {
+			goto label_done;
+		}
+		if (extent_hooks->purge_forced != NULL &&
+		    !extent_hooks->purge_forced(extent_hooks, addr, size, 0,
+		    size, ind)) {
+			goto label_done;
+		}
+		if (extent_hooks->purge_lazy != NULL &&
+		    !extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size,
+		    ind)) {
+			goto label_done;
+		}
+		/* Nothing worked.  That's the application's problem. */
+	label_done:
+		post_reentrancy(tsd);
+		return;
+	}
+}
+
+static void
+base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr,
+    size_t size) {
+	size_t sn;
+
+	sn = *extent_sn_next;
+	(*extent_sn_next)++;
+
+	extent_binit(extent, addr, size, sn);
+}
+
+static void *
+base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size,
+    size_t alignment) {
+	void *ret;
+
+	assert(alignment == ALIGNMENT_CEILING(alignment, QUANTUM));
+	assert(size == ALIGNMENT_CEILING(size, alignment));
+
+	*gap_size = ALIGNMENT_CEILING((uintptr_t)extent_addr_get(extent),
+	    alignment) - (uintptr_t)extent_addr_get(extent);
+	ret = (void *)((uintptr_t)extent_addr_get(extent) + *gap_size);
+	assert(extent_bsize_get(extent) >= *gap_size + size);
+	extent_binit(extent, (void *)((uintptr_t)extent_addr_get(extent) +
+	    *gap_size + size), extent_bsize_get(extent) - *gap_size - size,
+	    extent_sn_get(extent));
+	return ret;
+}
+
+static void
+base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent,
+    size_t gap_size, void *addr, size_t size) {
+	if (extent_bsize_get(extent) > 0) {
+		/*
+		 * Compute the index for the largest size class that does not
+		 * exceed extent's size.
+		 */
+		szind_t index_floor =
+		    sz_size2index(extent_bsize_get(extent) + 1) - 1;
+		extent_heap_insert(&base->avail[index_floor], extent);
+	}
+
+	if (config_stats) {
+		base->allocated += size;
+		/*
+		 * Add one PAGE to base_resident for every page boundary that is
+		 * crossed by the new allocation.
+		 */
+		base->resident += PAGE_CEILING((uintptr_t)addr + size) -
+		    PAGE_CEILING((uintptr_t)addr - gap_size);
+		assert(base->allocated <= base->resident);
+		assert(base->resident <= base->mapped);
+	}
+}
+
+static void *
+base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, extent_t *extent,
+    size_t size, size_t alignment) {
+	void *ret;
+	size_t gap_size;
+
+	ret = base_extent_bump_alloc_helper(extent, &gap_size, size, alignment);
+	base_extent_bump_alloc_post(tsdn, base, extent, gap_size, ret, size);
+	return ret;
+}

 /*
- * Current pages that are being used for internal memory allocations.  These
- * pages are carved up in cacheline-size quanta, so that there is no chance of
- * false cache line sharing.
+ * Allocate a block of virtual memory that is large enough to start with a
+ * base_block_t header, followed by an object of specified size and alignment.
+ * On success a pointer to the initialized base_block_t header is returned.
 */
-static void		*base_pages;
-static void		*base_next_addr;
-static void		*base_past_addr; /* Addr immediately past base_pages. */
-static extent_node_t	*base_nodes;
-
-/******************************************************************************/
-/* Function prototypes for non-inline static functions. */
-
-static bool	base_pages_alloc(size_t minsize);
-
-/******************************************************************************/
-
-static bool
-base_pages_alloc(size_t minsize)
-{
-	size_t csize;
-	bool zero;
-
-	assert(minsize != 0);
-	csize = CHUNK_CEILING(minsize);
-	zero = false;
-	base_pages = chunk_alloc(csize, chunksize, true, &zero,
-	    chunk_dss_prec_get());
-	if (base_pages == NULL)
-		return (true);
-	base_next_addr = base_pages;
-	base_past_addr = (void *)((uintptr_t)base_pages + csize);
-
-	return (false);
-}
-
-void *
-base_alloc(size_t size)
-{
-	void *ret;
-	size_t csize;
-
-	/* Round size up to nearest multiple of the cacheline size. */
-	csize = CACHELINE_CEILING(size);
-
-	malloc_mutex_lock(&base_mtx);
-	/* Make sure there's enough space for the allocation. */
-	if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
-		if (base_pages_alloc(csize)) {
-			malloc_mutex_unlock(&base_mtx);
-			return (NULL);
-		}
+static base_block_t *
+base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind,
+    pszind_t *pind_last, size_t *extent_sn_next, size_t size,
+    size_t alignment) {
+	alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
+	size_t usize = ALIGNMENT_CEILING(size, alignment);
+	size_t header_size = sizeof(base_block_t);
+	size_t gap_size = ALIGNMENT_CEILING(header_size, alignment) -
+	    header_size;
+	/*
+	 * Create increasingly larger blocks in order to limit the total number
+	 * of disjoint virtual memory ranges.  Choose the next size in the page
+	 * size class series (skipping size classes that are not a multiple of
+	 * HUGEPAGE), or a size large enough to satisfy the requested size and
+	 * alignment, whichever is larger.
+	 */
+	size_t min_block_size = HUGEPAGE_CEILING(sz_psz2u(header_size + gap_size
+	    + usize));
+	pszind_t pind_next = (*pind_last + 1 < NPSIZES) ? *pind_last + 1 :
+	    *pind_last;
+	size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next));
+	size_t block_size = (min_block_size > next_block_size) ? min_block_size
+	    : next_block_size;
+	base_block_t *block = (base_block_t *)base_map(tsdn, extent_hooks, ind,
+	    block_size);
+	if (block == NULL) {
+		return NULL;
 	}
-	/* Allocate. */
-	ret = base_next_addr;
-	base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
-	malloc_mutex_unlock(&base_mtx);
-	VALGRIND_MAKE_MEM_UNDEFINED(ret, csize);
-
-	return (ret);
+	*pind_last = sz_psz2ind(block_size);
+	block->size = block_size;
+	block->next = NULL;
+	assert(block_size >= header_size);
+	base_extent_init(extent_sn_next, &block->extent,
+	    (void *)((uintptr_t)block + header_size), block_size - header_size);
+	return block;
 }

-void *
-base_calloc(size_t number, size_t size)
-{
-	void *ret = base_alloc(number * size);
+/*
+ * Allocate an extent that is at least as large as specified size, with
+ * specified alignment.
+ */
+static extent_t *
+base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
+	malloc_mutex_assert_owner(tsdn, &base->mtx);

-	if (ret != NULL)
-		memset(ret, 0, number * size);
-
-	return (ret);
+	extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
+	/*
+	 * Drop mutex during base_block_alloc(), because an extent hook will be
+	 * called.
+	 */
+	malloc_mutex_unlock(tsdn, &base->mtx);
+	base_block_t *block = base_block_alloc(tsdn, extent_hooks,
+	    base_ind_get(base), &base->pind_last, &base->extent_sn_next, size,
+	    alignment);
+	malloc_mutex_lock(tsdn, &base->mtx);
+	if (block == NULL) {
+		return NULL;
+	}
+	block->next = base->blocks;
+	base->blocks = block;
+	if (config_stats) {
+		base->allocated += sizeof(base_block_t);
+		base->resident += PAGE_CEILING(sizeof(base_block_t));
+		base->mapped += block->size;
+		assert(base->allocated <= base->resident);
+		assert(base->resident <= base->mapped);
+	}
+	return &block->extent;
 }

-extent_node_t *
-base_node_alloc(void)
-{
-	extent_node_t *ret;
+base_t *
+b0get(void) {
+	return b0;
+}

-	malloc_mutex_lock(&base_mtx);
-	if (base_nodes != NULL) {
-		ret = base_nodes;
-		base_nodes = *(extent_node_t **)ret;
-		malloc_mutex_unlock(&base_mtx);
-		VALGRIND_MAKE_MEM_UNDEFINED(ret, sizeof(extent_node_t));
-	} else {
-		malloc_mutex_unlock(&base_mtx);
-		ret = (extent_node_t *)base_alloc(sizeof(extent_node_t));
+base_t *
+base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
+	pszind_t pind_last = 0;
+	size_t extent_sn_next = 0;
+	base_block_t *block = base_block_alloc(tsdn, extent_hooks, ind,
+	    &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
+	if (block == NULL) {
+		return NULL;
 	}

-	return (ret);
+	size_t gap_size;
+	size_t base_alignment = CACHELINE;
+	size_t base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
+	base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->extent,
+	    &gap_size, base_size, base_alignment);
+	base->ind = ind;
+	atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELAXED);
+	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
+	    malloc_mutex_rank_exclusive)) {
+		base_unmap(tsdn, extent_hooks, ind, block, block->size);
+		return NULL;
+	}
+	base->pind_last = pind_last;
+	base->extent_sn_next = extent_sn_next;
+	base->blocks = block;
+	for (szind_t i = 0; i < NSIZES; i++) {
+		extent_heap_new(&base->avail[i]);
+	}
+	if (config_stats) {
+		base->allocated = sizeof(base_block_t);
+		base->resident = PAGE_CEILING(sizeof(base_block_t));
+		base->mapped = block->size;
+		assert(base->allocated <= base->resident);
+		assert(base->resident <= base->mapped);
+	}
+	base_extent_bump_alloc_post(tsdn, base, &block->extent, gap_size, base,
+	    base_size);
+
+	return base;
 }

 void
-base_node_dealloc(extent_node_t *node)
-{
+base_delete(tsdn_t *tsdn, base_t *base) {
+	extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
+	base_block_t *next = base->blocks;
+	do {
+		base_block_t *block = next;
+		next = block->next;
+		base_unmap(tsdn, extent_hooks, base_ind_get(base), block,
+		    block->size);
+	} while (next != NULL);
+}

-	VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t));
-	malloc_mutex_lock(&base_mtx);
-	*(extent_node_t **)node = base_nodes;
-	base_nodes = node;
-	malloc_mutex_unlock(&base_mtx);
+extent_hooks_t *
+base_extent_hooks_get(base_t *base) {
+	return (extent_hooks_t *)atomic_load_p(&base->extent_hooks,
+	    ATOMIC_ACQUIRE);
+}
+
+extent_hooks_t *
+base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
+	extent_hooks_t *old_extent_hooks = base_extent_hooks_get(base);
+	atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELEASE);
+	return old_extent_hooks;
+}
+
+static void *
+base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
+    size_t *esn) {
+	alignment = QUANTUM_CEILING(alignment);
+	size_t usize = ALIGNMENT_CEILING(size, alignment);
+	size_t asize = usize + alignment - QUANTUM;
+
+	extent_t *extent = NULL;
+	malloc_mutex_lock(tsdn, &base->mtx);
+	for (szind_t i = sz_size2index(asize); i < NSIZES; i++) {
+		extent = extent_heap_remove_first(&base->avail[i]);
+		if (extent != NULL) {
+			/* Use existing space. */
+			break;
+		}
+	}
+	if (extent == NULL) {
+		/* Try to allocate more space. */
+		extent = base_extent_alloc(tsdn, base, usize, alignment);
+	}
+	void *ret;
+	if (extent == NULL) {
+		ret = NULL;
+		goto label_return;
+	}
+
+	ret = base_extent_bump_alloc(tsdn, base, extent, usize, alignment);
+	if (esn != NULL) {
+		*esn = extent_sn_get(extent);
+	}
+label_return:
+	malloc_mutex_unlock(tsdn, &base->mtx);
+	return ret;
+}
+
+/*
+ * base_alloc() returns zeroed memory, which is always demand-zeroed for the
+ * auto arenas, in order to make multi-page sparse data structures such as radix
+ * tree nodes efficient with respect to physical memory usage.  Upon success a
+ * pointer to at least size bytes with specified alignment is returned.  Note
+ * that size is rounded up to the nearest multiple of alignment to avoid false
+ * sharing.
+ */
+void *
+base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
+	return base_alloc_impl(tsdn, base, size, alignment, NULL);
+}
+
+extent_t *
+base_alloc_extent(tsdn_t *tsdn, base_t *base) {
+	size_t esn;
+	extent_t *extent = base_alloc_impl(tsdn, base, sizeof(extent_t),
+	    CACHELINE, &esn);
+	if (extent == NULL) {
+		return NULL;
+	}
+	extent_esn_set(extent, esn);
+	return extent;
+}
+
+void
+base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
+    size_t *mapped) {
+	cassert(config_stats);
+
+	malloc_mutex_lock(tsdn, &base->mtx);
+	assert(base->allocated <= base->resident);
+	assert(base->resident <= base->mapped);
+	*allocated = base->allocated;
+	*resident = base->resident;
+	*mapped = base->mapped;
+	malloc_mutex_unlock(tsdn, &base->mtx);
+}
+
+void
+base_prefork(tsdn_t *tsdn, base_t *base) {
+	malloc_mutex_prefork(tsdn, &base->mtx);
+}
+
+void
+base_postfork_parent(tsdn_t *tsdn, base_t *base) {
+	malloc_mutex_postfork_parent(tsdn, &base->mtx);
+}
+
+void
+base_postfork_child(tsdn_t *tsdn, base_t *base) {
+	malloc_mutex_postfork_child(tsdn, &base->mtx);
 }

 bool
-base_boot(void)
-{
-
-	base_nodes = NULL;
-	if (malloc_mutex_init(&base_mtx))
-		return (true);
-
-	return (false);
-}
-
-void
-base_prefork(void)
-{
-
-	malloc_mutex_prefork(&base_mtx);
-}
-
-void
-base_postfork_parent(void)
-{
-
-	malloc_mutex_postfork_parent(&base_mtx);
-}
-
-void
-base_postfork_child(void)
-{
-
-	malloc_mutex_postfork_child(&base_mtx);
+base_boot(tsdn_t *tsdn) {
+	b0 = base_new(tsdn, 0, (extent_hooks_t *)&extent_hooks_default);
+	return (b0 == NULL);
 }
--- a/deps/jemalloc/src/bitmap.c
+++ b/deps/jemalloc/src/bitmap.c
@@ -1,24 +1,15 @@
-#define	JEMALLOC_BITMAP_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define JEMALLOC_BITMAP_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"

-/******************************************************************************/
-/* Function prototypes for non-inline static functions. */
-
-static size_t	bits2groups(size_t nbits);
+#include "jemalloc/internal/assert.h"

 /******************************************************************************/

-static size_t
-bits2groups(size_t nbits)
-{
-
-	return ((nbits >> LG_BITMAP_GROUP_NBITS) +
-	    !!(nbits & BITMAP_GROUP_NBITS_MASK));
-}
+#ifdef BITMAP_USE_TREE

 void
-bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
-{
+bitmap_info_init(bitmap_info_t *binfo, size_t nbits) {
 	unsigned i;
 	size_t group_count;

@@ -31,60 +22,100 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
 	 * that requires only one group.
 	 */
 	binfo->levels[0].group_offset = 0;
-	group_count = bits2groups(nbits);
+	group_count = BITMAP_BITS2GROUPS(nbits);
 	for (i = 1; group_count > 1; i++) {
 		assert(i < BITMAP_MAX_LEVELS);
 		binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
 		    + group_count;
-		group_count = bits2groups(group_count);
+		group_count = BITMAP_BITS2GROUPS(group_count);
 	}
 	binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
 	    + group_count;
+	assert(binfo->levels[i].group_offset <= BITMAP_GROUPS_MAX);
 	binfo->nlevels = i;
 	binfo->nbits = nbits;
 }

-size_t
-bitmap_info_ngroups(const bitmap_info_t *binfo)
-{
-
-	return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP);
-}
-
-size_t
-bitmap_size(size_t nbits)
-{
-	bitmap_info_t binfo;
-
-	bitmap_info_init(&binfo, nbits);
-	return (bitmap_info_ngroups(&binfo));
+static size_t
+bitmap_info_ngroups(const bitmap_info_t *binfo) {
+	return binfo->levels[binfo->nlevels].group_offset;
 }

 void
-bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
-{
+bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill) {
 	size_t extra;
 	unsigned i;

 	/*
 	 * Bits are actually inverted with regard to the external bitmap
-	 * interface, so the bitmap starts out with all 1 bits, except for
-	 * trailing unused bits (if any).  Note that each group uses bit 0 to
-	 * correspond to the first logical bit in the group, so extra bits
-	 * are the most significant bits of the last group.
+	 * interface.
 	 */
-	memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset <<
-	    LG_SIZEOF_BITMAP);
+
+	if (fill) {
+		/* The "filled" bitmap starts out with all 0 bits. */
+		memset(bitmap, 0, bitmap_size(binfo));
+		return;
+	}
+
+	/*
+	 * The "empty" bitmap starts out with all 1 bits, except for trailing
+	 * unused bits (if any).  Note that each group uses bit 0 to correspond
+	 * to the first logical bit in the group, so extra bits are the most
+	 * significant bits of the last group.
+	 */
+	memset(bitmap, 0xffU, bitmap_size(binfo));
 	extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
 	    & BITMAP_GROUP_NBITS_MASK;
-	if (extra != 0)
+	if (extra != 0) {
 		bitmap[binfo->levels[1].group_offset - 1] >>= extra;
+	}
 	for (i = 1; i < binfo->nlevels; i++) {
 		size_t group_count = binfo->levels[i].group_offset -
 		    binfo->levels[i-1].group_offset;
 		extra = (BITMAP_GROUP_NBITS - (group_count &
 		    BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK;
-		if (extra != 0)
+		if (extra != 0) {
 			bitmap[binfo->levels[i+1].group_offset - 1] >>= extra;
+		}
 	}
 }
+
+#else /* BITMAP_USE_TREE */
+
+void
+bitmap_info_init(bitmap_info_t *binfo, size_t nbits) {
+	assert(nbits > 0);
+	assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
+
+	binfo->ngroups = BITMAP_BITS2GROUPS(nbits);
+	binfo->nbits = nbits;
+}
+
+static size_t
+bitmap_info_ngroups(const bitmap_info_t *binfo) {
+	return binfo->ngroups;
+}
+
+void
+bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill) {
+	size_t extra;
+
+	if (fill) {
+		memset(bitmap, 0, bitmap_size(binfo));
+		return;
+	}
+
+	memset(bitmap, 0xffU, bitmap_size(binfo));
+	extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
+	    & BITMAP_GROUP_NBITS_MASK;
+	if (extra != 0) {
+		bitmap[binfo->ngroups - 1] >>= extra;
+	}
+}
+
+#endif /* BITMAP_USE_TREE */
+
+size_t
+bitmap_size(const bitmap_info_t *binfo) {
+	return (bitmap_info_ngroups(binfo) << LG_SIZEOF_BITMAP);
+}
--- a/deps/jemalloc/src/ckh.c
+++ b/deps/jemalloc/src/ckh.c
@@ -34,14 +34,24 @@
 * respectively.
 *
 ******************************************************************************/
-#define	JEMALLOC_CKH_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define JEMALLOC_CKH_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+#include "jemalloc/internal/ckh.h"
+
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/prng.h"
+#include "jemalloc/internal/util.h"

 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */

-static bool	ckh_grow(ckh_t *ckh);
-static void	ckh_shrink(ckh_t *ckh);
+static bool	ckh_grow(tsd_t *tsd, ckh_t *ckh);
+static void	ckh_shrink(tsd_t *tsd, ckh_t *ckh);

 /******************************************************************************/

@@ -49,27 +59,26 @@ static void	ckh_shrink(ckh_t *ckh);
 * Search bucket for key and return the cell number if found; SIZE_T_MAX
 * otherwise.
 */
-JEMALLOC_INLINE_C size_t
-ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key)
-{
+static size_t
+ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key) {
 	ckhc_t *cell;
 	unsigned i;

 	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
-		if (cell->key != NULL && ckh->keycomp(key, cell->key))
-			return ((bucket << LG_CKH_BUCKET_CELLS) + i);
+		if (cell->key != NULL && ckh->keycomp(key, cell->key)) {
+			return (bucket << LG_CKH_BUCKET_CELLS) + i;
+		}
 	}

-	return (SIZE_T_MAX);
+	return SIZE_T_MAX;
 }

 /*
 * Search table for key and return cell number if found; SIZE_T_MAX otherwise.
 */
-JEMALLOC_INLINE_C size_t
-ckh_isearch(ckh_t *ckh, const void *key)
-{
+static size_t
+ckh_isearch(ckh_t *ckh, const void *key) {
 	size_t hashes[2], bucket, cell;

 	assert(ckh != NULL);
@@ -79,19 +88,19 @@ ckh_isearch(ckh_t *ckh, const void *key)
 	/* Search primary bucket. */
 	bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1);
 	cell = ckh_bucket_search(ckh, bucket, key);
-	if (cell != SIZE_T_MAX)
-		return (cell);
+	if (cell != SIZE_T_MAX) {
+		return cell;
+	}

 	/* Search secondary bucket. */
 	bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
 	cell = ckh_bucket_search(ckh, bucket, key);
-	return (cell);
+	return cell;
 }

-JEMALLOC_INLINE_C bool
+static bool
 ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
-    const void *data)
-{
+    const void *data) {
 	ckhc_t *cell;
 	unsigned offset, i;

@@ -99,7 +108,8 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
 	 * Cycle through the cells in the bucket, starting at a random position.
 	 * The randomness avoids worst-case search overhead as buckets fill up.
 	 */
-	prng32(offset, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C);
+	offset = (unsigned)prng_lg_range_u64(&ckh->prng_state,
+	    LG_CKH_BUCKET_CELLS);
 	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
 		    ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
@@ -107,11 +117,11 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
 			cell->key = key;
 			cell->data = data;
 			ckh->count++;
-			return (false);
+			return false;
 		}
 	}

-	return (true);
+	return true;
 }

 /*
@@ -120,10 +130,9 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
 * eviction/relocation procedure until either success or detection of an
 * eviction/relocation bucket cycle.
 */
-JEMALLOC_INLINE_C bool
+static bool
 ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
-    void const **argdata)
-{
+    void const **argdata) {
 	const void *key, *data, *tkey, *tdata;
 	ckhc_t *cell;
 	size_t hashes[2], bucket, tbucket;
@@ -141,7 +150,8 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 		 * were an item for which both hashes indicated the same
 		 * bucket.
 		 */
-		prng32(i, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C);
+		i = (unsigned)prng_lg_range_u64(&ckh->prng_state,
+		    LG_CKH_BUCKET_CELLS);
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
 		assert(cell->key != NULL);

@@ -181,18 +191,18 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 		if (tbucket == argbucket) {
 			*argkey = key;
 			*argdata = data;
-			return (true);
+			return true;
 		}

 		bucket = tbucket;
-		if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
-			return (false);
+		if (!ckh_try_bucket_insert(ckh, bucket, key, data)) {
+			return false;
+		}
 	}
 }

-JEMALLOC_INLINE_C bool
-ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata)
-{
+static bool
+ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) {
 	size_t hashes[2], bucket;
 	const void *key = *argkey;
 	const void *data = *argdata;
@@ -201,27 +211,28 @@ ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata)

 	/* Try to insert in primary bucket. */
 	bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1);
-	if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
-		return (false);
+	if (!ckh_try_bucket_insert(ckh, bucket, key, data)) {
+		return false;
+	}

 	/* Try to insert in secondary bucket. */
 	bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
-	if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
-		return (false);
+	if (!ckh_try_bucket_insert(ckh, bucket, key, data)) {
+		return false;
+	}

 	/*
 	 * Try to find a place for this item via iterative eviction/relocation.
 	 */
-	return (ckh_evict_reloc_insert(ckh, bucket, argkey, argdata));
+	return ckh_evict_reloc_insert(ckh, bucket, argkey, argdata);
 }

 /*
 * Try to rebuild the hash table from scratch by inserting all items from the
 * old table into the new.
 */
-JEMALLOC_INLINE_C bool
-ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
-{
+static bool
+ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) {
 	size_t count, i, nins;
 	const void *key, *data;

@@ -233,22 +244,20 @@ ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
 			data = aTab[i].data;
 			if (ckh_try_insert(ckh, &key, &data)) {
 				ckh->count = count;
-				return (true);
+				return true;
 			}
 			nins++;
 		}
 	}

-	return (false);
+	return false;
 }

 static bool
-ckh_grow(ckh_t *ckh)
-{
+ckh_grow(tsd_t *tsd, ckh_t *ckh) {
 	bool ret;
 	ckhc_t *tab, *ttab;
-	size_t lg_curcells;
-	unsigned lg_prevbuckets;
+	unsigned lg_prevbuckets, lg_curcells;

 #ifdef CKH_COUNT
 	ckh->ngrows++;
@@ -265,12 +274,13 @@ ckh_grow(ckh_t *ckh)
 		size_t usize;

 		lg_curcells++;
-		usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
-		if (usize == 0) {
+		usize = sz_sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
+		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 			ret = true;
 			goto label_return;
 		}
-		tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+		tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE,
+		    true, NULL, true, arena_ichoose(tsd, NULL));
 		if (tab == NULL) {
 			ret = true;
 			goto label_return;
@@ -281,28 +291,27 @@ ckh_grow(ckh_t *ckh)
 		tab = ttab;
 		ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;

-		if (ckh_rebuild(ckh, tab) == false) {
-			idalloc(tab);
+		if (!ckh_rebuild(ckh, tab)) {
+			idalloctm(tsd_tsdn(tsd), tab, NULL, NULL, true, true);
 			break;
 		}

 		/* Rebuilding failed, so back out partially rebuilt table. */
-		idalloc(ckh->tab);
+		idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, NULL, true, true);
 		ckh->tab = tab;
 		ckh->lg_curbuckets = lg_prevbuckets;
 	}

 	ret = false;
 label_return:
-	return (ret);
+	return ret;
 }

 static void
-ckh_shrink(ckh_t *ckh)
-{
+ckh_shrink(tsd_t *tsd, ckh_t *ckh) {
 	ckhc_t *tab, *ttab;
-	size_t lg_curcells, usize;
-	unsigned lg_prevbuckets;
+	size_t usize;
+	unsigned lg_prevbuckets, lg_curcells;

 	/*
 	 * It is possible (though unlikely, given well behaved hashes) that the
@@ -310,10 +319,12 @@ ckh_shrink(ckh_t *ckh)
 	 */
 	lg_prevbuckets = ckh->lg_curbuckets;
 	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
-	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
-	if (usize == 0)
+	usize = sz_sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
+	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 		return;
-	tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+	}
+	tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true, NULL,
+	    true, arena_ichoose(tsd, NULL));
 	if (tab == NULL) {
 		/*
 		 * An OOM error isn't worth propagating, since it doesn't
@@ -327,8 +338,8 @@ ckh_shrink(ckh_t *ckh)
 	tab = ttab;
 	ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;

-	if (ckh_rebuild(ckh, tab) == false) {
-		idalloc(tab);
+	if (!ckh_rebuild(ckh, tab)) {
+		idalloctm(tsd_tsdn(tsd), tab, NULL, NULL, true, true);
 #ifdef CKH_COUNT
 		ckh->nshrinks++;
 #endif
@@ -336,7 +347,7 @@ ckh_shrink(ckh_t *ckh)
 	}

 	/* Rebuilding failed, so back out partially rebuilt table. */
-	idalloc(ckh->tab);
+	idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, NULL, true, true);
 	ckh->tab = tab;
 	ckh->lg_curbuckets = lg_prevbuckets;
 #ifdef CKH_COUNT
@@ -345,8 +356,8 @@ ckh_shrink(ckh_t *ckh)
 }

 bool
-ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
-{
+ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+    ckh_keycomp_t *keycomp) {
 	bool ret;
 	size_t mincells, usize;
 	unsigned lg_mincells;
@@ -366,29 +377,31 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
 	ckh->count = 0;

 	/*
-	 * Find the minimum power of 2 that is large enough to fit aBaseCount
+	 * Find the minimum power of 2 that is large enough to fit minitems
 	 * entries.  We are using (2+,2) cuckoo hashing, which has an expected
 	 * maximum load factor of at least ~0.86, so 0.75 is a conservative load
-	 * factor that will typically allow 2^aLgMinItems to fit without ever
+	 * factor that will typically allow mincells items to fit without ever
 	 * growing the table.
 	 */
 	assert(LG_CKH_BUCKET_CELLS > 0);
 	mincells = ((minitems + (3 - (minitems % 3))) / 3) << 2;
 	for (lg_mincells = LG_CKH_BUCKET_CELLS;
 	    (ZU(1) << lg_mincells) < mincells;
-	    lg_mincells++)
-		; /* Do nothing. */
+	    lg_mincells++) {
+		/* Do nothing. */
+	}
 	ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
 	ckh->lg_curbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
 	ckh->hash = hash;
 	ckh->keycomp = keycomp;

-	usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE);
-	if (usize == 0) {
+	usize = sz_sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE);
+	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 		ret = true;
 		goto label_return;
 	}
-	ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+	ckh->tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true,
+	    NULL, true, arena_ichoose(tsd, NULL));
 	if (ckh->tab == NULL) {
 		ret = true;
 		goto label_return;
@@ -396,20 +409,18 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)

 	ret = false;
 label_return:
-	return (ret);
+	return ret;
 }

 void
-ckh_delete(ckh_t *ckh)
-{
-
+ckh_delete(tsd_t *tsd, ckh_t *ckh) {
 	assert(ckh != NULL);

 #ifdef CKH_VERBOSE
 	malloc_printf(
-	    "%s(%p): ngrows: %"PRIu64", nshrinks: %"PRIu64","
-	    " nshrinkfails: %"PRIu64", ninserts: %"PRIu64","
-	    " nrelocs: %"PRIu64"\n", __func__, ckh,
+	    "%s(%p): ngrows: %"FMTu64", nshrinks: %"FMTu64","
+	    " nshrinkfails: %"FMTu64", ninserts: %"FMTu64","
+	    " nrelocs: %"FMTu64"\n", __func__, ckh,
 	    (unsigned long long)ckh->ngrows,
 	    (unsigned long long)ckh->nshrinks,
 	    (unsigned long long)ckh->nshrinkfails,
@@ -417,43 +428,42 @@ ckh_delete(ckh_t *ckh)
 	    (unsigned long long)ckh->nrelocs);
 #endif

-	idalloc(ckh->tab);
-	if (config_debug)
-		memset(ckh, 0x5a, sizeof(ckh_t));
+	idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, NULL, true, true);
+	if (config_debug) {
+		memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t));
+	}
 }

 size_t
-ckh_count(ckh_t *ckh)
-{
-
+ckh_count(ckh_t *ckh) {
 	assert(ckh != NULL);

-	return (ckh->count);
+	return ckh->count;
 }

 bool
-ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data)
-{
+ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data) {
 	size_t i, ncells;

 	for (i = *tabind, ncells = (ZU(1) << (ckh->lg_curbuckets +
 	    LG_CKH_BUCKET_CELLS)); i < ncells; i++) {
 		if (ckh->tab[i].key != NULL) {
-			if (key != NULL)
+			if (key != NULL) {
 				*key = (void *)ckh->tab[i].key;
-			if (data != NULL)
+			}
+			if (data != NULL) {
 				*data = (void *)ckh->tab[i].data;
+			}
 			*tabind = i + 1;
-			return (false);
+			return false;
 		}
 	}

-	return (true);
+	return true;
 }

 bool
-ckh_insert(ckh_t *ckh, const void *key, const void *data)
-{
+ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data) {
 	bool ret;

 	assert(ckh != NULL);
@@ -464,7 +474,7 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data)
 #endif

 	while (ckh_try_insert(ckh, &key, &data)) {
-		if (ckh_grow(ckh)) {
+		if (ckh_grow(tsd, ckh)) {
 			ret = true;
 			goto label_return;
 		}
@@ -472,22 +482,24 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data)

 	ret = false;
 label_return:
-	return (ret);
+	return ret;
 }

 bool
-ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data)
-{
+ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
+    void **data) {
 	size_t cell;

 	assert(ckh != NULL);

 	cell = ckh_isearch(ckh, searchkey);
 	if (cell != SIZE_T_MAX) {
-		if (key != NULL)
+		if (key != NULL) {
 			*key = (void *)ckh->tab[cell].key;
-		if (data != NULL)
+		}
+		if (data != NULL) {
 			*data = (void *)ckh->tab[cell].data;
+		}
 		ckh->tab[cell].key = NULL;
 		ckh->tab[cell].data = NULL; /* Not necessary. */

@@ -497,54 +509,50 @@ ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data)
 		    + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets
 		    > ckh->lg_minbuckets) {
 			/* Ignore error due to OOM. */
-			ckh_shrink(ckh);
+			ckh_shrink(tsd, ckh);
 		}

-		return (false);
+		return false;
 	}

-	return (true);
+	return true;
 }

 bool
-ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
-{
+ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data) {
 	size_t cell;

 	assert(ckh != NULL);

 	cell = ckh_isearch(ckh, searchkey);
 	if (cell != SIZE_T_MAX) {
-		if (key != NULL)
+		if (key != NULL) {
 			*key = (void *)ckh->tab[cell].key;
-		if (data != NULL)
+		}
+		if (data != NULL) {
 			*data = (void *)ckh->tab[cell].data;
-		return (false);
+		}
+		return false;
 	}

-	return (true);
+	return true;
 }

 void
-ckh_string_hash(const void *key, size_t r_hash[2])
-{
-
+ckh_string_hash(const void *key, size_t r_hash[2]) {
 	hash(key, strlen((const char *)key), 0x94122f33U, r_hash);
 }

 bool
-ckh_string_keycomp(const void *k1, const void *k2)
-{
+ckh_string_keycomp(const void *k1, const void *k2) {
+	assert(k1 != NULL);
+	assert(k2 != NULL);

-    assert(k1 != NULL);
-    assert(k2 != NULL);
-
-    return (strcmp((char *)k1, (char *)k2) ? false : true);
+	return !strcmp((char *)k1, (char *)k2);
 }

 void
-ckh_pointer_hash(const void *key, size_t r_hash[2])
-{
+ckh_pointer_hash(const void *key, size_t r_hash[2]) {
 	union {
 		const void	*v;
 		size_t		i;
@@ -556,8 +564,6 @@ ckh_pointer_hash(const void *key, size_t r_hash[2])
 }

 bool
-ckh_pointer_keycomp(const void *k1, const void *k2)
-{
-
-	return ((k1 == k2) ? true : false);
+ckh_pointer_keycomp(const void *k1, const void *k2) {
+	return (k1 == k2);
 }
--- a/deps/jemalloc/src/ctl.c
+++ b/deps/jemalloc/src/ctl.c
--- a/deps/jemalloc/src/extent.c
+++ b/deps/jemalloc/src/extent.c
--- a/deps/jemalloc/src/hash.c
+++ b/deps/jemalloc/src/hash.c
@@ -1,2 +1,3 @@
-#define	JEMALLOC_HASH_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define JEMALLOC_HASH_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
--- a/deps/jemalloc/src/jemalloc.c
+++ b/deps/jemalloc/src/jemalloc.c
--- a/deps/jemalloc/src/mutex.c
+++ b/deps/jemalloc/src/mutex.c
@@ -1,12 +1,12 @@
-#define	JEMALLOC_MUTEX_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define JEMALLOC_MUTEX_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"

-#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
-#include <dlfcn.h>
-#endif
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/malloc_io.h"

 #ifndef _CRT_SPINCOUNT
-#define	_CRT_SPINCOUNT 4000
+#define _CRT_SPINCOUNT 4000
 #endif

 /******************************************************************************/
@@ -20,10 +20,6 @@ static bool		postpone_init = true;
 static malloc_mutex_t	*postponed_mutexes = NULL;
 #endif

-#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
-static void	pthread_create_once(void);
-#endif
-
 /******************************************************************************/
 /*
 * We intercept pthread_create() calls in order to toggle isthreaded if the
@@ -31,33 +27,11 @@ static void	pthread_create_once(void);
 */

 #if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
-static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
-    void *(*)(void *), void *__restrict);
-
-static void
-pthread_create_once(void)
-{
-
-	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
-	if (pthread_create_fptr == NULL) {
-		malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
-		    "\"pthread_create\")\n");
-		abort();
-	}
-
-	isthreaded = true;
-}
-
 JEMALLOC_EXPORT int
 pthread_create(pthread_t *__restrict thread,
    const pthread_attr_t *__restrict attr, void *(*start_routine)(void *),
-    void *__restrict arg)
-{
-	static pthread_once_t once_control = PTHREAD_ONCE_INIT;
-
-	pthread_once(&once_control, pthread_create_once);
-
-	return (pthread_create_fptr(thread, attr, start_routine, arg));
+    void *__restrict arg) {
+	return pthread_create_wrapper(thread, attr, start_routine, arg);
 }
 #endif

@@ -68,14 +42,108 @@ JEMALLOC_EXPORT int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
    void *(calloc_cb)(size_t, size_t));
 #endif

-bool
-malloc_mutex_init(malloc_mutex_t *mutex)
-{
+void
+malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
+	mutex_prof_data_t *data = &mutex->prof_data;
+	UNUSED nstime_t before = NSTIME_ZERO_INITIALIZER;

+	if (ncpus == 1) {
+		goto label_spin_done;
+	}
+
+	int cnt = 0, max_cnt = MALLOC_MUTEX_MAX_SPIN;
+	do {
+		CPU_SPINWAIT;
+		if (!malloc_mutex_trylock_final(mutex)) {
+			data->n_spin_acquired++;
+			return;
+		}
+	} while (cnt++ < max_cnt);
+
+	if (!config_stats) {
+		/* Only spin is useful when stats is off. */
+		malloc_mutex_lock_final(mutex);
+		return;
+	}
+label_spin_done:
+	nstime_update(&before);
+	/* Copy before to after to avoid clock skews. */
+	nstime_t after;
+	nstime_copy(&after, &before);
+	uint32_t n_thds = atomic_fetch_add_u32(&data->n_waiting_thds, 1,
+	    ATOMIC_RELAXED) + 1;
+	/* One last try as above two calls may take quite some cycles. */
+	if (!malloc_mutex_trylock_final(mutex)) {
+		atomic_fetch_sub_u32(&data->n_waiting_thds, 1, ATOMIC_RELAXED);
+		data->n_spin_acquired++;
+		return;
+	}
+
+	/* True slow path. */
+	malloc_mutex_lock_final(mutex);
+	/* Update more slow-path only counters. */
+	atomic_fetch_sub_u32(&data->n_waiting_thds, 1, ATOMIC_RELAXED);
+	nstime_update(&after);
+
+	nstime_t delta;
+	nstime_copy(&delta, &after);
+	nstime_subtract(&delta, &before);
+
+	data->n_wait_times++;
+	nstime_add(&data->tot_wait_time, &delta);
+	if (nstime_compare(&data->max_wait_time, &delta) < 0) {
+		nstime_copy(&data->max_wait_time, &delta);
+	}
+	if (n_thds > data->max_n_thds) {
+		data->max_n_thds = n_thds;
+	}
+}
+
+static void
+mutex_prof_data_init(mutex_prof_data_t *data) {
+	memset(data, 0, sizeof(mutex_prof_data_t));
+	nstime_init(&data->max_wait_time, 0);
+	nstime_init(&data->tot_wait_time, 0);
+	data->prev_owner = NULL;
+}
+
+void
+malloc_mutex_prof_data_reset(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+	malloc_mutex_assert_owner(tsdn, mutex);
+	mutex_prof_data_init(&mutex->prof_data);
+}
+
+static int
+mutex_addr_comp(const witness_t *witness1, void *mutex1,
+    const witness_t *witness2, void *mutex2) {
+	assert(mutex1 != NULL);
+	assert(mutex2 != NULL);
+	uintptr_t mu1int = (uintptr_t)mutex1;
+	uintptr_t mu2int = (uintptr_t)mutex2;
+	if (mu1int < mu2int) {
+		return -1;
+	} else if (mu1int == mu2int) {
+		return 0;
+	} else {
+		return 1;
+	}
+}
+
+bool
+malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
+    witness_rank_t rank, malloc_mutex_lock_order_t lock_order) {
+	mutex_prof_data_init(&mutex->prof_data);
 #ifdef _WIN32
+#  if _WIN32_WINNT >= 0x0600
+	InitializeSRWLock(&mutex->lock);
+#  else
 	if (!InitializeCriticalSectionAndSpinCount(&mutex->lock,
-	    _CRT_SPINCOUNT))
-		return (true);
+	    _CRT_SPINCOUNT)) {
+		return true;
+	}
+#  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	mutex->lock = OS_UNFAIR_LOCK_INIT;
 #elif (defined(JEMALLOC_OSSPIN))
 	mutex->lock = 0;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
@@ -83,67 +151,73 @@ malloc_mutex_init(malloc_mutex_t *mutex)
 		mutex->postponed_next = postponed_mutexes;
 		postponed_mutexes = mutex;
 	} else {
-		if (_pthread_mutex_init_calloc_cb(&mutex->lock, base_calloc) !=
-		    0)
-			return (true);
+		if (_pthread_mutex_init_calloc_cb(&mutex->lock,
+		    bootstrap_calloc) != 0) {
+			return true;
+		}
 	}
 #else
 	pthread_mutexattr_t attr;

-	if (pthread_mutexattr_init(&attr) != 0)
-		return (true);
+	if (pthread_mutexattr_init(&attr) != 0) {
+		return true;
+	}
 	pthread_mutexattr_settype(&attr, MALLOC_MUTEX_TYPE);
 	if (pthread_mutex_init(&mutex->lock, &attr) != 0) {
 		pthread_mutexattr_destroy(&attr);
-		return (true);
+		return true;
 	}
 	pthread_mutexattr_destroy(&attr);
 #endif
-	return (false);
+	if (config_debug) {
+		mutex->lock_order = lock_order;
+		if (lock_order == malloc_mutex_address_ordered) {
+			witness_init(&mutex->witness, name, rank,
+			    mutex_addr_comp, &mutex);
+		} else {
+			witness_init(&mutex->witness, name, rank, NULL, NULL);
+		}
+	}
+	return false;
 }

 void
-malloc_mutex_prefork(malloc_mutex_t *mutex)
-{
-
-	malloc_mutex_lock(mutex);
+malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+	malloc_mutex_lock(tsdn, mutex);
 }

 void
-malloc_mutex_postfork_parent(malloc_mutex_t *mutex)
-{
-
-	malloc_mutex_unlock(mutex);
+malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+	malloc_mutex_unlock(tsdn, mutex);
 }

 void
-malloc_mutex_postfork_child(malloc_mutex_t *mutex)
-{
-
+malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 #ifdef JEMALLOC_MUTEX_INIT_CB
-	malloc_mutex_unlock(mutex);
+	malloc_mutex_unlock(tsdn, mutex);
 #else
-	if (malloc_mutex_init(mutex)) {
+	if (malloc_mutex_init(mutex, mutex->witness.name,
+	    mutex->witness.rank, mutex->lock_order)) {
 		malloc_printf("<jemalloc>: Error re-initializing mutex in "
 		    "child\n");
-		if (opt_abort)
+		if (opt_abort) {
 			abort();
+		}
 	}
 #endif
 }

 bool
-mutex_boot(void)
-{
-
+malloc_mutex_boot(void) {
 #ifdef JEMALLOC_MUTEX_INIT_CB
 	postpone_init = false;
 	while (postponed_mutexes != NULL) {
 		if (_pthread_mutex_init_calloc_cb(&postponed_mutexes->lock,
-		    base_calloc) != 0)
-			return (true);
+		    bootstrap_calloc) != 0) {
+			return true;
+		}
 		postponed_mutexes = postponed_mutexes->postponed_next;
 	}
 #endif
-	return (false);
+	return false;
 }
--- a/deps/jemalloc/src/prof.c
+++ b/deps/jemalloc/src/prof.c
--- a/deps/jemalloc/src/rtree.c
+++ b/deps/jemalloc/src/rtree.c
@@ -1,105 +1,320 @@
-#define	JEMALLOC_RTREE_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define JEMALLOC_RTREE_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"

-rtree_t *
-rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc)
-{
-	rtree_t *ret;
-	unsigned bits_per_level, bits_in_leaf, height, i;
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/mutex.h"

-	assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3));
-
-	bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1;
-	bits_in_leaf = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(uint8_t)))) - 1;
-	if (bits > bits_in_leaf) {
-		height = 1 + (bits - bits_in_leaf) / bits_per_level;
-		if ((height-1) * bits_per_level + bits_in_leaf != bits)
-			height++;
-	} else {
-		height = 1;
+/*
+ * Only the most significant bits of keys passed to rtree_{read,write}() are
+ * used.
+ */
+bool
+rtree_new(rtree_t *rtree, bool zeroed) {
+#ifdef JEMALLOC_JET
+	if (!zeroed) {
+		memset(rtree, 0, sizeof(rtree_t)); /* Clear root. */
 	}
-	assert((height-1) * bits_per_level + bits_in_leaf >= bits);
+#else
+	assert(zeroed);
+#endif

-	ret = (rtree_t*)alloc(offsetof(rtree_t, level2bits) +
-	    (sizeof(unsigned) * height));
-	if (ret == NULL)
-		return (NULL);
-	memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) *
-	    height));
-
-	ret->alloc = alloc;
-	ret->dalloc = dalloc;
-	if (malloc_mutex_init(&ret->mutex)) {
-		if (dalloc != NULL)
-			dalloc(ret);
-		return (NULL);
+	if (malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE,
+	    malloc_mutex_rank_exclusive)) {
+		return true;
 	}
-	ret->height = height;
-	if (height > 1) {
-		if ((height-1) * bits_per_level + bits_in_leaf > bits) {
-			ret->level2bits[0] = (bits - bits_in_leaf) %
-			    bits_per_level;
-		} else
-			ret->level2bits[0] = bits_per_level;
-		for (i = 1; i < height-1; i++)
-			ret->level2bits[i] = bits_per_level;
-		ret->level2bits[height-1] = bits_in_leaf;
-	} else
-		ret->level2bits[0] = bits;

-	ret->root = (void**)alloc(sizeof(void *) << ret->level2bits[0]);
-	if (ret->root == NULL) {
-		if (dalloc != NULL)
-			dalloc(ret);
-		return (NULL);
-	}
-	memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]);
-
-	return (ret);
+	return false;
 }

+static rtree_node_elm_t *
+rtree_node_alloc_impl(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
+	return (rtree_node_elm_t *)base_alloc(tsdn, b0get(), nelms *
+	    sizeof(rtree_node_elm_t), CACHELINE);
+}
+rtree_node_alloc_t *JET_MUTABLE rtree_node_alloc = rtree_node_alloc_impl;
+
 static void
-rtree_delete_subtree(rtree_t *rtree, void **node, unsigned level)
-{
+rtree_node_dalloc_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *node) {
+	/* Nodes are never deleted during normal operation. */
+	not_reached();
+}
+UNUSED rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc =
+    rtree_node_dalloc_impl;

-	if (level < rtree->height - 1) {
-		size_t nchildren, i;
+static rtree_leaf_elm_t *
+rtree_leaf_alloc_impl(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
+	return (rtree_leaf_elm_t *)base_alloc(tsdn, b0get(), nelms *
+	    sizeof(rtree_leaf_elm_t), CACHELINE);
+}
+rtree_leaf_alloc_t *JET_MUTABLE rtree_leaf_alloc = rtree_leaf_alloc_impl;

-		nchildren = ZU(1) << rtree->level2bits[level];
-		for (i = 0; i < nchildren; i++) {
-			void **child = (void **)node[i];
-			if (child != NULL)
-				rtree_delete_subtree(rtree, child, level + 1);
+static void
+rtree_leaf_dalloc_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *leaf) {
+	/* Leaves are never deleted during normal operation. */
+	not_reached();
+}
+UNUSED rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc =
+    rtree_leaf_dalloc_impl;
+
+#ifdef JEMALLOC_JET
+#  if RTREE_HEIGHT > 1
+static void
+rtree_delete_subtree(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *subtree,
+    unsigned level) {
+	size_t nchildren = ZU(1) << rtree_levels[level].bits;
+	if (level + 2 < RTREE_HEIGHT) {
+		for (size_t i = 0; i < nchildren; i++) {
+			rtree_node_elm_t *node =
+			    (rtree_node_elm_t *)atomic_load_p(&subtree[i].child,
+			    ATOMIC_RELAXED);
+			if (node != NULL) {
+				rtree_delete_subtree(tsdn, rtree, node, level +
+				    1);
+			}
+		}
+	} else {
+		for (size_t i = 0; i < nchildren; i++) {
+			rtree_leaf_elm_t *leaf =
+			    (rtree_leaf_elm_t *)atomic_load_p(&subtree[i].child,
+			    ATOMIC_RELAXED);
+			if (leaf != NULL) {
+				rtree_leaf_dalloc(tsdn, rtree, leaf);
+			}
 		}
 	}
-	rtree->dalloc(node);
+
+	if (subtree != rtree->root) {
+		rtree_node_dalloc(tsdn, rtree, subtree);
+	}
+}
+#  endif
+
+void
+rtree_delete(tsdn_t *tsdn, rtree_t *rtree) {
+#  if RTREE_HEIGHT > 1
+	rtree_delete_subtree(tsdn, rtree, rtree->root, 0);
+#  endif
+}
+#endif
+
+static rtree_node_elm_t *
+rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
+    atomic_p_t *elmp) {
+	malloc_mutex_lock(tsdn, &rtree->init_lock);
+	/*
+	 * If *elmp is non-null, then it was initialized with the init lock
+	 * held, so we can get by with 'relaxed' here.
+	 */
+	rtree_node_elm_t *node = atomic_load_p(elmp, ATOMIC_RELAXED);
+	if (node == NULL) {
+		node = rtree_node_alloc(tsdn, rtree, ZU(1) <<
+		    rtree_levels[level].bits);
+		if (node == NULL) {
+			malloc_mutex_unlock(tsdn, &rtree->init_lock);
+			return NULL;
+		}
+		/*
+		 * Even though we hold the lock, a later reader might not; we
+		 * need release semantics.
+		 */
+		atomic_store_p(elmp, node, ATOMIC_RELEASE);
+	}
+	malloc_mutex_unlock(tsdn, &rtree->init_lock);
+
+	return node;
+}
+
+static rtree_leaf_elm_t *
+rtree_leaf_init(tsdn_t *tsdn, rtree_t *rtree, atomic_p_t *elmp) {
+	malloc_mutex_lock(tsdn, &rtree->init_lock);
+	/*
+	 * If *elmp is non-null, then it was initialized with the init lock
+	 * held, so we can get by with 'relaxed' here.
+	 */
+	rtree_leaf_elm_t *leaf = atomic_load_p(elmp, ATOMIC_RELAXED);
+	if (leaf == NULL) {
+		leaf = rtree_leaf_alloc(tsdn, rtree, ZU(1) <<
+		    rtree_levels[RTREE_HEIGHT-1].bits);
+		if (leaf == NULL) {
+			malloc_mutex_unlock(tsdn, &rtree->init_lock);
+			return NULL;
+		}
+		/*
+		 * Even though we hold the lock, a later reader might not; we
+		 * need release semantics.
+		 */
+		atomic_store_p(elmp, leaf, ATOMIC_RELEASE);
+	}
+	malloc_mutex_unlock(tsdn, &rtree->init_lock);
+
+	return leaf;
+}
+
+static bool
+rtree_node_valid(rtree_node_elm_t *node) {
+	return ((uintptr_t)node != (uintptr_t)0);
+}
+
+static bool
+rtree_leaf_valid(rtree_leaf_elm_t *leaf) {
+	return ((uintptr_t)leaf != (uintptr_t)0);
+}
+
+static rtree_node_elm_t *
+rtree_child_node_tryread(rtree_node_elm_t *elm, bool dependent) {
+	rtree_node_elm_t *node;
+
+	if (dependent) {
+		node = (rtree_node_elm_t *)atomic_load_p(&elm->child,
+		    ATOMIC_RELAXED);
+	} else {
+		node = (rtree_node_elm_t *)atomic_load_p(&elm->child,
+		    ATOMIC_ACQUIRE);
+	}
+
+	assert(!dependent || node != NULL);
+	return node;
+}
+
+static rtree_node_elm_t *
+rtree_child_node_read(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *elm,
+    unsigned level, bool dependent) {
+	rtree_node_elm_t *node;
+
+	node = rtree_child_node_tryread(elm, dependent);
+	if (!dependent && unlikely(!rtree_node_valid(node))) {
+		node = rtree_node_init(tsdn, rtree, level + 1, &elm->child);
+	}
+	assert(!dependent || node != NULL);
+	return node;
+}
+
+static rtree_leaf_elm_t *
+rtree_child_leaf_tryread(rtree_node_elm_t *elm, bool dependent) {
+	rtree_leaf_elm_t *leaf;
+
+	if (dependent) {
+		leaf = (rtree_leaf_elm_t *)atomic_load_p(&elm->child,
+		    ATOMIC_RELAXED);
+	} else {
+		leaf = (rtree_leaf_elm_t *)atomic_load_p(&elm->child,
+		    ATOMIC_ACQUIRE);
+	}
+
+	assert(!dependent || leaf != NULL);
+	return leaf;
+}
+
+static rtree_leaf_elm_t *
+rtree_child_leaf_read(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *elm,
+    unsigned level, bool dependent) {
+	rtree_leaf_elm_t *leaf;
+
+	leaf = rtree_child_leaf_tryread(elm, dependent);
+	if (!dependent && unlikely(!rtree_leaf_valid(leaf))) {
+		leaf = rtree_leaf_init(tsdn, rtree, &elm->child);
+	}
+	assert(!dependent || leaf != NULL);
+	return leaf;
+}
+
+rtree_leaf_elm_t *
+rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key, bool dependent, bool init_missing) {
+	rtree_node_elm_t *node;
+	rtree_leaf_elm_t *leaf;
+#if RTREE_HEIGHT > 1
+	node = rtree->root;
+#else
+	leaf = rtree->root;
+#endif
+
+	if (config_debug) {
+		uintptr_t leafkey = rtree_leafkey(key);
+		for (unsigned i = 0; i < RTREE_CTX_NCACHE; i++) {
+			assert(rtree_ctx->cache[i].leafkey != leafkey);
+		}
+		for (unsigned i = 0; i < RTREE_CTX_NCACHE_L2; i++) {
+			assert(rtree_ctx->l2_cache[i].leafkey != leafkey);
+		}
+	}
+
+#define RTREE_GET_CHILD(level) {					\
+		assert(level < RTREE_HEIGHT-1);				\
+		if (level != 0 && !dependent &&				\
+		    unlikely(!rtree_node_valid(node))) {		\
+			return NULL;					\
+		}							\
+		uintptr_t subkey = rtree_subkey(key, level);		\
+		if (level + 2 < RTREE_HEIGHT) {				\
+			node = init_missing ?				\
+			    rtree_child_node_read(tsdn, rtree,		\
+			    &node[subkey], level, dependent) :		\
+			    rtree_child_node_tryread(&node[subkey],	\
+			    dependent);					\
+		} else {						\
+			leaf = init_missing ?				\
+			    rtree_child_leaf_read(tsdn, rtree,		\
+			    &node[subkey], level, dependent) :		\
+			    rtree_child_leaf_tryread(&node[subkey],	\
+			    dependent);					\
+		}							\
+	}
+	/*
+	 * Cache replacement upon hard lookup (i.e. L1 & L2 rtree cache miss):
+	 * (1) evict last entry in L2 cache; (2) move the collision slot from L1
+	 * cache down to L2; and 3) fill L1.
+	 */
+#define RTREE_GET_LEAF(level) {						\
+		assert(level == RTREE_HEIGHT-1);			\
+		if (!dependent && unlikely(!rtree_leaf_valid(leaf))) {	\
+			return NULL;					\
+		}							\
+		if (RTREE_CTX_NCACHE_L2 > 1) {				\
+			memmove(&rtree_ctx->l2_cache[1],		\
+			    &rtree_ctx->l2_cache[0],			\
+			    sizeof(rtree_ctx_cache_elm_t) *		\
+			    (RTREE_CTX_NCACHE_L2 - 1));			\
+		}							\
+		size_t slot = rtree_cache_direct_map(key);		\
+		rtree_ctx->l2_cache[0].leafkey =			\
+		    rtree_ctx->cache[slot].leafkey;			\
+		rtree_ctx->l2_cache[0].leaf =				\
+		    rtree_ctx->cache[slot].leaf;			\
+		uintptr_t leafkey = rtree_leafkey(key);			\
+		rtree_ctx->cache[slot].leafkey = leafkey;		\
+		rtree_ctx->cache[slot].leaf = leaf;			\
+		uintptr_t subkey = rtree_subkey(key, level);		\
+		return &leaf[subkey];					\
+	}
+	if (RTREE_HEIGHT > 1) {
+		RTREE_GET_CHILD(0)
+	}
+	if (RTREE_HEIGHT > 2) {
+		RTREE_GET_CHILD(1)
+	}
+	if (RTREE_HEIGHT > 3) {
+		for (unsigned i = 2; i < RTREE_HEIGHT-1; i++) {
+			RTREE_GET_CHILD(i)
+		}
+	}
+	RTREE_GET_LEAF(RTREE_HEIGHT-1)
+#undef RTREE_GET_CHILD
+#undef RTREE_GET_LEAF
+	not_reached();
 }

 void
-rtree_delete(rtree_t *rtree)
-{
-
-	rtree_delete_subtree(rtree, rtree->root, 0);
-	rtree->dalloc(rtree);
-}
-
-void
-rtree_prefork(rtree_t *rtree)
-{
-
-	malloc_mutex_prefork(&rtree->mutex);
-}
-
-void
-rtree_postfork_parent(rtree_t *rtree)
-{
-
-	malloc_mutex_postfork_parent(&rtree->mutex);
-}
-
-void
-rtree_postfork_child(rtree_t *rtree)
-{
-
-	malloc_mutex_postfork_child(&rtree->mutex);
+rtree_ctx_data_init(rtree_ctx_t *ctx) {
+	for (unsigned i = 0; i < RTREE_CTX_NCACHE; i++) {
+		rtree_ctx_cache_elm_t *cache = &ctx->cache[i];
+		cache->leafkey = RTREE_LEAFKEY_INVALID;
+		cache->leaf = NULL;
+	}
+	for (unsigned i = 0; i < RTREE_CTX_NCACHE_L2; i++) {
+		rtree_ctx_cache_elm_t *cache = &ctx->l2_cache[i];
+		cache->leafkey = RTREE_LEAFKEY_INVALID;
+		cache->leaf = NULL;
+	}
 }
--- a/deps/jemalloc/src/stats.c
+++ b/deps/jemalloc/src/stats.c
--- a/deps/jemalloc/src/tcache.c
+++ b/deps/jemalloc/src/tcache.c
@@ -1,131 +1,153 @@
-#define	JEMALLOC_TCACHE_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define JEMALLOC_TCACHE_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/size_classes.h"

 /******************************************************************************/
 /* Data. */

-malloc_tsd_data(, tcache, tcache_t *, NULL)
-malloc_tsd_data(, tcache_enabled, tcache_enabled_t, tcache_enabled_default)
-
 bool	opt_tcache = true;
 ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;

 tcache_bin_info_t	*tcache_bin_info;
 static unsigned		stack_nelms; /* Total stack elms per tcache. */

-size_t			nhbins;
+unsigned		nhbins;
 size_t			tcache_maxclass;

+tcaches_t		*tcaches;
+
+/* Index of first element within tcaches that has never been used. */
+static unsigned		tcaches_past;
+
+/* Head of singly linked list tracking available tcaches elements. */
+static tcaches_t	*tcaches_avail;
+
+/* Protects tcaches{,_past,_avail}. */
+static malloc_mutex_t	tcaches_mtx;
+
 /******************************************************************************/

-size_t	tcache_salloc(const void *ptr)
-{
-
-	return (arena_salloc(ptr, false));
+size_t
+tcache_salloc(tsdn_t *tsdn, const void *ptr) {
+	return arena_salloc(tsdn, ptr);
 }

 void
-tcache_event_hard(tcache_t *tcache)
-{
-	size_t binind = tcache->next_gc_bin;
-	tcache_bin_t *tbin = &tcache->tbins[binind];
-	tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
+tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
+	szind_t binind = tcache->next_gc_bin;

+	tcache_bin_t *tbin;
+	if (binind < NBINS) {
+		tbin = tcache_small_bin_get(tcache, binind);
+	} else {
+		tbin = tcache_large_bin_get(tcache, binind);
+	}
 	if (tbin->low_water > 0) {
 		/*
 		 * Flush (ceiling) 3/4 of the objects below the low water mark.
 		 */
 		if (binind < NBINS) {
-			tcache_bin_flush_small(tbin, binind, tbin->ncached -
-			    tbin->low_water + (tbin->low_water >> 2), tcache);
+			tcache_bin_flush_small(tsd, tcache, tbin, binind,
+			    tbin->ncached - tbin->low_water + (tbin->low_water
+			    >> 2));
+			/*
+			 * Reduce fill count by 2X.  Limit lg_fill_div such that
+			 * the fill count is always at least 1.
+			 */
+			tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
+			if ((tbin_info->ncached_max >>
+			     (tcache->lg_fill_div[binind] + 1)) >= 1) {
+				tcache->lg_fill_div[binind]++;
+			}
 		} else {
-			tcache_bin_flush_large(tbin, binind, tbin->ncached -
-			    tbin->low_water + (tbin->low_water >> 2), tcache);
+			tcache_bin_flush_large(tsd, tbin, binind, tbin->ncached
+			    - tbin->low_water + (tbin->low_water >> 2), tcache);
 		}
-		/*
-		 * Reduce fill count by 2X.  Limit lg_fill_div such that the
-		 * fill count is always at least 1.
-		 */
-		if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) >= 1)
-			tbin->lg_fill_div++;
 	} else if (tbin->low_water < 0) {
 		/*
-		 * Increase fill count by 2X.  Make sure lg_fill_div stays
-		 * greater than 0.
+		 * Increase fill count by 2X for small bins.  Make sure
+		 * lg_fill_div stays greater than 0.
 		 */
-		if (tbin->lg_fill_div > 1)
-			tbin->lg_fill_div--;
+		if (binind < NBINS && tcache->lg_fill_div[binind] > 1) {
+			tcache->lg_fill_div[binind]--;
+		}
 	}
 	tbin->low_water = tbin->ncached;

 	tcache->next_gc_bin++;
-	if (tcache->next_gc_bin == nhbins)
+	if (tcache->next_gc_bin == nhbins) {
 		tcache->next_gc_bin = 0;
-	tcache->ev_cnt = 0;
+	}
 }

 void *
-tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
-{
+tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
+    tcache_bin_t *tbin, szind_t binind, bool *tcache_success) {
 	void *ret;

-	arena_tcache_fill_small(tcache->arena, tbin, binind,
+	assert(tcache->arena != NULL);
+	arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind,
 	    config_prof ? tcache->prof_accumbytes : 0);
-	if (config_prof)
+	if (config_prof) {
 		tcache->prof_accumbytes = 0;
-	ret = tcache_alloc_easy(tbin);
+	}
+	ret = tcache_alloc_easy(tbin, tcache_success);

-	return (ret);
+	return ret;
 }

 void
-tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
-    tcache_t *tcache)
-{
-	void *ptr;
-	unsigned i, nflush, ndeferred;
+tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
+    szind_t binind, unsigned rem) {
 	bool merged_stats = false;

 	assert(binind < NBINS);
 	assert(rem <= tbin->ncached);

-	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
-		/* Lock the arena bin associated with the first object. */
-		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
-		    tbin->avail[0]);
-		arena_t *arena = chunk->arena;
-		arena_bin_t *bin = &arena->bins[binind];
+	arena_t *arena = tcache->arena;
+	assert(arena != NULL);
+	unsigned nflush = tbin->ncached - rem;
+	VARIABLE_ARRAY(extent_t *, item_extent, nflush);
+	/* Look up extent once per item. */
+	for (unsigned i = 0 ; i < nflush; i++) {
+		item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
+	}

-		if (config_prof && arena == tcache->arena) {
-			if (arena_prof_accum(arena, tcache->prof_accumbytes))
-				prof_idump();
+	while (nflush > 0) {
+		/* Lock the arena bin associated with the first object. */
+		extent_t *extent = item_extent[0];
+		arena_t *bin_arena = extent_arena_get(extent);
+		arena_bin_t *bin = &bin_arena->bins[binind];
+
+		if (config_prof && bin_arena == arena) {
+			if (arena_prof_accum(tsd_tsdn(tsd), arena,
+			    tcache->prof_accumbytes)) {
+				prof_idump(tsd_tsdn(tsd));
+			}
 			tcache->prof_accumbytes = 0;
 		}

-		malloc_mutex_lock(&bin->lock);
-		if (config_stats && arena == tcache->arena) {
-			assert(merged_stats == false);
+		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+		if (config_stats && bin_arena == arena) {
+			assert(!merged_stats);
 			merged_stats = true;
 			bin->stats.nflushes++;
 			bin->stats.nrequests += tbin->tstats.nrequests;
 			tbin->tstats.nrequests = 0;
 		}
-		ndeferred = 0;
-		for (i = 0; i < nflush; i++) {
-			ptr = tbin->avail[i];
-			assert(ptr != NULL);
-			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-			if (chunk->arena == arena) {
-				size_t pageind = ((uintptr_t)ptr -
-				    (uintptr_t)chunk) >> LG_PAGE;
-				arena_chunk_map_t *mapelm =
-				    arena_mapp_get(chunk, pageind);
-				if (config_fill && opt_junk) {
-					arena_alloc_junk_small(ptr,
-					    &arena_bin_info[binind], true);
-				}
-				arena_dalloc_bin_locked(arena, chunk, ptr,
-				    mapelm);
+		unsigned ndeferred = 0;
+		for (unsigned i = 0; i < nflush; i++) {
+			void *ptr = *(tbin->avail - 1 - i);
+			extent = item_extent[i];
+			assert(ptr != NULL && extent != NULL);
+
+			if (extent_arena_get(extent) == bin_arena) {
+				arena_dalloc_bin_junked_locked(tsd_tsdn(tsd),
+				    bin_arena, extent, ptr);
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -133,276 +155,369 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
 				 * locked.  Stash the object, so that it can be
 				 * handled in a future pass.
 				 */
-				tbin->avail[ndeferred] = ptr;
+				*(tbin->avail - 1 - ndeferred) = ptr;
+				item_extent[ndeferred] = extent;
 				ndeferred++;
 			}
 		}
-		malloc_mutex_unlock(&bin->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
+		arena_decay_ticks(tsd_tsdn(tsd), bin_arena, nflush - ndeferred);
+		nflush = ndeferred;
 	}
-	if (config_stats && merged_stats == false) {
+	if (config_stats && !merged_stats) {
 		/*
 		 * The flush loop didn't happen to flush to this thread's
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
-		arena_bin_t *bin = &tcache->arena->bins[binind];
-		malloc_mutex_lock(&bin->lock);
+		arena_bin_t *bin = &arena->bins[binind];
+		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		bin->stats.nflushes++;
 		bin->stats.nrequests += tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(&bin->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 	}

-	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
-	    rem * sizeof(void *));
+	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
+	    sizeof(void *));
 	tbin->ncached = rem;
-	if ((int)tbin->ncached < tbin->low_water)
+	if ((low_water_t)tbin->ncached < tbin->low_water) {
 		tbin->low_water = tbin->ncached;
+	}
 }

 void
-tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem,
-    tcache_t *tcache)
-{
-	void *ptr;
-	unsigned i, nflush, ndeferred;
+tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
+    unsigned rem, tcache_t *tcache) {
 	bool merged_stats = false;

 	assert(binind < nhbins);
 	assert(rem <= tbin->ncached);

-	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
+	arena_t *arena = tcache->arena;
+	assert(arena != NULL);
+	unsigned nflush = tbin->ncached - rem;
+	VARIABLE_ARRAY(extent_t *, item_extent, nflush);
+	/* Look up extent once per item. */
+	for (unsigned i = 0 ; i < nflush; i++) {
+		item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
+	}
+
+	while (nflush > 0) {
 		/* Lock the arena associated with the first object. */
-		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
-		    tbin->avail[0]);
-		arena_t *arena = chunk->arena;
+		extent_t *extent = item_extent[0];
+		arena_t *locked_arena = extent_arena_get(extent);
 		UNUSED bool idump;

-		if (config_prof)
+		if (config_prof) {
 			idump = false;
-		malloc_mutex_lock(&arena->lock);
-		if ((config_prof || config_stats) && arena == tcache->arena) {
+		}
+
+		malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx);
+		for (unsigned i = 0; i < nflush; i++) {
+			void *ptr = *(tbin->avail - 1 - i);
+			assert(ptr != NULL);
+			extent = item_extent[i];
+			if (extent_arena_get(extent) == locked_arena) {
+				large_dalloc_prep_junked_locked(tsd_tsdn(tsd),
+				    extent);
+			}
+		}
+		if ((config_prof || config_stats) && locked_arena == arena) {
 			if (config_prof) {
-				idump = arena_prof_accum_locked(arena,
+				idump = arena_prof_accum(tsd_tsdn(tsd), arena,
 				    tcache->prof_accumbytes);
 				tcache->prof_accumbytes = 0;
 			}
 			if (config_stats) {
 				merged_stats = true;
-				arena->stats.nrequests_large +=
-				    tbin->tstats.nrequests;
-				arena->stats.lstats[binind - NBINS].nrequests +=
-				    tbin->tstats.nrequests;
+				arena_stats_large_nrequests_add(tsd_tsdn(tsd),
+				    &arena->stats, binind,
+				    tbin->tstats.nrequests);
 				tbin->tstats.nrequests = 0;
 			}
 		}
-		ndeferred = 0;
-		for (i = 0; i < nflush; i++) {
-			ptr = tbin->avail[i];
-			assert(ptr != NULL);
-			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-			if (chunk->arena == arena)
-				arena_dalloc_large_locked(arena, chunk, ptr);
-			else {
+		malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->large_mtx);
+
+		unsigned ndeferred = 0;
+		for (unsigned i = 0; i < nflush; i++) {
+			void *ptr = *(tbin->avail - 1 - i);
+			extent = item_extent[i];
+			assert(ptr != NULL && extent != NULL);
+
+			if (extent_arena_get(extent) == locked_arena) {
+				large_dalloc_finish(tsd_tsdn(tsd), extent);
+			} else {
 				/*
 				 * This object was allocated via a different
 				 * arena than the one that is currently locked.
 				 * Stash the object, so that it can be handled
 				 * in a future pass.
 				 */
-				tbin->avail[ndeferred] = ptr;
+				*(tbin->avail - 1 - ndeferred) = ptr;
+				item_extent[ndeferred] = extent;
 				ndeferred++;
 			}
 		}
-		malloc_mutex_unlock(&arena->lock);
-		if (config_prof && idump)
-			prof_idump();
+		if (config_prof && idump) {
+			prof_idump(tsd_tsdn(tsd));
+		}
+		arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush -
+		    ndeferred);
+		nflush = ndeferred;
 	}
-	if (config_stats && merged_stats == false) {
+	if (config_stats && !merged_stats) {
 		/*
 		 * The flush loop didn't happen to flush to this thread's
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
-		arena_t *arena = tcache->arena;
-		malloc_mutex_lock(&arena->lock);
-		arena->stats.nrequests_large += tbin->tstats.nrequests;
-		arena->stats.lstats[binind - NBINS].nrequests +=
-		    tbin->tstats.nrequests;
+		arena_stats_large_nrequests_add(tsd_tsdn(tsd), &arena->stats,
+		    binind, tbin->tstats.nrequests);
 		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(&arena->lock);
 	}

-	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
-	    rem * sizeof(void *));
+	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
+	    sizeof(void *));
 	tbin->ncached = rem;
-	if ((int)tbin->ncached < tbin->low_water)
+	if ((low_water_t)tbin->ncached < tbin->low_water) {
 		tbin->low_water = tbin->ncached;
+	}
 }

 void
-tcache_arena_associate(tcache_t *tcache, arena_t *arena)
-{
+tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
+	assert(tcache->arena == NULL);
+	tcache->arena = arena;

 	if (config_stats) {
 		/* Link into list of extant tcaches. */
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 		ql_elm_new(tcache, link);
 		ql_tail_insert(&arena->tcache_ql, tcache, link);
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 	}
-	tcache->arena = arena;
+}
+
+static void
+tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache) {
+	arena_t *arena = tcache->arena;
+	assert(arena != NULL);
+	if (config_stats) {
+		/* Unlink from list of extant tcaches. */
+		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
+		if (config_debug) {
+			bool in_ql = false;
+			tcache_t *iter;
+			ql_foreach(iter, &arena->tcache_ql, link) {
+				if (iter == tcache) {
+					in_ql = true;
+					break;
+				}
+			}
+			assert(in_ql);
+		}
+		ql_remove(&arena->tcache_ql, tcache, link);
+		tcache_stats_merge(tsdn, tcache, arena);
+		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
+	}
+	tcache->arena = NULL;
 }

 void
-tcache_arena_dissociate(tcache_t *tcache)
-{
-
-	if (config_stats) {
-		/* Unlink from list of extant tcaches. */
-		malloc_mutex_lock(&tcache->arena->lock);
-		ql_remove(&tcache->arena->tcache_ql, tcache, link);
-		tcache_stats_merge(tcache, tcache->arena);
-		malloc_mutex_unlock(&tcache->arena->lock);
-	}
+tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
+	tcache_arena_dissociate(tsdn, tcache);
+	tcache_arena_associate(tsdn, tcache, arena);
 }

+bool
+tsd_tcache_enabled_data_init(tsd_t *tsd) {
+	/* Called upon tsd initialization. */
+	tsd_tcache_enabled_set(tsd, opt_tcache);
+	tsd_slow_update(tsd);
+
+	if (opt_tcache) {
+		/* Trigger tcache init. */
+		tsd_tcache_data_init(tsd);
+	}
+
+	return false;
+}
+
+/* Initialize auto tcache (embedded in TSD). */
+static void
+tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
+	memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
+	tcache->prof_accumbytes = 0;
+	tcache->next_gc_bin = 0;
+	tcache->arena = NULL;
+
+	ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR);
+
+	size_t stack_offset = 0;
+	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
+	memset(tcache->tbins_small, 0, sizeof(tcache_bin_t) * NBINS);
+	memset(tcache->tbins_large, 0, sizeof(tcache_bin_t) * (nhbins - NBINS));
+	unsigned i = 0;
+	for (; i < NBINS; i++) {
+		tcache->lg_fill_div[i] = 1;
+		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
+		/*
+		 * avail points past the available space.  Allocations will
+		 * access the slots toward higher addresses (for the benefit of
+		 * prefetch).
+		 */
+		tcache_small_bin_get(tcache, i)->avail =
+		    (void **)((uintptr_t)avail_stack + (uintptr_t)stack_offset);
+	}
+	for (; i < nhbins; i++) {
+		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
+		tcache_large_bin_get(tcache, i)->avail =
+		    (void **)((uintptr_t)avail_stack + (uintptr_t)stack_offset);
+	}
+	assert(stack_offset == stack_nelms * sizeof(void *));
+}
+
+/* Initialize auto tcache (embedded in TSD). */
+bool
+tsd_tcache_data_init(tsd_t *tsd) {
+	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
+	assert(tcache_small_bin_get(tcache, 0)->avail == NULL);
+	size_t size = stack_nelms * sizeof(void *);
+	/* Avoid false cacheline sharing. */
+	size = sz_sa2u(size, CACHELINE);
+
+	void *avail_array = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true,
+	    NULL, true, arena_get(TSDN_NULL, 0, true));
+	if (avail_array == NULL) {
+		return true;
+	}
+
+	tcache_init(tsd, tcache, avail_array);
+	/*
+	 * Initialization is a bit tricky here.  After malloc init is done, all
+	 * threads can rely on arena_choose and associate tcache accordingly.
+	 * However, the thread that does actual malloc bootstrapping relies on
+	 * functional tsd, and it can only rely on a0.  In that case, we
+	 * associate its tcache to a0 temporarily, and later on
+	 * arena_choose_hard() will re-associate properly.
+	 */
+	tcache->arena = NULL;
+	arena_t *arena;
+	if (!malloc_initialized()) {
+		/* If in initialization, assign to a0. */
+		arena = arena_get(tsd_tsdn(tsd), 0, false);
+		tcache_arena_associate(tsd_tsdn(tsd), tcache, arena);
+	} else {
+		arena = arena_choose(tsd, NULL);
+		/* This may happen if thread.tcache.enabled is used. */
+		if (tcache->arena == NULL) {
+			tcache_arena_associate(tsd_tsdn(tsd), tcache, arena);
+		}
+	}
+	assert(arena == tcache->arena);
+
+	return false;
+}
+
+/* Created manual tcache for tcache.create mallctl. */
 tcache_t *
-tcache_create(arena_t *arena)
-{
+tcache_create_explicit(tsd_t *tsd) {
 	tcache_t *tcache;
 	size_t size, stack_offset;
-	unsigned i;

-	size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
+	size = sizeof(tcache_t);
 	/* Naturally align the pointer stacks. */
 	size = PTR_CEILING(size);
 	stack_offset = size;
 	size += stack_nelms * sizeof(void *);
-	/*
-	 * Round up to the nearest multiple of the cacheline size, in order to
-	 * avoid the possibility of false cacheline sharing.
-	 *
-	 * That this works relies on the same logic as in ipalloc(), but we
-	 * cannot directly call ipalloc() here due to tcache bootstrapping
-	 * issues.
-	 */
-	size = (size + CACHELINE_MASK) & (-CACHELINE);
+	/* Avoid false cacheline sharing. */
+	size = sz_sa2u(size, CACHELINE);

-	if (size <= SMALL_MAXCLASS)
-		tcache = (tcache_t *)arena_malloc_small(arena, size, true);
-	else if (size <= tcache_maxclass)
-		tcache = (tcache_t *)arena_malloc_large(arena, size, true);
-	else
-		tcache = (tcache_t *)icalloct(size, false, arena);
-
-	if (tcache == NULL)
-		return (NULL);
-
-	tcache_arena_associate(tcache, arena);
-
-	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
-	for (i = 0; i < nhbins; i++) {
-		tcache->tbins[i].lg_fill_div = 1;
-		tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
-		    (uintptr_t)stack_offset);
-		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
+	tcache = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true, NULL, true,
+	    arena_get(TSDN_NULL, 0, true));
+	if (tcache == NULL) {
+		return NULL;
 	}

-	tcache_tsd_set(&tcache);
+	tcache_init(tsd, tcache,
+	    (void *)((uintptr_t)tcache + (uintptr_t)stack_offset));
+	tcache_arena_associate(tsd_tsdn(tsd), tcache, arena_ichoose(tsd, NULL));

-	return (tcache);
+	return tcache;
 }

-void
-tcache_destroy(tcache_t *tcache)
-{
-	unsigned i;
-	size_t tcache_size;
+static void
+tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
+	assert(tcache->arena != NULL);

-	tcache_arena_dissociate(tcache);
+	for (unsigned i = 0; i < NBINS; i++) {
+		tcache_bin_t *tbin = tcache_small_bin_get(tcache, i);
+		tcache_bin_flush_small(tsd, tcache, tbin, i, 0);

-	for (i = 0; i < NBINS; i++) {
-		tcache_bin_t *tbin = &tcache->tbins[i];
-		tcache_bin_flush_small(tbin, i, 0, tcache);
-
-		if (config_stats && tbin->tstats.nrequests != 0) {
-			arena_t *arena = tcache->arena;
-			arena_bin_t *bin = &arena->bins[i];
-			malloc_mutex_lock(&bin->lock);
-			bin->stats.nrequests += tbin->tstats.nrequests;
-			malloc_mutex_unlock(&bin->lock);
+		if (config_stats) {
+			assert(tbin->tstats.nrequests == 0);
 		}
 	}
+	for (unsigned i = NBINS; i < nhbins; i++) {
+		tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
+		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);

-	for (; i < nhbins; i++) {
-		tcache_bin_t *tbin = &tcache->tbins[i];
-		tcache_bin_flush_large(tbin, i, 0, tcache);
-
-		if (config_stats && tbin->tstats.nrequests != 0) {
-			arena_t *arena = tcache->arena;
-			malloc_mutex_lock(&arena->lock);
-			arena->stats.nrequests_large += tbin->tstats.nrequests;
-			arena->stats.lstats[i - NBINS].nrequests +=
-			    tbin->tstats.nrequests;
-			malloc_mutex_unlock(&arena->lock);
+		if (config_stats) {
+			assert(tbin->tstats.nrequests == 0);
 		}
 	}

 	if (config_prof && tcache->prof_accumbytes > 0 &&
-	    arena_prof_accum(tcache->arena, tcache->prof_accumbytes))
-		prof_idump();
-
-	tcache_size = arena_salloc(tcache, false);
-	if (tcache_size <= SMALL_MAXCLASS) {
-		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
-		arena_t *arena = chunk->arena;
-		size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >>
-		    LG_PAGE;
-		arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind);
-
-		arena_dalloc_bin(arena, chunk, tcache, pageind, mapelm);
-	} else if (tcache_size <= tcache_maxclass) {
-		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
-		arena_t *arena = chunk->arena;
-
-		arena_dalloc_large(arena, chunk, tcache);
-	} else
-		idalloct(tcache, false);
-}
-
-void
-tcache_thread_cleanup(void *arg)
-{
-	tcache_t *tcache = *(tcache_t **)arg;
-
-	if (tcache == TCACHE_STATE_DISABLED) {
-		/* Do nothing. */
-	} else if (tcache == TCACHE_STATE_REINCARNATED) {
-		/*
-		 * Another destructor called an allocator function after this
-		 * destructor was called.  Reset tcache to
-		 * TCACHE_STATE_PURGATORY in order to receive another callback.
-		 */
-		tcache = TCACHE_STATE_PURGATORY;
-		tcache_tsd_set(&tcache);
-	} else if (tcache == TCACHE_STATE_PURGATORY) {
-		/*
-		 * The previous time this destructor was called, we set the key
-		 * to TCACHE_STATE_PURGATORY so that other destructors wouldn't
-		 * cause re-creation of the tcache.  This time, do nothing, so
-		 * that the destructor will not be called again.
-		 */
-	} else if (tcache != NULL) {
-		assert(tcache != TCACHE_STATE_PURGATORY);
-		tcache_destroy(tcache);
-		tcache = TCACHE_STATE_PURGATORY;
-		tcache_tsd_set(&tcache);
+	    arena_prof_accum(tsd_tsdn(tsd), tcache->arena,
+	    tcache->prof_accumbytes)) {
+		prof_idump(tsd_tsdn(tsd));
 	}
 }

-/* Caller must own arena->lock. */
 void
-tcache_stats_merge(tcache_t *tcache, arena_t *arena)
-{
+tcache_flush(tsd_t *tsd) {
+	assert(tcache_available(tsd));
+	tcache_flush_cache(tsd, tsd_tcachep_get(tsd));
+}
+
+static void
+tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
+	tcache_flush_cache(tsd, tcache);
+	tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
+
+	if (tsd_tcache) {
+		/* Release the avail array for the TSD embedded auto tcache. */
+		void *avail_array =
+		    (void *)((uintptr_t)tcache_small_bin_get(tcache, 0)->avail -
+		    (uintptr_t)tcache_bin_info[0].ncached_max * sizeof(void *));
+		idalloctm(tsd_tsdn(tsd), avail_array, NULL, NULL, true, true);
+	} else {
+		/* Release both the tcache struct and avail array. */
+		idalloctm(tsd_tsdn(tsd), tcache, NULL, NULL, true, true);
+	}
+}
+
+/* For auto tcache (embedded in TSD) only. */
+void
+tcache_cleanup(tsd_t *tsd) {
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	if (!tcache_available(tsd)) {
+		assert(tsd_tcache_enabled_get(tsd) == false);
+		if (config_debug) {
+			assert(tcache_small_bin_get(tcache, 0)->avail == NULL);
+		}
+		return;
+	}
+	assert(tsd_tcache_enabled_get(tsd));
+	assert(tcache_small_bin_get(tcache, 0)->avail != NULL);
+
+	tcache_destroy(tsd, tcache, true);
+	if (config_debug) {
+		tcache_small_bin_get(tcache, 0)->avail = NULL;
+	}
+}
+
+void
+tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	unsigned i;

 	cassert(config_stats);
@@ -410,48 +525,151 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena)
 	/* Merge and reset tcache stats. */
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
-		tcache_bin_t *tbin = &tcache->tbins[i];
-		malloc_mutex_lock(&bin->lock);
+		tcache_bin_t *tbin = tcache_small_bin_get(tcache, i);
+		malloc_mutex_lock(tsdn, &bin->lock);
 		bin->stats.nrequests += tbin->tstats.nrequests;
-		malloc_mutex_unlock(&bin->lock);
+		malloc_mutex_unlock(tsdn, &bin->lock);
 		tbin->tstats.nrequests = 0;
 	}

 	for (; i < nhbins; i++) {
-		malloc_large_stats_t *lstats = &arena->stats.lstats[i - NBINS];
-		tcache_bin_t *tbin = &tcache->tbins[i];
-		arena->stats.nrequests_large += tbin->tstats.nrequests;
-		lstats->nrequests += tbin->tstats.nrequests;
+		tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
+		arena_stats_large_nrequests_add(tsdn, &arena->stats, i,
+		    tbin->tstats.nrequests);
 		tbin->tstats.nrequests = 0;
 	}
 }

+static bool
+tcaches_create_prep(tsd_t *tsd) {
+	bool err;
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
+
+	if (tcaches == NULL) {
+		tcaches = base_alloc(tsd_tsdn(tsd), b0get(), sizeof(tcache_t *)
+		    * (MALLOCX_TCACHE_MAX+1), CACHELINE);
+		if (tcaches == NULL) {
+			err = true;
+			goto label_return;
+		}
+	}
+
+	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) {
+		err = true;
+		goto label_return;
+	}
+
+	err = false;
+label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
+	return err;
+}
+
 bool
-tcache_boot0(void)
-{
-	unsigned i;
+tcaches_create(tsd_t *tsd, unsigned *r_ind) {
+	witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);

-	/*
-	 * If necessary, clamp opt_lg_tcache_max, now that arena_maxclass is
-	 * known.
-	 */
-	if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < SMALL_MAXCLASS)
+	bool err;
+
+	if (tcaches_create_prep(tsd)) {
+		err = true;
+		goto label_return;
+	}
+
+	tcache_t *tcache = tcache_create_explicit(tsd);
+	if (tcache == NULL) {
+		err = true;
+		goto label_return;
+	}
+
+	tcaches_t *elm;
+	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
+	if (tcaches_avail != NULL) {
+		elm = tcaches_avail;
+		tcaches_avail = tcaches_avail->next;
+		elm->tcache = tcache;
+		*r_ind = (unsigned)(elm - tcaches);
+	} else {
+		elm = &tcaches[tcaches_past];
+		elm->tcache = tcache;
+		*r_ind = tcaches_past;
+		tcaches_past++;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
+
+	err = false;
+label_return:
+	witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);
+	return err;
+}
+
+static tcache_t *
+tcaches_elm_remove(tsd_t *tsd, tcaches_t *elm) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tcaches_mtx);
+
+	if (elm->tcache == NULL) {
+		return NULL;
+	}
+	tcache_t *tcache = elm->tcache;
+	elm->tcache = NULL;
+	return tcache;
+}
+
+void
+tcaches_flush(tsd_t *tsd, unsigned ind) {
+	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
+	tcache_t *tcache = tcaches_elm_remove(tsd, &tcaches[ind]);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
+	if (tcache != NULL) {
+		tcache_destroy(tsd, tcache, false);
+	}
+}
+
+void
+tcaches_destroy(tsd_t *tsd, unsigned ind) {
+	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
+	tcaches_t *elm = &tcaches[ind];
+	tcache_t *tcache = tcaches_elm_remove(tsd, elm);
+	elm->next = tcaches_avail;
+	tcaches_avail = elm;
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
+	if (tcache != NULL) {
+		tcache_destroy(tsd, tcache, false);
+	}
+}
+
+bool
+tcache_boot(tsdn_t *tsdn) {
+	/* If necessary, clamp opt_lg_tcache_max. */
+	if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) <
+	    SMALL_MAXCLASS) {
 		tcache_maxclass = SMALL_MAXCLASS;
-	else if ((1U << opt_lg_tcache_max) > arena_maxclass)
-		tcache_maxclass = arena_maxclass;
-	else
-		tcache_maxclass = (1U << opt_lg_tcache_max);
+	} else {
+		tcache_maxclass = (ZU(1) << opt_lg_tcache_max);
+	}

-	nhbins = NBINS + (tcache_maxclass >> LG_PAGE);
+	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
+	    malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+
+	nhbins = sz_size2index(tcache_maxclass) + 1;

 	/* Initialize tcache_bin_info. */
-	tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins *
-	    sizeof(tcache_bin_info_t));
-	if (tcache_bin_info == NULL)
-		return (true);
+	tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins
+	    * sizeof(tcache_bin_info_t), CACHELINE);
+	if (tcache_bin_info == NULL) {
+		return true;
+	}
 	stack_nelms = 0;
+	unsigned i;
 	for (i = 0; i < NBINS; i++) {
-		if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) {
+		if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
+			tcache_bin_info[i].ncached_max =
+			    TCACHE_NSLOTS_SMALL_MIN;
+		} else if ((arena_bin_info[i].nregs << 1) <=
+		    TCACHE_NSLOTS_SMALL_MAX) {
 			tcache_bin_info[i].ncached_max =
 			    (arena_bin_info[i].nregs << 1);
 		} else {
@@ -465,15 +683,26 @@ tcache_boot0(void)
 		stack_nelms += tcache_bin_info[i].ncached_max;
 	}

-	return (false);
+	return false;
 }

-bool
-tcache_boot1(void)
-{
-
-	if (tcache_tsd_boot() || tcache_enabled_tsd_boot())
-		return (true);
-
-	return (false);
+void
+tcache_prefork(tsdn_t *tsdn) {
+	if (!config_prof && opt_tcache) {
+		malloc_mutex_prefork(tsdn, &tcaches_mtx);
+	}
+}
+
+void
+tcache_postfork_parent(tsdn_t *tsdn) {
+	if (!config_prof && opt_tcache) {
+		malloc_mutex_postfork_parent(tsdn, &tcaches_mtx);
+	}
+}
+
+void
+tcache_postfork_child(tsdn_t *tsdn) {
+	if (!config_prof && opt_tcache) {
+		malloc_mutex_postfork_child(tsdn, &tcaches_mtx);
+	}
 }
--- a/deps/jemalloc/src/tsd.c
+++ b/deps/jemalloc/src/tsd.c
@@ -1,5 +1,10 @@
-#define	JEMALLOC_TSD_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define JEMALLOC_TSD_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/rtree.h"

 /******************************************************************************/
 /* Data. */
@@ -7,28 +12,148 @@
 static unsigned ncleanups;
 static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];

+#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
+__thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER;
+__thread bool JEMALLOC_TLS_MODEL tsd_initialized = false;
+bool tsd_booted = false;
+#elif (defined(JEMALLOC_TLS))
+__thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER;
+pthread_key_t tsd_tsd;
+bool tsd_booted = false;
+#elif (defined(_WIN32))
+DWORD tsd_tsd;
+tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
+bool tsd_booted = false;
+#else
+
+/*
+ * This contains a mutex, but it's pretty convenient to allow the mutex code to
+ * have a dependency on tsd.  So we define the struct here, and only refer to it
+ * by pointer in the header.
+ */
+struct tsd_init_head_s {
+	ql_head(tsd_init_block_t) blocks;
+	malloc_mutex_t lock;
+};
+
+pthread_key_t tsd_tsd;
+tsd_init_head_t	tsd_init_head = {
+	ql_head_initializer(blocks),
+	MALLOC_MUTEX_INITIALIZER
+};
+tsd_wrapper_t tsd_boot_wrapper = {
+	false,
+	TSD_INITIALIZER
+};
+bool tsd_booted = false;
+#endif
+
+
 /******************************************************************************/

+void
+tsd_slow_update(tsd_t *tsd) {
+	if (tsd_nominal(tsd)) {
+		if (malloc_slow || !tsd_tcache_enabled_get(tsd) ||
+		    tsd_reentrancy_level_get(tsd) > 0) {
+			tsd->state = tsd_state_nominal_slow;
+		} else {
+			tsd->state = tsd_state_nominal;
+		}
+	}
+}
+
+static bool
+tsd_data_init(tsd_t *tsd) {
+	/*
+	 * We initialize the rtree context first (before the tcache), since the
+	 * tcache initialization depends on it.
+	 */
+	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
+
+	return tsd_tcache_enabled_data_init(tsd);
+}
+
+static void
+assert_tsd_data_cleanup_done(tsd_t *tsd) {
+	assert(!tsd_nominal(tsd));
+	assert(*tsd_arenap_get_unsafe(tsd) == NULL);
+	assert(*tsd_iarenap_get_unsafe(tsd) == NULL);
+	assert(*tsd_arenas_tdata_bypassp_get_unsafe(tsd) == true);
+	assert(*tsd_arenas_tdatap_get_unsafe(tsd) == NULL);
+	assert(*tsd_tcache_enabledp_get_unsafe(tsd) == false);
+	assert(*tsd_prof_tdatap_get_unsafe(tsd) == NULL);
+}
+
+static bool
+tsd_data_init_nocleanup(tsd_t *tsd) {
+	assert(tsd->state == tsd_state_reincarnated ||
+	    tsd->state == tsd_state_minimal_initialized);
+	/*
+	 * During reincarnation, there is no guarantee that the cleanup function
+	 * will be called (deallocation may happen after all tsd destructors).
+	 * We set up tsd in a way that no cleanup is needed.
+	 */
+	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
+	*tsd_arenas_tdata_bypassp_get(tsd) = true;
+	*tsd_tcache_enabledp_get_unsafe(tsd) = false;
+	*tsd_reentrancy_levelp_get(tsd) = 1;
+	assert_tsd_data_cleanup_done(tsd);
+
+	return false;
+}
+
+tsd_t *
+tsd_fetch_slow(tsd_t *tsd, bool minimal) {
+	assert(!tsd_fast(tsd));
+
+	if (tsd->state == tsd_state_nominal_slow) {
+		/* On slow path but no work needed. */
+		assert(malloc_slow || !tsd_tcache_enabled_get(tsd) ||
+		    tsd_reentrancy_level_get(tsd) > 0 ||
+		    *tsd_arenas_tdata_bypassp_get(tsd));
+	} else if (tsd->state == tsd_state_uninitialized) {
+		if (!minimal) {
+			tsd->state = tsd_state_nominal;
+			tsd_slow_update(tsd);
+			/* Trigger cleanup handler registration. */
+			tsd_set(tsd);
+			tsd_data_init(tsd);
+		} else {
+			tsd->state = tsd_state_minimal_initialized;
+			tsd_set(tsd);
+			tsd_data_init_nocleanup(tsd);
+		}
+	} else if (tsd->state == tsd_state_minimal_initialized) {
+		if (!minimal) {
+			/* Switch to fully initialized. */
+			tsd->state = tsd_state_nominal;
+			assert(*tsd_reentrancy_levelp_get(tsd) >= 1);
+			(*tsd_reentrancy_levelp_get(tsd))--;
+			tsd_slow_update(tsd);
+			tsd_data_init(tsd);
+		} else {
+			assert_tsd_data_cleanup_done(tsd);
+		}
+	} else if (tsd->state == tsd_state_purgatory) {
+		tsd->state = tsd_state_reincarnated;
+		tsd_set(tsd);
+		tsd_data_init_nocleanup(tsd);
+	} else {
+		assert(tsd->state == tsd_state_reincarnated);
+	}
+
+	return tsd;
+}
+
 void *
-malloc_tsd_malloc(size_t size)
-{
-
-	/* Avoid choose_arena() in order to dodge bootstrapping issues. */
-	return (arena_malloc(arenas[0], size, false, false));
+malloc_tsd_malloc(size_t size) {
+	return a0malloc(CACHELINE_CEILING(size));
 }

 void
-malloc_tsd_dalloc(void *wrapper)
-{
-
-	idalloct(wrapper, false);
-}
-
-void
-malloc_tsd_no_cleanup(void *arg)
-{
-
-	not_reached();
+malloc_tsd_dalloc(void *wrapper) {
+	a0dalloc(wrapper);
 }

 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
@@ -36,21 +161,22 @@ malloc_tsd_no_cleanup(void *arg)
 JEMALLOC_EXPORT
 #endif
 void
-_malloc_thread_cleanup(void)
-{
+_malloc_thread_cleanup(void) {
 	bool pending[MALLOC_TSD_CLEANUPS_MAX], again;
 	unsigned i;

-	for (i = 0; i < ncleanups; i++)
+	for (i = 0; i < ncleanups; i++) {
 		pending[i] = true;
+	}

 	do {
 		again = false;
 		for (i = 0; i < ncleanups; i++) {
 			if (pending[i]) {
 				pending[i] = cleanups[i]();
-				if (pending[i])
+				if (pending[i]) {
 					again = true;
+				}
 			}
 		}
 	} while (again);
@@ -58,26 +184,92 @@ _malloc_thread_cleanup(void)
 #endif

 void
-malloc_tsd_cleanup_register(bool (*f)(void))
-{
-
+malloc_tsd_cleanup_register(bool (*f)(void)) {
 	assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX);
 	cleanups[ncleanups] = f;
 	ncleanups++;
 }

+static void
+tsd_do_data_cleanup(tsd_t *tsd) {
+	prof_tdata_cleanup(tsd);
+	iarena_cleanup(tsd);
+	arena_cleanup(tsd);
+	arenas_tdata_cleanup(tsd);
+	tcache_cleanup(tsd);
+	witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd));
+}
+
 void
-malloc_tsd_boot(void)
-{
+tsd_cleanup(void *arg) {
+	tsd_t *tsd = (tsd_t *)arg;
+
+	switch (tsd->state) {
+	case tsd_state_uninitialized:
+		/* Do nothing. */
+		break;
+	case tsd_state_minimal_initialized:
+		/* This implies the thread only did free() in its life time. */
+		/* Fall through. */
+	case tsd_state_reincarnated:
+		/*
+		 * Reincarnated means another destructor deallocated memory
+		 * after the destructor was called.  Cleanup isn't required but
+		 * is still called for testing and completeness.
+		 */
+		assert_tsd_data_cleanup_done(tsd);
+		/* Fall through. */
+	case tsd_state_nominal:
+	case tsd_state_nominal_slow:
+		tsd_do_data_cleanup(tsd);
+		tsd->state = tsd_state_purgatory;
+		tsd_set(tsd);
+		break;
+	case tsd_state_purgatory:
+		/*
+		 * The previous time this destructor was called, we set the
+		 * state to tsd_state_purgatory so that other destructors
+		 * wouldn't cause re-creation of the tsd.  This time, do
+		 * nothing, and do not request another callback.
+		 */
+		break;
+	default:
+		not_reached();
+	}
+#ifdef JEMALLOC_JET
+	test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd);
+	int *data = tsd_test_datap_get_unsafe(tsd);
+	if (test_callback != NULL) {
+		test_callback(data);
+	}
+#endif
+}
+
+tsd_t *
+malloc_tsd_boot0(void) {
+	tsd_t *tsd;

 	ncleanups = 0;
+	if (tsd_boot0()) {
+		return NULL;
+	}
+	tsd = tsd_fetch();
+	*tsd_arenas_tdata_bypassp_get(tsd) = true;
+	return tsd;
+}
+
+void
+malloc_tsd_boot1(void) {
+	tsd_boot1();
+	tsd_t *tsd = tsd_fetch();
+	/* malloc_slow has been set properly.  Update tsd_slow. */
+	tsd_slow_update(tsd);
+	*tsd_arenas_tdata_bypassp_get(tsd) = false;
 }

 #ifdef _WIN32
 static BOOL WINAPI
-_tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
-{
-
+_tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
 	switch (fdwReason) {
 #ifdef JEMALLOC_LAZY_LOCK
 	case DLL_THREAD_ATTACH:
@@ -90,52 +282,60 @@ _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
 	default:
 		break;
 	}
-	return (true);
+	return true;
 }

+/*
+ * We need to be able to say "read" here (in the "pragma section"), but have
+ * hooked "read". We won't read for the rest of the file, so we can get away
+ * with unhooking.
+ */
+#ifdef read
+#  undef read
+#endif
+
 #ifdef _MSC_VER
 #  ifdef _M_IX86
 #    pragma comment(linker, "/INCLUDE:__tls_used")
+#    pragma comment(linker, "/INCLUDE:_tls_callback")
 #  else
 #    pragma comment(linker, "/INCLUDE:_tls_used")
+#    pragma comment(linker, "/INCLUDE:tls_callback")
 #  endif
 #  pragma section(".CRT$XLY",long,read)
 #endif
 JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used)
-static const BOOL	(WINAPI *tls_callback)(HINSTANCE hinstDLL,
+BOOL	(WINAPI *const tls_callback)(HINSTANCE hinstDLL,
    DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
 #endif

 #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
    !defined(_WIN32))
 void *
-tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block)
-{
+tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) {
 	pthread_t self = pthread_self();
 	tsd_init_block_t *iter;

 	/* Check whether this thread has already inserted into the list. */
-	malloc_mutex_lock(&head->lock);
+	malloc_mutex_lock(TSDN_NULL, &head->lock);
 	ql_foreach(iter, &head->blocks, link) {
 		if (iter->thread == self) {
-			malloc_mutex_unlock(&head->lock);
-			return (iter->data);
+			malloc_mutex_unlock(TSDN_NULL, &head->lock);
+			return iter->data;
 		}
 	}
 	/* Insert block into list. */
 	ql_elm_new(block, link);
 	block->thread = self;
 	ql_tail_insert(&head->blocks, block, link);
-	malloc_mutex_unlock(&head->lock);
-	return (NULL);
+	malloc_mutex_unlock(TSDN_NULL, &head->lock);
+	return NULL;
 }

 void
-tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block)
-{
-
-	malloc_mutex_lock(&head->lock);
+tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) {
+	malloc_mutex_lock(TSDN_NULL, &head->lock);
 	ql_remove(&head->blocks, block, link);
-	malloc_mutex_unlock(&head->lock);
+	malloc_mutex_unlock(TSDN_NULL, &head->lock);
 }
 #endif
--- a/deps/jemalloc/src/zone.c
+++ b/deps/jemalloc/src/zone.c
@@ -1,10 +1,83 @@
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+
 #ifndef JEMALLOC_ZONE
 #  error "This source file is for zones on Darwin (OS X)."
 #endif

+/* Definitions of the following structs in malloc/malloc.h might be too old
+ * for the built binary to run on newer versions of OSX. So use the newest
+ * possible version of those structs.
+ */
+typedef struct _malloc_zone_t {
+	void *reserved1;
+	void *reserved2;
+	size_t (*size)(struct _malloc_zone_t *, const void *);
+	void *(*malloc)(struct _malloc_zone_t *, size_t);
+	void *(*calloc)(struct _malloc_zone_t *, size_t, size_t);
+	void *(*valloc)(struct _malloc_zone_t *, size_t);
+	void (*free)(struct _malloc_zone_t *, void *);
+	void *(*realloc)(struct _malloc_zone_t *, void *, size_t);
+	void (*destroy)(struct _malloc_zone_t *);
+	const char *zone_name;
+	unsigned (*batch_malloc)(struct _malloc_zone_t *, size_t, void **, unsigned);
+	void (*batch_free)(struct _malloc_zone_t *, void **, unsigned);
+	struct malloc_introspection_t *introspect;
+	unsigned version;
+	void *(*memalign)(struct _malloc_zone_t *, size_t, size_t);
+	void (*free_definite_size)(struct _malloc_zone_t *, void *, size_t);
+	size_t (*pressure_relief)(struct _malloc_zone_t *, size_t);
+} malloc_zone_t;
+
+typedef struct {
+	vm_address_t address;
+	vm_size_t size;
+} vm_range_t;
+
+typedef struct malloc_statistics_t {
+	unsigned blocks_in_use;
+	size_t size_in_use;
+	size_t max_size_in_use;
+	size_t size_allocated;
+} malloc_statistics_t;
+
+typedef kern_return_t memory_reader_t(task_t, vm_address_t, vm_size_t, void **);
+
+typedef void vm_range_recorder_t(task_t, void *, unsigned type, vm_range_t *, unsigned);
+
+typedef struct malloc_introspection_t {
+	kern_return_t (*enumerator)(task_t, void *, unsigned, vm_address_t, memory_reader_t, vm_range_recorder_t);
+	size_t (*good_size)(malloc_zone_t *, size_t);
+	boolean_t (*check)(malloc_zone_t *);
+	void (*print)(malloc_zone_t *, boolean_t);
+	void (*log)(malloc_zone_t *, void *);
+	void (*force_lock)(malloc_zone_t *);
+	void (*force_unlock)(malloc_zone_t *);
+	void (*statistics)(malloc_zone_t *, malloc_statistics_t *);
+	boolean_t (*zone_locked)(malloc_zone_t *);
+	boolean_t (*enable_discharge_checking)(malloc_zone_t *);
+	boolean_t (*disable_discharge_checking)(malloc_zone_t *);
+	void (*discharge)(malloc_zone_t *, void *);
+#ifdef __BLOCKS__
+	void (*enumerate_discharged_pointers)(malloc_zone_t *, void (^)(void *, void *));
+#else
+	void *enumerate_unavailable_without_blocks;
+#endif
+	void (*reinit_lock)(malloc_zone_t *);
+} malloc_introspection_t;
+
+extern kern_return_t malloc_get_all_zones(task_t, memory_reader_t, vm_address_t **, unsigned *);
+
+extern malloc_zone_t *malloc_default_zone(void);
+
+extern void malloc_zone_register(malloc_zone_t *zone);
+
+extern void malloc_zone_unregister(malloc_zone_t *zone);
+
 /*
- * The malloc_default_purgeable_zone function is only available on >= 10.6.
+ * The malloc_default_purgeable_zone() function is only available on >= 10.6.
 * We need to check whether it is present at runtime, thus the weak_import.
 */
 extern malloc_zone_t *malloc_default_purgeable_zone(void)
@@ -13,30 +86,42 @@ JEMALLOC_ATTR(weak_import);
 /******************************************************************************/
 /* Data. */

-static malloc_zone_t zone;
-static struct malloc_introspection_t zone_introspect;
+static malloc_zone_t *default_zone, *purgeable_zone;
+static malloc_zone_t jemalloc_zone;
+static struct malloc_introspection_t jemalloc_zone_introspect;

 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */

-static size_t	zone_size(malloc_zone_t *zone, void *ptr);
+static size_t	zone_size(malloc_zone_t *zone, const void *ptr);
 static void	*zone_malloc(malloc_zone_t *zone, size_t size);
 static void	*zone_calloc(malloc_zone_t *zone, size_t num, size_t size);
 static void	*zone_valloc(malloc_zone_t *zone, size_t size);
 static void	zone_free(malloc_zone_t *zone, void *ptr);
 static void	*zone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
-#if (JEMALLOC_ZONE_VERSION >= 5)
 static void	*zone_memalign(malloc_zone_t *zone, size_t alignment,
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 6)
    size_t size);
 static void	zone_free_definite_size(malloc_zone_t *zone, void *ptr,
    size_t size);
-#endif
-static void	*zone_destroy(malloc_zone_t *zone);
+static void	zone_destroy(malloc_zone_t *zone);
+static unsigned	zone_batch_malloc(struct _malloc_zone_t *zone, size_t size,
+    void **results, unsigned num_requested);
+static void	zone_batch_free(struct _malloc_zone_t *zone,
+    void **to_be_freed, unsigned num_to_be_freed);
+static size_t	zone_pressure_relief(struct _malloc_zone_t *zone, size_t goal);
 static size_t	zone_good_size(malloc_zone_t *zone, size_t size);
+static kern_return_t	zone_enumerator(task_t task, void *data, unsigned type_mask,
+    vm_address_t zone_address, memory_reader_t reader,
+    vm_range_recorder_t recorder);
+static boolean_t	zone_check(malloc_zone_t *zone);
+static void	zone_print(malloc_zone_t *zone, boolean_t verbose);
+static void	zone_log(malloc_zone_t *zone, void *address);
 static void	zone_force_lock(malloc_zone_t *zone);
 static void	zone_force_unlock(malloc_zone_t *zone);
+static void	zone_statistics(malloc_zone_t *zone,
+    malloc_statistics_t *stats);
+static boolean_t	zone_locked(malloc_zone_t *zone);
+static void	zone_reinit_lock(malloc_zone_t *zone);

 /******************************************************************************/
 /*
@@ -44,9 +129,7 @@ static void	zone_force_unlock(malloc_zone_t *zone);
 */

 static size_t
-zone_size(malloc_zone_t *zone, void *ptr)
-{
-
+zone_size(malloc_zone_t *zone, const void *ptr) {
 	/*
 	 * There appear to be places within Darwin (such as setenv(3)) that
 	 * cause calls to this function with pointers that *no* zone owns.  If
@@ -54,40 +137,33 @@ zone_size(malloc_zone_t *zone, void *ptr)
 	 * our zone into two parts, and use one as the default allocator and
 	 * the other as the default deallocator/reallocator.  Since that will
 	 * not work in practice, we must check all pointers to assure that they
-	 * reside within a mapped chunk before determining size.
+	 * reside within a mapped extent before determining size.
 	 */
-	return (ivsalloc(ptr, config_prof));
+	return ivsalloc(tsdn_fetch(), ptr);
 }

 static void *
-zone_malloc(malloc_zone_t *zone, size_t size)
-{
-
-	return (je_malloc(size));
+zone_malloc(malloc_zone_t *zone, size_t size) {
+	return je_malloc(size);
 }

 static void *
-zone_calloc(malloc_zone_t *zone, size_t num, size_t size)
-{
-
-	return (je_calloc(num, size));
+zone_calloc(malloc_zone_t *zone, size_t num, size_t size) {
+	return je_calloc(num, size);
 }

 static void *
-zone_valloc(malloc_zone_t *zone, size_t size)
-{
+zone_valloc(malloc_zone_t *zone, size_t size) {
 	void *ret = NULL; /* Assignment avoids useless compiler warning. */

 	je_posix_memalign(&ret, PAGE, size);

-	return (ret);
+	return ret;
 }

 static void
-zone_free(malloc_zone_t *zone, void *ptr)
-{
-
-	if (ivsalloc(ptr, config_prof) != 0) {
+zone_free(malloc_zone_t *zone, void *ptr) {
+	if (ivsalloc(tsdn_fetch(), ptr) != 0) {
 		je_free(ptr);
 		return;
 	}
@@ -96,163 +172,280 @@ zone_free(malloc_zone_t *zone, void *ptr)
 }

 static void *
-zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
-{
+zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) {
+	if (ivsalloc(tsdn_fetch(), ptr) != 0) {
+		return je_realloc(ptr, size);
+	}

-	if (ivsalloc(ptr, config_prof) != 0)
-		return (je_realloc(ptr, size));
-
-	return (realloc(ptr, size));
+	return realloc(ptr, size);
 }

-#if (JEMALLOC_ZONE_VERSION >= 5)
 static void *
-zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size)
-{
+zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) {
 	void *ret = NULL; /* Assignment avoids useless compiler warning. */

 	je_posix_memalign(&ret, alignment, size);

-	return (ret);
+	return ret;
 }
-#endif

-#if (JEMALLOC_ZONE_VERSION >= 6)
 static void
-zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
-{
+zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) {
+	size_t alloc_size;

-	if (ivsalloc(ptr, config_prof) != 0) {
-		assert(ivsalloc(ptr, config_prof) == size);
+	alloc_size = ivsalloc(tsdn_fetch(), ptr);
+	if (alloc_size != 0) {
+		assert(alloc_size == size);
 		je_free(ptr);
 		return;
 	}

 	free(ptr);
 }
-#endif
-
-static void *
-zone_destroy(malloc_zone_t *zone)
-{

+static void
+zone_destroy(malloc_zone_t *zone) {
 	/* This function should never be called. */
 	not_reached();
-	return (NULL);
+}
+
+static unsigned
+zone_batch_malloc(struct _malloc_zone_t *zone, size_t size, void **results,
+    unsigned num_requested) {
+	unsigned i;
+
+	for (i = 0; i < num_requested; i++) {
+		results[i] = je_malloc(size);
+		if (!results[i])
+			break;
+	}
+
+	return i;
+}
+
+static void
+zone_batch_free(struct _malloc_zone_t *zone, void **to_be_freed,
+    unsigned num_to_be_freed) {
+	unsigned i;
+
+	for (i = 0; i < num_to_be_freed; i++) {
+		zone_free(zone, to_be_freed[i]);
+		to_be_freed[i] = NULL;
+	}
 }

 static size_t
-zone_good_size(malloc_zone_t *zone, size_t size)
-{
+zone_pressure_relief(struct _malloc_zone_t *zone, size_t goal) {
+	return 0;
+}

-	if (size == 0)
+static size_t
+zone_good_size(malloc_zone_t *zone, size_t size) {
+	if (size == 0) {
 		size = 1;
-	return (s2u(size));
+	}
+	return sz_s2u(size);
+}
+
+static kern_return_t
+zone_enumerator(task_t task, void *data, unsigned type_mask,
+    vm_address_t zone_address, memory_reader_t reader,
+    vm_range_recorder_t recorder) {
+	return KERN_SUCCESS;
+}
+
+static boolean_t
+zone_check(malloc_zone_t *zone) {
+	return true;
 }

 static void
-zone_force_lock(malloc_zone_t *zone)
-{
+zone_print(malloc_zone_t *zone, boolean_t verbose) {
+}

-	if (isthreaded)
+static void
+zone_log(malloc_zone_t *zone, void *address) {
+}
+
+static void
+zone_force_lock(malloc_zone_t *zone) {
+	if (isthreaded) {
 		jemalloc_prefork();
+	}
 }

 static void
-zone_force_unlock(malloc_zone_t *zone)
-{
+zone_force_unlock(malloc_zone_t *zone) {
+	/*
+	 * Call jemalloc_postfork_child() rather than
+	 * jemalloc_postfork_parent(), because this function is executed by both
+	 * parent and child.  The parent can tolerate having state
+	 * reinitialized, but the child cannot unlock mutexes that were locked
+	 * by the parent.
+	 */
+	if (isthreaded) {
+		jemalloc_postfork_child();
+	}
+}

-	if (isthreaded)
-		jemalloc_postfork_parent();
+static void
+zone_statistics(malloc_zone_t *zone, malloc_statistics_t *stats) {
+	/* We make no effort to actually fill the values */
+	stats->blocks_in_use = 0;
+	stats->size_in_use = 0;
+	stats->max_size_in_use = 0;
+	stats->size_allocated = 0;
+}
+
+static boolean_t
+zone_locked(malloc_zone_t *zone) {
+	/* Pretend no lock is being held */
+	return false;
+}
+
+static void
+zone_reinit_lock(malloc_zone_t *zone) {
+	/* As of OSX 10.12, this function is only used when force_unlock would
+	 * be used if the zone version were < 9. So just use force_unlock. */
+	zone_force_unlock(zone);
+}
+
+static void
+zone_init(void) {
+	jemalloc_zone.size = zone_size;
+	jemalloc_zone.malloc = zone_malloc;
+	jemalloc_zone.calloc = zone_calloc;
+	jemalloc_zone.valloc = zone_valloc;
+	jemalloc_zone.free = zone_free;
+	jemalloc_zone.realloc = zone_realloc;
+	jemalloc_zone.destroy = zone_destroy;
+	jemalloc_zone.zone_name = "jemalloc_zone";
+	jemalloc_zone.batch_malloc = zone_batch_malloc;
+	jemalloc_zone.batch_free = zone_batch_free;
+	jemalloc_zone.introspect = &jemalloc_zone_introspect;
+	jemalloc_zone.version = 9;
+	jemalloc_zone.memalign = zone_memalign;
+	jemalloc_zone.free_definite_size = zone_free_definite_size;
+	jemalloc_zone.pressure_relief = zone_pressure_relief;
+
+	jemalloc_zone_introspect.enumerator = zone_enumerator;
+	jemalloc_zone_introspect.good_size = zone_good_size;
+	jemalloc_zone_introspect.check = zone_check;
+	jemalloc_zone_introspect.print = zone_print;
+	jemalloc_zone_introspect.log = zone_log;
+	jemalloc_zone_introspect.force_lock = zone_force_lock;
+	jemalloc_zone_introspect.force_unlock = zone_force_unlock;
+	jemalloc_zone_introspect.statistics = zone_statistics;
+	jemalloc_zone_introspect.zone_locked = zone_locked;
+	jemalloc_zone_introspect.enable_discharge_checking = NULL;
+	jemalloc_zone_introspect.disable_discharge_checking = NULL;
+	jemalloc_zone_introspect.discharge = NULL;
+#ifdef __BLOCKS__
+	jemalloc_zone_introspect.enumerate_discharged_pointers = NULL;
+#else
+	jemalloc_zone_introspect.enumerate_unavailable_without_blocks = NULL;
+#endif
+	jemalloc_zone_introspect.reinit_lock = zone_reinit_lock;
+}
+
+static malloc_zone_t *
+zone_default_get(void) {
+	malloc_zone_t **zones = NULL;
+	unsigned int num_zones = 0;
+
+	/*
+	 * On OSX 10.12, malloc_default_zone returns a special zone that is not
+	 * present in the list of registered zones. That zone uses a "lite zone"
+	 * if one is present (apparently enabled when malloc stack logging is
+	 * enabled), or the first registered zone otherwise. In practice this
+	 * means unless malloc stack logging is enabled, the first registered
+	 * zone is the default.  So get the list of zones to get the first one,
+	 * instead of relying on malloc_default_zone.
+	 */
+	if (KERN_SUCCESS != malloc_get_all_zones(0, NULL,
+	    (vm_address_t**)&zones, &num_zones)) {
+		/*
+		 * Reset the value in case the failure happened after it was
+		 * set.
+		 */
+		num_zones = 0;
+	}
+
+	if (num_zones) {
+		return zones[0];
+	}
+
+	return malloc_default_zone();
+}
+
+/* As written, this function can only promote jemalloc_zone. */
+static void
+zone_promote(void) {
+	malloc_zone_t *zone;
+
+	do {
+		/*
+		 * Unregister and reregister the default zone.  On OSX >= 10.6,
+		 * unregistering takes the last registered zone and places it
+		 * at the location of the specified zone.  Unregistering the
+		 * default zone thus makes the last registered one the default.
+		 * On OSX < 10.6, unregistering shifts all registered zones.
+		 * The first registered zone then becomes the default.
+		 */
+		malloc_zone_unregister(default_zone);
+		malloc_zone_register(default_zone);
+
+		/*
+		 * On OSX 10.6, having the default purgeable zone appear before
+		 * the default zone makes some things crash because it thinks it
+		 * owns the default zone allocated pointers.  We thus
+		 * unregister/re-register it in order to ensure it's always
+		 * after the default zone.  On OSX < 10.6, there is no purgeable
+		 * zone, so this does nothing.  On OSX >= 10.6, unregistering
+		 * replaces the purgeable zone with the last registered zone
+		 * above, i.e. the default zone.  Registering it again then puts
+		 * it at the end, obviously after the default zone.
+		 */
+		if (purgeable_zone != NULL) {
+			malloc_zone_unregister(purgeable_zone);
+			malloc_zone_register(purgeable_zone);
+		}
+
+		zone = zone_default_get();
+	} while (zone != &jemalloc_zone);
 }

 JEMALLOC_ATTR(constructor)
 void
-register_zone(void)
-{
-
+zone_register(void) {
 	/*
 	 * If something else replaced the system default zone allocator, don't
 	 * register jemalloc's.
 	 */
-	malloc_zone_t *default_zone = malloc_default_zone();
-	if (!default_zone->zone_name ||
-	    strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) {
+	default_zone = zone_default_get();
+	if (!default_zone->zone_name || strcmp(default_zone->zone_name,
+	    "DefaultMallocZone") != 0) {
 		return;
 	}

-	zone.size = (void *)zone_size;
-	zone.malloc = (void *)zone_malloc;
-	zone.calloc = (void *)zone_calloc;
-	zone.valloc = (void *)zone_valloc;
-	zone.free = (void *)zone_free;
-	zone.realloc = (void *)zone_realloc;
-	zone.destroy = (void *)zone_destroy;
-	zone.zone_name = "jemalloc_zone";
-	zone.batch_malloc = NULL;
-	zone.batch_free = NULL;
-	zone.introspect = &zone_introspect;
-	zone.version = JEMALLOC_ZONE_VERSION;
-#if (JEMALLOC_ZONE_VERSION >= 5)
-	zone.memalign = zone_memalign;
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 6)
-	zone.free_definite_size = zone_free_definite_size;
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 8)
-	zone.pressure_relief = NULL;
-#endif
-
-	zone_introspect.enumerator = NULL;
-	zone_introspect.good_size = (void *)zone_good_size;
-	zone_introspect.check = NULL;
-	zone_introspect.print = NULL;
-	zone_introspect.log = NULL;
-	zone_introspect.force_lock = (void *)zone_force_lock;
-	zone_introspect.force_unlock = (void *)zone_force_unlock;
-	zone_introspect.statistics = NULL;
-#if (JEMALLOC_ZONE_VERSION >= 6)
-	zone_introspect.zone_locked = NULL;
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 7)
-	zone_introspect.enable_discharge_checking = NULL;
-	zone_introspect.disable_discharge_checking = NULL;
-	zone_introspect.discharge = NULL;
-#ifdef __BLOCKS__
-	zone_introspect.enumerate_discharged_pointers = NULL;
-#else
-	zone_introspect.enumerate_unavailable_without_blocks = NULL;
-#endif
-#endif
-
 	/*
 	 * The default purgeable zone is created lazily by OSX's libc.  It uses
 	 * the default zone when it is created for "small" allocations
 	 * (< 15 KiB), but assumes the default zone is a scalable_zone.  This
 	 * obviously fails when the default zone is the jemalloc zone, so
-	 * malloc_default_purgeable_zone is called beforehand so that the
+	 * malloc_default_purgeable_zone() is called beforehand so that the
 	 * default purgeable zone is created when the default zone is still
 	 * a scalable_zone.  As purgeable zones only exist on >= 10.6, we need
 	 * to check for the existence of malloc_default_purgeable_zone() at
 	 * run time.
 	 */
-	if (malloc_default_purgeable_zone != NULL)
-		malloc_default_purgeable_zone();
+	purgeable_zone = (malloc_default_purgeable_zone == NULL) ? NULL :
+	    malloc_default_purgeable_zone();

 	/* Register the custom zone.  At this point it won't be the default. */
-	malloc_zone_register(&zone);
+	zone_init();
+	malloc_zone_register(&jemalloc_zone);

-	/*
-	 * Unregister and reregister the default zone.  On OSX >= 10.6,
-	 * unregistering takes the last registered zone and places it at the
-	 * location of the specified zone.  Unregistering the default zone thus
-	 * makes the last registered one the default.  On OSX < 10.6,
-	 * unregistering shifts all registered zones.  The first registered zone
-	 * then becomes the default.
-	 */
-	do {
-		default_zone = malloc_default_zone();
-		malloc_zone_unregister(default_zone);
-		malloc_zone_register(default_zone);
-	} while (malloc_default_zone() != &zone);
+	/* Promote the custom zone to be default. */
+	zone_promote();
 }