Moved files after merging

2026-01-13 01:08:35 +00:00 · 2017-12-21 11:29:24 +01:00
parent 403ed2600f
commit 8f7bc1c749
113 changed files with 0 additions and 0 deletions
--- a/deps/jemalloc/include/jemalloc/internal/arena_externs.h
+++ b/deps/jemalloc/include/jemalloc/internal/arena_externs.h
@@ -0,0 +1,97 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_EXTERNS_H
+#define JEMALLOC_INTERNAL_ARENA_EXTERNS_H
+
+#include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/pages.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/stats.h"
+
+extern ssize_t opt_dirty_decay_ms;
+extern ssize_t opt_muzzy_decay_ms;
+
+extern const arena_bin_info_t arena_bin_info[NBINS];
+
+extern percpu_arena_mode_t opt_percpu_arena;
+extern const char *percpu_arena_mode_names[];
+
+extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
+extern malloc_mutex_t arenas_lock;
+
+void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    szind_t szind, uint64_t nrequests);
+void arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    size_t size);
+void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
+    unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms,
+    ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
+void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
+    size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
+    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats);
+void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent);
+#ifdef JEMALLOC_JET
+size_t arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr);
+#endif
+extent_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
+    size_t usize, size_t alignment, bool *zero);
+void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
+    extent_t *extent);
+void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
+    extent_t *extent, size_t oldsize);
+void arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
+    extent_t *extent, size_t oldsize);
+ssize_t arena_dirty_decay_ms_get(arena_t *arena);
+bool arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms);
+ssize_t arena_muzzy_decay_ms_get(arena_t *arena);
+bool arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms);
+void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
+    bool all);
+void arena_reset(tsd_t *tsd, arena_t *arena);
+void arena_destroy(tsd_t *tsd, arena_t *arena);
+void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
+    tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
+void arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info,
+    bool zero);
+
+typedef void (arena_dalloc_junk_small_t)(void *, const arena_bin_info_t *);
+extern arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small;
+
+void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
+    szind_t ind, bool zero);
+void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
+    size_t alignment, bool zero, tcache_t *tcache);
+void arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize);
+void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
+    bool slow_path);
+void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
+    extent_t *extent, void *ptr);
+void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
+bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
+    size_t extra, bool zero);
+void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
+    size_t size, size_t alignment, bool zero, tcache_t *tcache);
+dss_prec_t arena_dss_prec_get(arena_t *arena);
+bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
+ssize_t arena_dirty_decay_ms_default_get(void);
+bool arena_dirty_decay_ms_default_set(ssize_t decay_ms);
+ssize_t arena_muzzy_decay_ms_default_get(void);
+bool arena_muzzy_decay_ms_default_set(ssize_t decay_ms);
+unsigned arena_nthreads_get(arena_t *arena, bool internal);
+void arena_nthreads_inc(arena_t *arena, bool internal);
+void arena_nthreads_dec(arena_t *arena, bool internal);
+size_t arena_extent_sn_next(arena_t *arena);
+arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+void arena_boot(void);
+void arena_prefork0(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork1(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork2(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork3(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork4(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork5(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork6(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork7(tsdn_t *tsdn, arena_t *arena);
+void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
+void arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
+
+#endif /* JEMALLOC_INTERNAL_ARENA_EXTERNS_H */
--- a/deps/jemalloc/include/jemalloc/internal/arena_inlines_a.h
+++ b/deps/jemalloc/include/jemalloc/internal/arena_inlines_a.h
@@ -0,0 +1,57 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_A_H
+#define JEMALLOC_INTERNAL_ARENA_INLINES_A_H
+
+static inline unsigned
+arena_ind_get(const arena_t *arena) {
+	return base_ind_get(arena->base);
+}
+
+static inline void
+arena_internal_add(arena_t *arena, size_t size) {
+	atomic_fetch_add_zu(&arena->stats.internal, size, ATOMIC_RELAXED);
+}
+
+static inline void
+arena_internal_sub(arena_t *arena, size_t size) {
+	atomic_fetch_sub_zu(&arena->stats.internal, size, ATOMIC_RELAXED);
+}
+
+static inline size_t
+arena_internal_get(arena_t *arena) {
+	return atomic_load_zu(&arena->stats.internal, ATOMIC_RELAXED);
+}
+
+static inline bool
+arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
+	cassert(config_prof);
+
+	if (likely(prof_interval == 0)) {
+		return false;
+	}
+
+	return prof_accum_add(tsdn, &arena->prof_accum, accumbytes);
+}
+
+static inline void
+percpu_arena_update(tsd_t *tsd, unsigned cpu) {
+	assert(have_percpu_arena);
+	arena_t *oldarena = tsd_arena_get(tsd);
+	assert(oldarena != NULL);
+	unsigned oldind = arena_ind_get(oldarena);
+
+	if (oldind != cpu) {
+		unsigned newind = cpu;
+		arena_t *newarena = arena_get(tsd_tsdn(tsd), newind, true);
+		assert(newarena != NULL);
+
+		/* Set new arena/tcache associations. */
+		arena_migrate(tsd, oldind, newind);
+		tcache_t *tcache = tcache_get(tsd);
+		if (tcache != NULL) {
+			tcache_arena_reassociate(tsd_tsdn(tsd), tcache,
+			    newarena);
+		}
+	}
+}
+
+#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_A_H */
--- a/deps/jemalloc/include/jemalloc/internal/arena_inlines_b.h
+++ b/deps/jemalloc/include/jemalloc/internal/arena_inlines_b.h
@@ -0,0 +1,361 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H
+#define JEMALLOC_INTERNAL_ARENA_INLINES_B_H
+
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/ticker.h"
+
+static inline szind_t
+arena_bin_index(arena_t *arena, arena_bin_t *bin) {
+	szind_t binind = (szind_t)(bin - arena->bins);
+	assert(binind < NBINS);
+	return binind;
+}
+
+JEMALLOC_ALWAYS_INLINE prof_tctx_t *
+arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+
+	/* Static check. */
+	if (alloc_ctx == NULL) {
+		const extent_t *extent = iealloc(tsdn, ptr);
+		if (unlikely(!extent_slab_get(extent))) {
+			return large_prof_tctx_get(tsdn, extent);
+		}
+	} else {
+		if (unlikely(!alloc_ctx->slab)) {
+			return large_prof_tctx_get(tsdn, iealloc(tsdn, ptr));
+		}
+	}
+	return (prof_tctx_t *)(uintptr_t)1U;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
+    alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+
+	/* Static check. */
+	if (alloc_ctx == NULL) {
+		extent_t *extent = iealloc(tsdn, ptr);
+		if (unlikely(!extent_slab_get(extent))) {
+			large_prof_tctx_set(tsdn, extent, tctx);
+		}
+	} else {
+		if (unlikely(!alloc_ctx->slab)) {
+			large_prof_tctx_set(tsdn, iealloc(tsdn, ptr), tctx);
+		}
+	}
+}
+
+static inline void
+arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+
+	extent_t *extent = iealloc(tsdn, ptr);
+	assert(!extent_slab_get(extent));
+
+	large_prof_tctx_reset(tsdn, extent);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
+	tsd_t *tsd;
+	ticker_t *decay_ticker;
+
+	if (unlikely(tsdn_null(tsdn))) {
+		return;
+	}
+	tsd = tsdn_tsd(tsdn);
+	decay_ticker = decay_ticker_get(tsd, arena_ind_get(arena));
+	if (unlikely(decay_ticker == NULL)) {
+		return;
+	}
+	if (unlikely(ticker_ticks(decay_ticker, nticks))) {
+		arena_decay(tsdn, arena, false, false);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
+	malloc_mutex_assert_not_owner(tsdn, &arena->decay_dirty.mtx);
+	malloc_mutex_assert_not_owner(tsdn, &arena->decay_muzzy.mtx);
+
+	arena_decay_ticks(tsdn, arena, 1);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
+    tcache_t *tcache, bool slow_path) {
+	assert(!tsdn_null(tsdn) || tcache == NULL);
+	assert(size != 0);
+
+	if (likely(tcache != NULL)) {
+		if (likely(size <= SMALL_MAXCLASS)) {
+			return tcache_alloc_small(tsdn_tsd(tsdn), arena,
+			    tcache, size, ind, zero, slow_path);
+		}
+		if (likely(size <= tcache_maxclass)) {
+			return tcache_alloc_large(tsdn_tsd(tsdn), arena,
+			    tcache, size, ind, zero, slow_path);
+		}
+		/* (size > tcache_maxclass) case falls through. */
+		assert(size > tcache_maxclass);
+	}
+
+	return arena_malloc_hard(tsdn, arena, size, ind, zero);
+}
+
+JEMALLOC_ALWAYS_INLINE arena_t *
+arena_aalloc(tsdn_t *tsdn, const void *ptr) {
+	return extent_arena_get(iealloc(tsdn, ptr));
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+arena_salloc(tsdn_t *tsdn, const void *ptr) {
+	assert(ptr != NULL);
+
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	szind_t szind = rtree_szind_read(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)ptr, true);
+	assert(szind != NSIZES);
+
+	return sz_index2size(szind);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
+	/*
+	 * Return 0 if ptr is not within an extent managed by jemalloc.  This
+	 * function has two extra costs relative to isalloc():
+	 * - The rtree calls cannot claim to be dependent lookups, which induces
+	 *   rtree lookup load dependencies.
+	 * - The lookup may fail, so there is an extra branch to check for
+	 *   failure.
+	 */
+
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	extent_t *extent;
+	szind_t szind;
+	if (rtree_extent_szind_read(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)ptr, false, &extent, &szind)) {
+		return 0;
+	}
+
+	if (extent == NULL) {
+		return 0;
+	}
+	assert(extent_state_get(extent) == extent_state_active);
+	/* Only slab members should be looked up via interior pointers. */
+	assert(extent_addr_get(extent) == ptr || extent_slab_get(extent));
+
+	assert(szind != NSIZES);
+
+	return sz_index2size(szind);
+}
+
+static inline void
+arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
+	assert(ptr != NULL);
+
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	szind_t szind;
+	bool slab;
+	rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
+	    true, &szind, &slab);
+
+	if (config_debug) {
+		extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
+		    rtree_ctx, (uintptr_t)ptr, true);
+		assert(szind == extent_szind_get(extent));
+		assert(szind < NSIZES);
+		assert(slab == extent_slab_get(extent));
+	}
+
+	if (likely(slab)) {
+		/* Small allocation. */
+		arena_dalloc_small(tsdn, ptr);
+	} else {
+		extent_t *extent = iealloc(tsdn, ptr);
+		large_dalloc(tsdn, extent);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
+    alloc_ctx_t *alloc_ctx, bool slow_path) {
+	assert(!tsdn_null(tsdn) || tcache == NULL);
+	assert(ptr != NULL);
+
+	if (unlikely(tcache == NULL)) {
+		arena_dalloc_no_tcache(tsdn, ptr);
+		return;
+	}
+
+	szind_t szind;
+	bool slab;
+	rtree_ctx_t *rtree_ctx;
+	if (alloc_ctx != NULL) {
+		szind = alloc_ctx->szind;
+		slab = alloc_ctx->slab;
+		assert(szind != NSIZES);
+	} else {
+		rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
+		rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
+		    (uintptr_t)ptr, true, &szind, &slab);
+	}
+
+	if (config_debug) {
+		rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
+		extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
+		    rtree_ctx, (uintptr_t)ptr, true);
+		assert(szind == extent_szind_get(extent));
+		assert(szind < NSIZES);
+		assert(slab == extent_slab_get(extent));
+	}
+
+	if (likely(slab)) {
+		/* Small allocation. */
+		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
+		    slow_path);
+	} else {
+		if (szind < nhbins) {
+			if (config_prof && unlikely(szind < NBINS)) {
+				arena_dalloc_promoted(tsdn, ptr, tcache,
+				    slow_path);
+			} else {
+				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
+				    szind, slow_path);
+			}
+		} else {
+			extent_t *extent = iealloc(tsdn, ptr);
+			large_dalloc(tsdn, extent);
+		}
+	}
+}
+
+static inline void
+arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
+	assert(ptr != NULL);
+	assert(size <= LARGE_MAXCLASS);
+
+	szind_t szind;
+	bool slab;
+	if (!config_prof || !opt_prof) {
+		/*
+		 * There is no risk of being confused by a promoted sampled
+		 * object, so base szind and slab on the given size.
+		 */
+		szind = sz_size2index(size);
+		slab = (szind < NBINS);
+	}
+
+	if ((config_prof && opt_prof) || config_debug) {
+		rtree_ctx_t rtree_ctx_fallback;
+		rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
+		    &rtree_ctx_fallback);
+
+		rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
+		    (uintptr_t)ptr, true, &szind, &slab);
+
+		assert(szind == sz_size2index(size));
+		assert((config_prof && opt_prof) || slab == (szind < NBINS));
+
+		if (config_debug) {
+			extent_t *extent = rtree_extent_read(tsdn,
+			    &extents_rtree, rtree_ctx, (uintptr_t)ptr, true);
+			assert(szind == extent_szind_get(extent));
+			assert(slab == extent_slab_get(extent));
+		}
+	}
+
+	if (likely(slab)) {
+		/* Small allocation. */
+		arena_dalloc_small(tsdn, ptr);
+	} else {
+		extent_t *extent = iealloc(tsdn, ptr);
+		large_dalloc(tsdn, extent);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    alloc_ctx_t *alloc_ctx, bool slow_path) {
+	assert(!tsdn_null(tsdn) || tcache == NULL);
+	assert(ptr != NULL);
+	assert(size <= LARGE_MAXCLASS);
+
+	if (unlikely(tcache == NULL)) {
+		arena_sdalloc_no_tcache(tsdn, ptr, size);
+		return;
+	}
+
+	szind_t szind;
+	bool slab;
+	UNUSED alloc_ctx_t local_ctx;
+	if (config_prof && opt_prof) {
+		if (alloc_ctx == NULL) {
+			/* Uncommon case and should be a static check. */
+			rtree_ctx_t rtree_ctx_fallback;
+			rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
+			    &rtree_ctx_fallback);
+			rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
+			    (uintptr_t)ptr, true, &local_ctx.szind,
+			    &local_ctx.slab);
+			assert(local_ctx.szind == sz_size2index(size));
+			alloc_ctx = &local_ctx;
+		}
+		slab = alloc_ctx->slab;
+		szind = alloc_ctx->szind;
+	} else {
+		/*
+		 * There is no risk of being confused by a promoted sampled
+		 * object, so base szind and slab on the given size.
+		 */
+		szind = sz_size2index(size);
+		slab = (szind < NBINS);
+	}
+
+	if (config_debug) {
+		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
+		rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
+		    (uintptr_t)ptr, true, &szind, &slab);
+		extent_t *extent = rtree_extent_read(tsdn,
+		    &extents_rtree, rtree_ctx, (uintptr_t)ptr, true);
+		assert(szind == extent_szind_get(extent));
+		assert(slab == extent_slab_get(extent));
+	}
+
+	if (likely(slab)) {
+		/* Small allocation. */
+		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
+		    slow_path);
+	} else {
+		if (szind < nhbins) {
+			if (config_prof && unlikely(szind < NBINS)) {
+				arena_dalloc_promoted(tsdn, ptr, tcache,
+				    slow_path);
+			} else {
+				tcache_dalloc_large(tsdn_tsd(tsdn),
+				    tcache, ptr, szind, slow_path);
+			}
+		} else {
+			extent_t *extent = iealloc(tsdn, ptr);
+			large_dalloc(tsdn, extent);
+		}
+	}
+}
+
+#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
--- a/deps/jemalloc/include/jemalloc/internal/arena_structs_a.h
+++ b/deps/jemalloc/include/jemalloc/internal/arena_structs_a.h
@@ -0,0 +1,11 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H
+#define JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H
+
+#include "jemalloc/internal/bitmap.h"
+
+struct arena_slab_data_s {
+	/* Per region allocated/deallocated bitmap. */
+	bitmap_t	bitmap[BITMAP_GROUPS_MAX];
+};
+
+#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H */
--- a/deps/jemalloc/include/jemalloc/internal/arena_structs_b.h
+++ b/deps/jemalloc/include/jemalloc/internal/arena_structs_b.h
@@ -0,0 +1,284 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
+#define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/smoothstep.h"
+#include "jemalloc/internal/stats.h"
+#include "jemalloc/internal/ticker.h"
+
+/*
+ * Read-only information associated with each element of arena_t's bins array
+ * is stored separately, partly to reduce memory usage (only one copy, rather
+ * than one per arena), but mainly to avoid false cacheline sharing.
+ *
+ * Each slab has the following layout:
+ *
+ *   /--------------------\
+ *   | region 0           |
+ *   |--------------------|
+ *   | region 1           |
+ *   |--------------------|
+ *   | ...                |
+ *   | ...                |
+ *   | ...                |
+ *   |--------------------|
+ *   | region nregs-1     |
+ *   \--------------------/
+ */
+struct arena_bin_info_s {
+	/* Size of regions in a slab for this bin's size class. */
+	size_t			reg_size;
+
+	/* Total size of a slab for this bin's size class. */
+	size_t			slab_size;
+
+	/* Total number of regions in a slab for this bin's size class. */
+	uint32_t		nregs;
+
+	/*
+	 * Metadata used to manipulate bitmaps for slabs associated with this
+	 * bin.
+	 */
+	bitmap_info_t		bitmap_info;
+};
+
+struct arena_decay_s {
+	/* Synchronizes all non-atomic fields. */
+	malloc_mutex_t		mtx;
+	/*
+	 * True if a thread is currently purging the extents associated with
+	 * this decay structure.
+	 */
+	bool			purging;
+	/*
+	 * Approximate time in milliseconds from the creation of a set of unused
+	 * dirty pages until an equivalent set of unused dirty pages is purged
+	 * and/or reused.
+	 */
+	atomic_zd_t		time_ms;
+	/* time / SMOOTHSTEP_NSTEPS. */
+	nstime_t		interval;
+	/*
+	 * Time at which the current decay interval logically started.  We do
+	 * not actually advance to a new epoch until sometime after it starts
+	 * because of scheduling and computation delays, and it is even possible
+	 * to completely skip epochs.  In all cases, during epoch advancement we
+	 * merge all relevant activity into the most recently recorded epoch.
+	 */
+	nstime_t		epoch;
+	/* Deadline randomness generator. */
+	uint64_t		jitter_state;
+	/*
+	 * Deadline for current epoch.  This is the sum of interval and per
+	 * epoch jitter which is a uniform random variable in [0..interval).
+	 * Epochs always advance by precise multiples of interval, but we
+	 * randomize the deadline to reduce the likelihood of arenas purging in
+	 * lockstep.
+	 */
+	nstime_t		deadline;
+	/*
+	 * Number of unpurged pages at beginning of current epoch.  During epoch
+	 * advancement we use the delta between arena->decay_*.nunpurged and
+	 * extents_npages_get(&arena->extents_*) to determine how many dirty
+	 * pages, if any, were generated.
+	 */
+	size_t			nunpurged;
+	/*
+	 * Trailing log of how many unused dirty pages were generated during
+	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
+	 * element is the most recent epoch.  Corresponding epoch times are
+	 * relative to epoch.
+	 */
+	size_t			backlog[SMOOTHSTEP_NSTEPS];
+
+	/*
+	 * Pointer to associated stats.  These stats are embedded directly in
+	 * the arena's stats due to how stats structures are shared between the
+	 * arena and ctl code.
+	 *
+	 * Synchronization: Same as associated arena's stats field. */
+	decay_stats_t		*stats;
+	/* Peak number of pages in associated extents.  Used for debug only. */
+	uint64_t		ceil_npages;
+};
+
+struct arena_bin_s {
+	/* All operations on arena_bin_t fields require lock ownership. */
+	malloc_mutex_t		lock;
+
+	/*
+	 * Current slab being used to service allocations of this bin's size
+	 * class.  slabcur is independent of slabs_{nonfull,full}; whenever
+	 * slabcur is reassigned, the previous slab must be deallocated or
+	 * inserted into slabs_{nonfull,full}.
+	 */
+	extent_t		*slabcur;
+
+	/*
+	 * Heap of non-full slabs.  This heap is used to assure that new
+	 * allocations come from the non-full slab that is oldest/lowest in
+	 * memory.
+	 */
+	extent_heap_t		slabs_nonfull;
+
+	/* List used to track full slabs. */
+	extent_list_t		slabs_full;
+
+	/* Bin statistics. */
+	malloc_bin_stats_t	stats;
+};
+
+struct arena_s {
+	/*
+	 * Number of threads currently assigned to this arena.  Each thread has
+	 * two distinct assignments, one for application-serving allocation, and
+	 * the other for internal metadata allocation.  Internal metadata must
+	 * not be allocated from arenas explicitly created via the arenas.create
+	 * mallctl, because the arena.<i>.reset mallctl indiscriminately
+	 * discards all allocations for the affected arena.
+	 *
+	 *   0: Application allocation.
+	 *   1: Internal metadata allocation.
+	 *
+	 * Synchronization: atomic.
+	 */
+	atomic_u_t		nthreads[2];
+
+	/*
+	 * When percpu_arena is enabled, to amortize the cost of reading /
+	 * updating the current CPU id, track the most recent thread accessing
+	 * this arena, and only read CPU if there is a mismatch.
+	 */
+	tsdn_t		*last_thd;
+
+	/* Synchronization: internal. */
+	arena_stats_t		stats;
+
+	/*
+	 * List of tcaches for extant threads associated with this arena.
+	 * Stats from these are merged incrementally, and at exit if
+	 * opt_stats_print is enabled.
+	 *
+	 * Synchronization: tcache_ql_mtx.
+	 */
+	ql_head(tcache_t)	tcache_ql;
+	malloc_mutex_t		tcache_ql_mtx;
+
+	/* Synchronization: internal. */
+	prof_accum_t		prof_accum;
+	uint64_t		prof_accumbytes;
+
+	/*
+	 * PRNG state for cache index randomization of large allocation base
+	 * pointers.
+	 *
+	 * Synchronization: atomic.
+	 */
+	atomic_zu_t		offset_state;
+
+	/*
+	 * Extent serial number generator state.
+	 *
+	 * Synchronization: atomic.
+	 */
+	atomic_zu_t		extent_sn_next;
+
+	/*
+	 * Represents a dss_prec_t, but atomically.
+	 *
+	 * Synchronization: atomic.
+	 */
+	atomic_u_t		dss_prec;
+
+	/*
+	 * Number of pages in active extents.
+	 *
+	 * Synchronization: atomic.
+	 */
+	atomic_zu_t		nactive;
+
+	/*
+	 * Extant large allocations.
+	 *
+	 * Synchronization: large_mtx.
+	 */
+	extent_list_t		large;
+	/* Synchronizes all large allocation/update/deallocation. */
+	malloc_mutex_t		large_mtx;
+
+	/*
+	 * Collections of extents that were previously allocated.  These are
+	 * used when allocating extents, in an attempt to re-use address space.
+	 *
+	 * Synchronization: internal.
+	 */
+	extents_t		extents_dirty;
+	extents_t		extents_muzzy;
+	extents_t		extents_retained;
+
+	/*
+	 * Decay-based purging state, responsible for scheduling extent state
+	 * transitions.
+	 *
+	 * Synchronization: internal.
+	 */
+	arena_decay_t		decay_dirty; /* dirty --> muzzy */
+	arena_decay_t		decay_muzzy; /* muzzy --> retained */
+
+	/*
+	 * Next extent size class in a growing series to use when satisfying a
+	 * request via the extent hooks (only if opt_retain).  This limits the
+	 * number of disjoint virtual memory ranges so that extent merging can
+	 * be effective even if multiple arenas' extent allocation requests are
+	 * highly interleaved.
+	 *
+	 * Synchronization: extent_grow_mtx
+	 */
+	pszind_t		extent_grow_next;
+	malloc_mutex_t		extent_grow_mtx;
+
+	/*
+	 * Available extent structures that were allocated via
+	 * base_alloc_extent().
+	 *
+	 * Synchronization: extent_avail_mtx.
+	 */
+	extent_tree_t		extent_avail;
+	malloc_mutex_t		extent_avail_mtx;
+
+	/*
+	 * bins is used to store heaps of free regions.
+	 *
+	 * Synchronization: internal.
+	 */
+	arena_bin_t		bins[NBINS];
+
+	/*
+	 * Base allocator, from which arena metadata are allocated.
+	 *
+	 * Synchronization: internal.
+	 */
+	base_t			*base;
+	/* Used to determine uptime.  Read-only after initialization. */
+	nstime_t		create_time;
+};
+
+/* Used in conjunction with tsd for fast arena-related context lookup. */
+struct arena_tdata_s {
+	ticker_t		decay_ticker;
+};
+
+/* Used to pass rtree lookup context down the path. */
+struct alloc_ctx_s {
+	szind_t szind;
+	bool slab;
+};
+
+#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H */
--- a/deps/jemalloc/include/jemalloc/internal/arena_types.h
+++ b/deps/jemalloc/include/jemalloc/internal/arena_types.h
@@ -0,0 +1,45 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_TYPES_H
+#define JEMALLOC_INTERNAL_ARENA_TYPES_H
+
+/* Maximum number of regions in one slab. */
+#define LG_SLAB_MAXREGS		(LG_PAGE - LG_TINY_MIN)
+#define SLAB_MAXREGS		(1U << LG_SLAB_MAXREGS)
+
+/* Default decay times in milliseconds. */
+#define DIRTY_DECAY_MS_DEFAULT	ZD(10 * 1000)
+#define MUZZY_DECAY_MS_DEFAULT	ZD(10 * 1000)
+/* Number of event ticks between time checks. */
+#define DECAY_NTICKS_PER_UPDATE	1000
+
+typedef struct arena_slab_data_s arena_slab_data_t;
+typedef struct arena_bin_info_s arena_bin_info_t;
+typedef struct arena_decay_s arena_decay_t;
+typedef struct arena_bin_s arena_bin_t;
+typedef struct arena_s arena_t;
+typedef struct arena_tdata_s arena_tdata_t;
+typedef struct alloc_ctx_s alloc_ctx_t;
+
+typedef enum {
+	percpu_arena_mode_names_base   = 0, /* Used for options processing. */
+
+	/*
+	 * *_uninit are used only during bootstrapping, and must correspond
+	 * to initialized variant plus percpu_arena_mode_enabled_base.
+	 */
+	percpu_arena_uninit            = 0,
+	per_phycpu_arena_uninit        = 1,
+
+	/* All non-disabled modes must come after percpu_arena_disabled. */
+	percpu_arena_disabled          = 2,
+
+	percpu_arena_mode_names_limit  = 3, /* Used for options processing. */
+	percpu_arena_mode_enabled_base = 3,
+
+	percpu_arena                   = 3,
+	per_phycpu_arena               = 4  /* Hyper threads share arena. */
+} percpu_arena_mode_t;
+
+#define PERCPU_ARENA_ENABLED(m)	((m) >= percpu_arena_mode_enabled_base)
+#define PERCPU_ARENA_DEFAULT	percpu_arena_disabled
+
+#endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */
--- a/deps/jemalloc/include/jemalloc/internal/assert.h
+++ b/deps/jemalloc/include/jemalloc/internal/assert.h
@@ -0,0 +1,56 @@
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/util.h"
+
+/*
+ * Define a custom assert() in order to reduce the chances of deadlock during
+ * assertion failure.
+ */
+#ifndef assert
+#define assert(e) do {							\
+	if (unlikely(config_debug && !(e))) {				\
+		malloc_printf(						\
+		    "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n",	\
+		    __FILE__, __LINE__, #e);				\
+		abort();						\
+	}								\
+} while (0)
+#endif
+
+#ifndef not_reached
+#define not_reached() do {						\
+	if (config_debug) {						\
+		malloc_printf(						\
+		    "<jemalloc>: %s:%d: Unreachable code reached\n",	\
+		    __FILE__, __LINE__);				\
+		abort();						\
+	}								\
+	unreachable();							\
+} while (0)
+#endif
+
+#ifndef not_implemented
+#define not_implemented() do {						\
+	if (config_debug) {						\
+		malloc_printf("<jemalloc>: %s:%d: Not implemented\n",	\
+		    __FILE__, __LINE__);				\
+		abort();						\
+	}								\
+} while (0)
+#endif
+
+#ifndef assert_not_implemented
+#define assert_not_implemented(e) do {					\
+	if (unlikely(config_debug && !(e))) {				\
+		not_implemented();					\
+	}								\
+} while (0)
+#endif
+
+/* Use to assert a particular configuration, e.g., cassert(config_debug). */
+#ifndef cassert
+#define cassert(c) do {							\
+	if (unlikely(!(c))) {						\
+		not_reached();						\
+	}								\
+} while (0)
+#endif
--- a/deps/jemalloc/include/jemalloc/internal/atomic_c11.h
+++ b/deps/jemalloc/include/jemalloc/internal/atomic_c11.h
@@ -0,0 +1,97 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_C11_H
+#define JEMALLOC_INTERNAL_ATOMIC_C11_H
+
+#include <stdatomic.h>
+
+#define ATOMIC_INIT(...) ATOMIC_VAR_INIT(__VA_ARGS__)
+
+#define atomic_memory_order_t memory_order
+#define atomic_memory_order_relaxed memory_order_relaxed
+#define atomic_memory_order_acquire memory_order_acquire
+#define atomic_memory_order_release memory_order_release
+#define atomic_memory_order_acq_rel memory_order_acq_rel
+#define atomic_memory_order_seq_cst memory_order_seq_cst
+
+#define atomic_fence atomic_thread_fence
+
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
+    /* unused */ lg_size)						\
+typedef _Atomic(type) atomic_##short_type##_t;				\
+									\
+ATOMIC_INLINE type							\
+atomic_load_##short_type(const atomic_##short_type##_t *a,		\
+    atomic_memory_order_t mo) {						\
+	/*								\
+	 * A strict interpretation of the C standard prevents		\
+	 * atomic_load from taking a const argument, but it's		\
+	 * convenient for our purposes. This cast is a workaround.	\
+	 */								\
+	atomic_##short_type##_t* a_nonconst =				\
+	    (atomic_##short_type##_t*)a;				\
+	return atomic_load_explicit(a_nonconst, mo);			\
+}									\
+									\
+ATOMIC_INLINE void							\
+atomic_store_##short_type(atomic_##short_type##_t *a,			\
+    type val, atomic_memory_order_t mo) {				\
+	atomic_store_explicit(a, val, mo);				\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return atomic_exchange_explicit(a, val, mo);			\
+}									\
+									\
+ATOMIC_INLINE bool							\
+atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
+    type *expected, type desired, atomic_memory_order_t success_mo,	\
+    atomic_memory_order_t failure_mo) {					\
+	return atomic_compare_exchange_weak_explicit(a, expected,	\
+	    desired, success_mo, failure_mo);				\
+}									\
+									\
+ATOMIC_INLINE bool							\
+atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
+    type *expected, type desired, atomic_memory_order_t success_mo,	\
+    atomic_memory_order_t failure_mo) {					\
+	return atomic_compare_exchange_strong_explicit(a, expected,	\
+	    desired, success_mo, failure_mo);				\
+}
+
+/*
+ * Integral types have some special operations available that non-integral ones
+ * lack.
+ */
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, 		\
+    /* unused */ lg_size)						\
+JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_add_##short_type(atomic_##short_type##_t *a,		\
+    type val, atomic_memory_order_t mo) {				\
+	return atomic_fetch_add_explicit(a, val, mo);			\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_sub_##short_type(atomic_##short_type##_t *a,		\
+    type val, atomic_memory_order_t mo) {				\
+	return atomic_fetch_sub_explicit(a, val, mo);			\
+}									\
+ATOMIC_INLINE type							\
+atomic_fetch_and_##short_type(atomic_##short_type##_t *a,		\
+    type val, atomic_memory_order_t mo) {				\
+	return atomic_fetch_and_explicit(a, val, mo);			\
+}									\
+ATOMIC_INLINE type							\
+atomic_fetch_or_##short_type(atomic_##short_type##_t *a,		\
+    type val, atomic_memory_order_t mo) {				\
+	return atomic_fetch_or_explicit(a, val, mo);			\
+}									\
+ATOMIC_INLINE type							\
+atomic_fetch_xor_##short_type(atomic_##short_type##_t *a,		\
+    type val, atomic_memory_order_t mo) {				\
+	return atomic_fetch_xor_explicit(a, val, mo);			\
+}
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_C11_H */
--- a/deps/jemalloc/include/jemalloc/internal/atomic_gcc_atomic.h
+++ b/deps/jemalloc/include/jemalloc/internal/atomic_gcc_atomic.h
@@ -0,0 +1,127 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H
+#define JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H
+
+#include "jemalloc/internal/assert.h"
+
+#define ATOMIC_INIT(...) {__VA_ARGS__}
+
+typedef enum {
+	atomic_memory_order_relaxed,
+	atomic_memory_order_acquire,
+	atomic_memory_order_release,
+	atomic_memory_order_acq_rel,
+	atomic_memory_order_seq_cst
+} atomic_memory_order_t;
+
+ATOMIC_INLINE int
+atomic_enum_to_builtin(atomic_memory_order_t mo) {
+	switch (mo) {
+	case atomic_memory_order_relaxed:
+		return __ATOMIC_RELAXED;
+	case atomic_memory_order_acquire:
+		return __ATOMIC_ACQUIRE;
+	case atomic_memory_order_release:
+		return __ATOMIC_RELEASE;
+	case atomic_memory_order_acq_rel:
+		return __ATOMIC_ACQ_REL;
+	case atomic_memory_order_seq_cst:
+		return __ATOMIC_SEQ_CST;
+	}
+	/* Can't happen; the switch is exhaustive. */
+	not_reached();
+}
+
+ATOMIC_INLINE void
+atomic_fence(atomic_memory_order_t mo) {
+	__atomic_thread_fence(atomic_enum_to_builtin(mo));
+}
+
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
+    /* unused */ lg_size)						\
+typedef struct {							\
+	type repr;							\
+} atomic_##short_type##_t;						\
+									\
+ATOMIC_INLINE type							\
+atomic_load_##short_type(const atomic_##short_type##_t *a,		\
+    atomic_memory_order_t mo) {						\
+	type result;							\
+	__atomic_load(&a->repr, &result, atomic_enum_to_builtin(mo));	\
+	return result;							\
+}									\
+									\
+ATOMIC_INLINE void							\
+atomic_store_##short_type(atomic_##short_type##_t *a, type val,		\
+    atomic_memory_order_t mo) {						\
+	__atomic_store(&a->repr, &val, atomic_enum_to_builtin(mo));	\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	type result;							\
+	__atomic_exchange(&a->repr, &val, &result,			\
+	    atomic_enum_to_builtin(mo));				\
+	return result;							\
+}									\
+									\
+ATOMIC_INLINE bool							\
+atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
+    type *expected, type desired, atomic_memory_order_t success_mo,	\
+    atomic_memory_order_t failure_mo) {					\
+	return __atomic_compare_exchange(&a->repr, expected, &desired,	\
+	    true, atomic_enum_to_builtin(success_mo),			\
+	    atomic_enum_to_builtin(failure_mo));			\
+}									\
+									\
+ATOMIC_INLINE bool							\
+atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
+    type *expected, type desired, atomic_memory_order_t success_mo,	\
+    atomic_memory_order_t failure_mo) {					\
+	return __atomic_compare_exchange(&a->repr, expected, &desired,	\
+	    false,							\
+	    atomic_enum_to_builtin(success_mo),				\
+	    atomic_enum_to_builtin(failure_mo));			\
+}
+
+
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type,			\
+    /* unused */ lg_size)						\
+JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __atomic_fetch_add(&a->repr, val,			\
+	    atomic_enum_to_builtin(mo));				\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __atomic_fetch_sub(&a->repr, val,			\
+	    atomic_enum_to_builtin(mo));				\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __atomic_fetch_and(&a->repr, val,			\
+	    atomic_enum_to_builtin(mo));				\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __atomic_fetch_or(&a->repr, val,				\
+	    atomic_enum_to_builtin(mo));				\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __atomic_fetch_xor(&a->repr, val,			\
+	    atomic_enum_to_builtin(mo));				\
+}
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H */
--- a/deps/jemalloc/include/jemalloc/internal/atomic_gcc_sync.h
+++ b/deps/jemalloc/include/jemalloc/internal/atomic_gcc_sync.h
@@ -0,0 +1,191 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
+#define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
+
+#define ATOMIC_INIT(...) {__VA_ARGS__}
+
+typedef enum {
+	atomic_memory_order_relaxed,
+	atomic_memory_order_acquire,
+	atomic_memory_order_release,
+	atomic_memory_order_acq_rel,
+	atomic_memory_order_seq_cst
+} atomic_memory_order_t;
+
+ATOMIC_INLINE void
+atomic_fence(atomic_memory_order_t mo) {
+	/* Easy cases first: no barrier, and full barrier. */
+	if (mo == atomic_memory_order_relaxed) {
+		asm volatile("" ::: "memory");
+		return;
+	}
+	if (mo == atomic_memory_order_seq_cst) {
+		asm volatile("" ::: "memory");
+		__sync_synchronize();
+		asm volatile("" ::: "memory");
+		return;
+	}
+	asm volatile("" ::: "memory");
+#  if defined(__i386__) || defined(__x86_64__)
+	/* This is implicit on x86. */
+#  elif defined(__ppc__)
+	asm volatile("lwsync");
+#  elif defined(__sparc__) && defined(__arch64__)
+	if (mo == atomic_memory_order_acquire) {
+		asm volatile("membar #LoadLoad | #LoadStore");
+	} else if (mo == atomic_memory_order_release) {
+		asm volatile("membar #LoadStore | #StoreStore");
+	} else {
+		asm volatile("membar #LoadLoad | #LoadStore | #StoreStore");
+	}
+#  else
+	__sync_synchronize();
+#  endif
+	asm volatile("" ::: "memory");
+}
+
+/*
+ * A correct implementation of seq_cst loads and stores on weakly ordered
+ * architectures could do either of the following:
+ *   1. store() is weak-fence -> store -> strong fence, load() is load ->
+ *      strong-fence.
+ *   2. store() is strong-fence -> store, load() is strong-fence -> load ->
+ *      weak-fence.
+ * The tricky thing is, load() and store() above can be the load or store
+ * portions of a gcc __sync builtin, so we have to follow GCC's lead, which
+ * means going with strategy 2.
+ * On strongly ordered architectures, the natural strategy is to stick a strong
+ * fence after seq_cst stores, and have naked loads.  So we want the strong
+ * fences in different places on different architectures.
+ * atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to
+ * accomplish this.
+ */
+
+ATOMIC_INLINE void
+atomic_pre_sc_load_fence() {
+#  if defined(__i386__) || defined(__x86_64__) ||			\
+    (defined(__sparc__) && defined(__arch64__))
+	atomic_fence(atomic_memory_order_relaxed);
+#  else
+	atomic_fence(atomic_memory_order_seq_cst);
+#  endif
+}
+
+ATOMIC_INLINE void
+atomic_post_sc_store_fence() {
+#  if defined(__i386__) || defined(__x86_64__) ||			\
+    (defined(__sparc__) && defined(__arch64__))
+	atomic_fence(atomic_memory_order_seq_cst);
+#  else
+	atomic_fence(atomic_memory_order_relaxed);
+#  endif
+
+}
+
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
+    /* unused */ lg_size)						\
+typedef struct {							\
+	type volatile repr;						\
+} atomic_##short_type##_t;						\
+									\
+ATOMIC_INLINE type							\
+atomic_load_##short_type(const atomic_##short_type##_t *a,		\
+    atomic_memory_order_t mo) {						\
+	if (mo == atomic_memory_order_seq_cst) {			\
+		atomic_pre_sc_load_fence();				\
+	}								\
+	type result = a->repr;						\
+	if (mo != atomic_memory_order_relaxed) {			\
+		atomic_fence(atomic_memory_order_acquire);		\
+	}								\
+	return result;							\
+}									\
+									\
+ATOMIC_INLINE void							\
+atomic_store_##short_type(atomic_##short_type##_t *a,			\
+    type val, atomic_memory_order_t mo) {				\
+	if (mo != atomic_memory_order_relaxed) {			\
+		atomic_fence(atomic_memory_order_release);		\
+	}								\
+	a->repr = val;							\
+	if (mo == atomic_memory_order_seq_cst) {			\
+		atomic_post_sc_store_fence();				\
+	}								\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	/*								\
+	 * Because of FreeBSD, we care about gcc 4.2, which doesn't have\
+	 * an atomic exchange builtin.  We fake it with a CAS loop.	\
+	 */								\
+	while (true) {							\
+		type old = a->repr;					\
+		if (__sync_bool_compare_and_swap(&a->repr, old, val)) {	\
+			return old;					\
+		}							\
+	}								\
+}									\
+									\
+ATOMIC_INLINE bool							\
+atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
+    type *expected, type desired, atomic_memory_order_t success_mo,	\
+    atomic_memory_order_t failure_mo) {					\
+	type prev = __sync_val_compare_and_swap(&a->repr, *expected,	\
+	    desired);							\
+	if (prev == *expected) {					\
+		return true;						\
+	} else {							\
+		*expected = prev;					\
+		return false;						\
+	}								\
+}									\
+ATOMIC_INLINE bool							\
+atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
+    type *expected, type desired, atomic_memory_order_t success_mo,	\
+    atomic_memory_order_t failure_mo) {					\
+	type prev = __sync_val_compare_and_swap(&a->repr, *expected,	\
+	    desired);							\
+	if (prev == *expected) {					\
+		return true;						\
+	} else {							\
+		*expected = prev;					\
+		return false;						\
+	}								\
+}
+
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type,			\
+    /* unused */ lg_size)						\
+JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __sync_fetch_and_add(&a->repr, val);			\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __sync_fetch_and_sub(&a->repr, val);			\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __sync_fetch_and_and(&a->repr, val);			\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __sync_fetch_and_or(&a->repr, val);			\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __sync_fetch_and_xor(&a->repr, val);			\
+}
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */
--- a/deps/jemalloc/include/jemalloc/internal/atomic_msvc.h
+++ b/deps/jemalloc/include/jemalloc/internal/atomic_msvc.h
@@ -0,0 +1,158 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_MSVC_H
+#define JEMALLOC_INTERNAL_ATOMIC_MSVC_H
+
+#define ATOMIC_INIT(...) {__VA_ARGS__}
+
+typedef enum {
+	atomic_memory_order_relaxed,
+	atomic_memory_order_acquire,
+	atomic_memory_order_release,
+	atomic_memory_order_acq_rel,
+	atomic_memory_order_seq_cst
+} atomic_memory_order_t;
+
+typedef char atomic_repr_0_t;
+typedef short atomic_repr_1_t;
+typedef long atomic_repr_2_t;
+typedef __int64 atomic_repr_3_t;
+
+ATOMIC_INLINE void
+atomic_fence(atomic_memory_order_t mo) {
+	_ReadWriteBarrier();
+#  if defined(_M_ARM) || defined(_M_ARM64)
+	/* ARM needs a barrier for everything but relaxed. */
+	if (mo != atomic_memory_order_relaxed) {
+		MemoryBarrier();
+	}
+#  elif defined(_M_IX86) || defined (_M_X64)
+	/* x86 needs a barrier only for seq_cst. */
+	if (mo == atomic_memory_order_seq_cst) {
+		MemoryBarrier();
+	}
+#  else
+#  error "Don't know how to create atomics for this platform for MSVC."
+#  endif
+	_ReadWriteBarrier();
+}
+
+#define ATOMIC_INTERLOCKED_REPR(lg_size) atomic_repr_ ## lg_size ## _t
+
+#define ATOMIC_CONCAT(a, b) ATOMIC_RAW_CONCAT(a, b)
+#define ATOMIC_RAW_CONCAT(a, b) a ## b
+
+#define ATOMIC_INTERLOCKED_NAME(base_name, lg_size) ATOMIC_CONCAT(	\
+    base_name, ATOMIC_INTERLOCKED_SUFFIX(lg_size))
+
+#define ATOMIC_INTERLOCKED_SUFFIX(lg_size)				\
+    ATOMIC_CONCAT(ATOMIC_INTERLOCKED_SUFFIX_, lg_size)
+
+#define ATOMIC_INTERLOCKED_SUFFIX_0 8
+#define ATOMIC_INTERLOCKED_SUFFIX_1 16
+#define ATOMIC_INTERLOCKED_SUFFIX_2
+#define ATOMIC_INTERLOCKED_SUFFIX_3 64
+
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size)		\
+typedef struct {							\
+	ATOMIC_INTERLOCKED_REPR(lg_size) repr;				\
+} atomic_##short_type##_t;						\
+									\
+ATOMIC_INLINE type							\
+atomic_load_##short_type(const atomic_##short_type##_t *a,		\
+    atomic_memory_order_t mo) {						\
+	ATOMIC_INTERLOCKED_REPR(lg_size) ret = a->repr;			\
+	if (mo != atomic_memory_order_relaxed) {			\
+		atomic_fence(atomic_memory_order_acquire);		\
+	}								\
+	return (type) ret;						\
+}									\
+									\
+ATOMIC_INLINE void							\
+atomic_store_##short_type(atomic_##short_type##_t *a,			\
+    type val, atomic_memory_order_t mo) {				\
+	if (mo != atomic_memory_order_relaxed) {			\
+		atomic_fence(atomic_memory_order_release);		\
+	}								\
+	a->repr = (ATOMIC_INTERLOCKED_REPR(lg_size)) val;		\
+	if (mo == atomic_memory_order_seq_cst) {			\
+		atomic_fence(atomic_memory_order_seq_cst);		\
+	}								\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchange,	\
+	    lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val);	\
+}									\
+									\
+ATOMIC_INLINE bool							\
+atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
+    type *expected, type desired, atomic_memory_order_t success_mo,	\
+    atomic_memory_order_t failure_mo) {					\
+	ATOMIC_INTERLOCKED_REPR(lg_size) e =				\
+	    (ATOMIC_INTERLOCKED_REPR(lg_size))*expected;		\
+	ATOMIC_INTERLOCKED_REPR(lg_size) d =				\
+	    (ATOMIC_INTERLOCKED_REPR(lg_size))desired;			\
+	ATOMIC_INTERLOCKED_REPR(lg_size) old =				\
+	    ATOMIC_INTERLOCKED_NAME(_InterlockedCompareExchange, 	\
+		lg_size)(&a->repr, d, e);				\
+	if (old == e) {							\
+		return true;						\
+	} else {							\
+		*expected = (type)old;					\
+		return false;						\
+	}								\
+}									\
+									\
+ATOMIC_INLINE bool							\
+atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
+    type *expected, type desired, atomic_memory_order_t success_mo,	\
+    atomic_memory_order_t failure_mo) {					\
+	/* We implement the weak version with strong semantics. */	\
+	return atomic_compare_exchange_weak_##short_type(a, expected,	\
+	    desired, success_mo, failure_mo);				\
+}
+
+
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size)	\
+JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size)			\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_add_##short_type(atomic_##short_type##_t *a,		\
+    type val, atomic_memory_order_t mo) {				\
+	return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchangeAdd,	\
+	    lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val);	\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_sub_##short_type(atomic_##short_type##_t *a,		\
+    type val, atomic_memory_order_t mo) {				\
+	/*								\
+	 * MSVC warns on negation of unsigned operands, but for us it	\
+	 * gives exactly the right semantics (MAX_TYPE + 1 - operand).	\
+	 */								\
+	__pragma(warning(push))						\
+	__pragma(warning(disable: 4146))				\
+	return atomic_fetch_add_##short_type(a, -val, mo);		\
+	__pragma(warning(pop))						\
+}									\
+ATOMIC_INLINE type							\
+atomic_fetch_and_##short_type(atomic_##short_type##_t *a,		\
+    type val, atomic_memory_order_t mo) {				\
+	return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedAnd, lg_size)(	\
+	    &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val);		\
+}									\
+ATOMIC_INLINE type							\
+atomic_fetch_or_##short_type(atomic_##short_type##_t *a,		\
+    type val, atomic_memory_order_t mo) {				\
+	return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedOr, lg_size)(	\
+	    &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val);		\
+}									\
+ATOMIC_INLINE type							\
+atomic_fetch_xor_##short_type(atomic_##short_type##_t *a,		\
+    type val, atomic_memory_order_t mo) {				\
+	return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedXor, lg_size)(	\
+	    &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val);		\
+}
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_MSVC_H */
--- a/deps/jemalloc/include/jemalloc/internal/background_thread_externs.h
+++ b/deps/jemalloc/include/jemalloc/internal/background_thread_externs.h
@@ -0,0 +1,31 @@
+#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H
+#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H
+
+extern bool opt_background_thread;
+extern malloc_mutex_t background_thread_lock;
+extern atomic_b_t background_thread_enabled_state;
+extern size_t n_background_threads;
+extern background_thread_info_t *background_thread_info;
+extern bool can_enable_background_thread;
+
+bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
+bool background_threads_enable(tsd_t *tsd);
+bool background_threads_disable(tsd_t *tsd);
+void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
+    arena_decay_t *decay, size_t npages_new);
+void background_thread_prefork0(tsdn_t *tsdn);
+void background_thread_prefork1(tsdn_t *tsdn);
+void background_thread_postfork_parent(tsdn_t *tsdn);
+void background_thread_postfork_child(tsdn_t *tsdn);
+bool background_thread_stats_read(tsdn_t *tsdn,
+    background_thread_stats_t *stats);
+void background_thread_ctl_init(tsdn_t *tsdn);
+
+#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
+extern int pthread_create_wrapper(pthread_t *__restrict, const pthread_attr_t *,
+    void *(*)(void *), void *__restrict);
+#endif
+bool background_thread_boot0(void);
+bool background_thread_boot1(tsdn_t *tsdn);
+
+#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H */
--- a/deps/jemalloc/include/jemalloc/internal/background_thread_inlines.h
+++ b/deps/jemalloc/include/jemalloc/internal/background_thread_inlines.h
@@ -0,0 +1,57 @@
+#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H
+#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H
+
+JEMALLOC_ALWAYS_INLINE bool
+background_thread_enabled(void) {
+	return atomic_load_b(&background_thread_enabled_state, ATOMIC_RELAXED);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+background_thread_enabled_set(tsdn_t *tsdn, bool state) {
+	malloc_mutex_assert_owner(tsdn, &background_thread_lock);
+	atomic_store_b(&background_thread_enabled_state, state, ATOMIC_RELAXED);
+}
+
+JEMALLOC_ALWAYS_INLINE background_thread_info_t *
+arena_background_thread_info_get(arena_t *arena) {
+	unsigned arena_ind = arena_ind_get(arena);
+	return &background_thread_info[arena_ind % ncpus];
+}
+
+JEMALLOC_ALWAYS_INLINE uint64_t
+background_thread_wakeup_time_get(background_thread_info_t *info) {
+	uint64_t next_wakeup = nstime_ns(&info->next_wakeup);
+	assert(atomic_load_b(&info->indefinite_sleep, ATOMIC_ACQUIRE) ==
+	    (next_wakeup == BACKGROUND_THREAD_INDEFINITE_SLEEP));
+	return next_wakeup;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+background_thread_wakeup_time_set(tsdn_t *tsdn, background_thread_info_t *info,
+    uint64_t wakeup_time) {
+	malloc_mutex_assert_owner(tsdn, &info->mtx);
+	atomic_store_b(&info->indefinite_sleep,
+	    wakeup_time == BACKGROUND_THREAD_INDEFINITE_SLEEP, ATOMIC_RELEASE);
+	nstime_init(&info->next_wakeup, wakeup_time);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+background_thread_indefinite_sleep(background_thread_info_t *info) {
+	return atomic_load_b(&info->indefinite_sleep, ATOMIC_ACQUIRE);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena,
+    bool is_background_thread) {
+	if (!background_thread_enabled() || is_background_thread) {
+		return;
+	}
+	background_thread_info_t *info =
+	    arena_background_thread_info_get(arena);
+	if (background_thread_indefinite_sleep(info)) {
+		background_thread_interval_check(tsdn, arena,
+		    &arena->decay_dirty, 0);
+	}
+}
+
+#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H */
--- a/deps/jemalloc/include/jemalloc/internal/background_thread_structs.h
+++ b/deps/jemalloc/include/jemalloc/internal/background_thread_structs.h
@@ -0,0 +1,52 @@
+#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H
+#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H
+
+/* This file really combines "structs" and "types", but only transitionally. */
+
+#if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK)
+#  define JEMALLOC_PTHREAD_CREATE_WRAPPER
+#endif
+
+#define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX
+
+typedef enum {
+	background_thread_stopped,
+	background_thread_started,
+	/* Thread waits on the global lock when paused (for arena_reset). */
+	background_thread_paused,
+} background_thread_state_t;
+
+struct background_thread_info_s {
+#ifdef JEMALLOC_BACKGROUND_THREAD
+	/* Background thread is pthread specific. */
+	pthread_t		thread;
+	pthread_cond_t		cond;
+#endif
+	malloc_mutex_t		mtx;
+	background_thread_state_t	state;
+	/* When true, it means no wakeup scheduled. */
+	atomic_b_t		indefinite_sleep;
+	/* Next scheduled wakeup time (absolute time in ns). */
+	nstime_t		next_wakeup;
+	/*
+	 *  Since the last background thread run, newly added number of pages
+	 *  that need to be purged by the next wakeup.  This is adjusted on
+	 *  epoch advance, and is used to determine whether we should signal the
+	 *  background thread to wake up earlier.
+	 */
+	size_t			npages_to_purge_new;
+	/* Stats: total number of runs since started. */
+	uint64_t		tot_n_runs;
+	/* Stats: total sleep time since started. */
+	nstime_t		tot_sleep_time;
+};
+typedef struct background_thread_info_s background_thread_info_t;
+
+struct background_thread_stats_s {
+	size_t num_threads;
+	uint64_t num_runs;
+	nstime_t run_interval;
+};
+typedef struct background_thread_stats_s background_thread_stats_t;
+
+#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H */
--- a/deps/jemalloc/include/jemalloc/internal/base_externs.h
+++ b/deps/jemalloc/include/jemalloc/internal/base_externs.h
@@ -0,0 +1,19 @@
+#ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H
+#define JEMALLOC_INTERNAL_BASE_EXTERNS_H
+
+base_t *b0get(void);
+base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+void base_delete(tsdn_t *tsdn, base_t *base);
+extent_hooks_t *base_extent_hooks_get(base_t *base);
+extent_hooks_t *base_extent_hooks_set(base_t *base,
+    extent_hooks_t *extent_hooks);
+void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
+extent_t *base_alloc_extent(tsdn_t *tsdn, base_t *base);
+void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
+    size_t *resident, size_t *mapped);
+void base_prefork(tsdn_t *tsdn, base_t *base);
+void base_postfork_parent(tsdn_t *tsdn, base_t *base);
+void base_postfork_child(tsdn_t *tsdn, base_t *base);
+bool base_boot(tsdn_t *tsdn);
+
+#endif /* JEMALLOC_INTERNAL_BASE_EXTERNS_H */
--- a/deps/jemalloc/include/jemalloc/internal/base_inlines.h
+++ b/deps/jemalloc/include/jemalloc/internal/base_inlines.h
@@ -0,0 +1,9 @@
+#ifndef JEMALLOC_INTERNAL_BASE_INLINES_H
+#define JEMALLOC_INTERNAL_BASE_INLINES_H
+
+static inline unsigned
+base_ind_get(const base_t *base) {
+	return base->ind;
+}
+
+#endif /* JEMALLOC_INTERNAL_BASE_INLINES_H */
--- a/deps/jemalloc/include/jemalloc/internal/base_structs.h
+++ b/deps/jemalloc/include/jemalloc/internal/base_structs.h
@@ -0,0 +1,55 @@
+#ifndef JEMALLOC_INTERNAL_BASE_STRUCTS_H
+#define JEMALLOC_INTERNAL_BASE_STRUCTS_H
+
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/size_classes.h"
+
+/* Embedded at the beginning of every block of base-managed virtual memory. */
+struct base_block_s {
+	/* Total size of block's virtual memory mapping. */
+	size_t		size;
+
+	/* Next block in list of base's blocks. */
+	base_block_t	*next;
+
+	/* Tracks unused trailing space. */
+	extent_t	extent;
+};
+
+struct base_s {
+	/* Associated arena's index within the arenas array. */
+	unsigned	ind;
+
+	/*
+	 * User-configurable extent hook functions.  Points to an
+	 * extent_hooks_t.
+	 */
+	atomic_p_t	extent_hooks;
+
+	/* Protects base_alloc() and base_stats_get() operations. */
+	malloc_mutex_t	mtx;
+
+	/*
+	 * Most recent size class in the series of increasingly large base
+	 * extents.  Logarithmic spacing between subsequent allocations ensures
+	 * that the total number of distinct mappings remains small.
+	 */
+	pszind_t	pind_last;
+
+	/* Serial number generation state. */
+	size_t		extent_sn_next;
+
+	/* Chain of all blocks associated with base. */
+	base_block_t	*blocks;
+
+	/* Heap of extents that track unused trailing space within blocks. */
+	extent_heap_t	avail[NSIZES];
+
+	/* Stats, only maintained if config_stats. */
+	size_t		allocated;
+	size_t		resident;
+	size_t		mapped;
+};
+
+#endif /* JEMALLOC_INTERNAL_BASE_STRUCTS_H */
--- a/deps/jemalloc/include/jemalloc/internal/base_types.h
+++ b/deps/jemalloc/include/jemalloc/internal/base_types.h
@@ -0,0 +1,7 @@
+#ifndef JEMALLOC_INTERNAL_BASE_TYPES_H
+#define JEMALLOC_INTERNAL_BASE_TYPES_H
+
+typedef struct base_block_s base_block_t;
+typedef struct base_s base_t;
+
+#endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */
--- a/deps/jemalloc/include/jemalloc/internal/bit_util.h
+++ b/deps/jemalloc/include/jemalloc/internal/bit_util.h
@@ -0,0 +1,165 @@
+#ifndef JEMALLOC_INTERNAL_BIT_UTIL_H
+#define JEMALLOC_INTERNAL_BIT_UTIL_H
+
+#include "jemalloc/internal/assert.h"
+
+#define BIT_UTIL_INLINE static inline
+
+/* Sanity check. */
+#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \
+    || !defined(JEMALLOC_INTERNAL_FFS)
+#  error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure
+#endif
+
+
+BIT_UTIL_INLINE unsigned
+ffs_llu(unsigned long long bitmap) {
+	return JEMALLOC_INTERNAL_FFSLL(bitmap);
+}
+
+BIT_UTIL_INLINE unsigned
+ffs_lu(unsigned long bitmap) {
+	return JEMALLOC_INTERNAL_FFSL(bitmap);
+}
+
+BIT_UTIL_INLINE unsigned
+ffs_u(unsigned bitmap) {
+	return JEMALLOC_INTERNAL_FFS(bitmap);
+}
+
+BIT_UTIL_INLINE unsigned
+ffs_zu(size_t bitmap) {
+#if LG_SIZEOF_PTR == LG_SIZEOF_INT
+	return ffs_u(bitmap);
+#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
+	return ffs_lu(bitmap);
+#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
+	return ffs_llu(bitmap);
+#else
+#error No implementation for size_t ffs()
+#endif
+}
+
+BIT_UTIL_INLINE unsigned
+ffs_u64(uint64_t bitmap) {
+#if LG_SIZEOF_LONG == 3
+	return ffs_lu(bitmap);
+#elif LG_SIZEOF_LONG_LONG == 3
+	return ffs_llu(bitmap);
+#else
+#error No implementation for 64-bit ffs()
+#endif
+}
+
+BIT_UTIL_INLINE unsigned
+ffs_u32(uint32_t bitmap) {
+#if LG_SIZEOF_INT == 2
+	return ffs_u(bitmap);
+#else
+#error No implementation for 32-bit ffs()
+#endif
+	return ffs_u(bitmap);
+}
+
+BIT_UTIL_INLINE uint64_t
+pow2_ceil_u64(uint64_t x) {
+	x--;
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+	x |= x >> 32;
+	x++;
+	return x;
+}
+
+BIT_UTIL_INLINE uint32_t
+pow2_ceil_u32(uint32_t x) {
+	x--;
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+	x++;
+	return x;
+}
+
+/* Compute the smallest power of 2 that is >= x. */
+BIT_UTIL_INLINE size_t
+pow2_ceil_zu(size_t x) {
+#if (LG_SIZEOF_PTR == 3)
+	return pow2_ceil_u64(x);
+#else
+	return pow2_ceil_u32(x);
+#endif
+}
+
+#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
+BIT_UTIL_INLINE unsigned
+lg_floor(size_t x) {
+	size_t ret;
+	assert(x != 0);
+
+	asm ("bsr %1, %0"
+	    : "=r"(ret) // Outputs.
+	    : "r"(x)    // Inputs.
+	    );
+	assert(ret < UINT_MAX);
+	return (unsigned)ret;
+}
+#elif (defined(_MSC_VER))
+BIT_UTIL_INLINE unsigned
+lg_floor(size_t x) {
+	unsigned long ret;
+
+	assert(x != 0);
+
+#if (LG_SIZEOF_PTR == 3)
+	_BitScanReverse64(&ret, x);
+#elif (LG_SIZEOF_PTR == 2)
+	_BitScanReverse(&ret, x);
+#else
+#  error "Unsupported type size for lg_floor()"
+#endif
+	assert(ret < UINT_MAX);
+	return (unsigned)ret;
+}
+#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
+BIT_UTIL_INLINE unsigned
+lg_floor(size_t x) {
+	assert(x != 0);
+
+#if (LG_SIZEOF_PTR == LG_SIZEOF_INT)
+	return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x);
+#elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG)
+	return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x);
+#else
+#  error "Unsupported type size for lg_floor()"
+#endif
+}
+#else
+BIT_UTIL_INLINE unsigned
+lg_floor(size_t x) {
+	assert(x != 0);
+
+	x |= (x >> 1);
+	x |= (x >> 2);
+	x |= (x >> 4);
+	x |= (x >> 8);
+	x |= (x >> 16);
+#if (LG_SIZEOF_PTR == 3)
+	x |= (x >> 32);
+#endif
+	if (x == SIZE_T_MAX) {
+		return (8 << LG_SIZEOF_PTR) - 1;
+	}
+	x++;
+	return ffs_zu(x) - 2;
+}
+#endif
+
+#undef BIT_UTIL_INLINE
+
+#endif /* JEMALLOC_INTERNAL_BIT_UTIL_H */
--- a/deps/jemalloc/include/jemalloc/internal/extent_dss.h
+++ b/deps/jemalloc/include/jemalloc/internal/extent_dss.h
@@ -0,0 +1,26 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_DSS_H
+#define JEMALLOC_INTERNAL_EXTENT_DSS_H
+
+typedef enum {
+	dss_prec_disabled  = 0,
+	dss_prec_primary   = 1,
+	dss_prec_secondary = 2,
+
+	dss_prec_limit     = 3
+} dss_prec_t;
+#define DSS_PREC_DEFAULT dss_prec_secondary
+#define DSS_DEFAULT "secondary"
+
+extern const char *dss_prec_names[];
+
+extern const char *opt_dss;
+
+dss_prec_t extent_dss_prec_get(void);
+bool extent_dss_prec_set(dss_prec_t dss_prec);
+void *extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit);
+bool extent_in_dss(void *addr);
+bool extent_dss_mergeable(void *addr_a, void *addr_b);
+void extent_dss_boot(void);
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_DSS_H */
--- a/deps/jemalloc/include/jemalloc/internal/extent_externs.h
+++ b/deps/jemalloc/include/jemalloc/internal/extent_externs.h
@@ -0,0 +1,72 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
+#define JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
+
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mutex_pool.h"
+#include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/rb.h"
+#include "jemalloc/internal/rtree.h"
+
+extern rtree_t			extents_rtree;
+extern const extent_hooks_t	extent_hooks_default;
+extern mutex_pool_t		extent_mutex_pool;
+
+extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
+void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
+
+extent_hooks_t *extent_hooks_get(arena_t *arena);
+extent_hooks_t *extent_hooks_set(tsd_t *tsd, arena_t *arena,
+    extent_hooks_t *extent_hooks);
+
+#ifdef JEMALLOC_JET
+size_t extent_size_quantize_floor(size_t size);
+size_t extent_size_quantize_ceil(size_t size);
+#endif
+
+rb_proto(, extent_avail_, extent_tree_t, extent_t)
+ph_proto(, extent_heap_, extent_heap_t, extent_t)
+
+bool extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
+    bool delay_coalesce);
+extent_state_t extents_state_get(const extents_t *extents);
+size_t extents_npages_get(extents_t *extents);
+extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr,
+    size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
+    bool *zero, bool *commit);
+void extents_dalloc(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent);
+extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_min);
+void extents_prefork(tsdn_t *tsdn, extents_t *extents);
+void extents_postfork_parent(tsdn_t *tsdn, extents_t *extents);
+void extents_postfork_child(tsdn_t *tsdn, extents_t *extents);
+extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
+    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit);
+void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
+void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent);
+void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent);
+bool extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    size_t length);
+bool extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    size_t length);
+bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    size_t length);
+bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    size_t length);
+extent_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
+    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
+bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b);
+
+bool extent_boot(void);
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_EXTERNS_H */
--- a/deps/jemalloc/include/jemalloc/internal/extent_inlines.h
+++ b/deps/jemalloc/include/jemalloc/internal/extent_inlines.h
@@ -0,0 +1,407 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_INLINES_H
+#define JEMALLOC_INTERNAL_EXTENT_INLINES_H
+
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mutex_pool.h"
+#include "jemalloc/internal/pages.h"
+#include "jemalloc/internal/prng.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/sz.h"
+
+static inline void
+extent_lock(tsdn_t *tsdn, extent_t *extent) {
+	assert(extent != NULL);
+	mutex_pool_lock(tsdn, &extent_mutex_pool, (uintptr_t)extent);
+}
+
+static inline void
+extent_unlock(tsdn_t *tsdn, extent_t *extent) {
+	assert(extent != NULL);
+	mutex_pool_unlock(tsdn, &extent_mutex_pool, (uintptr_t)extent);
+}
+
+static inline void
+extent_lock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
+	assert(extent1 != NULL && extent2 != NULL);
+	mutex_pool_lock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1,
+	    (uintptr_t)extent2);
+}
+
+static inline void
+extent_unlock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
+	assert(extent1 != NULL && extent2 != NULL);
+	mutex_pool_unlock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1,
+	    (uintptr_t)extent2);
+}
+
+static inline arena_t *
+extent_arena_get(const extent_t *extent) {
+	unsigned arena_ind = (unsigned)((extent->e_bits &
+	    EXTENT_BITS_ARENA_MASK) >> EXTENT_BITS_ARENA_SHIFT);
+	/*
+	 * The following check is omitted because we should never actually read
+	 * a NULL arena pointer.
+	 */
+	if (false && arena_ind >= MALLOCX_ARENA_LIMIT) {
+		return NULL;
+	}
+	assert(arena_ind < MALLOCX_ARENA_LIMIT);
+	return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_ACQUIRE);
+}
+
+static inline szind_t
+extent_szind_get_maybe_invalid(const extent_t *extent) {
+	szind_t szind = (szind_t)((extent->e_bits & EXTENT_BITS_SZIND_MASK) >>
+	    EXTENT_BITS_SZIND_SHIFT);
+	assert(szind <= NSIZES);
+	return szind;
+}
+
+static inline szind_t
+extent_szind_get(const extent_t *extent) {
+	szind_t szind = extent_szind_get_maybe_invalid(extent);
+	assert(szind < NSIZES); /* Never call when "invalid". */
+	return szind;
+}
+
+static inline size_t
+extent_usize_get(const extent_t *extent) {
+	return sz_index2size(extent_szind_get(extent));
+}
+
+static inline size_t
+extent_sn_get(const extent_t *extent) {
+	return (size_t)((extent->e_bits & EXTENT_BITS_SN_MASK) >>
+	    EXTENT_BITS_SN_SHIFT);
+}
+
+static inline extent_state_t
+extent_state_get(const extent_t *extent) {
+	return (extent_state_t)((extent->e_bits & EXTENT_BITS_STATE_MASK) >>
+	    EXTENT_BITS_STATE_SHIFT);
+}
+
+static inline bool
+extent_zeroed_get(const extent_t *extent) {
+	return (bool)((extent->e_bits & EXTENT_BITS_ZEROED_MASK) >>
+	    EXTENT_BITS_ZEROED_SHIFT);
+}
+
+static inline bool
+extent_committed_get(const extent_t *extent) {
+	return (bool)((extent->e_bits & EXTENT_BITS_COMMITTED_MASK) >>
+	    EXTENT_BITS_COMMITTED_SHIFT);
+}
+
+static inline bool
+extent_slab_get(const extent_t *extent) {
+	return (bool)((extent->e_bits & EXTENT_BITS_SLAB_MASK) >>
+	    EXTENT_BITS_SLAB_SHIFT);
+}
+
+static inline unsigned
+extent_nfree_get(const extent_t *extent) {
+	assert(extent_slab_get(extent));
+	return (unsigned)((extent->e_bits & EXTENT_BITS_NFREE_MASK) >>
+	    EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void *
+extent_base_get(const extent_t *extent) {
+	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
+	    !extent_slab_get(extent));
+	return PAGE_ADDR2BASE(extent->e_addr);
+}
+
+static inline void *
+extent_addr_get(const extent_t *extent) {
+	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
+	    !extent_slab_get(extent));
+	return extent->e_addr;
+}
+
+static inline size_t
+extent_size_get(const extent_t *extent) {
+	return (extent->e_size_esn & EXTENT_SIZE_MASK);
+}
+
+static inline size_t
+extent_esn_get(const extent_t *extent) {
+	return (extent->e_size_esn & EXTENT_ESN_MASK);
+}
+
+static inline size_t
+extent_bsize_get(const extent_t *extent) {
+	return extent->e_bsize;
+}
+
+static inline void *
+extent_before_get(const extent_t *extent) {
+	return (void *)((uintptr_t)extent_base_get(extent) - PAGE);
+}
+
+static inline void *
+extent_last_get(const extent_t *extent) {
+	return (void *)((uintptr_t)extent_base_get(extent) +
+	    extent_size_get(extent) - PAGE);
+}
+
+static inline void *
+extent_past_get(const extent_t *extent) {
+	return (void *)((uintptr_t)extent_base_get(extent) +
+	    extent_size_get(extent));
+}
+
+static inline arena_slab_data_t *
+extent_slab_data_get(extent_t *extent) {
+	assert(extent_slab_get(extent));
+	return &extent->e_slab_data;
+}
+
+static inline const arena_slab_data_t *
+extent_slab_data_get_const(const extent_t *extent) {
+	assert(extent_slab_get(extent));
+	return &extent->e_slab_data;
+}
+
+static inline prof_tctx_t *
+extent_prof_tctx_get(const extent_t *extent) {
+	return (prof_tctx_t *)atomic_load_p(&extent->e_prof_tctx,
+	    ATOMIC_ACQUIRE);
+}
+
+static inline void
+extent_arena_set(extent_t *extent, arena_t *arena) {
+	unsigned arena_ind = (arena != NULL) ? arena_ind_get(arena) : ((1U <<
+	    MALLOCX_ARENA_BITS) - 1);
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ARENA_MASK) |
+	    ((uint64_t)arena_ind << EXTENT_BITS_ARENA_SHIFT);
+}
+
+static inline void
+extent_addr_set(extent_t *extent, void *addr) {
+	extent->e_addr = addr;
+}
+
+static inline void
+extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment) {
+	assert(extent_base_get(extent) == extent_addr_get(extent));
+
+	if (alignment < PAGE) {
+		unsigned lg_range = LG_PAGE -
+		    lg_floor(CACHELINE_CEILING(alignment));
+		size_t r =
+		    prng_lg_range_zu(&extent_arena_get(extent)->offset_state,
+		    lg_range, true);
+		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
+		    lg_range);
+		extent->e_addr = (void *)((uintptr_t)extent->e_addr +
+		    random_offset);
+		assert(ALIGNMENT_ADDR2BASE(extent->e_addr, alignment) ==
+		    extent->e_addr);
+	}
+}
+
+static inline void
+extent_size_set(extent_t *extent, size_t size) {
+	assert((size & ~EXTENT_SIZE_MASK) == 0);
+	extent->e_size_esn = size | (extent->e_size_esn & ~EXTENT_SIZE_MASK);
+}
+
+static inline void
+extent_esn_set(extent_t *extent, size_t esn) {
+	extent->e_size_esn = (extent->e_size_esn & ~EXTENT_ESN_MASK) | (esn &
+	    EXTENT_ESN_MASK);
+}
+
+static inline void
+extent_bsize_set(extent_t *extent, size_t bsize) {
+	extent->e_bsize = bsize;
+}
+
+static inline void
+extent_szind_set(extent_t *extent, szind_t szind) {
+	assert(szind <= NSIZES); /* NSIZES means "invalid". */
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SZIND_MASK) |
+	    ((uint64_t)szind << EXTENT_BITS_SZIND_SHIFT);
+}
+
+static inline void
+extent_nfree_set(extent_t *extent, unsigned nfree) {
+	assert(extent_slab_get(extent));
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_NFREE_MASK) |
+	    ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
+extent_nfree_inc(extent_t *extent) {
+	assert(extent_slab_get(extent));
+	extent->e_bits += ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
+extent_nfree_dec(extent_t *extent) {
+	assert(extent_slab_get(extent));
+	extent->e_bits -= ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
+extent_sn_set(extent_t *extent, size_t sn) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SN_MASK) |
+	    ((uint64_t)sn << EXTENT_BITS_SN_SHIFT);
+}
+
+static inline void
+extent_state_set(extent_t *extent, extent_state_t state) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_STATE_MASK) |
+	    ((uint64_t)state << EXTENT_BITS_STATE_SHIFT);
+}
+
+static inline void
+extent_zeroed_set(extent_t *extent, bool zeroed) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ZEROED_MASK) |
+	    ((uint64_t)zeroed << EXTENT_BITS_ZEROED_SHIFT);
+}
+
+static inline void
+extent_committed_set(extent_t *extent, bool committed) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_COMMITTED_MASK) |
+	    ((uint64_t)committed << EXTENT_BITS_COMMITTED_SHIFT);
+}
+
+static inline void
+extent_slab_set(extent_t *extent, bool slab) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SLAB_MASK) |
+	    ((uint64_t)slab << EXTENT_BITS_SLAB_SHIFT);
+}
+
+static inline void
+extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
+	atomic_store_p(&extent->e_prof_tctx, tctx, ATOMIC_RELEASE);
+}
+
+static inline void
+extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
+    bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
+    bool committed) {
+	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
+
+	extent_arena_set(extent, arena);
+	extent_addr_set(extent, addr);
+	extent_size_set(extent, size);
+	extent_slab_set(extent, slab);
+	extent_szind_set(extent, szind);
+	extent_sn_set(extent, sn);
+	extent_state_set(extent, state);
+	extent_zeroed_set(extent, zeroed);
+	extent_committed_set(extent, committed);
+	ql_elm_new(extent, ql_link);
+	if (config_prof) {
+		extent_prof_tctx_set(extent, NULL);
+	}
+}
+
+static inline void
+extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
+	extent_arena_set(extent, NULL);
+	extent_addr_set(extent, addr);
+	extent_bsize_set(extent, bsize);
+	extent_slab_set(extent, false);
+	extent_szind_set(extent, NSIZES);
+	extent_sn_set(extent, sn);
+	extent_state_set(extent, extent_state_active);
+	extent_zeroed_set(extent, true);
+	extent_committed_set(extent, true);
+}
+
+static inline void
+extent_list_init(extent_list_t *list) {
+	ql_new(list);
+}
+
+static inline extent_t *
+extent_list_first(const extent_list_t *list) {
+	return ql_first(list);
+}
+
+static inline extent_t *
+extent_list_last(const extent_list_t *list) {
+	return ql_last(list, ql_link);
+}
+
+static inline void
+extent_list_append(extent_list_t *list, extent_t *extent) {
+	ql_tail_insert(list, extent, ql_link);
+}
+
+static inline void
+extent_list_replace(extent_list_t *list, extent_t *to_remove,
+    extent_t *to_insert) {
+	ql_after_insert(to_remove, to_insert, ql_link);
+	ql_remove(list, to_remove, ql_link);
+}
+
+static inline void
+extent_list_remove(extent_list_t *list, extent_t *extent) {
+	ql_remove(list, extent, ql_link);
+}
+
+static inline int
+extent_sn_comp(const extent_t *a, const extent_t *b) {
+	size_t a_sn = extent_sn_get(a);
+	size_t b_sn = extent_sn_get(b);
+
+	return (a_sn > b_sn) - (a_sn < b_sn);
+}
+
+static inline int
+extent_esn_comp(const extent_t *a, const extent_t *b) {
+	size_t a_esn = extent_esn_get(a);
+	size_t b_esn = extent_esn_get(b);
+
+	return (a_esn > b_esn) - (a_esn < b_esn);
+}
+
+static inline int
+extent_ad_comp(const extent_t *a, const extent_t *b) {
+	uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
+	uintptr_t b_addr = (uintptr_t)extent_addr_get(b);
+
+	return (a_addr > b_addr) - (a_addr < b_addr);
+}
+
+static inline int
+extent_ead_comp(const extent_t *a, const extent_t *b) {
+	uintptr_t a_eaddr = (uintptr_t)a;
+	uintptr_t b_eaddr = (uintptr_t)b;
+
+	return (a_eaddr > b_eaddr) - (a_eaddr < b_eaddr);
+}
+
+static inline int
+extent_snad_comp(const extent_t *a, const extent_t *b) {
+	int ret;
+
+	ret = extent_sn_comp(a, b);
+	if (ret != 0) {
+		return ret;
+	}
+
+	ret = extent_ad_comp(a, b);
+	return ret;
+}
+
+static inline int
+extent_esnead_comp(const extent_t *a, const extent_t *b) {
+	int ret;
+
+	ret = extent_esn_comp(a, b);
+	if (ret != 0) {
+		return ret;
+	}
+
+	ret = extent_ead_comp(a, b);
+	return ret;
+}
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_INLINES_H */
--- a/deps/jemalloc/include/jemalloc/internal/extent_mmap.h
+++ b/deps/jemalloc/include/jemalloc/internal/extent_mmap.h
@@ -0,0 +1,10 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H
+#define JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H
+
+extern bool opt_retain;
+
+void *extent_alloc_mmap(void *new_addr, size_t size, size_t alignment,
+    bool *zero, bool *commit);
+bool extent_dalloc_mmap(void *addr, size_t size);
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H */
--- a/deps/jemalloc/include/jemalloc/internal/extent_structs.h
+++ b/deps/jemalloc/include/jemalloc/internal/extent_structs.h
@@ -0,0 +1,199 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
+#define JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/rb.h"
+#include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/size_classes.h"
+
+typedef enum {
+	extent_state_active   = 0,
+	extent_state_dirty    = 1,
+	extent_state_muzzy    = 2,
+	extent_state_retained = 3
+} extent_state_t;
+
+/* Extent (span of pages).  Use accessor functions for e_* fields. */
+struct extent_s {
+	/*
+	 * Bitfield containing several fields:
+	 *
+	 * a: arena_ind
+	 * b: slab
+	 * c: committed
+	 * z: zeroed
+	 * t: state
+	 * i: szind
+	 * f: nfree
+	 * n: sn
+	 *
+	 * nnnnnnnn ... nnnnnfff fffffffi iiiiiiit tzcbaaaa aaaaaaaa
+	 *
+	 * arena_ind: Arena from which this extent came, or all 1 bits if
+	 *            unassociated.
+	 *
+	 * slab: The slab flag indicates whether the extent is used for a slab
+	 *       of small regions.  This helps differentiate small size classes,
+	 *       and it indicates whether interior pointers can be looked up via
+	 *       iealloc().
+	 *
+	 * committed: The committed flag indicates whether physical memory is
+	 *            committed to the extent, whether explicitly or implicitly
+	 *            as on a system that overcommits and satisfies physical
+	 *            memory needs on demand via soft page faults.
+	 *
+	 * zeroed: The zeroed flag is used by extent recycling code to track
+	 *         whether memory is zero-filled.
+	 *
+	 * state: The state flag is an extent_state_t.
+	 *
+	 * szind: The szind flag indicates usable size class index for
+	 *        allocations residing in this extent, regardless of whether the
+	 *        extent is a slab.  Extent size and usable size often differ
+	 *        even for non-slabs, either due to sz_large_pad or promotion of
+	 *        sampled small regions.
+	 *
+	 * nfree: Number of free regions in slab.
+	 *
+	 * sn: Serial number (potentially non-unique).
+	 *
+	 *     Serial numbers may wrap around if !opt_retain, but as long as
+	 *     comparison functions fall back on address comparison for equal
+	 *     serial numbers, stable (if imperfect) ordering is maintained.
+	 *
+	 *     Serial numbers may not be unique even in the absence of
+	 *     wrap-around, e.g. when splitting an extent and assigning the same
+	 *     serial number to both resulting adjacent extents.
+	 */
+	uint64_t		e_bits;
+#define EXTENT_BITS_ARENA_SHIFT		0
+#define EXTENT_BITS_ARENA_MASK \
+    (((uint64_t)(1U << MALLOCX_ARENA_BITS) - 1) << EXTENT_BITS_ARENA_SHIFT)
+
+#define EXTENT_BITS_SLAB_SHIFT		MALLOCX_ARENA_BITS
+#define EXTENT_BITS_SLAB_MASK \
+    ((uint64_t)0x1U << EXTENT_BITS_SLAB_SHIFT)
+
+#define EXTENT_BITS_COMMITTED_SHIFT	(MALLOCX_ARENA_BITS + 1)
+#define EXTENT_BITS_COMMITTED_MASK \
+    ((uint64_t)0x1U << EXTENT_BITS_COMMITTED_SHIFT)
+
+#define EXTENT_BITS_ZEROED_SHIFT	(MALLOCX_ARENA_BITS + 2)
+#define EXTENT_BITS_ZEROED_MASK \
+    ((uint64_t)0x1U << EXTENT_BITS_ZEROED_SHIFT)
+
+#define EXTENT_BITS_STATE_SHIFT		(MALLOCX_ARENA_BITS + 3)
+#define EXTENT_BITS_STATE_MASK \
+    ((uint64_t)0x3U << EXTENT_BITS_STATE_SHIFT)
+
+#define EXTENT_BITS_SZIND_SHIFT		(MALLOCX_ARENA_BITS + 5)
+#define EXTENT_BITS_SZIND_MASK \
+    (((uint64_t)(1U << LG_CEIL_NSIZES) - 1) << EXTENT_BITS_SZIND_SHIFT)
+
+#define EXTENT_BITS_NFREE_SHIFT \
+    (MALLOCX_ARENA_BITS + 5 + LG_CEIL_NSIZES)
+#define EXTENT_BITS_NFREE_MASK \
+    ((uint64_t)((1U << (LG_SLAB_MAXREGS + 1)) - 1) << EXTENT_BITS_NFREE_SHIFT)
+
+#define EXTENT_BITS_SN_SHIFT \
+    (MALLOCX_ARENA_BITS + 5 + LG_CEIL_NSIZES + (LG_SLAB_MAXREGS + 1))
+#define EXTENT_BITS_SN_MASK		(UINT64_MAX << EXTENT_BITS_SN_SHIFT)
+
+	/* Pointer to the extent that this structure is responsible for. */
+	void			*e_addr;
+
+	union {
+		/*
+		 * Extent size and serial number associated with the extent
+		 * structure (different than the serial number for the extent at
+		 * e_addr).
+		 *
+		 * ssssssss [...] ssssssss ssssnnnn nnnnnnnn
+		 */
+		size_t			e_size_esn;
+	#define EXTENT_SIZE_MASK	((size_t)~(PAGE-1))
+	#define EXTENT_ESN_MASK		((size_t)PAGE-1)
+		/* Base extent size, which may not be a multiple of PAGE. */
+		size_t			e_bsize;
+	};
+
+	union {
+		/*
+		 * List linkage, used by a variety of lists:
+		 * - arena_bin_t's slabs_full
+		 * - extents_t's LRU
+		 * - stashed dirty extents
+		 * - arena's large allocations
+		 */
+		ql_elm(extent_t)	ql_link;
+		/* Red-black tree linkage, used by arena's extent_avail. */
+		rb_node(extent_t)	rb_link;
+	};
+
+	/* Linkage for per size class sn/address-ordered heaps. */
+	phn(extent_t)		ph_link;
+
+	union {
+		/* Small region slab metadata. */
+		arena_slab_data_t	e_slab_data;
+
+		/*
+		 * Profile counters, used for large objects.  Points to a
+		 * prof_tctx_t.
+		 */
+		atomic_p_t		e_prof_tctx;
+	};
+};
+typedef ql_head(extent_t) extent_list_t;
+typedef rb_tree(extent_t) extent_tree_t;
+typedef ph(extent_t) extent_heap_t;
+
+/* Quantized collection of extents, with built-in LRU queue. */
+struct extents_s {
+	malloc_mutex_t		mtx;
+
+	/*
+	 * Quantized per size class heaps of extents.
+	 *
+	 * Synchronization: mtx.
+	 */
+	extent_heap_t		heaps[NPSIZES+1];
+
+	/*
+	 * Bitmap for which set bits correspond to non-empty heaps.
+	 *
+	 * Synchronization: mtx.
+	 */
+	bitmap_t		bitmap[BITMAP_GROUPS(NPSIZES+1)];
+
+	/*
+	 * LRU of all extents in heaps.
+	 *
+	 * Synchronization: mtx.
+	 */
+	extent_list_t		lru;
+
+	/*
+	 * Page sum for all extents in heaps.
+	 *
+	 * The synchronization here is a little tricky.  Modifications to npages
+	 * must hold mtx, but reads need not (though, a reader who sees npages
+	 * without holding the mutex can't assume anything about the rest of the
+	 * state of the extents_t).
+	 */
+	atomic_zu_t		npages;
+
+	/* All stored extents must be in the same state. */
+	extent_state_t		state;
+
+	/*
+	 * If true, delay coalescing until eviction; otherwise coalesce during
+	 * deallocation.
+	 */
+	bool			delay_coalesce;
+};
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_STRUCTS_H */
--- a/deps/jemalloc/include/jemalloc/internal/extent_types.h
+++ b/deps/jemalloc/include/jemalloc/internal/extent_types.h
@@ -0,0 +1,9 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_TYPES_H
+#define JEMALLOC_INTERNAL_EXTENT_TYPES_H
+
+typedef struct extent_s extent_t;
+typedef struct extents_s extents_t;
+
+#define EXTENT_HOOKS_INITIALIZER	NULL
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */
--- a/deps/jemalloc/include/jemalloc/internal/hooks.h
+++ b/deps/jemalloc/include/jemalloc/internal/hooks.h
@@ -0,0 +1,19 @@
+#ifndef JEMALLOC_INTERNAL_HOOKS_H
+#define JEMALLOC_INTERNAL_HOOKS_H
+
+extern JEMALLOC_EXPORT void (*hooks_arena_new_hook)();
+extern JEMALLOC_EXPORT void (*hooks_libc_hook)();
+
+#define JEMALLOC_HOOK(fn, hook) ((void)(hook != NULL && (hook(), 0)), fn)
+
+#define open JEMALLOC_HOOK(open, hooks_libc_hook)
+#define read JEMALLOC_HOOK(read, hooks_libc_hook)
+#define write JEMALLOC_HOOK(write, hooks_libc_hook)
+#define readlink JEMALLOC_HOOK(readlink, hooks_libc_hook)
+#define close JEMALLOC_HOOK(close, hooks_libc_hook)
+#define creat JEMALLOC_HOOK(creat, hooks_libc_hook)
+#define secure_getenv JEMALLOC_HOOK(secure_getenv, hooks_libc_hook)
+/* Note that this is undef'd and re-define'd in src/prof.c. */
+#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, hooks_libc_hook)
+
+#endif /* JEMALLOC_INTERNAL_HOOKS_H */
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -0,0 +1,82 @@
+#ifndef JEMALLOC_INTERNAL_DECLS_H
+#define JEMALLOC_INTERNAL_DECLS_H
+
+#include <math.h>
+#ifdef _WIN32
+#  include <windows.h>
+#  include "msvc_compat/windows_extra.h"
+
+#else
+#  include <sys/param.h>
+#  include <sys/mman.h>
+#  if !defined(__pnacl__) && !defined(__native_client__)
+#    include <sys/syscall.h>
+#    if !defined(SYS_write) && defined(__NR_write)
+#      define SYS_write __NR_write
+#    endif
+#    if defined(SYS_open) && defined(__aarch64__)
+       /* Android headers may define SYS_open to __NR_open even though
+        * __NR_open may not exist on AArch64 (superseded by __NR_openat). */
+#      undef SYS_open
+#    endif
+#    include <sys/uio.h>
+#  endif
+#  include <pthread.h>
+#  include <signal.h>
+#  ifdef JEMALLOC_OS_UNFAIR_LOCK
+#    include <os/lock.h>
+#  endif
+#  ifdef JEMALLOC_GLIBC_MALLOC_HOOK
+#    include <sched.h>
+#  endif
+#  include <errno.h>
+#  include <sys/time.h>
+#  include <time.h>
+#  ifdef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+#    include <mach/mach_time.h>
+#  endif
+#endif
+#include <sys/types.h>
+
+#include <limits.h>
+#ifndef SIZE_T_MAX
+#  define SIZE_T_MAX	SIZE_MAX
+#endif
+#ifndef SSIZE_MAX
+#  define SSIZE_MAX	((ssize_t)(SIZE_T_MAX >> 1))
+#endif
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stddef.h>
+#ifndef offsetof
+#  define offsetof(type, member)	((size_t)&(((type *)NULL)->member))
+#endif
+#include <string.h>
+#include <strings.h>
+#include <ctype.h>
+#ifdef _MSC_VER
+#  include <io.h>
+typedef intptr_t ssize_t;
+#  define PATH_MAX 1024
+#  define STDERR_FILENO 2
+#  define __func__ __FUNCTION__
+#  ifdef JEMALLOC_HAS_RESTRICT
+#    define restrict __restrict
+#  endif
+/* Disable warnings about deprecated system functions. */
+#  pragma warning(disable: 4996)
+#if _MSC_VER < 1800
+static int
+isblank(int c) {
+	return (c == '\t' || c == ' ');
+}
+#endif
+#else
+#  include <unistd.h>
+#endif
+#include <fcntl.h>
+
+#endif /* JEMALLOC_INTERNAL_H */
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -0,0 +1,53 @@
+#ifndef JEMALLOC_INTERNAL_EXTERNS_H
+#define JEMALLOC_INTERNAL_EXTERNS_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/tsd_types.h"
+
+/* TSD checks this to set thread local slow state accordingly. */
+extern bool malloc_slow;
+
+/* Run-time options. */
+extern bool opt_abort;
+extern bool opt_abort_conf;
+extern const char *opt_junk;
+extern bool opt_junk_alloc;
+extern bool opt_junk_free;
+extern bool opt_utrace;
+extern bool opt_xmalloc;
+extern bool opt_zero;
+extern unsigned opt_narenas;
+
+/* Number of CPUs. */
+extern unsigned ncpus;
+
+/* Number of arenas used for automatic multiplexing of threads and arenas. */
+extern unsigned narenas_auto;
+
+/*
+ * Arenas that are used to service external requests.  Not all elements of the
+ * arenas array are necessarily used; arenas are created lazily as needed.
+ */
+extern atomic_p_t arenas[];
+
+void *a0malloc(size_t size);
+void a0dalloc(void *ptr);
+void *bootstrap_malloc(size_t size);
+void *bootstrap_calloc(size_t num, size_t size);
+void bootstrap_free(void *ptr);
+void arena_set(unsigned ind, arena_t *arena);
+unsigned narenas_total_get(void);
+arena_t *arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+arena_tdata_t *arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
+arena_t *arena_choose_hard(tsd_t *tsd, bool internal);
+void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
+void iarena_cleanup(tsd_t *tsd);
+void arena_cleanup(tsd_t *tsd);
+void arenas_tdata_cleanup(tsd_t *tsd);
+void jemalloc_prefork(void);
+void jemalloc_postfork_parent(void);
+void jemalloc_postfork_child(void);
+bool malloc_initialized(void);
+
+#endif /* JEMALLOC_INTERNAL_EXTERNS_H */
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -0,0 +1,94 @@
+#ifndef JEMALLOC_INTERNAL_INCLUDES_H
+#define JEMALLOC_INTERNAL_INCLUDES_H
+
+/*
+ * jemalloc can conceptually be broken into components (arena, tcache, etc.),
+ * but there are circular dependencies that cannot be broken without
+ * substantial performance degradation.
+ *
+ * Historically, we dealt with this by each header into four sections (types,
+ * structs, externs, and inlines), and included each header file multiple times
+ * in this file, picking out the portion we want on each pass using the
+ * following #defines:
+ *   JEMALLOC_H_TYPES   : Preprocessor-defined constants and psuedo-opaque data
+ *                        types.
+ *   JEMALLOC_H_STRUCTS : Data structures.
+ *   JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes.
+ *   JEMALLOC_H_INLINES : Inline functions.
+ *
+ * We're moving toward a world in which the dependencies are explicit; each file
+ * will #include the headers it depends on (rather than relying on them being
+ * implicitly available via this file including every header file in the
+ * project).
+ *
+ * We're now in an intermediate state: we've broken up the header files to avoid
+ * having to include each one multiple times, but have not yet moved the
+ * dependency information into the header files (i.e. we still rely on the
+ * ordering in this file to ensure all a header's dependencies are available in
+ * its translation unit).  Each component is now broken up into multiple header
+ * files, corresponding to the sections above (e.g. instead of "foo.h", we now
+ * have "foo_types.h", "foo_structs.h", "foo_externs.h", "foo_inlines.h").
+ *
+ * Those files which have been converted to explicitly include their
+ * inter-component dependencies are now in the initial HERMETIC HEADERS
+ * section.  All headers may still rely on jemalloc_preamble.h (which, by fiat,
+ * must be included first in every translation unit) for system headers and
+ * global jemalloc definitions, however.
+ */
+
+/******************************************************************************/
+/* TYPES */
+/******************************************************************************/
+
+#include "jemalloc/internal/extent_types.h"
+#include "jemalloc/internal/base_types.h"
+#include "jemalloc/internal/arena_types.h"
+#include "jemalloc/internal/tcache_types.h"
+#include "jemalloc/internal/prof_types.h"
+
+/******************************************************************************/
+/* STRUCTS */
+/******************************************************************************/
+
+#include "jemalloc/internal/arena_structs_a.h"
+#include "jemalloc/internal/extent_structs.h"
+#include "jemalloc/internal/base_structs.h"
+#include "jemalloc/internal/prof_structs.h"
+#include "jemalloc/internal/arena_structs_b.h"
+#include "jemalloc/internal/tcache_structs.h"
+#include "jemalloc/internal/background_thread_structs.h"
+
+/******************************************************************************/
+/* EXTERNS */
+/******************************************************************************/
+
+#include "jemalloc/internal/jemalloc_internal_externs.h"
+#include "jemalloc/internal/extent_externs.h"
+#include "jemalloc/internal/base_externs.h"
+#include "jemalloc/internal/arena_externs.h"
+#include "jemalloc/internal/large_externs.h"
+#include "jemalloc/internal/tcache_externs.h"
+#include "jemalloc/internal/prof_externs.h"
+#include "jemalloc/internal/background_thread_externs.h"
+
+/******************************************************************************/
+/* INLINES */
+/******************************************************************************/
+
+#include "jemalloc/internal/jemalloc_internal_inlines_a.h"
+#include "jemalloc/internal/base_inlines.h"
+/*
+ * Include portions of arena code interleaved with tcache code in order to
+ * resolve circular dependencies.
+ */
+#include "jemalloc/internal/prof_inlines_a.h"
+#include "jemalloc/internal/arena_inlines_a.h"
+#include "jemalloc/internal/extent_inlines.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_b.h"
+#include "jemalloc/internal/tcache_inlines.h"
+#include "jemalloc/internal/arena_inlines_b.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_c.h"
+#include "jemalloc/internal/prof_inlines_b.h"
+#include "jemalloc/internal/background_thread_inlines.h"
+
+#endif /* JEMALLOC_INTERNAL_INCLUDES_H */
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -0,0 +1,171 @@
+#ifndef JEMALLOC_INTERNAL_INLINES_A_H
+#define JEMALLOC_INTERNAL_INLINES_A_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bit_util.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/ticker.h"
+
+JEMALLOC_ALWAYS_INLINE malloc_cpuid_t
+malloc_getcpu(void) {
+	assert(have_percpu_arena);
+#if defined(JEMALLOC_HAVE_SCHED_GETCPU)
+	return (malloc_cpuid_t)sched_getcpu();
+#else
+	not_reached();
+	return -1;
+#endif
+}
+
+/* Return the chosen arena index based on current cpu. */
+JEMALLOC_ALWAYS_INLINE unsigned
+percpu_arena_choose(void) {
+	assert(have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena));
+
+	malloc_cpuid_t cpuid = malloc_getcpu();
+	assert(cpuid >= 0);
+
+	unsigned arena_ind;
+	if ((opt_percpu_arena == percpu_arena) || ((unsigned)cpuid < ncpus /
+	    2)) {
+		arena_ind = cpuid;
+	} else {
+		assert(opt_percpu_arena == per_phycpu_arena);
+		/* Hyper threads on the same physical CPU share arena. */
+		arena_ind = cpuid - ncpus / 2;
+	}
+
+	return arena_ind;
+}
+
+/* Return the limit of percpu auto arena range, i.e. arenas[0...ind_limit). */
+JEMALLOC_ALWAYS_INLINE unsigned
+percpu_arena_ind_limit(percpu_arena_mode_t mode) {
+	assert(have_percpu_arena && PERCPU_ARENA_ENABLED(mode));
+	if (mode == per_phycpu_arena && ncpus > 1) {
+		if (ncpus % 2) {
+			/* This likely means a misconfig. */
+			return ncpus / 2 + 1;
+		}
+		return ncpus / 2;
+	} else {
+		return ncpus;
+	}
+}
+
+static inline arena_tdata_t *
+arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) {
+	arena_tdata_t *tdata;
+	arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd);
+
+	if (unlikely(arenas_tdata == NULL)) {
+		/* arenas_tdata hasn't been initialized yet. */
+		return arena_tdata_get_hard(tsd, ind);
+	}
+	if (unlikely(ind >= tsd_narenas_tdata_get(tsd))) {
+		/*
+		 * ind is invalid, cache is old (too small), or tdata to be
+		 * initialized.
+		 */
+		return (refresh_if_missing ? arena_tdata_get_hard(tsd, ind) :
+		    NULL);
+	}
+
+	tdata = &arenas_tdata[ind];
+	if (likely(tdata != NULL) || !refresh_if_missing) {
+		return tdata;
+	}
+	return arena_tdata_get_hard(tsd, ind);
+}
+
+static inline arena_t *
+arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
+	arena_t *ret;
+
+	assert(ind < MALLOCX_ARENA_LIMIT);
+
+	ret = (arena_t *)atomic_load_p(&arenas[ind], ATOMIC_ACQUIRE);
+	if (unlikely(ret == NULL)) {
+		if (init_if_missing) {
+			ret = arena_init(tsdn, ind,
+			    (extent_hooks_t *)&extent_hooks_default);
+		}
+	}
+	return ret;
+}
+
+static inline ticker_t *
+decay_ticker_get(tsd_t *tsd, unsigned ind) {
+	arena_tdata_t *tdata;
+
+	tdata = arena_tdata_get(tsd, ind, true);
+	if (unlikely(tdata == NULL)) {
+		return NULL;
+	}
+	return &tdata->decay_ticker;
+}
+
+JEMALLOC_ALWAYS_INLINE tcache_bin_t *
+tcache_small_bin_get(tcache_t *tcache, szind_t binind) {
+	assert(binind < NBINS);
+	return &tcache->tbins_small[binind];
+}
+
+JEMALLOC_ALWAYS_INLINE tcache_bin_t *
+tcache_large_bin_get(tcache_t *tcache, szind_t binind) {
+	assert(binind >= NBINS &&binind < nhbins);
+	return &tcache->tbins_large[binind - NBINS];
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tcache_available(tsd_t *tsd) {
+	/*
+	 * Thread specific auto tcache might be unavailable if: 1) during tcache
+	 * initialization, or 2) disabled through thread.tcache.enabled mallctl
+	 * or config options.  This check covers all cases.
+	 */
+	if (likely(tsd_tcache_enabled_get(tsd))) {
+		/* Associated arena == NULL implies tcache init in progress. */
+		assert(tsd_tcachep_get(tsd)->arena == NULL ||
+		    tcache_small_bin_get(tsd_tcachep_get(tsd), 0)->avail !=
+		    NULL);
+		return true;
+	}
+
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE tcache_t *
+tcache_get(tsd_t *tsd) {
+	if (!tcache_available(tsd)) {
+		return NULL;
+	}
+
+	return tsd_tcachep_get(tsd);
+}
+
+static inline void
+pre_reentrancy(tsd_t *tsd, arena_t *arena) {
+	/* arena is the current context.  Reentry from a0 is not allowed. */
+	assert(arena != arena_get(tsd_tsdn(tsd), 0, false));
+
+	bool fast = tsd_fast(tsd);
+	++*tsd_reentrancy_levelp_get(tsd);
+	if (fast) {
+		/* Prepare slow path for reentrancy. */
+		tsd_slow_update(tsd);
+		assert(tsd->state == tsd_state_nominal_slow);
+	}
+}
+
+static inline void
+post_reentrancy(tsd_t *tsd) {
+	int8_t *reentrancy_level = tsd_reentrancy_levelp_get(tsd);
+	assert(*reentrancy_level > 0);
+	if (--*reentrancy_level == 0) {
+		tsd_slow_update(tsd);
+	}
+}
+
+#endif /* JEMALLOC_INTERNAL_INLINES_A_H */
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -0,0 +1,86 @@
+#ifndef JEMALLOC_INTERNAL_INLINES_B_H
+#define JEMALLOC_INTERNAL_INLINES_B_H
+
+#include "jemalloc/internal/rtree.h"
+
+/* Choose an arena based on a per-thread value. */
+static inline arena_t *
+arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
+	arena_t *ret;
+
+	if (arena != NULL) {
+		return arena;
+	}
+
+	/* During reentrancy, arena 0 is the safest bet. */
+	if (unlikely(tsd_reentrancy_level_get(tsd) > 0)) {
+		return arena_get(tsd_tsdn(tsd), 0, true);
+	}
+
+	ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd);
+	if (unlikely(ret == NULL)) {
+		ret = arena_choose_hard(tsd, internal);
+		assert(ret);
+		if (tcache_available(tsd)) {
+			tcache_t *tcache = tcache_get(tsd);
+			if (tcache->arena != NULL) {
+				/* See comments in tcache_data_init().*/
+				assert(tcache->arena ==
+				    arena_get(tsd_tsdn(tsd), 0, false));
+				if (tcache->arena != ret) {
+					tcache_arena_reassociate(tsd_tsdn(tsd),
+					    tcache, ret);
+				}
+			} else {
+				tcache_arena_associate(tsd_tsdn(tsd), tcache,
+				    ret);
+			}
+		}
+	}
+
+	/*
+	 * Note that for percpu arena, if the current arena is outside of the
+	 * auto percpu arena range, (i.e. thread is assigned to a manually
+	 * managed arena), then percpu arena is skipped.
+	 */
+	if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena) &&
+	    !internal && (arena_ind_get(ret) <
+	    percpu_arena_ind_limit(opt_percpu_arena)) && (ret->last_thd !=
+	    tsd_tsdn(tsd))) {
+		unsigned ind = percpu_arena_choose();
+		if (arena_ind_get(ret) != ind) {
+			percpu_arena_update(tsd, ind);
+			ret = tsd_arena_get(tsd);
+		}
+		ret->last_thd = tsd_tsdn(tsd);
+	}
+
+	return ret;
+}
+
+static inline arena_t *
+arena_choose(tsd_t *tsd, arena_t *arena) {
+	return arena_choose_impl(tsd, arena, false);
+}
+
+static inline arena_t *
+arena_ichoose(tsd_t *tsd, arena_t *arena) {
+	return arena_choose_impl(tsd, arena, true);
+}
+
+static inline bool
+arena_is_auto(arena_t *arena) {
+	assert(narenas_auto > 0);
+	return (arena_ind_get(arena) < narenas_auto);
+}
+
+JEMALLOC_ALWAYS_INLINE extent_t *
+iealloc(tsdn_t *tsdn, const void *ptr) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	return rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)ptr, true);
+}
+
+#endif /* JEMALLOC_INTERNAL_INLINES_B_H */
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -0,0 +1,197 @@
+#ifndef JEMALLOC_INTERNAL_INLINES_C_H
+#define JEMALLOC_INTERNAL_INLINES_C_H
+
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/witness.h"
+
+JEMALLOC_ALWAYS_INLINE arena_t *
+iaalloc(tsdn_t *tsdn, const void *ptr) {
+	assert(ptr != NULL);
+
+	return arena_aalloc(tsdn, ptr);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+isalloc(tsdn_t *tsdn, const void *ptr) {
+	assert(ptr != NULL);
+
+	return arena_salloc(tsdn, ptr);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
+    bool is_internal, arena_t *arena, bool slow_path) {
+	void *ret;
+
+	assert(size != 0);
+	assert(!is_internal || tcache == NULL);
+	assert(!is_internal || arena == NULL || arena_is_auto(arena));
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
+	if (config_stats && is_internal && likely(ret != NULL)) {
+		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
+	}
+	return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) {
+	return iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd), false,
+	    NULL, slow_path);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+    tcache_t *tcache, bool is_internal, arena_t *arena) {
+	void *ret;
+
+	assert(usize != 0);
+	assert(usize == sz_sa2u(usize, alignment));
+	assert(!is_internal || tcache == NULL);
+	assert(!is_internal || arena == NULL || arena_is_auto(arena));
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
+	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
+	if (config_stats && is_internal && likely(ret != NULL)) {
+		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
+	}
+	return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+    tcache_t *tcache, arena_t *arena) {
+	return ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) {
+	return ipallocztm(tsd_tsdn(tsd), usize, alignment, zero,
+	    tcache_get(tsd), false, NULL);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+ivsalloc(tsdn_t *tsdn, const void *ptr) {
+	return arena_vsalloc(tsdn, ptr);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx_t *alloc_ctx,
+    bool is_internal, bool slow_path) {
+	assert(ptr != NULL);
+	assert(!is_internal || tcache == NULL);
+	assert(!is_internal || arena_is_auto(iaalloc(tsdn, ptr)));
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+	if (config_stats && is_internal) {
+		arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr));
+	}
+	if (!is_internal && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) {
+		assert(tcache == NULL);
+	}
+	arena_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+idalloc(tsd_t *tsd, void *ptr) {
+	idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd), NULL, false, true);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    alloc_ctx_t *alloc_ctx, bool slow_path) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+	arena_sdalloc(tsdn, ptr, size, tcache, alloc_ctx, slow_path);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
+    size_t extra, size_t alignment, bool zero, tcache_t *tcache,
+    arena_t *arena) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+	void *p;
+	size_t usize, copysize;
+
+	usize = sz_sa2u(size + extra, alignment);
+	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
+		return NULL;
+	}
+	p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
+	if (p == NULL) {
+		if (extra == 0) {
+			return NULL;
+		}
+		/* Try again, without extra this time. */
+		usize = sz_sa2u(size, alignment);
+		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
+			return NULL;
+		}
+		p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
+		if (p == NULL) {
+			return NULL;
+		}
+	}
+	/*
+	 * Copy at most size bytes (not size+extra), since the caller has no
+	 * expectation that the extra bytes will be reliably preserved.
+	 */
+	copysize = (size < oldsize) ? size : oldsize;
+	memcpy(p, ptr, copysize);
+	isdalloct(tsdn, ptr, oldsize, tcache, NULL, true);
+	return p;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
+    bool zero, tcache_t *tcache, arena_t *arena) {
+	assert(ptr != NULL);
+	assert(size != 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
+	    != 0) {
+		/*
+		 * Existing object alignment is inadequate; allocate new space
+		 * and copy.
+		 */
+		return iralloct_realign(tsdn, ptr, oldsize, size, 0, alignment,
+		    zero, tcache, arena);
+	}
+
+	return arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero,
+	    tcache);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
+    bool zero) {
+	return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero,
+	    tcache_get(tsd), NULL);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
+    size_t alignment, bool zero) {
+	assert(ptr != NULL);
+	assert(size != 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
+	    != 0) {
+		/* Existing object alignment is inadequate. */
+		return true;
+	}
+
+	return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero);
+}
+
+#endif /* JEMALLOC_INTERNAL_INLINES_C_H */
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -0,0 +1,40 @@
+#ifndef JEMALLOC_INTERNAL_MACROS_H
+#define JEMALLOC_INTERNAL_MACROS_H
+
+#ifdef JEMALLOC_DEBUG
+#  define JEMALLOC_ALWAYS_INLINE static inline
+#else
+#  define JEMALLOC_ALWAYS_INLINE JEMALLOC_ATTR(always_inline) static inline
+#endif
+#ifdef _MSC_VER
+#  define inline _inline
+#endif
+
+#define UNUSED JEMALLOC_ATTR(unused)
+
+#define ZU(z)	((size_t)z)
+#define ZD(z)	((ssize_t)z)
+#define QU(q)	((uint64_t)q)
+#define QD(q)	((int64_t)q)
+
+#define KZU(z)	ZU(z##ULL)
+#define KZD(z)	ZD(z##LL)
+#define KQU(q)	QU(q##ULL)
+#define KQD(q)	QI(q##LL)
+
+#ifndef __DECONST
+#  define	__DECONST(type, var)	((type)(uintptr_t)(const void *)(var))
+#endif
+
+#if !defined(JEMALLOC_HAS_RESTRICT) || defined(__cplusplus)
+#  define restrict
+#endif
+
+/* Various function pointers are statick and immutable except during testing. */
+#ifdef JEMALLOC_JET
+#  define JET_MUTABLE
+#else
+#  define JET_MUTABLE const
+#endif
+
+#endif /* JEMALLOC_INTERNAL_MACROS_H */
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h
@@ -0,0 +1,178 @@
+#ifndef JEMALLOC_INTERNAL_TYPES_H
+#define JEMALLOC_INTERNAL_TYPES_H
+
+/* Page size index type. */
+typedef unsigned pszind_t;
+
+/* Size class index type. */
+typedef unsigned szind_t;
+
+/* Processor / core id type. */
+typedef int malloc_cpuid_t;
+
+/*
+ * Flags bits:
+ *
+ * a: arena
+ * t: tcache
+ * 0: unused
+ * z: zero
+ * n: alignment
+ *
+ * aaaaaaaa aaaatttt tttttttt 0znnnnnn
+ */
+#define MALLOCX_ARENA_BITS	12
+#define MALLOCX_TCACHE_BITS	12
+#define MALLOCX_LG_ALIGN_BITS	6
+#define MALLOCX_ARENA_SHIFT	20
+#define MALLOCX_TCACHE_SHIFT	8
+#define MALLOCX_ARENA_MASK \
+    (((1 << MALLOCX_ARENA_BITS) - 1) << MALLOCX_ARENA_SHIFT)
+/* NB: Arena index bias decreases the maximum number of arenas by 1. */
+#define MALLOCX_ARENA_LIMIT	((1 << MALLOCX_ARENA_BITS) - 1)
+#define MALLOCX_TCACHE_MASK \
+    (((1 << MALLOCX_TCACHE_BITS) - 1) << MALLOCX_TCACHE_SHIFT)
+#define MALLOCX_TCACHE_MAX	((1 << MALLOCX_TCACHE_BITS) - 3)
+#define MALLOCX_LG_ALIGN_MASK	((1 << MALLOCX_LG_ALIGN_BITS) - 1)
+/* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */
+#define MALLOCX_ALIGN_GET_SPECIFIED(flags)				\
+    (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK))
+#define MALLOCX_ALIGN_GET(flags)					\
+    (MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1))
+#define MALLOCX_ZERO_GET(flags)						\
+    ((bool)(flags & MALLOCX_ZERO))
+
+#define MALLOCX_TCACHE_GET(flags)					\
+    (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> MALLOCX_TCACHE_SHIFT)) - 2)
+#define MALLOCX_ARENA_GET(flags)					\
+    (((unsigned)(((unsigned)flags) >> MALLOCX_ARENA_SHIFT)) - 1)
+
+/* Smallest size class to support. */
+#define TINY_MIN		(1U << LG_TINY_MIN)
+
+/*
+ * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
+ * classes).
+ */
+#ifndef LG_QUANTUM
+#  if (defined(__i386__) || defined(_M_IX86))
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __ia64__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __alpha__
+#    define LG_QUANTUM		4
+#  endif
+#  if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__))
+#    define LG_QUANTUM		4
+#  endif
+#  if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __arm__
+#    define LG_QUANTUM		3
+#  endif
+#  ifdef __aarch64__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __hppa__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __mips__
+#    define LG_QUANTUM		3
+#  endif
+#  ifdef __or1k__
+#    define LG_QUANTUM		3
+#  endif
+#  ifdef __powerpc__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __riscv__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __s390__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __SH4__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __tile__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __le32__
+#    define LG_QUANTUM		4
+#  endif
+#  ifndef LG_QUANTUM
+#    error "Unknown minimum alignment for architecture; specify via "
+	 "--with-lg-quantum"
+#  endif
+#endif
+
+#define QUANTUM			((size_t)(1U << LG_QUANTUM))
+#define QUANTUM_MASK		(QUANTUM - 1)
+
+/* Return the smallest quantum multiple that is >= a. */
+#define QUANTUM_CEILING(a)						\
+	(((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
+
+#define LONG			((size_t)(1U << LG_SIZEOF_LONG))
+#define LONG_MASK		(LONG - 1)
+
+/* Return the smallest long multiple that is >= a. */
+#define LONG_CEILING(a)							\
+	(((a) + LONG_MASK) & ~LONG_MASK)
+
+#define SIZEOF_PTR		(1U << LG_SIZEOF_PTR)
+#define PTR_MASK		(SIZEOF_PTR - 1)
+
+/* Return the smallest (void *) multiple that is >= a. */
+#define PTR_CEILING(a)							\
+	(((a) + PTR_MASK) & ~PTR_MASK)
+
+/*
+ * Maximum size of L1 cache line.  This is used to avoid cache line aliasing.
+ * In addition, this controls the spacing of cacheline-spaced size classes.
+ *
+ * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can
+ * only handle raw constants.
+ */
+#define LG_CACHELINE		6
+#define CACHELINE		64
+#define CACHELINE_MASK		(CACHELINE - 1)
+
+/* Return the smallest cacheline multiple that is >= s. */
+#define CACHELINE_CEILING(s)						\
+	(((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
+
+/* Return the nearest aligned address at or below a. */
+#define ALIGNMENT_ADDR2BASE(a, alignment)				\
+	((void *)((uintptr_t)(a) & ((~(alignment)) + 1)))
+
+/* Return the offset between a and the nearest aligned address at or below a. */
+#define ALIGNMENT_ADDR2OFFSET(a, alignment)				\
+	((size_t)((uintptr_t)(a) & (alignment - 1)))
+
+/* Return the smallest alignment multiple that is >= s. */
+#define ALIGNMENT_CEILING(s, alignment)					\
+	(((s) + (alignment - 1)) & ((~(alignment)) + 1))
+
+/* Declare a variable-length array. */
+#if __STDC_VERSION__ < 199901L
+#  ifdef _MSC_VER
+#    include <malloc.h>
+#    define alloca _alloca
+#  else
+#    ifdef JEMALLOC_HAS_ALLOCA_H
+#      include <alloca.h>
+#    else
+#      include <stdlib.h>
+#    endif
+#  endif
+#  define VARIABLE_ARRAY(type, name, count) \
+	type *name = alloca(sizeof(type) * (count))
+#else
+#  define VARIABLE_ARRAY(type, name, count) type name[(count)]
+#endif
+
+#endif /* JEMALLOC_INTERNAL_TYPES_H */
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_preamble.h
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_preamble.h
@@ -0,0 +1,179 @@
+#ifndef JEMALLOC_PREAMBLE_H
+#define JEMALLOC_PREAMBLE_H
+
+#include "jemalloc_internal_defs.h"
+#include "jemalloc/internal/jemalloc_internal_decls.h"
+
+#ifdef JEMALLOC_UTRACE
+#include <sys/ktrace.h>
+#endif
+
+#define JEMALLOC_NO_DEMANGLE
+#ifdef JEMALLOC_JET
+#  undef JEMALLOC_IS_MALLOC
+#  define JEMALLOC_N(n) jet_##n
+#  include "jemalloc/internal/public_namespace.h"
+#  define JEMALLOC_NO_RENAME
+#  include "../jemalloc.h"
+#  undef JEMALLOC_NO_RENAME
+#else
+#  define JEMALLOC_N(n) je_##n
+#  include "../jemalloc.h"
+#endif
+
+#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN))
+#include <libkern/OSAtomic.h>
+#endif
+
+#ifdef JEMALLOC_ZONE
+#include <mach/mach_error.h>
+#include <mach/mach_init.h>
+#include <mach/vm_map.h>
+#endif
+
+#include "jemalloc/internal/jemalloc_internal_macros.h"
+
+/*
+ * Note that the ordering matters here; the hook itself is name-mangled.  We
+ * want the inclusion of hooks to happen early, so that we hook as much as
+ * possible.
+ */
+#ifndef JEMALLOC_NO_PRIVATE_NAMESPACE
+#  ifndef JEMALLOC_JET
+#    include "jemalloc/internal/private_namespace.h"
+#  else
+#    include "jemalloc/internal/private_namespace_jet.h"
+#  endif
+#endif
+#include "jemalloc/internal/hooks.h"
+
+static const bool config_debug =
+#ifdef JEMALLOC_DEBUG
+    true
+#else
+    false
+#endif
+    ;
+static const bool have_dss =
+#ifdef JEMALLOC_DSS
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_fill =
+#ifdef JEMALLOC_FILL
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_lazy_lock =
+#ifdef JEMALLOC_LAZY_LOCK
+    true
+#else
+    false
+#endif
+    ;
+static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF;
+static const bool config_prof =
+#ifdef JEMALLOC_PROF
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_prof_libgcc =
+#ifdef JEMALLOC_PROF_LIBGCC
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_prof_libunwind =
+#ifdef JEMALLOC_PROF_LIBUNWIND
+    true
+#else
+    false
+#endif
+    ;
+static const bool maps_coalesce =
+#ifdef JEMALLOC_MAPS_COALESCE
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_stats =
+#ifdef JEMALLOC_STATS
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_thp =
+#ifdef JEMALLOC_THP
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_tls =
+#ifdef JEMALLOC_TLS
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_utrace =
+#ifdef JEMALLOC_UTRACE
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_xmalloc =
+#ifdef JEMALLOC_XMALLOC
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_cache_oblivious =
+#ifdef JEMALLOC_CACHE_OBLIVIOUS
+    true
+#else
+    false
+#endif
+    ;
+#ifdef JEMALLOC_HAVE_SCHED_GETCPU
+/* Currently percpu_arena depends on sched_getcpu. */
+#define JEMALLOC_PERCPU_ARENA
+#endif
+static const bool have_percpu_arena =
+#ifdef JEMALLOC_PERCPU_ARENA
+    true
+#else
+    false
+#endif
+    ;
+/*
+ * Undocumented, and not recommended; the application should take full
+ * responsibility for tracking provenance.
+ */
+static const bool force_ivsalloc =
+#ifdef JEMALLOC_FORCE_IVSALLOC
+    true
+#else
+    false
+#endif
+    ;
+static const bool have_background_thread =
+#ifdef JEMALLOC_BACKGROUND_THREAD
+    true
+#else
+    false
+#endif
+    ;
+
+#endif /* JEMALLOC_PREAMBLE_H */
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -0,0 +1,179 @@
+#ifndef JEMALLOC_PREAMBLE_H
+#define JEMALLOC_PREAMBLE_H
+
+#include "jemalloc_internal_defs.h"
+#include "jemalloc/internal/jemalloc_internal_decls.h"
+
+#ifdef JEMALLOC_UTRACE
+#include <sys/ktrace.h>
+#endif
+
+#define JEMALLOC_NO_DEMANGLE
+#ifdef JEMALLOC_JET
+#  undef JEMALLOC_IS_MALLOC
+#  define JEMALLOC_N(n) jet_##n
+#  include "jemalloc/internal/public_namespace.h"
+#  define JEMALLOC_NO_RENAME
+#  include "../jemalloc@install_suffix@.h"
+#  undef JEMALLOC_NO_RENAME
+#else
+#  define JEMALLOC_N(n) @private_namespace@##n
+#  include "../jemalloc@install_suffix@.h"
+#endif
+
+#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN))
+#include <libkern/OSAtomic.h>
+#endif
+
+#ifdef JEMALLOC_ZONE
+#include <mach/mach_error.h>
+#include <mach/mach_init.h>
+#include <mach/vm_map.h>
+#endif
+
+#include "jemalloc/internal/jemalloc_internal_macros.h"
+
+/*
+ * Note that the ordering matters here; the hook itself is name-mangled.  We
+ * want the inclusion of hooks to happen early, so that we hook as much as
+ * possible.
+ */
+#ifndef JEMALLOC_NO_PRIVATE_NAMESPACE
+#  ifndef JEMALLOC_JET
+#    include "jemalloc/internal/private_namespace.h"
+#  else
+#    include "jemalloc/internal/private_namespace_jet.h"
+#  endif
+#endif
+#include "jemalloc/internal/hooks.h"
+
+static const bool config_debug =
+#ifdef JEMALLOC_DEBUG
+    true
+#else
+    false
+#endif
+    ;
+static const bool have_dss =
+#ifdef JEMALLOC_DSS
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_fill =
+#ifdef JEMALLOC_FILL
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_lazy_lock =
+#ifdef JEMALLOC_LAZY_LOCK
+    true
+#else
+    false
+#endif
+    ;
+static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF;
+static const bool config_prof =
+#ifdef JEMALLOC_PROF
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_prof_libgcc =
+#ifdef JEMALLOC_PROF_LIBGCC
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_prof_libunwind =
+#ifdef JEMALLOC_PROF_LIBUNWIND
+    true
+#else
+    false
+#endif
+    ;
+static const bool maps_coalesce =
+#ifdef JEMALLOC_MAPS_COALESCE
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_stats =
+#ifdef JEMALLOC_STATS
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_thp =
+#ifdef JEMALLOC_THP
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_tls =
+#ifdef JEMALLOC_TLS
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_utrace =
+#ifdef JEMALLOC_UTRACE
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_xmalloc =
+#ifdef JEMALLOC_XMALLOC
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_cache_oblivious =
+#ifdef JEMALLOC_CACHE_OBLIVIOUS
+    true
+#else
+    false
+#endif
+    ;
+#ifdef JEMALLOC_HAVE_SCHED_GETCPU
+/* Currently percpu_arena depends on sched_getcpu. */
+#define JEMALLOC_PERCPU_ARENA
+#endif
+static const bool have_percpu_arena =
+#ifdef JEMALLOC_PERCPU_ARENA
+    true
+#else
+    false
+#endif
+    ;
+/*
+ * Undocumented, and not recommended; the application should take full
+ * responsibility for tracking provenance.
+ */
+static const bool force_ivsalloc =
+#ifdef JEMALLOC_FORCE_IVSALLOC
+    true
+#else
+    false
+#endif
+    ;
+static const bool have_background_thread =
+#ifdef JEMALLOC_BACKGROUND_THREAD
+    true
+#else
+    false
+#endif
+    ;
+
+#endif /* JEMALLOC_PREAMBLE_H */
--- a/deps/jemalloc/include/jemalloc/internal/large_externs.h
+++ b/deps/jemalloc/include/jemalloc/internal/large_externs.h
@@ -0,0 +1,26 @@
+#ifndef JEMALLOC_INTERNAL_LARGE_EXTERNS_H
+#define JEMALLOC_INTERNAL_LARGE_EXTERNS_H
+
+void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
+void *large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
+    bool zero);
+bool large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
+    size_t usize_max, bool zero);
+void *large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
+    size_t alignment, bool zero, tcache_t *tcache);
+
+typedef void (large_dalloc_junk_t)(void *, size_t);
+extern large_dalloc_junk_t *JET_MUTABLE large_dalloc_junk;
+
+typedef void (large_dalloc_maybe_junk_t)(void *, size_t);
+extern large_dalloc_maybe_junk_t *JET_MUTABLE large_dalloc_maybe_junk;
+
+void large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent);
+void large_dalloc_finish(tsdn_t *tsdn, extent_t *extent);
+void large_dalloc(tsdn_t *tsdn, extent_t *extent);
+size_t large_salloc(tsdn_t *tsdn, const extent_t *extent);
+prof_tctx_t *large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent);
+void large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx);
+void large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent);
+
+#endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */
--- a/deps/jemalloc/include/jemalloc/internal/malloc_io.h
+++ b/deps/jemalloc/include/jemalloc/internal/malloc_io.h
@@ -0,0 +1,62 @@
+#ifndef JEMALLOC_INTERNAL_MALLOC_IO_H
+#define JEMALLOC_INTERNAL_MALLOC_IO_H
+
+#ifdef _WIN32
+#  ifdef _WIN64
+#    define FMT64_PREFIX "ll"
+#    define FMTPTR_PREFIX "ll"
+#  else
+#    define FMT64_PREFIX "ll"
+#    define FMTPTR_PREFIX ""
+#  endif
+#  define FMTd32 "d"
+#  define FMTu32 "u"
+#  define FMTx32 "x"
+#  define FMTd64 FMT64_PREFIX "d"
+#  define FMTu64 FMT64_PREFIX "u"
+#  define FMTx64 FMT64_PREFIX "x"
+#  define FMTdPTR FMTPTR_PREFIX "d"
+#  define FMTuPTR FMTPTR_PREFIX "u"
+#  define FMTxPTR FMTPTR_PREFIX "x"
+#else
+#  include <inttypes.h>
+#  define FMTd32 PRId32
+#  define FMTu32 PRIu32
+#  define FMTx32 PRIx32
+#  define FMTd64 PRId64
+#  define FMTu64 PRIu64
+#  define FMTx64 PRIx64
+#  define FMTdPTR PRIdPTR
+#  define FMTuPTR PRIuPTR
+#  define FMTxPTR PRIxPTR
+#endif
+
+/* Size of stack-allocated buffer passed to buferror(). */
+#define BUFERROR_BUF		64
+
+/*
+ * Size of stack-allocated buffer used by malloc_{,v,vc}printf().  This must be
+ * large enough for all possible uses within jemalloc.
+ */
+#define MALLOC_PRINTF_BUFSIZE	4096
+
+int buferror(int err, char *buf, size_t buflen);
+uintmax_t malloc_strtoumax(const char *restrict nptr, char **restrict endptr,
+    int base);
+void malloc_write(const char *s);
+
+/*
+ * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating
+ * point math.
+ */
+size_t malloc_vsnprintf(char *str, size_t size, const char *format,
+    va_list ap);
+size_t malloc_snprintf(char *str, size_t size, const char *format, ...)
+    JEMALLOC_FORMAT_PRINTF(3, 4);
+void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *format, va_list ap);
+void malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4);
+void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
+
+#endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */
--- a/deps/jemalloc/include/jemalloc/internal/mutex_pool.h
+++ b/deps/jemalloc/include/jemalloc/internal/mutex_pool.h
@@ -0,0 +1,94 @@
+#ifndef JEMALLOC_INTERNAL_MUTEX_POOL_H
+#define JEMALLOC_INTERNAL_MUTEX_POOL_H
+
+#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/witness.h"
+
+/* We do mod reductions by this value, so it should be kept a power of 2. */
+#define MUTEX_POOL_SIZE 256
+
+typedef struct mutex_pool_s mutex_pool_t;
+struct mutex_pool_s {
+	malloc_mutex_t mutexes[MUTEX_POOL_SIZE];
+};
+
+bool mutex_pool_init(mutex_pool_t *pool, const char *name, witness_rank_t rank);
+
+/* Internal helper - not meant to be called outside this module. */
+static inline malloc_mutex_t *
+mutex_pool_mutex(mutex_pool_t *pool, uintptr_t key) {
+	size_t hash_result[2];
+	hash(&key, sizeof(key), 0xd50dcc1b, hash_result);
+	return &pool->mutexes[hash_result[0] % MUTEX_POOL_SIZE];
+}
+
+static inline void
+mutex_pool_assert_not_held(tsdn_t *tsdn, mutex_pool_t *pool) {
+	for (int i = 0; i < MUTEX_POOL_SIZE; i++) {
+		malloc_mutex_assert_not_owner(tsdn, &pool->mutexes[i]);
+	}
+}
+
+/*
+ * Note that a mutex pool doesn't work exactly the way an embdedded mutex would.
+ * You're not allowed to acquire mutexes in the pool one at a time.  You have to
+ * acquire all the mutexes you'll need in a single function call, and then
+ * release them all in a single function call.
+ */
+
+static inline void
+mutex_pool_lock(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) {
+	mutex_pool_assert_not_held(tsdn, pool);
+
+	malloc_mutex_t *mutex = mutex_pool_mutex(pool, key);
+	malloc_mutex_lock(tsdn, mutex);
+}
+
+static inline void
+mutex_pool_unlock(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) {
+	malloc_mutex_t *mutex = mutex_pool_mutex(pool, key);
+	malloc_mutex_unlock(tsdn, mutex);
+
+	mutex_pool_assert_not_held(tsdn, pool);
+}
+
+static inline void
+mutex_pool_lock2(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key1,
+    uintptr_t key2) {
+	mutex_pool_assert_not_held(tsdn, pool);
+
+	malloc_mutex_t *mutex1 = mutex_pool_mutex(pool, key1);
+	malloc_mutex_t *mutex2 = mutex_pool_mutex(pool, key2);
+	if ((uintptr_t)mutex1 < (uintptr_t)mutex2) {
+		malloc_mutex_lock(tsdn, mutex1);
+		malloc_mutex_lock(tsdn, mutex2);
+	} else if ((uintptr_t)mutex1 == (uintptr_t)mutex2) {
+		malloc_mutex_lock(tsdn, mutex1);
+	} else {
+		malloc_mutex_lock(tsdn, mutex2);
+		malloc_mutex_lock(tsdn, mutex1);
+	}
+}
+
+static inline void
+mutex_pool_unlock2(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key1,
+    uintptr_t key2) {
+	malloc_mutex_t *mutex1 = mutex_pool_mutex(pool, key1);
+	malloc_mutex_t *mutex2 = mutex_pool_mutex(pool, key2);
+	if (mutex1 == mutex2) {
+		malloc_mutex_unlock(tsdn, mutex1);
+	} else {
+		malloc_mutex_unlock(tsdn, mutex1);
+		malloc_mutex_unlock(tsdn, mutex2);
+	}
+
+	mutex_pool_assert_not_held(tsdn, pool);
+}
+
+static inline void
+mutex_pool_assert_owner(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) {
+	malloc_mutex_assert_owner(tsdn, mutex_pool_mutex(pool, key));
+}
+
+#endif /* JEMALLOC_INTERNAL_MUTEX_POOL_H */
--- a/deps/jemalloc/include/jemalloc/internal/mutex_prof.h
+++ b/deps/jemalloc/include/jemalloc/internal/mutex_prof.h
@@ -0,0 +1,86 @@
+#ifndef JEMALLOC_INTERNAL_MUTEX_PROF_H
+#define JEMALLOC_INTERNAL_MUTEX_PROF_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/tsd_types.h"
+
+#define MUTEX_PROF_GLOBAL_MUTEXES					\
+    OP(background_thread)						\
+    OP(ctl)								\
+    OP(prof)
+
+typedef enum {
+#define OP(mtx) global_prof_mutex_##mtx,
+	MUTEX_PROF_GLOBAL_MUTEXES
+#undef OP
+	mutex_prof_num_global_mutexes
+} mutex_prof_global_ind_t;
+
+#define MUTEX_PROF_ARENA_MUTEXES					\
+    OP(large)								\
+    OP(extent_avail)							\
+    OP(extents_dirty)							\
+    OP(extents_muzzy)							\
+    OP(extents_retained)						\
+    OP(decay_dirty)							\
+    OP(decay_muzzy)							\
+    OP(base)								\
+    OP(tcache_list)
+
+typedef enum {
+#define OP(mtx) arena_prof_mutex_##mtx,
+	MUTEX_PROF_ARENA_MUTEXES
+#undef OP
+	mutex_prof_num_arena_mutexes
+} mutex_prof_arena_ind_t;
+
+#define MUTEX_PROF_COUNTERS						\
+    OP(num_ops, uint64_t)						\
+    OP(num_wait, uint64_t)						\
+    OP(num_spin_acq, uint64_t)						\
+    OP(num_owner_switch, uint64_t)					\
+    OP(total_wait_time, uint64_t)					\
+    OP(max_wait_time, uint64_t)						\
+    OP(max_num_thds, uint32_t)
+
+typedef enum {
+#define OP(counter, type) mutex_counter_##counter,
+	MUTEX_PROF_COUNTERS
+#undef OP
+	mutex_prof_num_counters
+} mutex_prof_counter_ind_t;
+
+typedef struct {
+	/*
+	 * Counters touched on the slow path, i.e. when there is lock
+	 * contention.  We update them once we have the lock.
+	 */
+	/* Total time (in nano seconds) spent waiting on this mutex. */
+	nstime_t		tot_wait_time;
+	/* Max time (in nano seconds) spent on a single lock operation. */
+	nstime_t		max_wait_time;
+	/* # of times have to wait for this mutex (after spinning). */
+	uint64_t		n_wait_times;
+	/* # of times acquired the mutex through local spinning. */
+	uint64_t		n_spin_acquired;
+	/* Max # of threads waiting for the mutex at the same time. */
+	uint32_t		max_n_thds;
+	/* Current # of threads waiting on the lock.  Atomic synced. */
+	atomic_u32_t		n_waiting_thds;
+
+	/*
+	 * Data touched on the fast path.  These are modified right after we
+	 * grab the lock, so it's placed closest to the end (i.e. right before
+	 * the lock) so that we have a higher chance of them being on the same
+	 * cacheline.
+	 */
+	/* # of times the mutex holder is different than the previous one. */
+	uint64_t		n_owner_switches;
+	/* Previous mutex holder, to facilitate n_owner_switches. */
+	tsdn_t			*prev_owner;
+	/* # of lock() operations in total. */
+	uint64_t		n_lock_ops;
+} mutex_prof_data_t;
+
+#endif /* JEMALLOC_INTERNAL_MUTEX_PROF_H */
--- a/deps/jemalloc/include/jemalloc/internal/nstime.h
+++ b/deps/jemalloc/include/jemalloc/internal/nstime.h
@@ -0,0 +1,34 @@
+#ifndef JEMALLOC_INTERNAL_NSTIME_H
+#define JEMALLOC_INTERNAL_NSTIME_H
+
+/* Maximum supported number of seconds (~584 years). */
+#define NSTIME_SEC_MAX KQU(18446744072)
+#define NSTIME_ZERO_INITIALIZER {0}
+
+typedef struct {
+	uint64_t ns;
+} nstime_t;
+
+void nstime_init(nstime_t *time, uint64_t ns);
+void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
+uint64_t nstime_ns(const nstime_t *time);
+uint64_t nstime_sec(const nstime_t *time);
+uint64_t nstime_msec(const nstime_t *time);
+uint64_t nstime_nsec(const nstime_t *time);
+void nstime_copy(nstime_t *time, const nstime_t *source);
+int nstime_compare(const nstime_t *a, const nstime_t *b);
+void nstime_add(nstime_t *time, const nstime_t *addend);
+void nstime_iadd(nstime_t *time, uint64_t addend);
+void nstime_subtract(nstime_t *time, const nstime_t *subtrahend);
+void nstime_isubtract(nstime_t *time, uint64_t subtrahend);
+void nstime_imultiply(nstime_t *time, uint64_t multiplier);
+void nstime_idivide(nstime_t *time, uint64_t divisor);
+uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor);
+
+typedef bool (nstime_monotonic_t)(void);
+extern nstime_monotonic_t *JET_MUTABLE nstime_monotonic;
+
+typedef bool (nstime_update_t)(nstime_t *);
+extern nstime_update_t *JET_MUTABLE nstime_update;
+
+#endif /* JEMALLOC_INTERNAL_NSTIME_H */
--- a/deps/jemalloc/include/jemalloc/internal/pages.h
+++ b/deps/jemalloc/include/jemalloc/internal/pages.h
@@ -0,0 +1,71 @@
+#ifndef JEMALLOC_INTERNAL_PAGES_EXTERNS_H
+#define JEMALLOC_INTERNAL_PAGES_EXTERNS_H
+
+/* Page size.  LG_PAGE is determined by the configure script. */
+#ifdef PAGE_MASK
+#  undef PAGE_MASK
+#endif
+#define PAGE		((size_t)(1U << LG_PAGE))
+#define PAGE_MASK	((size_t)(PAGE - 1))
+/* Return the page base address for the page containing address a. */
+#define PAGE_ADDR2BASE(a)						\
+	((void *)((uintptr_t)(a) & ~PAGE_MASK))
+/* Return the smallest pagesize multiple that is >= s. */
+#define PAGE_CEILING(s)							\
+	(((s) + PAGE_MASK) & ~PAGE_MASK)
+
+/* Huge page size.  LG_HUGEPAGE is determined by the configure script. */
+#define HUGEPAGE	((size_t)(1U << LG_HUGEPAGE))
+#define HUGEPAGE_MASK	((size_t)(HUGEPAGE - 1))
+/* Return the huge page base address for the huge page containing address a. */
+#define HUGEPAGE_ADDR2BASE(a)						\
+	((void *)((uintptr_t)(a) & ~HUGEPAGE_MASK))
+/* Return the smallest pagesize multiple that is >= s. */
+#define HUGEPAGE_CEILING(s)						\
+	(((s) + HUGEPAGE_MASK) & ~HUGEPAGE_MASK)
+
+/* PAGES_CAN_PURGE_LAZY is defined if lazy purging is supported. */
+#if defined(_WIN32) || defined(JEMALLOC_PURGE_MADVISE_FREE)
+#  define PAGES_CAN_PURGE_LAZY
+#endif
+/*
+ * PAGES_CAN_PURGE_FORCED is defined if forced purging is supported.
+ *
+ * The only supported way to hard-purge on Windows is to decommit and then
+ * re-commit, but doing so is racy, and if re-commit fails it's a pain to
+ * propagate the "poisoned" memory state.  Since we typically decommit as the
+ * next step after purging on Windows anyway, there's no point in adding such
+ * complexity.
+ */
+#if !defined(_WIN32) && ((defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
+    defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)) || \
+    defined(JEMALLOC_MAPS_COALESCE))
+#  define PAGES_CAN_PURGE_FORCED
+#endif
+
+static const bool pages_can_purge_lazy =
+#ifdef PAGES_CAN_PURGE_LAZY
+    true
+#else
+    false
+#endif
+    ;
+static const bool pages_can_purge_forced =
+#ifdef PAGES_CAN_PURGE_FORCED
+    true
+#else
+    false
+#endif
+    ;
+
+void *pages_map(void *addr, size_t size, size_t alignment, bool *commit);
+void pages_unmap(void *addr, size_t size);
+bool pages_commit(void *addr, size_t size);
+bool pages_decommit(void *addr, size_t size);
+bool pages_purge_lazy(void *addr, size_t size);
+bool pages_purge_forced(void *addr, size_t size);
+bool pages_huge(void *addr, size_t size);
+bool pages_nohuge(void *addr, size_t size);
+bool pages_boot(void);
+
+#endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */
--- a/deps/jemalloc/include/jemalloc/internal/ph.h
+++ b/deps/jemalloc/include/jemalloc/internal/ph.h
@@ -0,0 +1,391 @@
+/*
+ * A Pairing Heap implementation.
+ *
+ * "The Pairing Heap: A New Form of Self-Adjusting Heap"
+ * https://www.cs.cmu.edu/~sleator/papers/pairing-heaps.pdf
+ *
+ * With auxiliary twopass list, described in a follow on paper.
+ *
+ * "Pairing Heaps: Experiments and Analysis"
+ * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf
+ *
+ *******************************************************************************
+ */
+
+#ifndef PH_H_
+#define PH_H_
+
+/* Node structure. */
+#define phn(a_type)							\
+struct {								\
+	a_type	*phn_prev;						\
+	a_type	*phn_next;						\
+	a_type	*phn_lchild;						\
+}
+
+/* Root structure. */
+#define ph(a_type)							\
+struct {								\
+	a_type	*ph_root;						\
+}
+
+/* Internal utility macros. */
+#define phn_lchild_get(a_type, a_field, a_phn)				\
+	(a_phn->a_field.phn_lchild)
+#define phn_lchild_set(a_type, a_field, a_phn, a_lchild) do {		\
+	a_phn->a_field.phn_lchild = a_lchild;				\
+} while (0)
+
+#define phn_next_get(a_type, a_field, a_phn)				\
+	(a_phn->a_field.phn_next)
+#define phn_prev_set(a_type, a_field, a_phn, a_prev) do {		\
+	a_phn->a_field.phn_prev = a_prev;				\
+} while (0)
+
+#define phn_prev_get(a_type, a_field, a_phn)				\
+	(a_phn->a_field.phn_prev)
+#define phn_next_set(a_type, a_field, a_phn, a_next) do {		\
+	a_phn->a_field.phn_next = a_next;				\
+} while (0)
+
+#define phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, a_cmp) do {	\
+	a_type *phn0child;						\
+									\
+	assert(a_phn0 != NULL);						\
+	assert(a_phn1 != NULL);						\
+	assert(a_cmp(a_phn0, a_phn1) <= 0);				\
+									\
+	phn_prev_set(a_type, a_field, a_phn1, a_phn0);			\
+	phn0child = phn_lchild_get(a_type, a_field, a_phn0);		\
+	phn_next_set(a_type, a_field, a_phn1, phn0child);		\
+	if (phn0child != NULL) {					\
+		phn_prev_set(a_type, a_field, phn0child, a_phn1);	\
+	}								\
+	phn_lchild_set(a_type, a_field, a_phn0, a_phn1);		\
+} while (0)
+
+#define phn_merge(a_type, a_field, a_phn0, a_phn1, a_cmp, r_phn) do {	\
+	if (a_phn0 == NULL) {						\
+		r_phn = a_phn1;						\
+	} else if (a_phn1 == NULL) {					\
+		r_phn = a_phn0;						\
+	} else if (a_cmp(a_phn0, a_phn1) < 0) {				\
+		phn_merge_ordered(a_type, a_field, a_phn0, a_phn1,	\
+		    a_cmp);						\
+		r_phn = a_phn0;						\
+	} else {							\
+		phn_merge_ordered(a_type, a_field, a_phn1, a_phn0,	\
+		    a_cmp);						\
+		r_phn = a_phn1;						\
+	}								\
+} while (0)
+
+#define ph_merge_siblings(a_type, a_field, a_phn, a_cmp, r_phn) do {	\
+	a_type *head = NULL;						\
+	a_type *tail = NULL;						\
+	a_type *phn0 = a_phn;						\
+	a_type *phn1 = phn_next_get(a_type, a_field, phn0);		\
+									\
+	/*								\
+	 * Multipass merge, wherein the first two elements of a FIFO	\
+	 * are repeatedly merged, and each result is appended to the	\
+	 * singly linked FIFO, until the FIFO contains only a single	\
+	 * element.  We start with a sibling list but no reference to	\
+	 * its tail, so we do a single pass over the sibling list to	\
+	 * populate the FIFO.						\
+	 */								\
+	if (phn1 != NULL) {						\
+		a_type *phnrest = phn_next_get(a_type, a_field, phn1);	\
+		if (phnrest != NULL) {					\
+			phn_prev_set(a_type, a_field, phnrest, NULL);	\
+		}							\
+		phn_prev_set(a_type, a_field, phn0, NULL);		\
+		phn_next_set(a_type, a_field, phn0, NULL);		\
+		phn_prev_set(a_type, a_field, phn1, NULL);		\
+		phn_next_set(a_type, a_field, phn1, NULL);		\
+		phn_merge(a_type, a_field, phn0, phn1, a_cmp, phn0);	\
+		head = tail = phn0;					\
+		phn0 = phnrest;						\
+		while (phn0 != NULL) {					\
+			phn1 = phn_next_get(a_type, a_field, phn0);	\
+			if (phn1 != NULL) {				\
+				phnrest = phn_next_get(a_type, a_field,	\
+				    phn1);				\
+				if (phnrest != NULL) {			\
+					phn_prev_set(a_type, a_field,	\
+					    phnrest, NULL);		\
+				}					\
+				phn_prev_set(a_type, a_field, phn0,	\
+				    NULL);				\
+				phn_next_set(a_type, a_field, phn0,	\
+				    NULL);				\
+				phn_prev_set(a_type, a_field, phn1,	\
+				    NULL);				\
+				phn_next_set(a_type, a_field, phn1,	\
+				    NULL);				\
+				phn_merge(a_type, a_field, phn0, phn1,	\
+				    a_cmp, phn0);			\
+				phn_next_set(a_type, a_field, tail,	\
+				    phn0);				\
+				tail = phn0;				\
+				phn0 = phnrest;				\
+			} else {					\
+				phn_next_set(a_type, a_field, tail,	\
+				    phn0);				\
+				tail = phn0;				\
+				phn0 = NULL;				\
+			}						\
+		}							\
+		phn0 = head;						\
+		phn1 = phn_next_get(a_type, a_field, phn0);		\
+		if (phn1 != NULL) {					\
+			while (true) {					\
+				head = phn_next_get(a_type, a_field,	\
+				    phn1);				\
+				assert(phn_prev_get(a_type, a_field,	\
+				    phn0) == NULL);			\
+				phn_next_set(a_type, a_field, phn0,	\
+				    NULL);				\
+				assert(phn_prev_get(a_type, a_field,	\
+				    phn1) == NULL);			\
+				phn_next_set(a_type, a_field, phn1,	\
+				    NULL);				\
+				phn_merge(a_type, a_field, phn0, phn1,	\
+				    a_cmp, phn0);			\
+				if (head == NULL) {			\
+					break;				\
+				}					\
+				phn_next_set(a_type, a_field, tail,	\
+				    phn0);				\
+				tail = phn0;				\
+				phn0 = head;				\
+				phn1 = phn_next_get(a_type, a_field,	\
+				    phn0);				\
+			}						\
+		}							\
+	}								\
+	r_phn = phn0;							\
+} while (0)
+
+#define ph_merge_aux(a_type, a_field, a_ph, a_cmp) do {			\
+	a_type *phn = phn_next_get(a_type, a_field, a_ph->ph_root);	\
+	if (phn != NULL) {						\
+		phn_prev_set(a_type, a_field, a_ph->ph_root, NULL);	\
+		phn_next_set(a_type, a_field, a_ph->ph_root, NULL);	\
+		phn_prev_set(a_type, a_field, phn, NULL);		\
+		ph_merge_siblings(a_type, a_field, phn, a_cmp, phn);	\
+		assert(phn_next_get(a_type, a_field, phn) == NULL);	\
+		phn_merge(a_type, a_field, a_ph->ph_root, phn, a_cmp,	\
+		    a_ph->ph_root);					\
+	}								\
+} while (0)
+
+#define ph_merge_children(a_type, a_field, a_phn, a_cmp, r_phn) do {	\
+	a_type *lchild = phn_lchild_get(a_type, a_field, a_phn);	\
+	if (lchild == NULL) {						\
+		r_phn = NULL;						\
+	} else {							\
+		ph_merge_siblings(a_type, a_field, lchild, a_cmp,	\
+		    r_phn);						\
+	}								\
+} while (0)
+
+/*
+ * The ph_proto() macro generates function prototypes that correspond to the
+ * functions generated by an equivalently parameterized call to ph_gen().
+ */
+#define ph_proto(a_attr, a_prefix, a_ph_type, a_type)			\
+a_attr void	a_prefix##new(a_ph_type *ph);				\
+a_attr bool	a_prefix##empty(a_ph_type *ph);				\
+a_attr a_type	*a_prefix##first(a_ph_type *ph);			\
+a_attr a_type	*a_prefix##any(a_ph_type *ph);				\
+a_attr void	a_prefix##insert(a_ph_type *ph, a_type *phn);		\
+a_attr a_type	*a_prefix##remove_first(a_ph_type *ph);			\
+a_attr a_type	*a_prefix##remove_any(a_ph_type *ph);			\
+a_attr void	a_prefix##remove(a_ph_type *ph, a_type *phn);
+
+/*
+ * The ph_gen() macro generates a type-specific pairing heap implementation,
+ * based on the above cpp macros.
+ */
+#define ph_gen(a_attr, a_prefix, a_ph_type, a_type, a_field, a_cmp)	\
+a_attr void								\
+a_prefix##new(a_ph_type *ph) {						\
+	memset(ph, 0, sizeof(ph(a_type)));				\
+}									\
+a_attr bool								\
+a_prefix##empty(a_ph_type *ph) {					\
+	return (ph->ph_root == NULL);					\
+}									\
+a_attr a_type *								\
+a_prefix##first(a_ph_type *ph) {					\
+	if (ph->ph_root == NULL) {					\
+		return NULL;						\
+	}								\
+	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
+	return ph->ph_root;						\
+}									\
+a_attr a_type *								\
+a_prefix##any(a_ph_type *ph) {						\
+	if (ph->ph_root == NULL) {					\
+		return NULL;						\
+	}								\
+	a_type *aux = phn_next_get(a_type, a_field, ph->ph_root);	\
+	if (aux != NULL) {						\
+		return aux;						\
+	}								\
+	return ph->ph_root;						\
+}									\
+a_attr void								\
+a_prefix##insert(a_ph_type *ph, a_type *phn) {				\
+	memset(&phn->a_field, 0, sizeof(phn(a_type)));			\
+									\
+	/*								\
+	 * Treat the root as an aux list during insertion, and lazily	\
+	 * merge during a_prefix##remove_first().  For elements that	\
+	 * are inserted, then removed via a_prefix##remove() before the	\
+	 * aux list is ever processed, this makes insert/remove		\
+	 * constant-time, whereas eager merging would make insert	\
+	 * O(log n).							\
+	 */								\
+	if (ph->ph_root == NULL) {					\
+		ph->ph_root = phn;					\
+	} else {							\
+		phn_next_set(a_type, a_field, phn, phn_next_get(a_type,	\
+		    a_field, ph->ph_root));				\
+		if (phn_next_get(a_type, a_field, ph->ph_root) !=	\
+		    NULL) {						\
+			phn_prev_set(a_type, a_field,			\
+			    phn_next_get(a_type, a_field, ph->ph_root),	\
+			    phn);					\
+		}							\
+		phn_prev_set(a_type, a_field, phn, ph->ph_root);	\
+		phn_next_set(a_type, a_field, ph->ph_root, phn);	\
+	}								\
+}									\
+a_attr a_type *								\
+a_prefix##remove_first(a_ph_type *ph) {					\
+	a_type *ret;							\
+									\
+	if (ph->ph_root == NULL) {					\
+		return NULL;						\
+	}								\
+	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
+									\
+	ret = ph->ph_root;						\
+									\
+	ph_merge_children(a_type, a_field, ph->ph_root, a_cmp,		\
+	    ph->ph_root);						\
+									\
+	return ret;							\
+}									\
+a_attr a_type *								\
+a_prefix##remove_any(a_ph_type *ph) {					\
+	/*								\
+	 * Remove the most recently inserted aux list element, or the	\
+	 * root if the aux list is empty.  This has the effect of	\
+	 * behaving as a LIFO (and insertion/removal is therefore	\
+	 * constant-time) if a_prefix##[remove_]first() are never	\
+	 * called.							\
+	 */								\
+	if (ph->ph_root == NULL) {					\
+		return NULL;						\
+	}								\
+	a_type *ret = phn_next_get(a_type, a_field, ph->ph_root);	\
+	if (ret != NULL) {						\
+		a_type *aux = phn_next_get(a_type, a_field, ret);	\
+		phn_next_set(a_type, a_field, ph->ph_root, aux);	\
+		if (aux != NULL) {					\
+			phn_prev_set(a_type, a_field, aux,		\
+			    ph->ph_root);				\
+		}							\
+		return ret;						\
+	}								\
+	ret = ph->ph_root;						\
+	ph_merge_children(a_type, a_field, ph->ph_root, a_cmp,		\
+	    ph->ph_root);						\
+	return ret;							\
+}									\
+a_attr void								\
+a_prefix##remove(a_ph_type *ph, a_type *phn) {				\
+	a_type *replace, *parent;					\
+									\
+	if (ph->ph_root == phn) {					\
+		/*							\
+		 * We can delete from aux list without merging it, but	\
+		 * we need to merge if we are dealing with the root	\
+		 * node and it has children.				\
+		 */							\
+		if (phn_lchild_get(a_type, a_field, phn) == NULL) {	\
+			ph->ph_root = phn_next_get(a_type, a_field,	\
+			    phn);					\
+			if (ph->ph_root != NULL) {			\
+				phn_prev_set(a_type, a_field,		\
+				    ph->ph_root, NULL);			\
+			}						\
+			return;						\
+		}							\
+		ph_merge_aux(a_type, a_field, ph, a_cmp);		\
+		if (ph->ph_root == phn) {				\
+			ph_merge_children(a_type, a_field, ph->ph_root,	\
+			    a_cmp, ph->ph_root);			\
+			return;						\
+		}							\
+	}								\
+									\
+	/* Get parent (if phn is leftmost child) before mutating. */	\
+	if ((parent = phn_prev_get(a_type, a_field, phn)) != NULL) {	\
+		if (phn_lchild_get(a_type, a_field, parent) != phn) {	\
+			parent = NULL;					\
+		}							\
+	}								\
+	/* Find a possible replacement node, and link to parent. */	\
+	ph_merge_children(a_type, a_field, phn, a_cmp, replace);	\
+	/* Set next/prev for sibling linked list. */			\
+	if (replace != NULL) {						\
+		if (parent != NULL) {					\
+			phn_prev_set(a_type, a_field, replace, parent);	\
+			phn_lchild_set(a_type, a_field, parent,		\
+			    replace);					\
+		} else {						\
+			phn_prev_set(a_type, a_field, replace,		\
+			    phn_prev_get(a_type, a_field, phn));	\
+			if (phn_prev_get(a_type, a_field, phn) !=	\
+			    NULL) {					\
+				phn_next_set(a_type, a_field,		\
+				    phn_prev_get(a_type, a_field, phn),	\
+				    replace);				\
+			}						\
+		}							\
+		phn_next_set(a_type, a_field, replace,			\
+		    phn_next_get(a_type, a_field, phn));		\
+		if (phn_next_get(a_type, a_field, phn) != NULL) {	\
+			phn_prev_set(a_type, a_field,			\
+			    phn_next_get(a_type, a_field, phn),		\
+			    replace);					\
+		}							\
+	} else {							\
+		if (parent != NULL) {					\
+			a_type *next = phn_next_get(a_type, a_field,	\
+			    phn);					\
+			phn_lchild_set(a_type, a_field, parent, next);	\
+			if (next != NULL) {				\
+				phn_prev_set(a_type, a_field, next,	\
+				    parent);				\
+			}						\
+		} else {						\
+			assert(phn_prev_get(a_type, a_field, phn) !=	\
+			    NULL);					\
+			phn_next_set(a_type, a_field,			\
+			    phn_prev_get(a_type, a_field, phn),		\
+			    phn_next_get(a_type, a_field, phn));	\
+		}							\
+		if (phn_next_get(a_type, a_field, phn) != NULL) {	\
+			phn_prev_set(a_type, a_field,			\
+			    phn_next_get(a_type, a_field, phn),		\
+			    phn_prev_get(a_type, a_field, phn));	\
+		}							\
+	}								\
+}
+
+#endif /* PH_H_ */
--- a/deps/jemalloc/include/jemalloc/internal/prof_externs.h
+++ b/deps/jemalloc/include/jemalloc/internal/prof_externs.h
@@ -0,0 +1,92 @@
+#ifndef JEMALLOC_INTERNAL_PROF_EXTERNS_H
+#define JEMALLOC_INTERNAL_PROF_EXTERNS_H
+
+#include "jemalloc/internal/mutex.h"
+
+extern malloc_mutex_t	bt2gctx_mtx;
+
+extern bool	opt_prof;
+extern bool	opt_prof_active;
+extern bool	opt_prof_thread_active_init;
+extern size_t	opt_lg_prof_sample;   /* Mean bytes between samples. */
+extern ssize_t	opt_lg_prof_interval; /* lg(prof_interval). */
+extern bool	opt_prof_gdump;       /* High-water memory dumping. */
+extern bool	opt_prof_final;       /* Final profile dumping. */
+extern bool	opt_prof_leak;        /* Dump leak summary at exit. */
+extern bool	opt_prof_accum;       /* Report cumulative bytes. */
+extern char	opt_prof_prefix[
+    /* Minimize memory bloat for non-prof builds. */
+#ifdef JEMALLOC_PROF
+    PATH_MAX +
+#endif
+    1];
+
+/* Accessed via prof_active_[gs]et{_unlocked,}(). */
+extern bool	prof_active;
+
+/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
+extern bool	prof_gdump_val;
+
+/*
+ * Profile dump interval, measured in bytes allocated.  Each arena triggers a
+ * profile dump when it reaches this threshold.  The effect is that the
+ * interval between profile dumps averages prof_interval, though the actual
+ * interval between dumps will tend to be sporadic, and the interval will be a
+ * maximum of approximately (prof_interval * narenas).
+ */
+extern uint64_t	prof_interval;
+
+/*
+ * Initialized as opt_lg_prof_sample, and potentially modified during profiling
+ * resets.
+ */
+extern size_t	lg_prof_sample;
+
+void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
+void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
+    prof_tctx_t *tctx);
+void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
+void bt_init(prof_bt_t *bt, void **vec);
+void prof_backtrace(prof_bt_t *bt);
+prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
+#ifdef JEMALLOC_JET
+size_t prof_tdata_count(void);
+size_t prof_bt_count(void);
+#endif
+typedef int (prof_dump_open_t)(bool, const char *);
+extern prof_dump_open_t *JET_MUTABLE prof_dump_open;
+
+typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
+extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
+#ifdef JEMALLOC_JET
+void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
+    uint64_t *accumbytes);
+#endif
+bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum);
+void prof_idump(tsdn_t *tsdn);
+bool prof_mdump(tsd_t *tsd, const char *filename);
+void prof_gdump(tsdn_t *tsdn);
+prof_tdata_t *prof_tdata_init(tsd_t *tsd);
+prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
+void prof_reset(tsd_t *tsd, size_t lg_sample);
+void prof_tdata_cleanup(tsd_t *tsd);
+bool prof_active_get(tsdn_t *tsdn);
+bool prof_active_set(tsdn_t *tsdn, bool active);
+const char *prof_thread_name_get(tsd_t *tsd);
+int prof_thread_name_set(tsd_t *tsd, const char *thread_name);
+bool prof_thread_active_get(tsd_t *tsd);
+bool prof_thread_active_set(tsd_t *tsd, bool active);
+bool prof_thread_active_init_get(tsdn_t *tsdn);
+bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
+bool prof_gdump_get(tsdn_t *tsdn);
+bool prof_gdump_set(tsdn_t *tsdn, bool active);
+void prof_boot0(void);
+void prof_boot1(void);
+bool prof_boot2(tsd_t *tsd);
+void prof_prefork0(tsdn_t *tsdn);
+void prof_prefork1(tsdn_t *tsdn);
+void prof_postfork_parent(tsdn_t *tsdn);
+void prof_postfork_child(tsdn_t *tsdn);
+void prof_sample_threshold_update(prof_tdata_t *tdata);
+
+#endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
--- a/deps/jemalloc/include/jemalloc/internal/prof_inlines_a.h
+++ b/deps/jemalloc/include/jemalloc/internal/prof_inlines_a.h
@@ -0,0 +1,72 @@
+#ifndef JEMALLOC_INTERNAL_PROF_INLINES_A_H
+#define JEMALLOC_INTERNAL_PROF_INLINES_A_H
+
+#include "jemalloc/internal/mutex.h"
+
+static inline bool
+prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) {
+	cassert(config_prof);
+
+	bool overflow;
+	uint64_t a0, a1;
+
+	/*
+	 * If the application allocates fast enough (and/or if idump is slow
+	 * enough), extreme overflow here (a1 >= prof_interval * 2) can cause
+	 * idump trigger coalescing.  This is an intentional mechanism that
+	 * avoids rate-limiting allocation.
+	 */
+#ifdef JEMALLOC_ATOMIC_U64
+	a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED);
+	do {
+		a1 = a0 + accumbytes;
+		assert(a1 >= a0);
+		overflow = (a1 >= prof_interval);
+		if (overflow) {
+			a1 %= prof_interval;
+		}
+	} while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0,
+	    a1, ATOMIC_RELAXED, ATOMIC_RELAXED));
+#else
+	malloc_mutex_lock(tsdn, &prof_accum->mtx);
+	a0 = prof_accum->accumbytes;
+	a1 = a0 + accumbytes;
+	overflow = (a1 >= prof_interval);
+	if (overflow) {
+		a1 %= prof_interval;
+	}
+	prof_accum->accumbytes = a1;
+	malloc_mutex_unlock(tsdn, &prof_accum->mtx);
+#endif
+	return overflow;
+}
+
+static inline void
+prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) {
+	cassert(config_prof);
+
+	/*
+	 * Cancel out as much of the excessive prof_accumbytes increase as
+	 * possible without underflowing.  Interval-triggered dumps occur
+	 * slightly more often than intended as a result of incomplete
+	 * canceling.
+	 */
+	uint64_t a0, a1;
+#ifdef JEMALLOC_ATOMIC_U64
+	a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED);
+	do {
+		a1 = (a0 >= LARGE_MINCLASS - usize) ?  a0 - (LARGE_MINCLASS -
+		    usize) : 0;
+	} while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0,
+	    a1, ATOMIC_RELAXED, ATOMIC_RELAXED));
+#else
+	malloc_mutex_lock(tsdn, &prof_accum->mtx);
+	a0 = prof_accum->accumbytes;
+	a1 = (a0 >= LARGE_MINCLASS - usize) ?  a0 - (LARGE_MINCLASS - usize) :
+	    0;
+	prof_accum->accumbytes = a1;
+	malloc_mutex_unlock(tsdn, &prof_accum->mtx);
+#endif
+}
+
+#endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */
--- a/deps/jemalloc/include/jemalloc/internal/prof_inlines_b.h
+++ b/deps/jemalloc/include/jemalloc/internal/prof_inlines_b.h
@@ -0,0 +1,217 @@
+#ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H
+#define JEMALLOC_INTERNAL_PROF_INLINES_B_H
+
+#include "jemalloc/internal/sz.h"
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_active_get_unlocked(void) {
+	/*
+	 * Even if opt_prof is true, sampling can be temporarily disabled by
+	 * setting prof_active to false.  No locking is used when reading
+	 * prof_active in the fast path, so there are no guarantees regarding
+	 * how long it will take for all threads to notice state changes.
+	 */
+	return prof_active;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_gdump_get_unlocked(void) {
+	/*
+	 * No locking is used when reading prof_gdump_val in the fast path, so
+	 * there are no guarantees regarding how long it will take for all
+	 * threads to notice state changes.
+	 */
+	return prof_gdump_val;
+}
+
+JEMALLOC_ALWAYS_INLINE prof_tdata_t *
+prof_tdata_get(tsd_t *tsd, bool create) {
+	prof_tdata_t *tdata;
+
+	cassert(config_prof);
+
+	tdata = tsd_prof_tdata_get(tsd);
+	if (create) {
+		if (unlikely(tdata == NULL)) {
+			if (tsd_nominal(tsd)) {
+				tdata = prof_tdata_init(tsd);
+				tsd_prof_tdata_set(tsd, tdata);
+			}
+		} else if (unlikely(tdata->expired)) {
+			tdata = prof_tdata_reinit(tsd, tdata);
+			tsd_prof_tdata_set(tsd, tdata);
+		}
+		assert(tdata == NULL || tdata->attached);
+	}
+
+	return tdata;
+}
+
+JEMALLOC_ALWAYS_INLINE prof_tctx_t *
+prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+
+	return arena_prof_tctx_get(tsdn, ptr, alloc_ctx);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
+    alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+
+	arena_prof_tctx_set(tsdn, ptr, usize, alloc_ctx, tctx);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+
+	arena_prof_tctx_reset(tsdn, ptr, tctx);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
+    prof_tdata_t **tdata_out) {
+	prof_tdata_t *tdata;
+
+	cassert(config_prof);
+
+	tdata = prof_tdata_get(tsd, true);
+	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
+		tdata = NULL;
+	}
+
+	if (tdata_out != NULL) {
+		*tdata_out = tdata;
+	}
+
+	if (unlikely(tdata == NULL)) {
+		return true;
+	}
+
+	if (likely(tdata->bytes_until_sample >= usize)) {
+		if (update) {
+			tdata->bytes_until_sample -= usize;
+		}
+		return true;
+	} else {
+		if (tsd_reentrancy_level_get(tsd) > 0) {
+			return true;
+		}
+		/* Compute new sample threshold. */
+		if (update) {
+			prof_sample_threshold_update(tdata);
+		}
+		return !tdata->active;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE prof_tctx_t *
+prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
+	prof_tctx_t *ret;
+	prof_tdata_t *tdata;
+	prof_bt_t bt;
+
+	assert(usize == sz_s2u(usize));
+
+	if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update,
+	    &tdata))) {
+		ret = (prof_tctx_t *)(uintptr_t)1U;
+	} else {
+		bt_init(&bt, tdata->vec);
+		prof_backtrace(&bt);
+		ret = prof_lookup(tsd, &bt);
+	}
+
+	return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx,
+    prof_tctx_t *tctx) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+	assert(usize == isalloc(tsdn, ptr));
+
+	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
+		prof_malloc_sample_object(tsdn, ptr, usize, tctx);
+	} else {
+		prof_tctx_set(tsdn, ptr, usize, alloc_ctx,
+		    (prof_tctx_t *)(uintptr_t)1U);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
+    bool prof_active, bool updated, const void *old_ptr, size_t old_usize,
+    prof_tctx_t *old_tctx) {
+	bool sampled, old_sampled, moved;
+
+	cassert(config_prof);
+	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
+
+	if (prof_active && !updated && ptr != NULL) {
+		assert(usize == isalloc(tsd_tsdn(tsd), ptr));
+		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
+			/*
+			 * Don't sample.  The usize passed to prof_alloc_prep()
+			 * was larger than what actually got allocated, so a
+			 * backtrace was captured for this allocation, even
+			 * though its actual usize was insufficient to cross the
+			 * sample threshold.
+			 */
+			prof_alloc_rollback(tsd, tctx, true);
+			tctx = (prof_tctx_t *)(uintptr_t)1U;
+		}
+	}
+
+	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
+	old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
+	moved = (ptr != old_ptr);
+
+	if (unlikely(sampled)) {
+		prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx);
+	} else if (moved) {
+		prof_tctx_set(tsd_tsdn(tsd), ptr, usize, NULL,
+		    (prof_tctx_t *)(uintptr_t)1U);
+	} else if (unlikely(old_sampled)) {
+		/*
+		 * prof_tctx_set() would work for the !moved case as well, but
+		 * prof_tctx_reset() is slightly cheaper, and the proper thing
+		 * to do here in the presence of explicit knowledge re: moved
+		 * state.
+		 */
+		prof_tctx_reset(tsd_tsdn(tsd), ptr, tctx);
+	} else {
+		assert((uintptr_t)prof_tctx_get(tsd_tsdn(tsd), ptr, NULL) ==
+		    (uintptr_t)1U);
+	}
+
+	/*
+	 * The prof_free_sampled_object() call must come after the
+	 * prof_malloc_sample_object() call, because tctx and old_tctx may be
+	 * the same, in which case reversing the call order could cause the tctx
+	 * to be prematurely destroyed as a side effect of momentarily zeroed
+	 * counters.
+	 */
+	if (unlikely(old_sampled)) {
+		prof_free_sampled_object(tsd, old_usize, old_tctx);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
+	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr, alloc_ctx);
+
+	cassert(config_prof);
+	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
+
+	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
+		prof_free_sampled_object(tsd, usize, tctx);
+	}
+}
+
+#endif /* JEMALLOC_INTERNAL_PROF_INLINES_B_H */
--- a/deps/jemalloc/include/jemalloc/internal/prof_structs.h
+++ b/deps/jemalloc/include/jemalloc/internal/prof_structs.h
@@ -0,0 +1,201 @@
+#ifndef JEMALLOC_INTERNAL_PROF_STRUCTS_H
+#define JEMALLOC_INTERNAL_PROF_STRUCTS_H
+
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/prng.h"
+#include "jemalloc/internal/rb.h"
+
+struct prof_bt_s {
+	/* Backtrace, stored as len program counters. */
+	void		**vec;
+	unsigned	len;
+};
+
+#ifdef JEMALLOC_PROF_LIBGCC
+/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
+typedef struct {
+	prof_bt_t	*bt;
+	unsigned	max;
+} prof_unwind_data_t;
+#endif
+
+struct prof_accum_s {
+#ifndef JEMALLOC_ATOMIC_U64
+	malloc_mutex_t	mtx;
+	uint64_t	accumbytes;
+#else
+	atomic_u64_t	accumbytes;
+#endif
+};
+
+struct prof_cnt_s {
+	/* Profiling counters. */
+	uint64_t	curobjs;
+	uint64_t	curbytes;
+	uint64_t	accumobjs;
+	uint64_t	accumbytes;
+};
+
+typedef enum {
+	prof_tctx_state_initializing,
+	prof_tctx_state_nominal,
+	prof_tctx_state_dumping,
+	prof_tctx_state_purgatory /* Dumper must finish destroying. */
+} prof_tctx_state_t;
+
+struct prof_tctx_s {
+	/* Thread data for thread that performed the allocation. */
+	prof_tdata_t		*tdata;
+
+	/*
+	 * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be
+	 * defunct during teardown.
+	 */
+	uint64_t		thr_uid;
+	uint64_t		thr_discrim;
+
+	/* Profiling counters, protected by tdata->lock. */
+	prof_cnt_t		cnts;
+
+	/* Associated global context. */
+	prof_gctx_t		*gctx;
+
+	/*
+	 * UID that distinguishes multiple tctx's created by the same thread,
+	 * but coexisting in gctx->tctxs.  There are two ways that such
+	 * coexistence can occur:
+	 * - A dumper thread can cause a tctx to be retained in the purgatory
+	 *   state.
+	 * - Although a single "producer" thread must create all tctx's which
+	 *   share the same thr_uid, multiple "consumers" can each concurrently
+	 *   execute portions of prof_tctx_destroy().  prof_tctx_destroy() only
+	 *   gets called once each time cnts.cur{objs,bytes} drop to 0, but this
+	 *   threshold can be hit again before the first consumer finishes
+	 *   executing prof_tctx_destroy().
+	 */
+	uint64_t		tctx_uid;
+
+	/* Linkage into gctx's tctxs. */
+	rb_node(prof_tctx_t)	tctx_link;
+
+	/*
+	 * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
+	 * sample vs destroy race.
+	 */
+	bool			prepared;
+
+	/* Current dump-related state, protected by gctx->lock. */
+	prof_tctx_state_t	state;
+
+	/*
+	 * Copy of cnts snapshotted during early dump phase, protected by
+	 * dump_mtx.
+	 */
+	prof_cnt_t		dump_cnts;
+};
+typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
+
+struct prof_gctx_s {
+	/* Protects nlimbo, cnt_summed, and tctxs. */
+	malloc_mutex_t		*lock;
+
+	/*
+	 * Number of threads that currently cause this gctx to be in a state of
+	 * limbo due to one of:
+	 *   - Initializing this gctx.
+	 *   - Initializing per thread counters associated with this gctx.
+	 *   - Preparing to destroy this gctx.
+	 *   - Dumping a heap profile that includes this gctx.
+	 * nlimbo must be 1 (single destroyer) in order to safely destroy the
+	 * gctx.
+	 */
+	unsigned		nlimbo;
+
+	/*
+	 * Tree of profile counters, one for each thread that has allocated in
+	 * this context.
+	 */
+	prof_tctx_tree_t	tctxs;
+
+	/* Linkage for tree of contexts to be dumped. */
+	rb_node(prof_gctx_t)	dump_link;
+
+	/* Temporary storage for summation during dump. */
+	prof_cnt_t		cnt_summed;
+
+	/* Associated backtrace. */
+	prof_bt_t		bt;
+
+	/* Backtrace vector, variable size, referred to by bt. */
+	void			*vec[1];
+};
+typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
+
+struct prof_tdata_s {
+	malloc_mutex_t		*lock;
+
+	/* Monotonically increasing unique thread identifier. */
+	uint64_t		thr_uid;
+
+	/*
+	 * Monotonically increasing discriminator among tdata structures
+	 * associated with the same thr_uid.
+	 */
+	uint64_t		thr_discrim;
+
+	/* Included in heap profile dumps if non-NULL. */
+	char			*thread_name;
+
+	bool			attached;
+	bool			expired;
+
+	rb_node(prof_tdata_t)	tdata_link;
+
+	/*
+	 * Counter used to initialize prof_tctx_t's tctx_uid.  No locking is
+	 * necessary when incrementing this field, because only one thread ever
+	 * does so.
+	 */
+	uint64_t		tctx_uid_next;
+
+	/*
+	 * Hash of (prof_bt_t *)-->(prof_tctx_t *).  Each thread tracks
+	 * backtraces for which it has non-zero allocation/deallocation counters
+	 * associated with thread-specific prof_tctx_t objects.  Other threads
+	 * may write to prof_tctx_t contents when freeing associated objects.
+	 */
+	ckh_t			bt2tctx;
+
+	/* Sampling state. */
+	uint64_t		prng_state;
+	uint64_t		bytes_until_sample;
+
+	/* State used to avoid dumping while operating on prof internals. */
+	bool			enq;
+	bool			enq_idump;
+	bool			enq_gdump;
+
+	/*
+	 * Set to true during an early dump phase for tdata's which are
+	 * currently being dumped.  New threads' tdata's have this initialized
+	 * to false so that they aren't accidentally included in later dump
+	 * phases.
+	 */
+	bool			dumping;
+
+	/*
+	 * True if profiling is active for this tdata's thread
+	 * (thread.prof.active mallctl).
+	 */
+	bool			active;
+
+	/* Temporary storage for summation during dump. */
+	prof_cnt_t		cnt_summed;
+
+	/* Backtrace vector, used for calls to prof_backtrace(). */
+	void			*vec[PROF_BT_MAX];
+};
+typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
+
+#endif /* JEMALLOC_INTERNAL_PROF_STRUCTS_H */
--- a/deps/jemalloc/include/jemalloc/internal/prof_types.h
+++ b/deps/jemalloc/include/jemalloc/internal/prof_types.h
@@ -0,0 +1,56 @@
+#ifndef JEMALLOC_INTERNAL_PROF_TYPES_H
+#define JEMALLOC_INTERNAL_PROF_TYPES_H
+
+typedef struct prof_bt_s prof_bt_t;
+typedef struct prof_accum_s prof_accum_t;
+typedef struct prof_cnt_s prof_cnt_t;
+typedef struct prof_tctx_s prof_tctx_t;
+typedef struct prof_gctx_s prof_gctx_t;
+typedef struct prof_tdata_s prof_tdata_t;
+
+/* Option defaults. */
+#ifdef JEMALLOC_PROF
+#  define PROF_PREFIX_DEFAULT		"jeprof"
+#else
+#  define PROF_PREFIX_DEFAULT		""
+#endif
+#define LG_PROF_SAMPLE_DEFAULT		19
+#define LG_PROF_INTERVAL_DEFAULT	-1
+
+/*
+ * Hard limit on stack backtrace depth.  The version of prof_backtrace() that
+ * is based on __builtin_return_address() necessarily has a hard-coded number
+ * of backtrace frame handlers, and should be kept in sync with this setting.
+ */
+#define PROF_BT_MAX			128
+
+/* Initial hash table size. */
+#define PROF_CKH_MINITEMS		64
+
+/* Size of memory buffer to use when writing dump files. */
+#define PROF_DUMP_BUFSIZE		65536
+
+/* Size of stack-allocated buffer used by prof_printf(). */
+#define PROF_PRINTF_BUFSIZE		128
+
+/*
+ * Number of mutexes shared among all gctx's.  No space is allocated for these
+ * unless profiling is enabled, so it's okay to over-provision.
+ */
+#define PROF_NCTX_LOCKS			1024
+
+/*
+ * Number of mutexes shared among all tdata's.  No space is allocated for these
+ * unless profiling is enabled, so it's okay to over-provision.
+ */
+#define PROF_NTDATA_LOCKS		256
+
+/*
+ * prof_tdata pointers close to NULL are used to encode state information that
+ * is used for cleaning up during thread shutdown.
+ */
+#define PROF_TDATA_STATE_REINCARNATED	((prof_tdata_t *)(uintptr_t)1)
+#define PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
+#define PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
+
+#endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */
--- a/deps/jemalloc/include/jemalloc/internal/rtree_tsd.h
+++ b/deps/jemalloc/include/jemalloc/internal/rtree_tsd.h
@@ -0,0 +1,50 @@
+#ifndef JEMALLOC_INTERNAL_RTREE_CTX_H
+#define JEMALLOC_INTERNAL_RTREE_CTX_H
+
+/*
+ * Number of leafkey/leaf pairs to cache in L1 and L2 level respectively.  Each
+ * entry supports an entire leaf, so the cache hit rate is typically high even
+ * with a small number of entries.  In rare cases extent activity will straddle
+ * the boundary between two leaf nodes.  Furthermore, an arena may use a
+ * combination of dss and mmap.  Note that as memory usage grows past the amount
+ * that this cache can directly cover, the cache will become less effective if
+ * locality of reference is low, but the consequence is merely cache misses
+ * while traversing the tree nodes.
+ *
+ * The L1 direct mapped cache offers consistent and low cost on cache hit.
+ * However collision could affect hit rate negatively.  This is resolved by
+ * combining with a L2 LRU cache, which requires linear search and re-ordering
+ * on access but suffers no collision.  Note that, the cache will itself suffer
+ * cache misses if made overly large, plus the cost of linear search in the LRU
+ * cache.
+ */
+#define RTREE_CTX_LG_NCACHE 4
+#define RTREE_CTX_NCACHE (1 << RTREE_CTX_LG_NCACHE)
+#define RTREE_CTX_NCACHE_L2 8
+
+/*
+ * Zero initializer required for tsd initialization only.  Proper initialization
+ * done via rtree_ctx_data_init().
+ */
+#define RTREE_CTX_ZERO_INITIALIZER {{{0}}}
+
+
+typedef struct rtree_leaf_elm_s rtree_leaf_elm_t;
+
+typedef struct rtree_ctx_cache_elm_s rtree_ctx_cache_elm_t;
+struct rtree_ctx_cache_elm_s {
+	uintptr_t		leafkey;
+	rtree_leaf_elm_t	*leaf;
+};
+
+typedef struct rtree_ctx_s rtree_ctx_t;
+struct rtree_ctx_s {
+	/* Direct mapped cache. */
+	rtree_ctx_cache_elm_t	cache[RTREE_CTX_NCACHE];
+	/* L2 LRU cache. */
+	rtree_ctx_cache_elm_t	l2_cache[RTREE_CTX_NCACHE_L2];
+};
+
+void rtree_ctx_data_init(rtree_ctx_t *ctx);
+
+#endif /* JEMALLOC_INTERNAL_RTREE_CTX_H */
--- a/deps/jemalloc/include/jemalloc/internal/smoothstep.h
+++ b/deps/jemalloc/include/jemalloc/internal/smoothstep.h
@@ -0,0 +1,232 @@
+#ifndef JEMALLOC_INTERNAL_SMOOTHSTEP_H
+#define JEMALLOC_INTERNAL_SMOOTHSTEP_H
+
+/*
+ * This file was generated by the following command:
+ *   sh smoothstep.sh smoother 200 24 3 15
+ */
+/******************************************************************************/
+
+/*
+ * This header defines a precomputed table based on the smoothstep family of
+ * sigmoidal curves (https://en.wikipedia.org/wiki/Smoothstep) that grow from 0
+ * to 1 in 0 <= x <= 1.  The table is stored as integer fixed point values so
+ * that floating point math can be avoided.
+ *
+ *                      3     2
+ *   smoothstep(x) = -2x  + 3x
+ *
+ *                       5      4      3
+ *   smootherstep(x) = 6x  - 15x  + 10x
+ *
+ *                          7      6      5      4
+ *   smootheststep(x) = -20x  + 70x  - 84x  + 35x
+ */
+
+#define SMOOTHSTEP_VARIANT	"smoother"
+#define SMOOTHSTEP_NSTEPS	200
+#define SMOOTHSTEP_BFP		24
+#define SMOOTHSTEP \
+ /* STEP(step, h,                            x,     y) */ \
+    STEP(   1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750) \
+    STEP(   2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000) \
+    STEP(   3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250) \
+    STEP(   4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000) \
+    STEP(   5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750) \
+    STEP(   6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000) \
+    STEP(   7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250) \
+    STEP(   8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000) \
+    STEP(   9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750) \
+    STEP(  10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000) \
+    STEP(  11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250) \
+    STEP(  12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000) \
+    STEP(  13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750) \
+    STEP(  14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000) \
+    STEP(  15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250) \
+    STEP(  16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000) \
+    STEP(  17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750) \
+    STEP(  18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000) \
+    STEP(  19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250) \
+    STEP(  20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000) \
+    STEP(  21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750) \
+    STEP(  22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000) \
+    STEP(  23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250) \
+    STEP(  24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000) \
+    STEP(  25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750) \
+    STEP(  26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000) \
+    STEP(  27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250) \
+    STEP(  28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000) \
+    STEP(  29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750) \
+    STEP(  30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000) \
+    STEP(  31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250) \
+    STEP(  32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000) \
+    STEP(  33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750) \
+    STEP(  34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000) \
+    STEP(  35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250) \
+    STEP(  36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000) \
+    STEP(  37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750) \
+    STEP(  38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000) \
+    STEP(  39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250) \
+    STEP(  40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000) \
+    STEP(  41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750) \
+    STEP(  42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000) \
+    STEP(  43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250) \
+    STEP(  44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000) \
+    STEP(  45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750) \
+    STEP(  46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000) \
+    STEP(  47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250) \
+    STEP(  48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000) \
+    STEP(  49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750) \
+    STEP(  50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000) \
+    STEP(  51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250) \
+    STEP(  52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000) \
+    STEP(  53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750) \
+    STEP(  54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000) \
+    STEP(  55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250) \
+    STEP(  56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000) \
+    STEP(  57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750) \
+    STEP(  58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000) \
+    STEP(  59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250) \
+    STEP(  60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000) \
+    STEP(  61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750) \
+    STEP(  62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000) \
+    STEP(  63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250) \
+    STEP(  64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000) \
+    STEP(  65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750) \
+    STEP(  66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000) \
+    STEP(  67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250) \
+    STEP(  68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000) \
+    STEP(  69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750) \
+    STEP(  70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000) \
+    STEP(  71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250) \
+    STEP(  72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000) \
+    STEP(  73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750) \
+    STEP(  74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000) \
+    STEP(  75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250) \
+    STEP(  76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000) \
+    STEP(  77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750) \
+    STEP(  78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000) \
+    STEP(  79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250) \
+    STEP(  80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000) \
+    STEP(  81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750) \
+    STEP(  82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000) \
+    STEP(  83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250) \
+    STEP(  84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000) \
+    STEP(  85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750) \
+    STEP(  86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000) \
+    STEP(  87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250) \
+    STEP(  88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000) \
+    STEP(  89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750) \
+    STEP(  90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000) \
+    STEP(  91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250) \
+    STEP(  92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000) \
+    STEP(  93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750) \
+    STEP(  94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000) \
+    STEP(  95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250) \
+    STEP(  96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000) \
+    STEP(  97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750) \
+    STEP(  98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000) \
+    STEP(  99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250) \
+    STEP( 100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000) \
+    STEP( 101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750) \
+    STEP( 102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000) \
+    STEP( 103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250) \
+    STEP( 104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000) \
+    STEP( 105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750) \
+    STEP( 106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000) \
+    STEP( 107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250) \
+    STEP( 108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000) \
+    STEP( 109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750) \
+    STEP( 110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000) \
+    STEP( 111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250) \
+    STEP( 112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000) \
+    STEP( 113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750) \
+    STEP( 114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000) \
+    STEP( 115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250) \
+    STEP( 116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000) \
+    STEP( 117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750) \
+    STEP( 118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000) \
+    STEP( 119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250) \
+    STEP( 120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000) \
+    STEP( 121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750) \
+    STEP( 122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000) \
+    STEP( 123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250) \
+    STEP( 124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000) \
+    STEP( 125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750) \
+    STEP( 126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000) \
+    STEP( 127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250) \
+    STEP( 128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000) \
+    STEP( 129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750) \
+    STEP( 130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000) \
+    STEP( 131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250) \
+    STEP( 132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000) \
+    STEP( 133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750) \
+    STEP( 134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000) \
+    STEP( 135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250) \
+    STEP( 136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000) \
+    STEP( 137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750) \
+    STEP( 138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000) \
+    STEP( 139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250) \
+    STEP( 140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000) \
+    STEP( 141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750) \
+    STEP( 142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000) \
+    STEP( 143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250) \
+    STEP( 144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000) \
+    STEP( 145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750) \
+    STEP( 146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000) \
+    STEP( 147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250) \
+    STEP( 148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000) \
+    STEP( 149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750) \
+    STEP( 150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000) \
+    STEP( 151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250) \
+    STEP( 152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000) \
+    STEP( 153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750) \
+    STEP( 154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000) \
+    STEP( 155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250) \
+    STEP( 156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000) \
+    STEP( 157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750) \
+    STEP( 158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000) \
+    STEP( 159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250) \
+    STEP( 160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000) \
+    STEP( 161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750) \
+    STEP( 162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000) \
+    STEP( 163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250) \
+    STEP( 164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000) \
+    STEP( 165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750) \
+    STEP( 166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000) \
+    STEP( 167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250) \
+    STEP( 168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000) \
+    STEP( 169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750) \
+    STEP( 170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000) \
+    STEP( 171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250) \
+    STEP( 172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000) \
+    STEP( 173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750) \
+    STEP( 174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000) \
+    STEP( 175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250) \
+    STEP( 176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000) \
+    STEP( 177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750) \
+    STEP( 178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000) \
+    STEP( 179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250) \
+    STEP( 180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000) \
+    STEP( 181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750) \
+    STEP( 182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000) \
+    STEP( 183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250) \
+    STEP( 184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000) \
+    STEP( 185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750) \
+    STEP( 186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000) \
+    STEP( 187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250) \
+    STEP( 188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000) \
+    STEP( 189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750) \
+    STEP( 190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000) \
+    STEP( 191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250) \
+    STEP( 192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000) \
+    STEP( 193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750) \
+    STEP( 194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000) \
+    STEP( 195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250) \
+    STEP( 196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000) \
+    STEP( 197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750) \
+    STEP( 198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000) \
+    STEP( 199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250) \
+    STEP( 200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000) \
+
+#endif /* JEMALLOC_INTERNAL_SMOOTHSTEP_H */
--- a/deps/jemalloc/include/jemalloc/internal/spin.h
+++ b/deps/jemalloc/include/jemalloc/internal/spin.h
@@ -0,0 +1,36 @@
+#ifndef JEMALLOC_INTERNAL_SPIN_H
+#define JEMALLOC_INTERNAL_SPIN_H
+
+#ifdef JEMALLOC_SPIN_C_
+#  define SPIN_INLINE extern inline
+#else
+#  define SPIN_INLINE inline
+#endif
+
+#define SPIN_INITIALIZER {0U}
+
+typedef struct {
+	unsigned iteration;
+} spin_t;
+
+SPIN_INLINE void
+spin_adaptive(spin_t *spin) {
+	volatile uint32_t i;
+
+	if (spin->iteration < 5) {
+		for (i = 0; i < (1U << spin->iteration); i++) {
+			CPU_SPINWAIT;
+		}
+		spin->iteration++;
+	} else {
+#ifdef _WIN32
+		SwitchToThread();
+#else
+		sched_yield();
+#endif
+	}
+}
+
+#undef SPIN_INLINE
+
+#endif /* JEMALLOC_INTERNAL_SPIN_H */
--- a/deps/jemalloc/include/jemalloc/internal/stats_tsd.h
+++ b/deps/jemalloc/include/jemalloc/internal/stats_tsd.h
@@ -0,0 +1,12 @@
+#ifndef JEMALLOC_INTERNAL_STATS_TSD_H
+#define JEMALLOC_INTERNAL_STATS_TSD_H
+
+typedef struct tcache_bin_stats_s {
+	/*
+	 * Number of allocation requests that corresponded to the size of this
+	 * bin.
+	 */
+	uint64_t	nrequests;
+} tcache_bin_stats_t;
+
+#endif /* JEMALLOC_INTERNAL_STATS_TSD_H */
--- a/deps/jemalloc/include/jemalloc/internal/sz.h
+++ b/deps/jemalloc/include/jemalloc/internal/sz.h
@@ -0,0 +1,317 @@
+#ifndef JEMALLOC_INTERNAL_SIZE_H
+#define JEMALLOC_INTERNAL_SIZE_H
+
+#include "jemalloc/internal/bit_util.h"
+#include "jemalloc/internal/pages.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/util.h"
+
+/*
+ * sz module: Size computations.
+ *
+ * Some abbreviations used here:
+ *   p: Page
+ *   ind: Index
+ *   s, sz: Size
+ *   u: Usable size
+ *   a: Aligned
+ *
+ * These are not always used completely consistently, but should be enough to
+ * interpret function names.  E.g. sz_psz2ind converts page size to page size
+ * index; sz_sa2u converts a (size, alignment) allocation request to the usable
+ * size that would result from such an allocation.
+ */
+
+/*
+ * sz_pind2sz_tab encodes the same information as could be computed by
+ * sz_pind2sz_compute().
+ */
+extern size_t const sz_pind2sz_tab[NPSIZES+1];
+/*
+ * sz_index2size_tab encodes the same information as could be computed (at
+ * unacceptable cost in some code paths) by sz_index2size_compute().
+ */
+extern size_t const sz_index2size_tab[NSIZES];
+/*
+ * sz_size2index_tab is a compact lookup table that rounds request sizes up to
+ * size classes.  In order to reduce cache footprint, the table is compressed,
+ * and all accesses are via sz_size2index().
+ */
+extern uint8_t const sz_size2index_tab[];
+
+static const size_t sz_large_pad =
+#ifdef JEMALLOC_CACHE_OBLIVIOUS
+    PAGE
+#else
+    0
+#endif
+    ;
+
+JEMALLOC_ALWAYS_INLINE pszind_t
+sz_psz2ind(size_t psz) {
+	if (unlikely(psz > LARGE_MAXCLASS)) {
+		return NPSIZES;
+	}
+	{
+		pszind_t x = lg_floor((psz<<1)-1);
+		pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x -
+		    (LG_SIZE_CLASS_GROUP + LG_PAGE);
+		pszind_t grp = shift << LG_SIZE_CLASS_GROUP;
+
+		pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+
+		size_t delta_inverse_mask = ZD(-1) << lg_delta;
+		pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
+		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		pszind_t ind = grp + mod;
+		return ind;
+	}
+}
+
+static inline size_t
+sz_pind2sz_compute(pszind_t pind) {
+	if (unlikely(pind == NPSIZES)) {
+		return LARGE_MAXCLASS + PAGE;
+	}
+	{
+		size_t grp = pind >> LG_SIZE_CLASS_GROUP;
+		size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		size_t grp_size_mask = ~((!!grp)-1);
+		size_t grp_size = ((ZU(1) << (LG_PAGE +
+		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+
+		size_t shift = (grp == 0) ? 1 : grp;
+		size_t lg_delta = shift + (LG_PAGE-1);
+		size_t mod_size = (mod+1) << lg_delta;
+
+		size_t sz = grp_size + mod_size;
+		return sz;
+	}
+}
+
+static inline size_t
+sz_pind2sz_lookup(pszind_t pind) {
+	size_t ret = (size_t)sz_pind2sz_tab[pind];
+	assert(ret == sz_pind2sz_compute(pind));
+	return ret;
+}
+
+static inline size_t
+sz_pind2sz(pszind_t pind) {
+	assert(pind < NPSIZES+1);
+	return sz_pind2sz_lookup(pind);
+}
+
+static inline size_t
+sz_psz2u(size_t psz) {
+	if (unlikely(psz > LARGE_MAXCLASS)) {
+		return LARGE_MAXCLASS + PAGE;
+	}
+	{
+		size_t x = lg_floor((psz<<1)-1);
+		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+		size_t delta = ZU(1) << lg_delta;
+		size_t delta_mask = delta - 1;
+		size_t usize = (psz + delta_mask) & ~delta_mask;
+		return usize;
+	}
+}
+
+static inline szind_t
+sz_size2index_compute(size_t size) {
+	if (unlikely(size > LARGE_MAXCLASS)) {
+		return NSIZES;
+	}
+#if (NTBINS != 0)
+	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
+		szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+		szind_t lg_ceil = lg_floor(pow2_ceil_zu(size));
+		return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin);
+	}
+#endif
+	{
+		szind_t x = lg_floor((size<<1)-1);
+		szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
+		    x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
+		szind_t grp = shift << LG_SIZE_CLASS_GROUP;
+
+		szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
+		    ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
+
+		size_t delta_inverse_mask = ZD(-1) << lg_delta;
+		szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
+		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		szind_t index = NTBINS + grp + mod;
+		return index;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE szind_t
+sz_size2index_lookup(size_t size) {
+	assert(size <= LOOKUP_MAXCLASS);
+	{
+		szind_t ret = (sz_size2index_tab[(size-1) >> LG_TINY_MIN]);
+		assert(ret == sz_size2index_compute(size));
+		return ret;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE szind_t
+sz_size2index(size_t size) {
+	assert(size > 0);
+	if (likely(size <= LOOKUP_MAXCLASS)) {
+		return sz_size2index_lookup(size);
+	}
+	return sz_size2index_compute(size);
+}
+
+static inline size_t
+sz_index2size_compute(szind_t index) {
+#if (NTBINS > 0)
+	if (index < NTBINS) {
+		return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index));
+	}
+#endif
+	{
+		size_t reduced_index = index - NTBINS;
+		size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP;
+		size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) -
+		    1);
+
+		size_t grp_size_mask = ~((!!grp)-1);
+		size_t grp_size = ((ZU(1) << (LG_QUANTUM +
+		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+
+		size_t shift = (grp == 0) ? 1 : grp;
+		size_t lg_delta = shift + (LG_QUANTUM-1);
+		size_t mod_size = (mod+1) << lg_delta;
+
+		size_t usize = grp_size + mod_size;
+		return usize;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+sz_index2size_lookup(szind_t index) {
+	size_t ret = (size_t)sz_index2size_tab[index];
+	assert(ret == sz_index2size_compute(index));
+	return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+sz_index2size(szind_t index) {
+	assert(index < NSIZES);
+	return sz_index2size_lookup(index);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+sz_s2u_compute(size_t size) {
+	if (unlikely(size > LARGE_MAXCLASS)) {
+		return 0;
+	}
+#if (NTBINS > 0)
+	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
+		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+		size_t lg_ceil = lg_floor(pow2_ceil_zu(size));
+		return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
+		    (ZU(1) << lg_ceil));
+	}
+#endif
+	{
+		size_t x = lg_floor((size<<1)-1);
+		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
+		    ?  LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
+		size_t delta = ZU(1) << lg_delta;
+		size_t delta_mask = delta - 1;
+		size_t usize = (size + delta_mask) & ~delta_mask;
+		return usize;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+sz_s2u_lookup(size_t size) {
+	size_t ret = sz_index2size_lookup(sz_size2index_lookup(size));
+
+	assert(ret == sz_s2u_compute(size));
+	return ret;
+}
+
+/*
+ * Compute usable size that would result from allocating an object with the
+ * specified size.
+ */
+JEMALLOC_ALWAYS_INLINE size_t
+sz_s2u(size_t size) {
+	assert(size > 0);
+	if (likely(size <= LOOKUP_MAXCLASS)) {
+		return sz_s2u_lookup(size);
+	}
+	return sz_s2u_compute(size);
+}
+
+/*
+ * Compute usable size that would result from allocating an object with the
+ * specified size and alignment.
+ */
+JEMALLOC_ALWAYS_INLINE size_t
+sz_sa2u(size_t size, size_t alignment) {
+	size_t usize;
+
+	assert(alignment != 0 && ((alignment - 1) & alignment) == 0);
+
+	/* Try for a small size class. */
+	if (size <= SMALL_MAXCLASS && alignment < PAGE) {
+		/*
+		 * Round size up to the nearest multiple of alignment.
+		 *
+		 * This done, we can take advantage of the fact that for each
+		 * small size class, every object is aligned at the smallest
+		 * power of two that is non-zero in the base two representation
+		 * of the size.  For example:
+		 *
+		 *   Size |   Base 2 | Minimum alignment
+		 *   -----+----------+------------------
+		 *     96 |  1100000 |  32
+		 *    144 | 10100000 |  32
+		 *    192 | 11000000 |  64
+		 */
+		usize = sz_s2u(ALIGNMENT_CEILING(size, alignment));
+		if (usize < LARGE_MINCLASS) {
+			return usize;
+		}
+	}
+
+	/* Large size class.  Beware of overflow. */
+
+	if (unlikely(alignment > LARGE_MAXCLASS)) {
+		return 0;
+	}
+
+	/* Make sure result is a large size class. */
+	if (size <= LARGE_MINCLASS) {
+		usize = LARGE_MINCLASS;
+	} else {
+		usize = sz_s2u(size);
+		if (usize < size) {
+			/* size_t overflow. */
+			return 0;
+		}
+	}
+
+	/*
+	 * Calculate the multi-page mapping that large_palloc() would need in
+	 * order to guarantee the alignment.
+	 */
+	if (usize + sz_large_pad + PAGE_CEILING(alignment) - PAGE < usize) {
+		/* size_t overflow. */
+		return 0;
+	}
+	return usize;
+}
+
+#endif /* JEMALLOC_INTERNAL_SIZE_H */
--- a/deps/jemalloc/include/jemalloc/internal/tcache_externs.h
+++ b/deps/jemalloc/include/jemalloc/internal/tcache_externs.h
@@ -0,0 +1,55 @@
+#ifndef JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
+#define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
+
+#include "jemalloc/internal/size_classes.h"
+
+extern bool	opt_tcache;
+extern ssize_t	opt_lg_tcache_max;
+
+extern tcache_bin_info_t	*tcache_bin_info;
+
+/*
+ * Number of tcache bins.  There are NBINS small-object bins, plus 0 or more
+ * large-object bins.
+ */
+extern unsigned	nhbins;
+
+/* Maximum cached size class. */
+extern size_t	tcache_maxclass;
+
+/*
+ * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
+ * usable via the MALLOCX_TCACHE() flag.  The automatic per thread tcaches are
+ * completely disjoint from this data structure.  tcaches starts off as a sparse
+ * array, so it has no physical memory footprint until individual pages are
+ * touched.  This allows the entire array to be allocated the first time an
+ * explicit tcache is created without a disproportionate impact on memory usage.
+ */
+extern tcaches_t	*tcaches;
+
+size_t	tcache_salloc(tsdn_t *tsdn, const void *ptr);
+void	tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
+void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
+    tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
+void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
+    szind_t binind, unsigned rem);
+void	tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
+    unsigned rem, tcache_t *tcache);
+void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
+    arena_t *arena);
+tcache_t *tcache_create_explicit(tsd_t *tsd);
+void	tcache_cleanup(tsd_t *tsd);
+void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
+bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
+void	tcaches_flush(tsd_t *tsd, unsigned ind);
+void	tcaches_destroy(tsd_t *tsd, unsigned ind);
+bool	tcache_boot(tsdn_t *tsdn);
+void tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
+void tcache_prefork(tsdn_t *tsdn);
+void tcache_postfork_parent(tsdn_t *tsdn);
+void tcache_postfork_child(tsdn_t *tsdn);
+void tcache_flush(tsd_t *tsd);
+bool tsd_tcache_data_init(tsd_t *tsd);
+bool tsd_tcache_enabled_data_init(tsd_t *tsd);
+
+#endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */
--- a/deps/jemalloc/include/jemalloc/internal/tcache_inlines.h
+++ b/deps/jemalloc/include/jemalloc/internal/tcache_inlines.h
@@ -0,0 +1,250 @@
+#ifndef JEMALLOC_INTERNAL_TCACHE_INLINES_H
+#define JEMALLOC_INTERNAL_TCACHE_INLINES_H
+
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/ticker.h"
+#include "jemalloc/internal/util.h"
+
+static inline bool
+tcache_enabled_get(tsd_t *tsd) {
+	return tsd_tcache_enabled_get(tsd);
+}
+
+static inline void
+tcache_enabled_set(tsd_t *tsd, bool enabled) {
+	bool was_enabled = tsd_tcache_enabled_get(tsd);
+
+	if (!was_enabled && enabled) {
+		tsd_tcache_data_init(tsd);
+	} else if (was_enabled && !enabled) {
+		tcache_cleanup(tsd);
+	}
+	/* Commit the state last.  Above calls check current state. */
+	tsd_tcache_enabled_set(tsd, enabled);
+	tsd_slow_update(tsd);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tcache_event(tsd_t *tsd, tcache_t *tcache) {
+	if (TCACHE_GC_INCR == 0) {
+		return;
+	}
+
+	if (unlikely(ticker_tick(&tcache->gc_ticker))) {
+		tcache_event_hard(tsd, tcache);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) {
+	void *ret;
+
+	if (unlikely(tbin->ncached == 0)) {
+		tbin->low_water = -1;
+		*tcache_success = false;
+		return NULL;
+	}
+	/*
+	 * tcache_success (instead of ret) should be checked upon the return of
+	 * this function.  We avoid checking (ret == NULL) because there is
+	 * never a null stored on the avail stack (which is unknown to the
+	 * compiler), and eagerly checking ret would cause pipeline stall
+	 * (waiting for the cacheline).
+	 */
+	*tcache_success = true;
+	ret = *(tbin->avail - tbin->ncached);
+	tbin->ncached--;
+
+	if (unlikely((low_water_t)tbin->ncached < tbin->low_water)) {
+		tbin->low_water = tbin->ncached;
+	}
+
+	return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
+    szind_t binind, bool zero, bool slow_path) {
+	void *ret;
+	tcache_bin_t *tbin;
+	bool tcache_success;
+	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
+
+	assert(binind < NBINS);
+	tbin = tcache_small_bin_get(tcache, binind);
+	ret = tcache_alloc_easy(tbin, &tcache_success);
+	assert(tcache_success == (ret != NULL));
+	if (unlikely(!tcache_success)) {
+		bool tcache_hard_success;
+		arena = arena_choose(tsd, arena);
+		if (unlikely(arena == NULL)) {
+			return NULL;
+		}
+
+		ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
+		    tbin, binind, &tcache_hard_success);
+		if (tcache_hard_success == false) {
+			return NULL;
+		}
+	}
+
+	assert(ret);
+	/*
+	 * Only compute usize if required.  The checks in the following if
+	 * statement are all static.
+	 */
+	if (config_prof || (slow_path && config_fill) || unlikely(zero)) {
+		usize = sz_index2size(binind);
+		assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize);
+	}
+
+	if (likely(!zero)) {
+		if (slow_path && config_fill) {
+			if (unlikely(opt_junk_alloc)) {
+				arena_alloc_junk_small(ret,
+				    &arena_bin_info[binind], false);
+			} else if (unlikely(opt_zero)) {
+				memset(ret, 0, usize);
+			}
+		}
+	} else {
+		if (slow_path && config_fill && unlikely(opt_junk_alloc)) {
+			arena_alloc_junk_small(ret, &arena_bin_info[binind],
+			    true);
+		}
+		memset(ret, 0, usize);
+	}
+
+	if (config_stats) {
+		tbin->tstats.nrequests++;
+	}
+	if (config_prof) {
+		tcache->prof_accumbytes += usize;
+	}
+	tcache_event(tsd, tcache);
+	return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
+    szind_t binind, bool zero, bool slow_path) {
+	void *ret;
+	tcache_bin_t *tbin;
+	bool tcache_success;
+
+	assert(binind >= NBINS &&binind < nhbins);
+	tbin = tcache_large_bin_get(tcache, binind);
+	ret = tcache_alloc_easy(tbin, &tcache_success);
+	assert(tcache_success == (ret != NULL));
+	if (unlikely(!tcache_success)) {
+		/*
+		 * Only allocate one large object at a time, because it's quite
+		 * expensive to create one and not use it.
+		 */
+		arena = arena_choose(tsd, arena);
+		if (unlikely(arena == NULL)) {
+			return NULL;
+		}
+
+		ret = large_malloc(tsd_tsdn(tsd), arena, sz_s2u(size), zero);
+		if (ret == NULL) {
+			return NULL;
+		}
+	} else {
+		size_t usize JEMALLOC_CC_SILENCE_INIT(0);
+
+		/* Only compute usize on demand */
+		if (config_prof || (slow_path && config_fill) ||
+		    unlikely(zero)) {
+			usize = sz_index2size(binind);
+			assert(usize <= tcache_maxclass);
+		}
+
+		if (likely(!zero)) {
+			if (slow_path && config_fill) {
+				if (unlikely(opt_junk_alloc)) {
+					memset(ret, JEMALLOC_ALLOC_JUNK,
+					    usize);
+				} else if (unlikely(opt_zero)) {
+					memset(ret, 0, usize);
+				}
+			}
+		} else {
+			memset(ret, 0, usize);
+		}
+
+		if (config_stats) {
+			tbin->tstats.nrequests++;
+		}
+		if (config_prof) {
+			tcache->prof_accumbytes += usize;
+		}
+	}
+
+	tcache_event(tsd, tcache);
+	return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
+    bool slow_path) {
+	tcache_bin_t *tbin;
+	tcache_bin_info_t *tbin_info;
+
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS);
+
+	if (slow_path && config_fill && unlikely(opt_junk_free)) {
+		arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
+	}
+
+	tbin = tcache_small_bin_get(tcache, binind);
+	tbin_info = &tcache_bin_info[binind];
+	if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
+		tcache_bin_flush_small(tsd, tcache, tbin, binind,
+		    (tbin_info->ncached_max >> 1));
+	}
+	assert(tbin->ncached < tbin_info->ncached_max);
+	tbin->ncached++;
+	*(tbin->avail - tbin->ncached) = ptr;
+
+	tcache_event(tsd, tcache);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
+    bool slow_path) {
+	tcache_bin_t *tbin;
+	tcache_bin_info_t *tbin_info;
+
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
+
+	if (slow_path && config_fill && unlikely(opt_junk_free)) {
+		large_dalloc_junk(ptr, sz_index2size(binind));
+	}
+
+	tbin = tcache_large_bin_get(tcache, binind);
+	tbin_info = &tcache_bin_info[binind];
+	if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
+		tcache_bin_flush_large(tsd, tbin, binind,
+		    (tbin_info->ncached_max >> 1), tcache);
+	}
+	assert(tbin->ncached < tbin_info->ncached_max);
+	tbin->ncached++;
+	*(tbin->avail - tbin->ncached) = ptr;
+
+	tcache_event(tsd, tcache);
+}
+
+JEMALLOC_ALWAYS_INLINE tcache_t *
+tcaches_get(tsd_t *tsd, unsigned ind) {
+	tcaches_t *elm = &tcaches[ind];
+	if (unlikely(elm->tcache == NULL)) {
+		elm->tcache = tcache_create_explicit(tsd);
+	}
+	return elm->tcache;
+}
+
+#endif /* JEMALLOC_INTERNAL_TCACHE_INLINES_H */
--- a/deps/jemalloc/include/jemalloc/internal/tcache_structs.h
+++ b/deps/jemalloc/include/jemalloc/internal/tcache_structs.h
@@ -0,0 +1,64 @@
+#ifndef JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
+#define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
+
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/stats_tsd.h"
+#include "jemalloc/internal/ticker.h"
+
+/*
+ * Read-only information associated with each element of tcache_t's tbins array
+ * is stored separately, mainly to reduce memory usage.
+ */
+struct tcache_bin_info_s {
+	unsigned	ncached_max;	/* Upper limit on ncached. */
+};
+
+struct tcache_bin_s {
+	low_water_t	low_water;	/* Min # cached since last GC. */
+	uint32_t	ncached;	/* # of cached objects. */
+	/*
+	 * ncached and stats are both modified frequently.  Let's keep them
+	 * close so that they have a higher chance of being on the same
+	 * cacheline, thus less write-backs.
+	 */
+	tcache_bin_stats_t tstats;
+	/*
+	 * To make use of adjacent cacheline prefetch, the items in the avail
+	 * stack goes to higher address for newer allocations.  avail points
+	 * just above the available space, which means that
+	 * avail[-ncached, ... -1] are available items and the lowest item will
+	 * be allocated first.
+	 */
+	void		**avail;	/* Stack of available objects. */
+};
+
+struct tcache_s {
+	/* Data accessed frequently first: prof, ticker and small bins. */
+	uint64_t	prof_accumbytes;/* Cleared after arena_prof_accum(). */
+	ticker_t	gc_ticker;	/* Drives incremental GC. */
+	/*
+	 * The pointer stacks associated with tbins follow as a contiguous
+	 * array.  During tcache initialization, the avail pointer in each
+	 * element of tbins is initialized to point to the proper offset within
+	 * this array.
+	 */
+	tcache_bin_t	tbins_small[NBINS];
+	/* Data accessed less often below. */
+	ql_elm(tcache_t) link;		/* Used for aggregating stats. */
+	arena_t		*arena;		/* Associated arena. */
+	szind_t		next_gc_bin;	/* Next bin to GC. */
+	/* For small bins, fill (ncached_max >> lg_fill_div). */
+	uint8_t		lg_fill_div[NBINS];
+	tcache_bin_t	tbins_large[NSIZES-NBINS];
+};
+
+/* Linkage for list of available (previously used) explicit tcache IDs. */
+struct tcaches_s {
+	union {
+		tcache_t	*tcache;
+		tcaches_t	*next;
+	};
+};
+
+#endif /* JEMALLOC_INTERNAL_TCACHE_STRUCTS_H */
--- a/deps/jemalloc/include/jemalloc/internal/tcache_types.h
+++ b/deps/jemalloc/include/jemalloc/internal/tcache_types.h
@@ -0,0 +1,61 @@
+#ifndef JEMALLOC_INTERNAL_TCACHE_TYPES_H
+#define JEMALLOC_INTERNAL_TCACHE_TYPES_H
+
+#include "jemalloc/internal/size_classes.h"
+
+typedef struct tcache_bin_info_s tcache_bin_info_t;
+typedef struct tcache_bin_s tcache_bin_t;
+typedef struct tcache_s tcache_t;
+typedef struct tcaches_s tcaches_t;
+
+/* ncached is cast to this type for comparison. */
+typedef int32_t low_water_t;
+
+/*
+ * tcache pointers close to NULL are used to encode state information that is
+ * used for two purposes: preventing thread caching on a per thread basis and
+ * cleaning up during thread shutdown.
+ */
+#define TCACHE_STATE_DISABLED		((tcache_t *)(uintptr_t)1)
+#define TCACHE_STATE_REINCARNATED	((tcache_t *)(uintptr_t)2)
+#define TCACHE_STATE_PURGATORY		((tcache_t *)(uintptr_t)3)
+#define TCACHE_STATE_MAX		TCACHE_STATE_PURGATORY
+
+/*
+ * Absolute minimum number of cache slots for each small bin.
+ */
+#define TCACHE_NSLOTS_SMALL_MIN		20
+
+/*
+ * Absolute maximum number of cache slots for each small bin in the thread
+ * cache.  This is an additional constraint beyond that imposed as: twice the
+ * number of regions per slab for this size class.
+ *
+ * This constant must be an even number.
+ */
+#define TCACHE_NSLOTS_SMALL_MAX		200
+
+/* Number of cache slots for large size classes. */
+#define TCACHE_NSLOTS_LARGE		20
+
+/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
+#define LG_TCACHE_MAXCLASS_DEFAULT	15
+
+/*
+ * TCACHE_GC_SWEEP is the approximate number of allocation events between
+ * full GC sweeps.  Integer rounding may cause the actual number to be
+ * slightly higher, since GC is performed incrementally.
+ */
+#define TCACHE_GC_SWEEP			8192
+
+/* Number of tcache allocation/deallocation events between incremental GCs. */
+#define TCACHE_GC_INCR							\
+    ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1))
+
+/* Used in TSD static initializer only. Real init in tcache_data_init(). */
+#define TCACHE_ZERO_INITIALIZER {0}
+
+/* Used in TSD static initializer only. Will be initialized to opt_tcache. */
+#define TCACHE_ENABLED_ZERO_INITIALIZER false
+
+#endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */
--- a/deps/jemalloc/include/jemalloc/internal/ticker.h
+++ b/deps/jemalloc/include/jemalloc/internal/ticker.h
@@ -0,0 +1,50 @@
+#ifndef JEMALLOC_INTERNAL_TICKER_H
+#define JEMALLOC_INTERNAL_TICKER_H
+
+#include "jemalloc/internal/util.h"
+
+/**
+ * A ticker makes it easy to count-down events until some limit.  You
+ * ticker_init the ticker to trigger every nticks events.  You then notify it
+ * that an event has occurred with calls to ticker_tick (or that nticks events
+ * have occurred with a call to ticker_ticks), which will return true (and reset
+ * the counter) if the countdown hit zero.
+ */
+
+typedef struct {
+	int32_t tick;
+	int32_t nticks;
+} ticker_t;
+
+static inline void
+ticker_init(ticker_t *ticker, int32_t nticks) {
+	ticker->tick = nticks;
+	ticker->nticks = nticks;
+}
+
+static inline void
+ticker_copy(ticker_t *ticker, const ticker_t *other) {
+	*ticker = *other;
+}
+
+static inline int32_t
+ticker_read(const ticker_t *ticker) {
+	return ticker->tick;
+}
+
+static inline bool
+ticker_ticks(ticker_t *ticker, int32_t nticks) {
+	if (unlikely(ticker->tick < nticks)) {
+		ticker->tick = ticker->nticks;
+		return true;
+	}
+	ticker->tick -= nticks;
+	return(false);
+}
+
+static inline bool
+ticker_tick(ticker_t *ticker) {
+	return ticker_ticks(ticker, 1);
+}
+
+#endif /* JEMALLOC_INTERNAL_TICKER_H */
--- a/deps/jemalloc/include/jemalloc/internal/tsd_generic.h
+++ b/deps/jemalloc/include/jemalloc/internal/tsd_generic.h
@@ -0,0 +1,157 @@
+#ifdef JEMALLOC_INTERNAL_TSD_GENERIC_H
+#error This file should be included only once, by tsd.h.
+#endif
+#define JEMALLOC_INTERNAL_TSD_GENERIC_H
+
+typedef struct tsd_init_block_s tsd_init_block_t;
+struct tsd_init_block_s {
+	ql_elm(tsd_init_block_t) link;
+	pthread_t thread;
+	void *data;
+};
+
+/* Defined in tsd.c, to allow the mutex headers to have tsd dependencies. */
+typedef struct tsd_init_head_s tsd_init_head_t;
+
+typedef struct {
+	bool initialized;
+	tsd_t val;
+} tsd_wrapper_t;
+
+void *tsd_init_check_recursion(tsd_init_head_t *head,
+    tsd_init_block_t *block);
+void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block);
+
+extern pthread_key_t tsd_tsd;
+extern tsd_init_head_t tsd_init_head;
+extern tsd_wrapper_t tsd_boot_wrapper;
+extern bool tsd_booted;
+
+/* Initialization/cleanup. */
+JEMALLOC_ALWAYS_INLINE void
+tsd_cleanup_wrapper(void *arg) {
+	tsd_wrapper_t *wrapper = (tsd_wrapper_t *)arg;
+
+	if (wrapper->initialized) {
+		wrapper->initialized = false;
+		tsd_cleanup(&wrapper->val);
+		if (wrapper->initialized) {
+			/* Trigger another cleanup round. */
+			if (pthread_setspecific(tsd_tsd, (void *)wrapper) != 0)
+			{
+				malloc_write("<jemalloc>: Error setting TSD\n");
+				if (opt_abort) {
+					abort();
+				}
+			}
+			return;
+		}
+	}
+	malloc_tsd_dalloc(wrapper);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_wrapper_set(tsd_wrapper_t *wrapper) {
+	if (pthread_setspecific(tsd_tsd, (void *)wrapper) != 0) {
+		malloc_write("<jemalloc>: Error setting TSD\n");
+		abort();
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_wrapper_t *
+tsd_wrapper_get(bool init) {
+	tsd_wrapper_t *wrapper = (tsd_wrapper_t *)pthread_getspecific(tsd_tsd);
+
+	if (init && unlikely(wrapper == NULL)) {
+		tsd_init_block_t block;
+		wrapper = (tsd_wrapper_t *)
+		    tsd_init_check_recursion(&tsd_init_head, &block);
+		if (wrapper) {
+			return wrapper;
+		}
+		wrapper = (tsd_wrapper_t *)
+		    malloc_tsd_malloc(sizeof(tsd_wrapper_t));
+		block.data = (void *)wrapper;
+		if (wrapper == NULL) {
+			malloc_write("<jemalloc>: Error allocating TSD\n");
+			abort();
+		} else {
+			wrapper->initialized = false;
+			tsd_t initializer = TSD_INITIALIZER;
+			wrapper->val = initializer;
+		}
+		tsd_wrapper_set(wrapper);
+		tsd_init_finish(&tsd_init_head, &block);
+	}
+	return wrapper;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot0(void) {
+	if (pthread_key_create(&tsd_tsd, tsd_cleanup_wrapper) != 0) {
+		return true;
+	}
+	tsd_wrapper_set(&tsd_boot_wrapper);
+	tsd_booted = true;
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_boot1(void) {
+	tsd_wrapper_t *wrapper;
+	wrapper = (tsd_wrapper_t *)malloc_tsd_malloc(sizeof(tsd_wrapper_t));
+	if (wrapper == NULL) {
+		malloc_write("<jemalloc>: Error allocating TSD\n");
+		abort();
+	}
+	tsd_boot_wrapper.initialized = false;
+	tsd_cleanup(&tsd_boot_wrapper.val);
+	wrapper->initialized = false;
+	tsd_t initializer = TSD_INITIALIZER;
+	wrapper->val = initializer;
+	tsd_wrapper_set(wrapper);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot(void) {
+	if (tsd_boot0()) {
+		return true;
+	}
+	tsd_boot1();
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_booted_get(void) {
+	return tsd_booted;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_get_allocates(void) {
+	return true;
+}
+
+/* Get/set. */
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_get(bool init) {
+	tsd_wrapper_t *wrapper;
+
+	assert(tsd_booted);
+	wrapper = tsd_wrapper_get(init);
+	if (tsd_get_allocates() && !init && wrapper == NULL) {
+		return NULL;
+	}
+	return &wrapper->val;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_set(tsd_t *val) {
+	tsd_wrapper_t *wrapper;
+
+	assert(tsd_booted);
+	wrapper = tsd_wrapper_get(true);
+	if (likely(&wrapper->val != val)) {
+		wrapper->val = *(val);
+	}
+	wrapper->initialized = true;
+}
--- a/deps/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
+++ b/deps/jemalloc/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
@@ -0,0 +1,60 @@
+#ifdef JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H
+#error This file should be included only once, by tsd.h.
+#endif
+#define JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H
+
+extern __thread tsd_t tsd_tls;
+extern __thread bool tsd_initialized;
+extern bool tsd_booted;
+
+/* Initialization/cleanup. */
+JEMALLOC_ALWAYS_INLINE bool
+tsd_cleanup_wrapper(void) {
+	if (tsd_initialized) {
+		tsd_initialized = false;
+		tsd_cleanup(&tsd_tls);
+	}
+	return tsd_initialized;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot0(void) {
+	malloc_tsd_cleanup_register(&tsd_cleanup_wrapper);
+	tsd_booted = true;
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_boot1(void) {
+	/* Do nothing. */
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot(void) {
+	return tsd_boot0();
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_booted_get(void) {
+	return tsd_booted;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_get_allocates(void) {
+	return false;
+}
+
+/* Get/set. */
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_get(bool init) {
+	assert(tsd_booted);
+	return &tsd_tls;
+}
+JEMALLOC_ALWAYS_INLINE void
+tsd_set(tsd_t *val) {
+	assert(tsd_booted);
+	if (likely(&tsd_tls != val)) {
+		tsd_tls = (*val);
+	}
+	tsd_initialized = true;
+}
--- a/deps/jemalloc/include/jemalloc/internal/tsd_tls.h
+++ b/deps/jemalloc/include/jemalloc/internal/tsd_tls.h
@@ -0,0 +1,59 @@
+#ifdef JEMALLOC_INTERNAL_TSD_TLS_H
+#error This file should be included only once, by tsd.h.
+#endif
+#define JEMALLOC_INTERNAL_TSD_TLS_H
+
+extern __thread tsd_t tsd_tls;
+extern pthread_key_t tsd_tsd;
+extern bool tsd_booted;
+
+/* Initialization/cleanup. */
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot0(void) {
+	if (pthread_key_create(&tsd_tsd, &tsd_cleanup) != 0) {
+		return true;
+	}
+	tsd_booted = true;
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_boot1(void) {
+	/* Do nothing. */
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot(void) {
+	return tsd_boot0();
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_booted_get(void) {
+	return tsd_booted;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_get_allocates(void) {
+	return false;
+}
+
+/* Get/set. */
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_get(bool init) {
+	assert(tsd_booted);
+	return &tsd_tls;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_set(tsd_t *val) {
+	assert(tsd_booted);
+	if (likely(&tsd_tls != val)) {
+		tsd_tls = (*val);
+	}
+	if (pthread_setspecific(tsd_tsd, (void *)(&tsd_tls)) != 0) {
+		malloc_write("<jemalloc>: Error setting tsd.\n");
+		if (opt_abort) {
+			abort();
+		}
+	}
+}
--- a/deps/jemalloc/include/jemalloc/internal/tsd_types.h
+++ b/deps/jemalloc/include/jemalloc/internal/tsd_types.h
@@ -0,0 +1,10 @@
+#ifndef JEMALLOC_INTERNAL_TSD_TYPES_H
+#define JEMALLOC_INTERNAL_TSD_TYPES_H
+
+#define MALLOC_TSD_CLEANUPS_MAX	2
+
+typedef struct tsd_s tsd_t;
+typedef struct tsdn_s tsdn_t;
+typedef bool (*malloc_tsd_cleanup_t)(void);
+
+#endif /* JEMALLOC_INTERNAL_TSD_TYPES_H */
--- a/deps/jemalloc/include/jemalloc/internal/tsd_win.h
+++ b/deps/jemalloc/include/jemalloc/internal/tsd_win.h
@@ -0,0 +1,139 @@
+#ifdef JEMALLOC_INTERNAL_TSD_WIN_H
+#error This file should be included only once, by tsd.h.
+#endif
+#define JEMALLOC_INTERNAL_TSD_WIN_H
+
+typedef struct {
+	bool initialized;
+	tsd_t val;
+} tsd_wrapper_t;
+
+extern DWORD tsd_tsd;
+extern tsd_wrapper_t tsd_boot_wrapper;
+extern bool tsd_booted;
+
+/* Initialization/cleanup. */
+JEMALLOC_ALWAYS_INLINE bool
+tsd_cleanup_wrapper(void) {
+	DWORD error = GetLastError();
+	tsd_wrapper_t *wrapper = (tsd_wrapper_t *)TlsGetValue(tsd_tsd);
+	SetLastError(error);
+
+	if (wrapper == NULL) {
+		return false;
+	}
+
+	if (wrapper->initialized) {
+		wrapper->initialized = false;
+		tsd_cleanup(&wrapper->val);
+		if (wrapper->initialized) {
+			/* Trigger another cleanup round. */
+			return true;
+		}
+	}
+	malloc_tsd_dalloc(wrapper);
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_wrapper_set(tsd_wrapper_t *wrapper) {
+	if (!TlsSetValue(tsd_tsd, (void *)wrapper)) {
+		malloc_write("<jemalloc>: Error setting TSD\n");
+		abort();
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_wrapper_t *
+tsd_wrapper_get(bool init) {
+	DWORD error = GetLastError();
+	tsd_wrapper_t *wrapper = (tsd_wrapper_t *) TlsGetValue(tsd_tsd);
+	SetLastError(error);
+
+	if (init && unlikely(wrapper == NULL)) {
+		wrapper = (tsd_wrapper_t *)
+		    malloc_tsd_malloc(sizeof(tsd_wrapper_t));
+		if (wrapper == NULL) {
+			malloc_write("<jemalloc>: Error allocating TSD\n");
+			abort();
+		} else {
+			wrapper->initialized = false;
+			/* MSVC is finicky about aggregate initialization. */
+			tsd_t tsd_initializer = TSD_INITIALIZER;
+			wrapper->val = tsd_initializer;
+		}
+		tsd_wrapper_set(wrapper);
+	}
+	return wrapper;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot0(void) {
+	tsd_tsd = TlsAlloc();
+	if (tsd_tsd == TLS_OUT_OF_INDEXES) {
+		return true;
+	}
+	malloc_tsd_cleanup_register(&tsd_cleanup_wrapper);
+	tsd_wrapper_set(&tsd_boot_wrapper);
+	tsd_booted = true;
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_boot1(void) {
+	tsd_wrapper_t *wrapper;
+	wrapper = (tsd_wrapper_t *)
+	    malloc_tsd_malloc(sizeof(tsd_wrapper_t));
+	if (wrapper == NULL) {
+		malloc_write("<jemalloc>: Error allocating TSD\n");
+		abort();
+	}
+	tsd_boot_wrapper.initialized = false;
+	tsd_cleanup(&tsd_boot_wrapper.val);
+	wrapper->initialized = false;
+	tsd_t initializer = TSD_INITIALIZER;
+	wrapper->val = initializer;
+	tsd_wrapper_set(wrapper);
+}
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot(void) {
+	if (tsd_boot0()) {
+		return true;
+	}
+	tsd_boot1();
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_booted_get(void) {
+	return tsd_booted;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_get_allocates(void) {
+	return true;
+}
+
+/* Get/set. */
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_get(bool init) {
+	tsd_wrapper_t *wrapper;
+
+	assert(tsd_booted);
+	wrapper = tsd_wrapper_get(init);
+	if (tsd_get_allocates() && !init && wrapper == NULL) {
+		return NULL;
+	}
+	return &wrapper->val;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_set(tsd_t *val) {
+	tsd_wrapper_t *wrapper;
+
+	assert(tsd_booted);
+	wrapper = tsd_wrapper_get(true);
+	if (likely(&wrapper->val != val)) {
+		wrapper->val = *(val);
+	}
+	wrapper->initialized = true;
+}
--- a/deps/jemalloc/include/jemalloc/internal/witness.h
+++ b/deps/jemalloc/include/jemalloc/internal/witness.h
@@ -0,0 +1,346 @@
+#ifndef JEMALLOC_INTERNAL_WITNESS_H
+#define JEMALLOC_INTERNAL_WITNESS_H
+
+#include "jemalloc/internal/ql.h"
+
+/******************************************************************************/
+/* LOCK RANKS */
+/******************************************************************************/
+
+/*
+ * Witnesses with rank WITNESS_RANK_OMIT are completely ignored by the witness
+ * machinery.
+ */
+
+#define WITNESS_RANK_OMIT		0U
+
+#define WITNESS_RANK_MIN		1U
+
+#define WITNESS_RANK_INIT		1U
+#define WITNESS_RANK_CTL		1U
+#define WITNESS_RANK_TCACHES		2U
+#define WITNESS_RANK_ARENAS		3U
+
+#define WITNESS_RANK_BACKGROUND_THREAD_GLOBAL	4U
+
+#define WITNESS_RANK_PROF_DUMP		5U
+#define WITNESS_RANK_PROF_BT2GCTX	6U
+#define WITNESS_RANK_PROF_TDATAS	7U
+#define WITNESS_RANK_PROF_TDATA		8U
+#define WITNESS_RANK_PROF_GCTX		9U
+
+#define WITNESS_RANK_BACKGROUND_THREAD	10U
+
+/*
+ * Used as an argument to witness_assert_depth_to_rank() in order to validate
+ * depth excluding non-core locks with lower ranks.  Since the rank argument to
+ * witness_assert_depth_to_rank() is inclusive rather than exclusive, this
+ * definition can have the same value as the minimally ranked core lock.
+ */
+#define WITNESS_RANK_CORE		11U
+
+#define WITNESS_RANK_DECAY		11U
+#define WITNESS_RANK_TCACHE_QL		12U
+#define WITNESS_RANK_EXTENT_GROW	13U
+#define WITNESS_RANK_EXTENTS		14U
+#define WITNESS_RANK_EXTENT_AVAIL	15U
+
+#define WITNESS_RANK_EXTENT_POOL	16U
+#define WITNESS_RANK_RTREE		17U
+#define WITNESS_RANK_BASE		18U
+#define WITNESS_RANK_ARENA_LARGE	19U
+
+#define WITNESS_RANK_LEAF		0xffffffffU
+#define WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
+#define WITNESS_RANK_ARENA_STATS	WITNESS_RANK_LEAF
+#define WITNESS_RANK_DSS		WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_ACCUM		WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_DUMP_SEQ	WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
+
+/******************************************************************************/
+/* PER-WITNESS DATA */
+/******************************************************************************/
+#if defined(JEMALLOC_DEBUG)
+#  define WITNESS_INITIALIZER(name, rank) {name, rank, NULL, NULL, {NULL, NULL}}
+#else
+#  define WITNESS_INITIALIZER(name, rank)
+#endif
+
+typedef struct witness_s witness_t;
+typedef unsigned witness_rank_t;
+typedef ql_head(witness_t) witness_list_t;
+typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
+    void *);
+
+struct witness_s {
+	/* Name, used for printing lock order reversal messages. */
+	const char		*name;
+
+	/*
+	 * Witness rank, where 0 is lowest and UINT_MAX is highest.  Witnesses
+	 * must be acquired in order of increasing rank.
+	 */
+	witness_rank_t		rank;
+
+	/*
+	 * If two witnesses are of equal rank and they have the samp comp
+	 * function pointer, it is called as a last attempt to differentiate
+	 * between witnesses of equal rank.
+	 */
+	witness_comp_t		*comp;
+
+	/* Opaque data, passed to comp(). */
+	void			*opaque;
+
+	/* Linkage for thread's currently owned locks. */
+	ql_elm(witness_t)	link;
+};
+
+/******************************************************************************/
+/* PER-THREAD DATA */
+/******************************************************************************/
+typedef struct witness_tsd_s witness_tsd_t;
+struct witness_tsd_s {
+	witness_list_t witnesses;
+	bool forking;
+};
+
+#define WITNESS_TSD_INITIALIZER { ql_head_initializer(witnesses), false }
+#define WITNESS_TSDN_NULL ((witness_tsdn_t *)0)
+
+/******************************************************************************/
+/* (PER-THREAD) NULLABILITY HELPERS */
+/******************************************************************************/
+typedef struct witness_tsdn_s witness_tsdn_t;
+struct witness_tsdn_s {
+	witness_tsd_t witness_tsd;
+};
+
+JEMALLOC_ALWAYS_INLINE witness_tsdn_t *
+witness_tsd_tsdn(witness_tsd_t *witness_tsd) {
+	return (witness_tsdn_t *)witness_tsd;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+witness_tsdn_null(witness_tsdn_t *witness_tsdn) {
+	return witness_tsdn == NULL;
+}
+
+JEMALLOC_ALWAYS_INLINE witness_tsd_t *
+witness_tsdn_tsd(witness_tsdn_t *witness_tsdn) {
+	assert(!witness_tsdn_null(witness_tsdn));
+	return &witness_tsdn->witness_tsd;
+}
+
+/******************************************************************************/
+/* API */
+/******************************************************************************/
+void witness_init(witness_t *witness, const char *name, witness_rank_t rank,
+    witness_comp_t *comp, void *opaque);
+
+typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *);
+extern witness_lock_error_t *JET_MUTABLE witness_lock_error;
+
+typedef void (witness_owner_error_t)(const witness_t *);
+extern witness_owner_error_t *JET_MUTABLE witness_owner_error;
+
+typedef void (witness_not_owner_error_t)(const witness_t *);
+extern witness_not_owner_error_t *JET_MUTABLE witness_not_owner_error;
+
+typedef void (witness_depth_error_t)(const witness_list_t *,
+    witness_rank_t rank_inclusive, unsigned depth);
+extern witness_depth_error_t *JET_MUTABLE witness_depth_error;
+
+void witnesses_cleanup(witness_tsd_t *witness_tsd);
+void witness_prefork(witness_tsd_t *witness_tsd);
+void witness_postfork_parent(witness_tsd_t *witness_tsd);
+void witness_postfork_child(witness_tsd_t *witness_tsd);
+
+/* Helper, not intended for direct use. */
+static inline bool
+witness_owner(witness_tsd_t *witness_tsd, const witness_t *witness) {
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	cassert(config_debug);
+
+	witnesses = &witness_tsd->witnesses;
+	ql_foreach(w, witnesses, link) {
+		if (w == witness) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static inline void
+witness_assert_owner(witness_tsdn_t *witness_tsdn, const witness_t *witness) {
+	witness_tsd_t *witness_tsd;
+
+	if (!config_debug) {
+		return;
+	}
+
+	if (witness_tsdn_null(witness_tsdn)) {
+		return;
+	}
+	witness_tsd = witness_tsdn_tsd(witness_tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT) {
+		return;
+	}
+
+	if (witness_owner(witness_tsd, witness)) {
+		return;
+	}
+	witness_owner_error(witness);
+}
+
+static inline void
+witness_assert_not_owner(witness_tsdn_t *witness_tsdn,
+    const witness_t *witness) {
+	witness_tsd_t *witness_tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug) {
+		return;
+	}
+
+	if (witness_tsdn_null(witness_tsdn)) {
+		return;
+	}
+	witness_tsd = witness_tsdn_tsd(witness_tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT) {
+		return;
+	}
+
+	witnesses = &witness_tsd->witnesses;
+	ql_foreach(w, witnesses, link) {
+		if (w == witness) {
+			witness_not_owner_error(witness);
+		}
+	}
+}
+
+static inline void
+witness_assert_depth_to_rank(witness_tsdn_t *witness_tsdn,
+    witness_rank_t rank_inclusive, unsigned depth) {
+	witness_tsd_t *witness_tsd;
+	unsigned d;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug) {
+		return;
+	}
+
+	if (witness_tsdn_null(witness_tsdn)) {
+		return;
+	}
+	witness_tsd = witness_tsdn_tsd(witness_tsdn);
+
+	d = 0;
+	witnesses = &witness_tsd->witnesses;
+	w = ql_last(witnesses, link);
+	if (w != NULL) {
+		ql_reverse_foreach(w, witnesses, link) {
+			if (w->rank < rank_inclusive) {
+				break;
+			}
+			d++;
+		}
+	}
+	if (d != depth) {
+		witness_depth_error(witnesses, rank_inclusive, depth);
+	}
+}
+
+static inline void
+witness_assert_depth(witness_tsdn_t *witness_tsdn, unsigned depth) {
+	witness_assert_depth_to_rank(witness_tsdn, WITNESS_RANK_MIN, depth);
+}
+
+static inline void
+witness_assert_lockless(witness_tsdn_t *witness_tsdn) {
+	witness_assert_depth(witness_tsdn, 0);
+}
+
+static inline void
+witness_lock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
+	witness_tsd_t *witness_tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug) {
+		return;
+	}
+
+	if (witness_tsdn_null(witness_tsdn)) {
+		return;
+	}
+	witness_tsd = witness_tsdn_tsd(witness_tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT) {
+		return;
+	}
+
+	witness_assert_not_owner(witness_tsdn, witness);
+
+	witnesses = &witness_tsd->witnesses;
+	w = ql_last(witnesses, link);
+	if (w == NULL) {
+		/* No other locks; do nothing. */
+	} else if (witness_tsd->forking && w->rank <= witness->rank) {
+		/* Forking, and relaxed ranking satisfied. */
+	} else if (w->rank > witness->rank) {
+		/* Not forking, rank order reversal. */
+		witness_lock_error(witnesses, witness);
+	} else if (w->rank == witness->rank && (w->comp == NULL || w->comp !=
+	    witness->comp || w->comp(w, w->opaque, witness, witness->opaque) >
+	    0)) {
+		/*
+		 * Missing/incompatible comparison function, or comparison
+		 * function indicates rank order reversal.
+		 */
+		witness_lock_error(witnesses, witness);
+	}
+
+	ql_elm_new(witness, link);
+	ql_tail_insert(witnesses, witness, link);
+}
+
+static inline void
+witness_unlock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
+	witness_tsd_t *witness_tsd;
+	witness_list_t *witnesses;
+
+	if (!config_debug) {
+		return;
+	}
+
+	if (witness_tsdn_null(witness_tsdn)) {
+		return;
+	}
+	witness_tsd = witness_tsdn_tsd(witness_tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT) {
+		return;
+	}
+
+	/*
+	 * Check whether owner before removal, rather than relying on
+	 * witness_assert_owner() to abort, so that unit tests can test this
+	 * function's failure mode without causing undefined behavior.
+	 */
+	if (witness_owner(witness_tsd, witness)) {
+		witnesses = &witness_tsd->witnesses;
+		ql_remove(witnesses, witness, link);
+	} else {
+		witness_assert_owner(witness_tsdn, witness);
+	}
+}
+
+#endif /* JEMALLOC_INTERNAL_WITNESS_H */
--- a/deps/jemalloc/include/msvc_compat/C99/stdbool.h
+++ b/deps/jemalloc/include/msvc_compat/C99/stdbool.h
@@ -0,0 +1,20 @@
+#ifndef stdbool_h
+#define stdbool_h
+
+#include <wtypes.h>
+
+/* MSVC doesn't define _Bool or bool in C, but does have BOOL */
+/* Note this doesn't pass autoconf's test because (bool) 0.5 != true */
+/* Clang-cl uses MSVC headers, so needs msvc_compat, but has _Bool as
+ * a built-in type. */
+#ifndef __clang__
+typedef BOOL _Bool;
+#endif
+
+#define bool _Bool
+#define true 1
+#define false 0
+
+#define __bool_true_false_are_defined 1
+
+#endif /* stdbool_h */
--- a/deps/jemalloc/include/msvc_compat/C99/stdint.h
+++ b/deps/jemalloc/include/msvc_compat/C99/stdint.h
@@ -0,0 +1,247 @@
+// ISO C9x  compliant stdint.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
+// 
+//  Copyright (c) 2006-2008 Alexander Chemeris
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 
+//   1. Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimer.
+// 
+//   2. Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+// 
+//   3. The name of the author may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// 
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_STDINT_H_ // [
+#define _MSC_STDINT_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include <limits.h>
+
+// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
+// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
+// or compiler give many errors like this:
+//   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
+#ifdef __cplusplus
+extern "C" {
+#endif
+#  include <wchar.h>
+#ifdef __cplusplus
+}
+#endif
+
+// Define _W64 macros to mark types changing their size, like intptr_t.
+#ifndef _W64
+#  if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
+#     define _W64 __w64
+#  else
+#     define _W64
+#  endif
+#endif
+
+
+// 7.18.1 Integer types
+
+// 7.18.1.1 Exact-width integer types
+
+// Visual Studio 6 and Embedded Visual C++ 4 doesn't
+// realize that, e.g. char has the same size as __int8
+// so we give up on __intX for them.
+#if (_MSC_VER < 1300)
+   typedef signed char       int8_t;
+   typedef signed short      int16_t;
+   typedef signed int        int32_t;
+   typedef unsigned char     uint8_t;
+   typedef unsigned short    uint16_t;
+   typedef unsigned int      uint32_t;
+#else
+   typedef signed __int8     int8_t;
+   typedef signed __int16    int16_t;
+   typedef signed __int32    int32_t;
+   typedef unsigned __int8   uint8_t;
+   typedef unsigned __int16  uint16_t;
+   typedef unsigned __int32  uint32_t;
+#endif
+typedef signed __int64       int64_t;
+typedef unsigned __int64     uint64_t;
+
+
+// 7.18.1.2 Minimum-width integer types
+typedef int8_t    int_least8_t;
+typedef int16_t   int_least16_t;
+typedef int32_t   int_least32_t;
+typedef int64_t   int_least64_t;
+typedef uint8_t   uint_least8_t;
+typedef uint16_t  uint_least16_t;
+typedef uint32_t  uint_least32_t;
+typedef uint64_t  uint_least64_t;
+
+// 7.18.1.3 Fastest minimum-width integer types
+typedef int8_t    int_fast8_t;
+typedef int16_t   int_fast16_t;
+typedef int32_t   int_fast32_t;
+typedef int64_t   int_fast64_t;
+typedef uint8_t   uint_fast8_t;
+typedef uint16_t  uint_fast16_t;
+typedef uint32_t  uint_fast32_t;
+typedef uint64_t  uint_fast64_t;
+
+// 7.18.1.4 Integer types capable of holding object pointers
+#ifdef _WIN64 // [
+   typedef signed __int64    intptr_t;
+   typedef unsigned __int64  uintptr_t;
+#else // _WIN64 ][
+   typedef _W64 signed int   intptr_t;
+   typedef _W64 unsigned int uintptr_t;
+#endif // _WIN64 ]
+
+// 7.18.1.5 Greatest-width integer types
+typedef int64_t   intmax_t;
+typedef uint64_t  uintmax_t;
+
+
+// 7.18.2 Limits of specified-width integer types
+
+#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
+
+// 7.18.2.1 Limits of exact-width integer types
+#define INT8_MIN     ((int8_t)_I8_MIN)
+#define INT8_MAX     _I8_MAX
+#define INT16_MIN    ((int16_t)_I16_MIN)
+#define INT16_MAX    _I16_MAX
+#define INT32_MIN    ((int32_t)_I32_MIN)
+#define INT32_MAX    _I32_MAX
+#define INT64_MIN    ((int64_t)_I64_MIN)
+#define INT64_MAX    _I64_MAX
+#define UINT8_MAX    _UI8_MAX
+#define UINT16_MAX   _UI16_MAX
+#define UINT32_MAX   _UI32_MAX
+#define UINT64_MAX   _UI64_MAX
+
+// 7.18.2.2 Limits of minimum-width integer types
+#define INT_LEAST8_MIN    INT8_MIN
+#define INT_LEAST8_MAX    INT8_MAX
+#define INT_LEAST16_MIN   INT16_MIN
+#define INT_LEAST16_MAX   INT16_MAX
+#define INT_LEAST32_MIN   INT32_MIN
+#define INT_LEAST32_MAX   INT32_MAX
+#define INT_LEAST64_MIN   INT64_MIN
+#define INT_LEAST64_MAX   INT64_MAX
+#define UINT_LEAST8_MAX   UINT8_MAX
+#define UINT_LEAST16_MAX  UINT16_MAX
+#define UINT_LEAST32_MAX  UINT32_MAX
+#define UINT_LEAST64_MAX  UINT64_MAX
+
+// 7.18.2.3 Limits of fastest minimum-width integer types
+#define INT_FAST8_MIN    INT8_MIN
+#define INT_FAST8_MAX    INT8_MAX
+#define INT_FAST16_MIN   INT16_MIN
+#define INT_FAST16_MAX   INT16_MAX
+#define INT_FAST32_MIN   INT32_MIN
+#define INT_FAST32_MAX   INT32_MAX
+#define INT_FAST64_MIN   INT64_MIN
+#define INT_FAST64_MAX   INT64_MAX
+#define UINT_FAST8_MAX   UINT8_MAX
+#define UINT_FAST16_MAX  UINT16_MAX
+#define UINT_FAST32_MAX  UINT32_MAX
+#define UINT_FAST64_MAX  UINT64_MAX
+
+// 7.18.2.4 Limits of integer types capable of holding object pointers
+#ifdef _WIN64 // [
+#  define INTPTR_MIN   INT64_MIN
+#  define INTPTR_MAX   INT64_MAX
+#  define UINTPTR_MAX  UINT64_MAX
+#else // _WIN64 ][
+#  define INTPTR_MIN   INT32_MIN
+#  define INTPTR_MAX   INT32_MAX
+#  define UINTPTR_MAX  UINT32_MAX
+#endif // _WIN64 ]
+
+// 7.18.2.5 Limits of greatest-width integer types
+#define INTMAX_MIN   INT64_MIN
+#define INTMAX_MAX   INT64_MAX
+#define UINTMAX_MAX  UINT64_MAX
+
+// 7.18.3 Limits of other integer types
+
+#ifdef _WIN64 // [
+#  define PTRDIFF_MIN  _I64_MIN
+#  define PTRDIFF_MAX  _I64_MAX
+#else  // _WIN64 ][
+#  define PTRDIFF_MIN  _I32_MIN
+#  define PTRDIFF_MAX  _I32_MAX
+#endif  // _WIN64 ]
+
+#define SIG_ATOMIC_MIN  INT_MIN
+#define SIG_ATOMIC_MAX  INT_MAX
+
+#ifndef SIZE_MAX // [
+#  ifdef _WIN64 // [
+#     define SIZE_MAX  _UI64_MAX
+#  else // _WIN64 ][
+#     define SIZE_MAX  _UI32_MAX
+#  endif // _WIN64 ]
+#endif // SIZE_MAX ]
+
+// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
+#ifndef WCHAR_MIN // [
+#  define WCHAR_MIN  0
+#endif  // WCHAR_MIN ]
+#ifndef WCHAR_MAX // [
+#  define WCHAR_MAX  _UI16_MAX
+#endif  // WCHAR_MAX ]
+
+#define WINT_MIN  0
+#define WINT_MAX  _UI16_MAX
+
+#endif // __STDC_LIMIT_MACROS ]
+
+
+// 7.18.4 Limits of other integer types
+
+#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
+
+// 7.18.4.1 Macros for minimum-width integer constants
+
+#define INT8_C(val)  val##i8
+#define INT16_C(val) val##i16
+#define INT32_C(val) val##i32
+#define INT64_C(val) val##i64
+
+#define UINT8_C(val)  val##ui8
+#define UINT16_C(val) val##ui16
+#define UINT32_C(val) val##ui32
+#define UINT64_C(val) val##ui64
+
+// 7.18.4.2 Macros for greatest-width integer constants
+#define INTMAX_C   INT64_C
+#define UINTMAX_C  UINT64_C
+
+#endif // __STDC_CONSTANT_MACROS ]
+
+
+#endif // _MSC_STDINT_H_ ]
--- a/deps/jemalloc/include/msvc_compat/windows_extra.h
+++ b/deps/jemalloc/include/msvc_compat/windows_extra.h
@@ -0,0 +1,6 @@
+#ifndef MSVC_COMPAT_WINDOWS_EXTRA_H
+#define MSVC_COMPAT_WINDOWS_EXTRA_H
+
+#include <errno.h>
+
+#endif /* MSVC_COMPAT_WINDOWS_EXTRA_H */
--- a/deps/jemalloc/src/background_thread.c
+++ b/deps/jemalloc/src/background_thread.c
@@ -0,0 +1,880 @@
+#define JEMALLOC_BACKGROUND_THREAD_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+
+/******************************************************************************/
+/* Data. */
+
+/* This option should be opt-in only. */
+#define BACKGROUND_THREAD_DEFAULT false
+/* Read-only after initialization. */
+bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
+
+/* Used for thread creation, termination and stats. */
+malloc_mutex_t background_thread_lock;
+/* Indicates global state.  Atomic because decay reads this w/o locking. */
+atomic_b_t background_thread_enabled_state;
+size_t n_background_threads;
+/* Thread info per-index. */
+background_thread_info_t *background_thread_info;
+
+/* False if no necessary runtime support. */
+bool can_enable_background_thread;
+
+/******************************************************************************/
+
+#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
+#include <dlfcn.h>
+
+static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
+    void *(*)(void *), void *__restrict);
+static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+
+static void
+pthread_create_wrapper_once(void) {
+#ifdef JEMALLOC_LAZY_LOCK
+	isthreaded = true;
+#endif
+}
+
+int
+pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
+    void *(*start_routine)(void *), void *__restrict arg) {
+	pthread_once(&once_control, pthread_create_wrapper_once);
+
+	return pthread_create_fptr(thread, attr, start_routine, arg);
+}
+#endif /* JEMALLOC_PTHREAD_CREATE_WRAPPER */
+
+#ifndef JEMALLOC_BACKGROUND_THREAD
+#define NOT_REACHED { not_reached(); }
+bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
+bool background_threads_enable(tsd_t *tsd) NOT_REACHED
+bool background_threads_disable(tsd_t *tsd) NOT_REACHED
+void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
+    arena_decay_t *decay, size_t npages_new) NOT_REACHED
+void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
+void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
+void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
+void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
+bool background_thread_stats_read(tsdn_t *tsdn,
+    background_thread_stats_t *stats) NOT_REACHED
+void background_thread_ctl_init(tsdn_t *tsdn) NOT_REACHED
+#undef NOT_REACHED
+#else
+
+static bool background_thread_enabled_at_fork;
+
+static void
+background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
+	background_thread_wakeup_time_set(tsdn, info, 0);
+	info->npages_to_purge_new = 0;
+	if (config_stats) {
+		info->tot_n_runs = 0;
+		nstime_init(&info->tot_sleep_time, 0);
+	}
+}
+
+static inline bool
+set_current_thread_affinity(UNUSED int cpu) {
+#if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+	cpu_set_t cpuset;
+	CPU_ZERO(&cpuset);
+	CPU_SET(cpu, &cpuset);
+	int ret = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
+
+	return (ret != 0);
+#else
+	return false;
+#endif
+}
+
+/* Threshold for determining when to wake up the background thread. */
+#define BACKGROUND_THREAD_NPAGES_THRESHOLD UINT64_C(1024)
+#define BILLION UINT64_C(1000000000)
+/* Minimal sleep interval 100 ms. */
+#define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
+
+static inline size_t
+decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
+	size_t i;
+	uint64_t sum = 0;
+	for (i = 0; i < interval; i++) {
+		sum += decay->backlog[i] * h_steps[i];
+	}
+	for (; i < SMOOTHSTEP_NSTEPS; i++) {
+		sum += decay->backlog[i] * (h_steps[i] - h_steps[i - interval]);
+	}
+
+	return (size_t)(sum >> SMOOTHSTEP_BFP);
+}
+
+static uint64_t
+arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
+    extents_t *extents) {
+	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
+		/* Use minimal interval if decay is contended. */
+		return BACKGROUND_THREAD_MIN_INTERVAL_NS;
+	}
+
+	uint64_t interval;
+	ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
+	if (decay_time <= 0) {
+		/* Purging is eagerly done or disabled currently. */
+		interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
+		goto label_done;
+	}
+
+	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
+	assert(decay_interval_ns > 0);
+	size_t npages = extents_npages_get(extents);
+	if (npages == 0) {
+		unsigned i;
+		for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
+			if (decay->backlog[i] > 0) {
+				break;
+			}
+		}
+		if (i == SMOOTHSTEP_NSTEPS) {
+			/* No dirty pages recorded.  Sleep indefinitely. */
+			interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
+			goto label_done;
+		}
+	}
+	if (npages <= BACKGROUND_THREAD_NPAGES_THRESHOLD) {
+		/* Use max interval. */
+		interval = decay_interval_ns * SMOOTHSTEP_NSTEPS;
+		goto label_done;
+	}
+
+	size_t lb = BACKGROUND_THREAD_MIN_INTERVAL_NS / decay_interval_ns;
+	size_t ub = SMOOTHSTEP_NSTEPS;
+	/* Minimal 2 intervals to ensure reaching next epoch deadline. */
+	lb = (lb < 2) ? 2 : lb;
+	if ((decay_interval_ns * ub <= BACKGROUND_THREAD_MIN_INTERVAL_NS) ||
+	    (lb + 2 > ub)) {
+		interval = BACKGROUND_THREAD_MIN_INTERVAL_NS;
+		goto label_done;
+	}
+
+	assert(lb + 2 <= ub);
+	size_t npurge_lb, npurge_ub;
+	npurge_lb = decay_npurge_after_interval(decay, lb);
+	if (npurge_lb > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
+		interval = decay_interval_ns * lb;
+		goto label_done;
+	}
+	npurge_ub = decay_npurge_after_interval(decay, ub);
+	if (npurge_ub < BACKGROUND_THREAD_NPAGES_THRESHOLD) {
+		interval = decay_interval_ns * ub;
+		goto label_done;
+	}
+
+	unsigned n_search = 0;
+	size_t target, npurge;
+	while ((npurge_lb + BACKGROUND_THREAD_NPAGES_THRESHOLD < npurge_ub)
+	    && (lb + 2 < ub)) {
+		target = (lb + ub) / 2;
+		npurge = decay_npurge_after_interval(decay, target);
+		if (npurge > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
+			ub = target;
+			npurge_ub = npurge;
+		} else {
+			lb = target;
+			npurge_lb = npurge;
+		}
+		assert(n_search++ < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
+	}
+	interval = decay_interval_ns * (ub + lb) / 2;
+label_done:
+	interval = (interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) ?
+	    BACKGROUND_THREAD_MIN_INTERVAL_NS : interval;
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+
+	return interval;
+}
+
+/* Compute purge interval for background threads. */
+static uint64_t
+arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
+	uint64_t i1, i2;
+	i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
+	    &arena->extents_dirty);
+	if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+		return i1;
+	}
+	i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
+	    &arena->extents_muzzy);
+
+	return i1 < i2 ? i1 : i2;
+}
+
+static void
+background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
+    uint64_t interval) {
+	if (config_stats) {
+		info->tot_n_runs++;
+	}
+	info->npages_to_purge_new = 0;
+
+	struct timeval tv;
+	/* Specific clock required by timedwait. */
+	gettimeofday(&tv, NULL);
+	nstime_t before_sleep;
+	nstime_init2(&before_sleep, tv.tv_sec, tv.tv_usec * 1000);
+
+	int ret;
+	if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
+		assert(background_thread_indefinite_sleep(info));
+		ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
+		assert(ret == 0);
+	} else {
+		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
+		    interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
+		/* We need malloc clock (can be different from tv). */
+		nstime_t next_wakeup;
+		nstime_init(&next_wakeup, 0);
+		nstime_update(&next_wakeup);
+		nstime_iadd(&next_wakeup, interval);
+		assert(nstime_ns(&next_wakeup) <
+		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
+		background_thread_wakeup_time_set(tsdn, info,
+		    nstime_ns(&next_wakeup));
+
+		nstime_t ts_wakeup;
+		nstime_copy(&ts_wakeup, &before_sleep);
+		nstime_iadd(&ts_wakeup, interval);
+		struct timespec ts;
+		ts.tv_sec = (size_t)nstime_sec(&ts_wakeup);
+		ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
+
+		assert(!background_thread_indefinite_sleep(info));
+		ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, &ts);
+		assert(ret == ETIMEDOUT || ret == 0);
+		background_thread_wakeup_time_set(tsdn, info,
+		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
+	}
+	if (config_stats) {
+		gettimeofday(&tv, NULL);
+		nstime_t after_sleep;
+		nstime_init2(&after_sleep, tv.tv_sec, tv.tv_usec * 1000);
+		if (nstime_compare(&after_sleep, &before_sleep) > 0) {
+			nstime_subtract(&after_sleep, &before_sleep);
+			nstime_add(&info->tot_sleep_time, &after_sleep);
+		}
+	}
+}
+
+static bool
+background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
+	if (unlikely(info->state == background_thread_paused)) {
+		malloc_mutex_unlock(tsdn, &info->mtx);
+		/* Wait on global lock to update status. */
+		malloc_mutex_lock(tsdn, &background_thread_lock);
+		malloc_mutex_unlock(tsdn, &background_thread_lock);
+		malloc_mutex_lock(tsdn, &info->mtx);
+		return true;
+	}
+
+	return false;
+}
+
+static inline void
+background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) {
+	uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
+	unsigned narenas = narenas_total_get();
+
+	for (unsigned i = ind; i < narenas; i += ncpus) {
+		arena_t *arena = arena_get(tsdn, i, false);
+		if (!arena) {
+			continue;
+		}
+		arena_decay(tsdn, arena, true, false);
+		if (min_interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+			/* Min interval will be used. */
+			continue;
+		}
+		uint64_t interval = arena_decay_compute_purge_interval(tsdn,
+		    arena);
+		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS);
+		if (min_interval > interval) {
+			min_interval = interval;
+		}
+	}
+	background_thread_sleep(tsdn, info, min_interval);
+}
+
+static bool
+background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
+	if (info == &background_thread_info[0]) {
+		malloc_mutex_assert_owner(tsd_tsdn(tsd),
+		    &background_thread_lock);
+	} else {
+		malloc_mutex_assert_not_owner(tsd_tsdn(tsd),
+		    &background_thread_lock);
+	}
+
+	pre_reentrancy(tsd, NULL);
+	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+	bool has_thread;
+	assert(info->state != background_thread_paused);
+	if (info->state == background_thread_started) {
+		has_thread = true;
+		info->state = background_thread_stopped;
+		pthread_cond_signal(&info->cond);
+	} else {
+		has_thread = false;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+
+	if (!has_thread) {
+		post_reentrancy(tsd);
+		return false;
+	}
+	void *ret;
+	if (pthread_join(info->thread, &ret)) {
+		post_reentrancy(tsd);
+		return true;
+	}
+	assert(ret == NULL);
+	n_background_threads--;
+	post_reentrancy(tsd);
+
+	return false;
+}
+
+static void *background_thread_entry(void *ind_arg);
+
+static int
+background_thread_create_signals_masked(pthread_t *thread,
+    const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg) {
+	/*
+	 * Mask signals during thread creation so that the thread inherits
+	 * an empty signal set.
+	 */
+	sigset_t set;
+	sigfillset(&set);
+	sigset_t oldset;
+	int mask_err = pthread_sigmask(SIG_SETMASK, &set, &oldset);
+	if (mask_err != 0) {
+		return mask_err;
+	}
+	int create_err = pthread_create_wrapper(thread, attr, start_routine,
+	    arg);
+	/*
+	 * Restore the signal mask.  Failure to restore the signal mask here
+	 * changes program behavior.
+	 */
+	int restore_err = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+	if (restore_err != 0) {
+		malloc_printf("<jemalloc>: background thread creation "
+		    "failed (%d), and signal mask restoration failed "
+		    "(%d)\n", create_err, restore_err);
+		if (opt_abort) {
+			abort();
+		}
+	}
+	return create_err;
+}
+
+static void
+check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
+    bool *created_threads) {
+	if (likely(*n_created == n_background_threads)) {
+		return;
+	}
+
+	malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_info[0].mtx);
+label_restart:
+	malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
+	for (unsigned i = 1; i < ncpus; i++) {
+		if (created_threads[i]) {
+			continue;
+		}
+		background_thread_info_t *info = &background_thread_info[i];
+		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+		assert(info->state != background_thread_paused);
+		bool create = (info->state == background_thread_started);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+		if (!create) {
+			continue;
+		}
+
+		/*
+		 * To avoid deadlock with prefork handlers (which waits for the
+		 * mutex held here), unlock before calling pthread_create().
+		 */
+		malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
+
+		pre_reentrancy(tsd, NULL);
+		int err = background_thread_create_signals_masked(&info->thread,
+		    NULL, background_thread_entry, (void *)(uintptr_t)i);
+		post_reentrancy(tsd);
+
+		if (err == 0) {
+			(*n_created)++;
+			created_threads[i] = true;
+		} else {
+			malloc_printf("<jemalloc>: background thread "
+			    "creation failed (%d)\n", err);
+			if (opt_abort) {
+				abort();
+			}
+		}
+		/* Restart since we unlocked. */
+		goto label_restart;
+	}
+	malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_info[0].mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
+}
+
+static void
+background_thread0_work(tsd_t *tsd) {
+	/* Thread0 is also responsible for launching / terminating threads. */
+	VARIABLE_ARRAY(bool, created_threads, ncpus);
+	unsigned i;
+	for (i = 1; i < ncpus; i++) {
+		created_threads[i] = false;
+	}
+	/* Start working, and create more threads when asked. */
+	unsigned n_created = 1;
+	while (background_thread_info[0].state != background_thread_stopped) {
+		if (background_thread_pause_check(tsd_tsdn(tsd),
+		    &background_thread_info[0])) {
+			continue;
+		}
+		check_background_thread_creation(tsd, &n_created,
+		    (bool *)&created_threads);
+		background_work_sleep_once(tsd_tsdn(tsd),
+		    &background_thread_info[0], 0);
+	}
+
+	/*
+	 * Shut down other threads at exit.  Note that the ctl thread is holding
+	 * the global background_thread mutex (and is waiting) for us.
+	 */
+	assert(!background_thread_enabled());
+	for (i = 1; i < ncpus; i++) {
+		background_thread_info_t *info = &background_thread_info[i];
+		assert(info->state != background_thread_paused);
+		if (created_threads[i]) {
+			background_threads_disable_single(tsd, info);
+		} else {
+			malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+			/* Clear in case the thread wasn't created. */
+			info->state = background_thread_stopped;
+			malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+		}
+	}
+	background_thread_info[0].state = background_thread_stopped;
+	assert(n_background_threads == 1);
+}
+
+static void
+background_work(tsd_t *tsd, unsigned ind) {
+	background_thread_info_t *info = &background_thread_info[ind];
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+	background_thread_wakeup_time_set(tsd_tsdn(tsd), info,
+	    BACKGROUND_THREAD_INDEFINITE_SLEEP);
+	if (ind == 0) {
+		background_thread0_work(tsd);
+	} else {
+		while (info->state != background_thread_stopped) {
+			if (background_thread_pause_check(tsd_tsdn(tsd),
+			    info)) {
+				continue;
+			}
+			background_work_sleep_once(tsd_tsdn(tsd), info, ind);
+		}
+	}
+	assert(info->state == background_thread_stopped);
+	background_thread_wakeup_time_set(tsd_tsdn(tsd), info, 0);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+}
+
+static void *
+background_thread_entry(void *ind_arg) {
+	unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
+	assert(thread_ind < ncpus);
+#ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+	pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
+#endif
+	if (opt_percpu_arena != percpu_arena_disabled) {
+		set_current_thread_affinity((int)thread_ind);
+	}
+	/*
+	 * Start periodic background work.  We use internal tsd which avoids
+	 * side effects, for example triggering new arena creation (which in
+	 * turn triggers another background thread creation).
+	 */
+	background_work(tsd_internal_fetch(), thread_ind);
+	assert(pthread_equal(pthread_self(),
+	    background_thread_info[thread_ind].thread));
+
+	return NULL;
+}
+
+static void
+background_thread_init(tsd_t *tsd, background_thread_info_t *info) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
+	info->state = background_thread_started;
+	background_thread_info_init(tsd_tsdn(tsd), info);
+	n_background_threads++;
+}
+
+/* Create a new background thread if needed. */
+bool
+background_thread_create(tsd_t *tsd, unsigned arena_ind) {
+	assert(have_background_thread);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
+
+	/* We create at most NCPUs threads. */
+	size_t thread_ind = arena_ind % ncpus;
+	background_thread_info_t *info = &background_thread_info[thread_ind];
+
+	bool need_new_thread;
+	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+	need_new_thread = background_thread_enabled() &&
+	    (info->state == background_thread_stopped);
+	if (need_new_thread) {
+		background_thread_init(tsd, info);
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+	if (!need_new_thread) {
+		return false;
+	}
+	if (arena_ind != 0) {
+		/* Threads are created asynchronously by Thread 0. */
+		background_thread_info_t *t0 = &background_thread_info[0];
+		malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
+		assert(t0->state == background_thread_started);
+		pthread_cond_signal(&t0->cond);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
+
+		return false;
+	}
+
+	pre_reentrancy(tsd, NULL);
+	/*
+	 * To avoid complications (besides reentrancy), create internal
+	 * background threads with the underlying pthread_create.
+	 */
+	int err = background_thread_create_signals_masked(&info->thread, NULL,
+	    background_thread_entry, (void *)thread_ind);
+	post_reentrancy(tsd);
+
+	if (err != 0) {
+		malloc_printf("<jemalloc>: arena 0 background thread creation "
+		    "failed (%d)\n", err);
+		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+		info->state = background_thread_stopped;
+		n_background_threads--;
+		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+
+		return true;
+	}
+
+	return false;
+}
+
+bool
+background_threads_enable(tsd_t *tsd) {
+	assert(n_background_threads == 0);
+	assert(background_thread_enabled());
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
+
+	VARIABLE_ARRAY(bool, marked, ncpus);
+	unsigned i, nmarked;
+	for (i = 0; i < ncpus; i++) {
+		marked[i] = false;
+	}
+	nmarked = 0;
+	/* Mark the threads we need to create for thread 0. */
+	unsigned n = narenas_total_get();
+	for (i = 1; i < n; i++) {
+		if (marked[i % ncpus] ||
+		    arena_get(tsd_tsdn(tsd), i, false) == NULL) {
+			continue;
+		}
+		background_thread_info_t *info = &background_thread_info[i];
+		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+		assert(info->state == background_thread_stopped);
+		background_thread_init(tsd, info);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+		marked[i % ncpus] = true;
+		if (++nmarked == ncpus) {
+			break;
+		}
+	}
+
+	return background_thread_create(tsd, 0);
+}
+
+bool
+background_threads_disable(tsd_t *tsd) {
+	assert(!background_thread_enabled());
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
+
+	/* Thread 0 will be responsible for terminating other threads. */
+	if (background_threads_disable_single(tsd,
+	    &background_thread_info[0])) {
+		return true;
+	}
+	assert(n_background_threads == 0);
+
+	return false;
+}
+
+/* Check if we need to signal the background thread early. */
+void
+background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
+    arena_decay_t *decay, size_t npages_new) {
+	background_thread_info_t *info = arena_background_thread_info_get(
+	    arena);
+	if (malloc_mutex_trylock(tsdn, &info->mtx)) {
+		/*
+		 * Background thread may hold the mutex for a long period of
+		 * time.  We'd like to avoid the variance on application
+		 * threads.  So keep this non-blocking, and leave the work to a
+		 * future epoch.
+		 */
+		return;
+	}
+
+	if (info->state != background_thread_started) {
+		goto label_done;
+	}
+	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
+		goto label_done;
+	}
+
+	ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
+	if (decay_time <= 0) {
+		/* Purging is eagerly done or disabled currently. */
+		goto label_done_unlock2;
+	}
+	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
+	assert(decay_interval_ns > 0);
+
+	nstime_t diff;
+	nstime_init(&diff, background_thread_wakeup_time_get(info));
+	if (nstime_compare(&diff, &decay->epoch) <= 0) {
+		goto label_done_unlock2;
+	}
+	nstime_subtract(&diff, &decay->epoch);
+	if (nstime_ns(&diff) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+		goto label_done_unlock2;
+	}
+
+	if (npages_new > 0) {
+		size_t n_epoch = (size_t)(nstime_ns(&diff) / decay_interval_ns);
+		/*
+		 * Compute how many new pages we would need to purge by the next
+		 * wakeup, which is used to determine if we should signal the
+		 * background thread.
+		 */
+		uint64_t npurge_new;
+		if (n_epoch >= SMOOTHSTEP_NSTEPS) {
+			npurge_new = npages_new;
+		} else {
+			uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
+			assert(h_steps_max >=
+			    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
+			npurge_new = npages_new * (h_steps_max -
+			    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
+			npurge_new >>= SMOOTHSTEP_BFP;
+		}
+		info->npages_to_purge_new += npurge_new;
+	}
+
+	bool should_signal;
+	if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
+		should_signal = true;
+	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
+	    (extents_npages_get(&arena->extents_dirty) > 0 ||
+	    extents_npages_get(&arena->extents_muzzy) > 0 ||
+	    info->npages_to_purge_new > 0)) {
+		should_signal = true;
+	} else {
+		should_signal = false;
+	}
+
+	if (should_signal) {
+		info->npages_to_purge_new = 0;
+		pthread_cond_signal(&info->cond);
+	}
+label_done_unlock2:
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+label_done:
+	malloc_mutex_unlock(tsdn, &info->mtx);
+}
+
+void
+background_thread_prefork0(tsdn_t *tsdn) {
+	malloc_mutex_prefork(tsdn, &background_thread_lock);
+	background_thread_enabled_at_fork = background_thread_enabled();
+}
+
+void
+background_thread_prefork1(tsdn_t *tsdn) {
+	for (unsigned i = 0; i < ncpus; i++) {
+		malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx);
+	}
+}
+
+void
+background_thread_postfork_parent(tsdn_t *tsdn) {
+	for (unsigned i = 0; i < ncpus; i++) {
+		malloc_mutex_postfork_parent(tsdn,
+		    &background_thread_info[i].mtx);
+	}
+	malloc_mutex_postfork_parent(tsdn, &background_thread_lock);
+}
+
+void
+background_thread_postfork_child(tsdn_t *tsdn) {
+	for (unsigned i = 0; i < ncpus; i++) {
+		malloc_mutex_postfork_child(tsdn,
+		    &background_thread_info[i].mtx);
+	}
+	malloc_mutex_postfork_child(tsdn, &background_thread_lock);
+	if (!background_thread_enabled_at_fork) {
+		return;
+	}
+
+	/* Clear background_thread state (reset to disabled for child). */
+	malloc_mutex_lock(tsdn, &background_thread_lock);
+	n_background_threads = 0;
+	background_thread_enabled_set(tsdn, false);
+	for (unsigned i = 0; i < ncpus; i++) {
+		background_thread_info_t *info = &background_thread_info[i];
+		malloc_mutex_lock(tsdn, &info->mtx);
+		info->state = background_thread_stopped;
+		int ret = pthread_cond_init(&info->cond, NULL);
+		assert(ret == 0);
+		background_thread_info_init(tsdn, info);
+		malloc_mutex_unlock(tsdn, &info->mtx);
+	}
+	malloc_mutex_unlock(tsdn, &background_thread_lock);
+}
+
+bool
+background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
+	assert(config_stats);
+	malloc_mutex_lock(tsdn, &background_thread_lock);
+	if (!background_thread_enabled()) {
+		malloc_mutex_unlock(tsdn, &background_thread_lock);
+		return true;
+	}
+
+	stats->num_threads = n_background_threads;
+	uint64_t num_runs = 0;
+	nstime_init(&stats->run_interval, 0);
+	for (unsigned i = 0; i < ncpus; i++) {
+		background_thread_info_t *info = &background_thread_info[i];
+		malloc_mutex_lock(tsdn, &info->mtx);
+		if (info->state != background_thread_stopped) {
+			num_runs += info->tot_n_runs;
+			nstime_add(&stats->run_interval, &info->tot_sleep_time);
+		}
+		malloc_mutex_unlock(tsdn, &info->mtx);
+	}
+	stats->num_runs = num_runs;
+	if (num_runs > 0) {
+		nstime_idivide(&stats->run_interval, num_runs);
+	}
+	malloc_mutex_unlock(tsdn, &background_thread_lock);
+
+	return false;
+}
+
+#undef BACKGROUND_THREAD_NPAGES_THRESHOLD
+#undef BILLION
+#undef BACKGROUND_THREAD_MIN_INTERVAL_NS
+
+/*
+ * When lazy lock is enabled, we need to make sure setting isthreaded before
+ * taking any background_thread locks.  This is called early in ctl (instead of
+ * wait for the pthread_create calls to trigger) because the mutex is required
+ * before creating background threads.
+ */
+void
+background_thread_ctl_init(tsdn_t *tsdn) {
+	malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
+#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
+	pthread_once(&once_control, pthread_create_wrapper_once);
+#endif
+}
+
+#endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
+
+bool
+background_thread_boot0(void) {
+	if (!have_background_thread && opt_background_thread) {
+		malloc_printf("<jemalloc>: option background_thread currently "
+		    "supports pthread only\n");
+		return true;
+	}
+
+#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
+	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
+	if (pthread_create_fptr == NULL) {
+		can_enable_background_thread = false;
+		if (config_lazy_lock || opt_background_thread) {
+			malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
+			    "\"pthread_create\")\n");
+			abort();
+		}
+	} else {
+		can_enable_background_thread = true;
+	}
+#endif
+	return false;
+}
+
+bool
+background_thread_boot1(tsdn_t *tsdn) {
+#ifdef JEMALLOC_BACKGROUND_THREAD
+	assert(have_background_thread);
+	assert(narenas_total_get() > 0);
+
+	background_thread_enabled_set(tsdn, opt_background_thread);
+	if (malloc_mutex_init(&background_thread_lock,
+	    "background_thread_global",
+	    WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
+	    malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	if (opt_background_thread) {
+		background_thread_ctl_init(tsdn);
+	}
+
+	background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
+	    b0get(), ncpus * sizeof(background_thread_info_t), CACHELINE);
+	if (background_thread_info == NULL) {
+		return true;
+	}
+
+	for (unsigned i = 0; i < ncpus; i++) {
+		background_thread_info_t *info = &background_thread_info[i];
+		/* Thread mutex is rank_inclusive because of thread0. */
+		if (malloc_mutex_init(&info->mtx, "background_thread",
+		    WITNESS_RANK_BACKGROUND_THREAD,
+		    malloc_mutex_address_ordered)) {
+			return true;
+		}
+		if (pthread_cond_init(&info->cond, NULL)) {
+			return true;
+		}
+		malloc_mutex_lock(tsdn, &info->mtx);
+		info->state = background_thread_stopped;
+		background_thread_info_init(tsdn, info);
+		malloc_mutex_unlock(tsdn, &info->mtx);
+	}
+#endif
+
+	return false;
+}
--- a/deps/jemalloc/src/extent_dss.c
+++ b/deps/jemalloc/src/extent_dss.c
@@ -0,0 +1,269 @@
+#define JEMALLOC_EXTENT_DSS_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/spin.h"
+
+/******************************************************************************/
+/* Data. */
+
+const char	*opt_dss = DSS_DEFAULT;
+
+const char	*dss_prec_names[] = {
+	"disabled",
+	"primary",
+	"secondary",
+	"N/A"
+};
+
+/*
+ * Current dss precedence default, used when creating new arenas.  NB: This is
+ * stored as unsigned rather than dss_prec_t because in principle there's no
+ * guarantee that sizeof(dss_prec_t) is the same as sizeof(unsigned), and we use
+ * atomic operations to synchronize the setting.
+ */
+static atomic_u_t	dss_prec_default = ATOMIC_INIT(
+    (unsigned)DSS_PREC_DEFAULT);
+
+/* Base address of the DSS. */
+static void		*dss_base;
+/* Atomic boolean indicating whether a thread is currently extending DSS. */
+static atomic_b_t	dss_extending;
+/* Atomic boolean indicating whether the DSS is exhausted. */
+static atomic_b_t	dss_exhausted;
+/* Atomic current upper limit on DSS addresses. */
+static atomic_p_t	dss_max;
+
+/******************************************************************************/
+
+static void *
+extent_dss_sbrk(intptr_t increment) {
+#ifdef JEMALLOC_DSS
+	return sbrk(increment);
+#else
+	not_implemented();
+	return NULL;
+#endif
+}
+
+dss_prec_t
+extent_dss_prec_get(void) {
+	dss_prec_t ret;
+
+	if (!have_dss) {
+		return dss_prec_disabled;
+	}
+	ret = (dss_prec_t)atomic_load_u(&dss_prec_default, ATOMIC_ACQUIRE);
+	return ret;
+}
+
+bool
+extent_dss_prec_set(dss_prec_t dss_prec) {
+	if (!have_dss) {
+		return (dss_prec != dss_prec_disabled);
+	}
+	atomic_store_u(&dss_prec_default, (unsigned)dss_prec, ATOMIC_RELEASE);
+	return false;
+}
+
+static void
+extent_dss_extending_start(void) {
+	spin_t spinner = SPIN_INITIALIZER;
+	while (true) {
+		bool expected = false;
+		if (atomic_compare_exchange_weak_b(&dss_extending, &expected,
+		    true, ATOMIC_ACQ_REL, ATOMIC_RELAXED)) {
+			break;
+		}
+		spin_adaptive(&spinner);
+	}
+}
+
+static void
+extent_dss_extending_finish(void) {
+	assert(atomic_load_b(&dss_extending, ATOMIC_RELAXED));
+
+	atomic_store_b(&dss_extending, false, ATOMIC_RELEASE);
+}
+
+static void *
+extent_dss_max_update(void *new_addr) {
+	/*
+	 * Get the current end of the DSS as max_cur and assure that dss_max is
+	 * up to date.
+	 */
+	void *max_cur = extent_dss_sbrk(0);
+	if (max_cur == (void *)-1) {
+		return NULL;
+	}
+	atomic_store_p(&dss_max, max_cur, ATOMIC_RELEASE);
+	/* Fixed new_addr can only be supported if it is at the edge of DSS. */
+	if (new_addr != NULL && max_cur != new_addr) {
+		return NULL;
+	}
+	return max_cur;
+}
+
+void *
+extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit) {
+	extent_t *gap;
+
+	cassert(have_dss);
+	assert(size > 0);
+	assert(alignment > 0);
+
+	/*
+	 * sbrk() uses a signed increment argument, so take care not to
+	 * interpret a large allocation request as a negative increment.
+	 */
+	if ((intptr_t)size < 0) {
+		return NULL;
+	}
+
+	gap = extent_alloc(tsdn, arena);
+	if (gap == NULL) {
+		return NULL;
+	}
+
+	extent_dss_extending_start();
+	if (!atomic_load_b(&dss_exhausted, ATOMIC_ACQUIRE)) {
+		/*
+		 * The loop is necessary to recover from races with other
+		 * threads that are using the DSS for something other than
+		 * malloc.
+		 */
+		while (true) {
+			void *max_cur = extent_dss_max_update(new_addr);
+			if (max_cur == NULL) {
+				goto label_oom;
+			}
+
+			/*
+			 * Compute how much page-aligned gap space (if any) is
+			 * necessary to satisfy alignment.  This space can be
+			 * recycled for later use.
+			 */
+			void *gap_addr_page = (void *)(PAGE_CEILING(
+			    (uintptr_t)max_cur));
+			void *ret = (void *)ALIGNMENT_CEILING(
+			    (uintptr_t)gap_addr_page, alignment);
+			size_t gap_size_page = (uintptr_t)ret -
+			    (uintptr_t)gap_addr_page;
+			if (gap_size_page != 0) {
+				extent_init(gap, arena, gap_addr_page,
+				    gap_size_page, false, NSIZES,
+				    arena_extent_sn_next(arena),
+				    extent_state_active, false, true);
+			}
+			/*
+			 * Compute the address just past the end of the desired
+			 * allocation space.
+			 */
+			void *dss_next = (void *)((uintptr_t)ret + size);
+			if ((uintptr_t)ret < (uintptr_t)max_cur ||
+			    (uintptr_t)dss_next < (uintptr_t)max_cur) {
+				goto label_oom; /* Wrap-around. */
+			}
+			/* Compute the increment, including subpage bytes. */
+			void *gap_addr_subpage = max_cur;
+			size_t gap_size_subpage = (uintptr_t)ret -
+			    (uintptr_t)gap_addr_subpage;
+			intptr_t incr = gap_size_subpage + size;
+
+			assert((uintptr_t)max_cur + incr == (uintptr_t)ret +
+			    size);
+
+			/* Try to allocate. */
+			void *dss_prev = extent_dss_sbrk(incr);
+			if (dss_prev == max_cur) {
+				/* Success. */
+				atomic_store_p(&dss_max, dss_next,
+				    ATOMIC_RELEASE);
+				extent_dss_extending_finish();
+
+				if (gap_size_page != 0) {
+					extent_dalloc_gap(tsdn, arena, gap);
+				} else {
+					extent_dalloc(tsdn, arena, gap);
+				}
+				if (!*commit) {
+					*commit = pages_decommit(ret, size);
+				}
+				if (*zero && *commit) {
+					extent_hooks_t *extent_hooks =
+					    EXTENT_HOOKS_INITIALIZER;
+					extent_t extent;
+
+					extent_init(&extent, arena, ret, size,
+					    size, false, NSIZES,
+					    extent_state_active, false, true);
+					if (extent_purge_forced_wrapper(tsdn,
+					    arena, &extent_hooks, &extent, 0,
+					    size)) {
+						memset(ret, 0, size);
+					}
+				}
+				return ret;
+			}
+			/*
+			 * Failure, whether due to OOM or a race with a raw
+			 * sbrk() call from outside the allocator.
+			 */
+			if (dss_prev == (void *)-1) {
+				/* OOM. */
+				atomic_store_b(&dss_exhausted, true,
+				    ATOMIC_RELEASE);
+				goto label_oom;
+			}
+		}
+	}
+label_oom:
+	extent_dss_extending_finish();
+	extent_dalloc(tsdn, arena, gap);
+	return NULL;
+}
+
+static bool
+extent_in_dss_helper(void *addr, void *max) {
+	return ((uintptr_t)addr >= (uintptr_t)dss_base && (uintptr_t)addr <
+	    (uintptr_t)max);
+}
+
+bool
+extent_in_dss(void *addr) {
+	cassert(have_dss);
+
+	return extent_in_dss_helper(addr, atomic_load_p(&dss_max,
+	    ATOMIC_ACQUIRE));
+}
+
+bool
+extent_dss_mergeable(void *addr_a, void *addr_b) {
+	void *max;
+
+	cassert(have_dss);
+
+	if ((uintptr_t)addr_a < (uintptr_t)dss_base && (uintptr_t)addr_b <
+	    (uintptr_t)dss_base) {
+		return true;
+	}
+
+	max = atomic_load_p(&dss_max, ATOMIC_ACQUIRE);
+	return (extent_in_dss_helper(addr_a, max) ==
+	    extent_in_dss_helper(addr_b, max));
+}
+
+void
+extent_dss_boot(void) {
+	cassert(have_dss);
+
+	dss_base = extent_dss_sbrk(0);
+	atomic_store_b(&dss_extending, false, ATOMIC_RELAXED);
+	atomic_store_b(&dss_exhausted, dss_base == (void *)-1, ATOMIC_RELAXED);
+	atomic_store_p(&dss_max, dss_base, ATOMIC_RELAXED);
+}
+
+/******************************************************************************/
--- a/deps/jemalloc/src/extent_mmap.c
+++ b/deps/jemalloc/src/extent_mmap.c
@@ -0,0 +1,42 @@
+#define JEMALLOC_EXTENT_MMAP_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/extent_mmap.h"
+
+/******************************************************************************/
+/* Data. */
+
+bool	opt_retain =
+#ifdef JEMALLOC_RETAIN
+    true
+#else
+    false
+#endif
+    ;
+
+/******************************************************************************/
+
+void *
+extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
+    bool *commit) {
+	void *ret = pages_map(new_addr, size, ALIGNMENT_CEILING(alignment,
+	    PAGE), commit);
+	if (ret == NULL) {
+		return NULL;
+	}
+	assert(ret != NULL);
+	if (*commit) {
+		*zero = true;
+	}
+	return ret;
+}
+
+bool
+extent_dalloc_mmap(void *addr, size_t size) {
+	if (!opt_retain) {
+		pages_unmap(addr, size);
+	}
+	return opt_retain;
+}
--- a/deps/jemalloc/src/hooks.c
+++ b/deps/jemalloc/src/hooks.c
@@ -0,0 +1,12 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+/*
+ * The hooks are a little bit screwy -- they're not genuinely exported in the
+ * sense that we want them available to end-users, but we do want them visible
+ * from outside the generated library, so that we can use them in test code.
+ */
+JEMALLOC_EXPORT
+void (*hooks_arena_new_hook)() = NULL;
+
+JEMALLOC_EXPORT
+void (*hooks_libc_hook)() = NULL;
--- a/deps/jemalloc/src/jemalloc_cpp.cpp
+++ b/deps/jemalloc/src/jemalloc_cpp.cpp
@@ -0,0 +1,132 @@
+#include <mutex>
+#include <new>
+
+#define JEMALLOC_CPP_CPP_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+// All operators in this file are exported.
+
+// Possibly alias hidden versions of malloc and sdallocx to avoid an extra plt
+// thunk?
+//
+// extern __typeof (sdallocx) sdallocx_int
+//  __attribute ((alias ("sdallocx"),
+//		visibility ("hidden")));
+//
+// ... but it needs to work with jemalloc namespaces.
+
+void	*operator new(std::size_t size);
+void	*operator new[](std::size_t size);
+void	*operator new(std::size_t size, const std::nothrow_t &) noexcept;
+void	*operator new[](std::size_t size, const std::nothrow_t &) noexcept;
+void	operator delete(void *ptr) noexcept;
+void	operator delete[](void *ptr) noexcept;
+void	operator delete(void *ptr, const std::nothrow_t &) noexcept;
+void	operator delete[](void *ptr, const std::nothrow_t &) noexcept;
+
+#if __cpp_sized_deallocation >= 201309
+/* C++14's sized-delete operators. */
+void	operator delete(void *ptr, std::size_t size) noexcept;
+void	operator delete[](void *ptr, std::size_t size) noexcept;
+#endif
+
+template <bool IsNoExcept>
+void *
+newImpl(std::size_t size) noexcept(IsNoExcept) {
+	void *ptr = je_malloc(size);
+	if (likely(ptr != nullptr))
+		return ptr;
+
+	while (ptr == nullptr) {
+		std::new_handler handler;
+		// GCC-4.8 and clang 4.0 do not have std::get_new_handler.
+		{
+			static std::mutex mtx;
+			std::lock_guard<std::mutex> lock(mtx);
+
+			handler = std::set_new_handler(nullptr);
+			std::set_new_handler(handler);
+		}
+		if (handler == nullptr)
+			break;
+
+		try {
+			handler();
+		} catch (const std::bad_alloc &) {
+			break;
+		}
+
+		ptr = je_malloc(size);
+	}
+
+	if (ptr == nullptr && !IsNoExcept)
+		std::__throw_bad_alloc();
+	return ptr;
+}
+
+void *
+operator new(std::size_t size) {
+	return newImpl<false>(size);
+}
+
+void *
+operator new[](std::size_t size) {
+	return newImpl<false>(size);
+}
+
+void *
+operator new(std::size_t size, const std::nothrow_t &) noexcept {
+	return newImpl<true>(size);
+}
+
+void *
+operator new[](std::size_t size, const std::nothrow_t &) noexcept {
+	return newImpl<true>(size);
+}
+
+void
+operator delete(void *ptr) noexcept {
+	je_free(ptr);
+}
+
+void
+operator delete[](void *ptr) noexcept {
+	je_free(ptr);
+}
+
+void
+operator delete(void *ptr, const std::nothrow_t &) noexcept {
+	je_free(ptr);
+}
+
+void operator delete[](void *ptr, const std::nothrow_t &) noexcept {
+	je_free(ptr);
+}
+
+#if __cpp_sized_deallocation >= 201309
+
+void
+operator delete(void *ptr, std::size_t size) noexcept {
+	if (unlikely(ptr == nullptr)) {
+		return;
+	}
+	je_sdallocx(ptr, size, /*flags=*/0);
+}
+
+void operator delete[](void *ptr, std::size_t size) noexcept {
+	if (unlikely(ptr == nullptr)) {
+		return;
+	}
+	je_sdallocx(ptr, size, /*flags=*/0);
+}
+
+#endif  // __cpp_sized_deallocation
--- a/deps/jemalloc/src/large.c
+++ b/deps/jemalloc/src/large.c
@@ -0,0 +1,371 @@
+#define JEMALLOC_LARGE_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/util.h"
+
+/******************************************************************************/
+
+void *
+large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero) {
+	assert(usize == sz_s2u(usize));
+
+	return large_palloc(tsdn, arena, usize, CACHELINE, zero);
+}
+
+void *
+large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
+    bool zero) {
+	size_t ausize;
+	extent_t *extent;
+	bool is_zeroed;
+	UNUSED bool idump JEMALLOC_CC_SILENCE_INIT(false);
+
+	assert(!tsdn_null(tsdn) || arena != NULL);
+
+	ausize = sz_sa2u(usize, alignment);
+	if (unlikely(ausize == 0 || ausize > LARGE_MAXCLASS)) {
+		return NULL;
+	}
+
+	if (config_fill && unlikely(opt_zero)) {
+		zero = true;
+	}
+	/*
+	 * Copy zero into is_zeroed and pass the copy when allocating the
+	 * extent, so that it is possible to make correct junk/zero fill
+	 * decisions below, even if is_zeroed ends up true when zero is false.
+	 */
+	is_zeroed = zero;
+	if (likely(!tsdn_null(tsdn))) {
+		arena = arena_choose(tsdn_tsd(tsdn), arena);
+	}
+	if (unlikely(arena == NULL) || (extent = arena_extent_alloc_large(tsdn,
+	    arena, usize, alignment, &is_zeroed)) == NULL) {
+		return NULL;
+	}
+
+	/* See comments in arena_bin_slabs_full_insert(). */
+	if (!arena_is_auto(arena)) {
+		/* Insert extent into large. */
+		malloc_mutex_lock(tsdn, &arena->large_mtx);
+		extent_list_append(&arena->large, extent);
+		malloc_mutex_unlock(tsdn, &arena->large_mtx);
+	}
+	if (config_prof && arena_prof_accum(tsdn, arena, usize)) {
+		prof_idump(tsdn);
+	}
+
+	if (zero) {
+		assert(is_zeroed);
+	} else if (config_fill && unlikely(opt_junk_alloc)) {
+		memset(extent_addr_get(extent), JEMALLOC_ALLOC_JUNK,
+		    extent_usize_get(extent));
+	}
+
+	arena_decay_tick(tsdn, arena);
+	return extent_addr_get(extent);
+}
+
+static void
+large_dalloc_junk_impl(void *ptr, size_t size) {
+	memset(ptr, JEMALLOC_FREE_JUNK, size);
+}
+large_dalloc_junk_t *JET_MUTABLE large_dalloc_junk = large_dalloc_junk_impl;
+
+static void
+large_dalloc_maybe_junk_impl(void *ptr, size_t size) {
+	if (config_fill && have_dss && unlikely(opt_junk_free)) {
+		/*
+		 * Only bother junk filling if the extent isn't about to be
+		 * unmapped.
+		 */
+		if (opt_retain || (have_dss && extent_in_dss(ptr))) {
+			large_dalloc_junk(ptr, size);
+		}
+	}
+}
+large_dalloc_maybe_junk_t *JET_MUTABLE large_dalloc_maybe_junk =
+    large_dalloc_maybe_junk_impl;
+
+static bool
+large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
+	arena_t *arena = extent_arena_get(extent);
+	size_t oldusize = extent_usize_get(extent);
+	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
+	size_t diff = extent_size_get(extent) - (usize + sz_large_pad);
+
+	assert(oldusize > usize);
+
+	if (extent_hooks->split == NULL) {
+		return true;
+	}
+
+	/* Split excess pages. */
+	if (diff != 0) {
+		extent_t *trail = extent_split_wrapper(tsdn, arena,
+		    &extent_hooks, extent, usize + sz_large_pad,
+		    sz_size2index(usize), false, diff, NSIZES, false);
+		if (trail == NULL) {
+			return true;
+		}
+
+		if (config_fill && unlikely(opt_junk_free)) {
+			large_dalloc_maybe_junk(extent_addr_get(trail),
+			    extent_size_get(trail));
+		}
+
+		arena_extents_dirty_dalloc(tsdn, arena, &extent_hooks, trail);
+	}
+
+	arena_extent_ralloc_large_shrink(tsdn, arena, extent, oldusize);
+
+	return false;
+}
+
+static bool
+large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
+    bool zero) {
+	arena_t *arena = extent_arena_get(extent);
+	size_t oldusize = extent_usize_get(extent);
+	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
+	size_t trailsize = usize - oldusize;
+
+	if (extent_hooks->merge == NULL) {
+		return true;
+	}
+
+	if (config_fill && unlikely(opt_zero)) {
+		zero = true;
+	}
+	/*
+	 * Copy zero into is_zeroed_trail and pass the copy when allocating the
+	 * extent, so that it is possible to make correct junk/zero fill
+	 * decisions below, even if is_zeroed_trail ends up true when zero is
+	 * false.
+	 */
+	bool is_zeroed_trail = zero;
+	bool commit = true;
+	extent_t *trail;
+	bool new_mapping;
+	if ((trail = extents_alloc(tsdn, arena, &extent_hooks,
+	    &arena->extents_dirty, extent_past_get(extent), trailsize, 0,
+	    CACHELINE, false, NSIZES, &is_zeroed_trail, &commit)) != NULL
+	    || (trail = extents_alloc(tsdn, arena, &extent_hooks,
+	    &arena->extents_muzzy, extent_past_get(extent), trailsize, 0,
+	    CACHELINE, false, NSIZES, &is_zeroed_trail, &commit)) != NULL) {
+		if (config_stats) {
+			new_mapping = false;
+		}
+	} else {
+		if ((trail = extent_alloc_wrapper(tsdn, arena, &extent_hooks,
+		    extent_past_get(extent), trailsize, 0, CACHELINE, false,
+		    NSIZES, &is_zeroed_trail, &commit)) == NULL) {
+			return true;
+		}
+		if (config_stats) {
+			new_mapping = true;
+		}
+	}
+
+	if (extent_merge_wrapper(tsdn, arena, &extent_hooks, extent, trail)) {
+		extent_dalloc_wrapper(tsdn, arena, &extent_hooks, trail);
+		return true;
+	}
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	szind_t szind = sz_size2index(usize);
+	extent_szind_set(extent, szind);
+	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)extent_addr_get(extent), szind, false);
+
+	if (config_stats && new_mapping) {
+		arena_stats_mapped_add(tsdn, &arena->stats, trailsize);
+	}
+
+	if (zero) {
+		if (config_cache_oblivious) {
+			/*
+			 * Zero the trailing bytes of the original allocation's
+			 * last page, since they are in an indeterminate state.
+			 * There will always be trailing bytes, because ptr's
+			 * offset from the beginning of the extent is a multiple
+			 * of CACHELINE in [0 .. PAGE).
+			 */
+			void *zbase = (void *)
+			    ((uintptr_t)extent_addr_get(extent) + oldusize);
+			void *zpast = PAGE_ADDR2BASE((void *)((uintptr_t)zbase +
+			    PAGE));
+			size_t nzero = (uintptr_t)zpast - (uintptr_t)zbase;
+			assert(nzero > 0);
+			memset(zbase, 0, nzero);
+		}
+		assert(is_zeroed_trail);
+	} else if (config_fill && unlikely(opt_junk_alloc)) {
+		memset((void *)((uintptr_t)extent_addr_get(extent) + oldusize),
+		    JEMALLOC_ALLOC_JUNK, usize - oldusize);
+	}
+
+	arena_extent_ralloc_large_expand(tsdn, arena, extent, oldusize);
+
+	return false;
+}
+
+bool
+large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
+    size_t usize_max, bool zero) {
+	size_t oldusize = extent_usize_get(extent);
+
+	/* The following should have been caught by callers. */
+	assert(usize_min > 0 && usize_max <= LARGE_MAXCLASS);
+	/* Both allocation sizes must be large to avoid a move. */
+	assert(oldusize >= LARGE_MINCLASS && usize_max >= LARGE_MINCLASS);
+
+	if (usize_max > oldusize) {
+		/* Attempt to expand the allocation in-place. */
+		if (!large_ralloc_no_move_expand(tsdn, extent, usize_max,
+		    zero)) {
+			arena_decay_tick(tsdn, extent_arena_get(extent));
+			return false;
+		}
+		/* Try again, this time with usize_min. */
+		if (usize_min < usize_max && usize_min > oldusize &&
+		    large_ralloc_no_move_expand(tsdn, extent, usize_min,
+		    zero)) {
+			arena_decay_tick(tsdn, extent_arena_get(extent));
+			return false;
+		}
+	}
+
+	/*
+	 * Avoid moving the allocation if the existing extent size accommodates
+	 * the new size.
+	 */
+	if (oldusize >= usize_min && oldusize <= usize_max) {
+		arena_decay_tick(tsdn, extent_arena_get(extent));
+		return false;
+	}
+
+	/* Attempt to shrink the allocation in-place. */
+	if (oldusize > usize_max) {
+		if (!large_ralloc_no_move_shrink(tsdn, extent, usize_max)) {
+			arena_decay_tick(tsdn, extent_arena_get(extent));
+			return false;
+		}
+	}
+	return true;
+}
+
+static void *
+large_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
+    size_t alignment, bool zero) {
+	if (alignment <= CACHELINE) {
+		return large_malloc(tsdn, arena, usize, zero);
+	}
+	return large_palloc(tsdn, arena, usize, alignment, zero);
+}
+
+void *
+large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
+    size_t alignment, bool zero, tcache_t *tcache) {
+	size_t oldusize = extent_usize_get(extent);
+
+	/* The following should have been caught by callers. */
+	assert(usize > 0 && usize <= LARGE_MAXCLASS);
+	/* Both allocation sizes must be large to avoid a move. */
+	assert(oldusize >= LARGE_MINCLASS && usize >= LARGE_MINCLASS);
+
+	/* Try to avoid moving the allocation. */
+	if (!large_ralloc_no_move(tsdn, extent, usize, usize, zero)) {
+		return extent_addr_get(extent);
+	}
+
+	/*
+	 * usize and old size are different enough that we need to use a
+	 * different size class.  In that case, fall back to allocating new
+	 * space and copying.
+	 */
+	void *ret = large_ralloc_move_helper(tsdn, arena, usize, alignment,
+	    zero);
+	if (ret == NULL) {
+		return NULL;
+	}
+
+	size_t copysize = (usize < oldusize) ? usize : oldusize;
+	memcpy(ret, extent_addr_get(extent), copysize);
+	isdalloct(tsdn, extent_addr_get(extent), oldusize, tcache, NULL, true);
+	return ret;
+}
+
+/*
+ * junked_locked indicates whether the extent's data have been junk-filled, and
+ * whether the arena's large_mtx is currently held.
+ */
+static void
+large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    bool junked_locked) {
+	if (!junked_locked) {
+		/* See comments in arena_bin_slabs_full_insert(). */
+		if (!arena_is_auto(arena)) {
+			malloc_mutex_lock(tsdn, &arena->large_mtx);
+			extent_list_remove(&arena->large, extent);
+			malloc_mutex_unlock(tsdn, &arena->large_mtx);
+		}
+		large_dalloc_maybe_junk(extent_addr_get(extent),
+		    extent_usize_get(extent));
+	} else {
+		malloc_mutex_assert_owner(tsdn, &arena->large_mtx);
+		if (!arena_is_auto(arena)) {
+			extent_list_remove(&arena->large, extent);
+		}
+	}
+	arena_extent_dalloc_large_prep(tsdn, arena, extent);
+}
+
+static void
+large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
+	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
+	arena_extents_dirty_dalloc(tsdn, arena, &extent_hooks, extent);
+}
+
+void
+large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent) {
+	large_dalloc_prep_impl(tsdn, extent_arena_get(extent), extent, true);
+}
+
+void
+large_dalloc_finish(tsdn_t *tsdn, extent_t *extent) {
+	large_dalloc_finish_impl(tsdn, extent_arena_get(extent), extent);
+}
+
+void
+large_dalloc(tsdn_t *tsdn, extent_t *extent) {
+	arena_t *arena = extent_arena_get(extent);
+	large_dalloc_prep_impl(tsdn, arena, extent, false);
+	large_dalloc_finish_impl(tsdn, arena, extent);
+	arena_decay_tick(tsdn, arena);
+}
+
+size_t
+large_salloc(tsdn_t *tsdn, const extent_t *extent) {
+	return extent_usize_get(extent);
+}
+
+prof_tctx_t *
+large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent) {
+	return extent_prof_tctx_get(extent);
+}
+
+void
+large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx) {
+	extent_prof_tctx_set(extent, tctx);
+}
+
+void
+large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent) {
+	large_prof_tctx_set(tsdn, extent, (prof_tctx_t *)(uintptr_t)1U);
+}
--- a/deps/jemalloc/src/malloc_io.c
+++ b/deps/jemalloc/src/malloc_io.c
@@ -0,0 +1,689 @@
+#define JEMALLOC_MALLOC_IO_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/util.h"
+
+#ifdef assert
+#  undef assert
+#endif
+#ifdef not_reached
+#  undef not_reached
+#endif
+#ifdef not_implemented
+#  undef not_implemented
+#endif
+#ifdef assert_not_implemented
+#  undef assert_not_implemented
+#endif
+
+/*
+ * Define simple versions of assertion macros that won't recurse in case
+ * of assertion failures in malloc_*printf().
+ */
+#define assert(e) do {							\
+	if (config_debug && !(e)) {					\
+		malloc_write("<jemalloc>: Failed assertion\n");		\
+		abort();						\
+	}								\
+} while (0)
+
+#define not_reached() do {						\
+	if (config_debug) {						\
+		malloc_write("<jemalloc>: Unreachable code reached\n");	\
+		abort();						\
+	}								\
+	unreachable();							\
+} while (0)
+
+#define not_implemented() do {						\
+	if (config_debug) {						\
+		malloc_write("<jemalloc>: Not implemented\n");		\
+		abort();						\
+	}								\
+} while (0)
+
+#define assert_not_implemented(e) do {					\
+	if (unlikely(config_debug && !(e))) {				\
+		not_implemented();					\
+	}								\
+} while (0)
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void wrtmessage(void *cbopaque, const char *s);
+#define U2S_BUFSIZE ((1U << (LG_SIZEOF_INTMAX_T + 3)) + 1)
+static char *u2s(uintmax_t x, unsigned base, bool uppercase, char *s,
+    size_t *slen_p);
+#define D2S_BUFSIZE (1 + U2S_BUFSIZE)
+static char *d2s(intmax_t x, char sign, char *s, size_t *slen_p);
+#define O2S_BUFSIZE (1 + U2S_BUFSIZE)
+static char *o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p);
+#define X2S_BUFSIZE (2 + U2S_BUFSIZE)
+static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s,
+    size_t *slen_p);
+
+/******************************************************************************/
+
+/* malloc_message() setup. */
+static void
+wrtmessage(void *cbopaque, const char *s) {
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write)
+	/*
+	 * Use syscall(2) rather than write(2) when possible in order to avoid
+	 * the possibility of memory allocation within libc.  This is necessary
+	 * on FreeBSD; most operating systems do not have this problem though.
+	 *
+	 * syscall() returns long or int, depending on platform, so capture the
+	 * unused result in the widest plausible type to avoid compiler
+	 * warnings.
+	 */
+	UNUSED long result = syscall(SYS_write, STDERR_FILENO, s, strlen(s));
+#else
+	UNUSED ssize_t result = write(STDERR_FILENO, s, strlen(s));
+#endif
+}
+
+JEMALLOC_EXPORT void	(*je_malloc_message)(void *, const char *s);
+
+/*
+ * Wrapper around malloc_message() that avoids the need for
+ * je_malloc_message(...) throughout the code.
+ */
+void
+malloc_write(const char *s) {
+	if (je_malloc_message != NULL) {
+		je_malloc_message(NULL, s);
+	} else {
+		wrtmessage(NULL, s);
+	}
+}
+
+/*
+ * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so
+ * provide a wrapper.
+ */
+int
+buferror(int err, char *buf, size_t buflen) {
+#ifdef _WIN32
+	FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0,
+	    (LPSTR)buf, (DWORD)buflen, NULL);
+	return 0;
+#elif defined(__GLIBC__) && defined(_GNU_SOURCE)
+	char *b = strerror_r(err, buf, buflen);
+	if (b != buf) {
+		strncpy(buf, b, buflen);
+		buf[buflen-1] = '\0';
+	}
+	return 0;
+#else
+	return strerror_r(err, buf, buflen);
+#endif
+}
+
+uintmax_t
+malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) {
+	uintmax_t ret, digit;
+	unsigned b;
+	bool neg;
+	const char *p, *ns;
+
+	p = nptr;
+	if (base < 0 || base == 1 || base > 36) {
+		ns = p;
+		set_errno(EINVAL);
+		ret = UINTMAX_MAX;
+		goto label_return;
+	}
+	b = base;
+
+	/* Swallow leading whitespace and get sign, if any. */
+	neg = false;
+	while (true) {
+		switch (*p) {
+		case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
+			p++;
+			break;
+		case '-':
+			neg = true;
+			/* Fall through. */
+		case '+':
+			p++;
+			/* Fall through. */
+		default:
+			goto label_prefix;
+		}
+	}
+
+	/* Get prefix, if any. */
+	label_prefix:
+	/*
+	 * Note where the first non-whitespace/sign character is so that it is
+	 * possible to tell whether any digits are consumed (e.g., "  0" vs.
+	 * "  -x").
+	 */
+	ns = p;
+	if (*p == '0') {
+		switch (p[1]) {
+		case '0': case '1': case '2': case '3': case '4': case '5':
+		case '6': case '7':
+			if (b == 0) {
+				b = 8;
+			}
+			if (b == 8) {
+				p++;
+			}
+			break;
+		case 'X': case 'x':
+			switch (p[2]) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'F':
+			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case 'f':
+				if (b == 0) {
+					b = 16;
+				}
+				if (b == 16) {
+					p += 2;
+				}
+				break;
+			default:
+				break;
+			}
+			break;
+		default:
+			p++;
+			ret = 0;
+			goto label_return;
+		}
+	}
+	if (b == 0) {
+		b = 10;
+	}
+
+	/* Convert. */
+	ret = 0;
+	while ((*p >= '0' && *p <= '9' && (digit = *p - '0') < b)
+	    || (*p >= 'A' && *p <= 'Z' && (digit = 10 + *p - 'A') < b)
+	    || (*p >= 'a' && *p <= 'z' && (digit = 10 + *p - 'a') < b)) {
+		uintmax_t pret = ret;
+		ret *= b;
+		ret += digit;
+		if (ret < pret) {
+			/* Overflow. */
+			set_errno(ERANGE);
+			ret = UINTMAX_MAX;
+			goto label_return;
+		}
+		p++;
+	}
+	if (neg) {
+		ret = (uintmax_t)(-((intmax_t)ret));
+	}
+
+	if (p == ns) {
+		/* No conversion performed. */
+		set_errno(EINVAL);
+		ret = UINTMAX_MAX;
+		goto label_return;
+	}
+
+label_return:
+	if (endptr != NULL) {
+		if (p == ns) {
+			/* No characters were converted. */
+			*endptr = (char *)nptr;
+		} else {
+			*endptr = (char *)p;
+		}
+	}
+	return ret;
+}
+
+static char *
+u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p) {
+	unsigned i;
+
+	i = U2S_BUFSIZE - 1;
+	s[i] = '\0';
+	switch (base) {
+	case 10:
+		do {
+			i--;
+			s[i] = "0123456789"[x % (uint64_t)10];
+			x /= (uint64_t)10;
+		} while (x > 0);
+		break;
+	case 16: {
+		const char *digits = (uppercase)
+		    ? "0123456789ABCDEF"
+		    : "0123456789abcdef";
+
+		do {
+			i--;
+			s[i] = digits[x & 0xf];
+			x >>= 4;
+		} while (x > 0);
+		break;
+	} default: {
+		const char *digits = (uppercase)
+		    ? "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+		    : "0123456789abcdefghijklmnopqrstuvwxyz";
+
+		assert(base >= 2 && base <= 36);
+		do {
+			i--;
+			s[i] = digits[x % (uint64_t)base];
+			x /= (uint64_t)base;
+		} while (x > 0);
+	}}
+
+	*slen_p = U2S_BUFSIZE - 1 - i;
+	return &s[i];
+}
+
+static char *
+d2s(intmax_t x, char sign, char *s, size_t *slen_p) {
+	bool neg;
+
+	if ((neg = (x < 0))) {
+		x = -x;
+	}
+	s = u2s(x, 10, false, s, slen_p);
+	if (neg) {
+		sign = '-';
+	}
+	switch (sign) {
+	case '-':
+		if (!neg) {
+			break;
+		}
+		/* Fall through. */
+	case ' ':
+	case '+':
+		s--;
+		(*slen_p)++;
+		*s = sign;
+		break;
+	default: not_reached();
+	}
+	return s;
+}
+
+static char *
+o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p) {
+	s = u2s(x, 8, false, s, slen_p);
+	if (alt_form && *s != '0') {
+		s--;
+		(*slen_p)++;
+		*s = '0';
+	}
+	return s;
+}
+
+static char *
+x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p) {
+	s = u2s(x, 16, uppercase, s, slen_p);
+	if (alt_form) {
+		s -= 2;
+		(*slen_p) += 2;
+		memcpy(s, uppercase ? "0X" : "0x", 2);
+	}
+	return s;
+}
+
+size_t
+malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
+	size_t i;
+	const char *f;
+
+#define APPEND_C(c) do {						\
+	if (i < size) {							\
+		str[i] = (c);						\
+	}								\
+	i++;								\
+} while (0)
+#define APPEND_S(s, slen) do {						\
+	if (i < size) {							\
+		size_t cpylen = (slen <= size - i) ? slen : size - i;	\
+		memcpy(&str[i], s, cpylen);				\
+	}								\
+	i += slen;							\
+} while (0)
+#define APPEND_PADDED_S(s, slen, width, left_justify) do {		\
+	/* Left padding. */						\
+	size_t pad_len = (width == -1) ? 0 : ((slen < (size_t)width) ?	\
+	    (size_t)width - slen : 0);					\
+	if (!left_justify && pad_len != 0) {				\
+		size_t j;						\
+		for (j = 0; j < pad_len; j++) {				\
+			APPEND_C(' ');					\
+		}							\
+	}								\
+	/* Value. */							\
+	APPEND_S(s, slen);						\
+	/* Right padding. */						\
+	if (left_justify && pad_len != 0) {				\
+		size_t j;						\
+		for (j = 0; j < pad_len; j++) {				\
+			APPEND_C(' ');					\
+		}							\
+	}								\
+} while (0)
+#define GET_ARG_NUMERIC(val, len) do {					\
+	switch (len) {							\
+	case '?':							\
+		val = va_arg(ap, int);					\
+		break;							\
+	case '?' | 0x80:						\
+		val = va_arg(ap, unsigned int);				\
+		break;							\
+	case 'l':							\
+		val = va_arg(ap, long);					\
+		break;							\
+	case 'l' | 0x80:						\
+		val = va_arg(ap, unsigned long);			\
+		break;							\
+	case 'q':							\
+		val = va_arg(ap, long long);				\
+		break;							\
+	case 'q' | 0x80:						\
+		val = va_arg(ap, unsigned long long);			\
+		break;							\
+	case 'j':							\
+		val = va_arg(ap, intmax_t);				\
+		break;							\
+	case 'j' | 0x80:						\
+		val = va_arg(ap, uintmax_t);				\
+		break;							\
+	case 't':							\
+		val = va_arg(ap, ptrdiff_t);				\
+		break;							\
+	case 'z':							\
+		val = va_arg(ap, ssize_t);				\
+		break;							\
+	case 'z' | 0x80:						\
+		val = va_arg(ap, size_t);				\
+		break;							\
+	case 'p': /* Synthetic; used for %p. */				\
+		val = va_arg(ap, uintptr_t);				\
+		break;							\
+	default:							\
+		not_reached();						\
+		val = 0;						\
+	}								\
+} while (0)
+
+	i = 0;
+	f = format;
+	while (true) {
+		switch (*f) {
+		case '\0': goto label_out;
+		case '%': {
+			bool alt_form = false;
+			bool left_justify = false;
+			bool plus_space = false;
+			bool plus_plus = false;
+			int prec = -1;
+			int width = -1;
+			unsigned char len = '?';
+			char *s;
+			size_t slen;
+
+			f++;
+			/* Flags. */
+			while (true) {
+				switch (*f) {
+				case '#':
+					assert(!alt_form);
+					alt_form = true;
+					break;
+				case '-':
+					assert(!left_justify);
+					left_justify = true;
+					break;
+				case ' ':
+					assert(!plus_space);
+					plus_space = true;
+					break;
+				case '+':
+					assert(!plus_plus);
+					plus_plus = true;
+					break;
+				default: goto label_width;
+				}
+				f++;
+			}
+			/* Width. */
+			label_width:
+			switch (*f) {
+			case '*':
+				width = va_arg(ap, int);
+				f++;
+				if (width < 0) {
+					left_justify = true;
+					width = -width;
+				}
+				break;
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9': {
+				uintmax_t uwidth;
+				set_errno(0);
+				uwidth = malloc_strtoumax(f, (char **)&f, 10);
+				assert(uwidth != UINTMAX_MAX || get_errno() !=
+				    ERANGE);
+				width = (int)uwidth;
+				break;
+			} default:
+				break;
+			}
+			/* Width/precision separator. */
+			if (*f == '.') {
+				f++;
+			} else {
+				goto label_length;
+			}
+			/* Precision. */
+			switch (*f) {
+			case '*':
+				prec = va_arg(ap, int);
+				f++;
+				break;
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9': {
+				uintmax_t uprec;
+				set_errno(0);
+				uprec = malloc_strtoumax(f, (char **)&f, 10);
+				assert(uprec != UINTMAX_MAX || get_errno() !=
+				    ERANGE);
+				prec = (int)uprec;
+				break;
+			}
+			default: break;
+			}
+			/* Length. */
+			label_length:
+			switch (*f) {
+			case 'l':
+				f++;
+				if (*f == 'l') {
+					len = 'q';
+					f++;
+				} else {
+					len = 'l';
+				}
+				break;
+			case 'q': case 'j': case 't': case 'z':
+				len = *f;
+				f++;
+				break;
+			default: break;
+			}
+			/* Conversion specifier. */
+			switch (*f) {
+			case '%':
+				/* %% */
+				APPEND_C(*f);
+				f++;
+				break;
+			case 'd': case 'i': {
+				intmax_t val JEMALLOC_CC_SILENCE_INIT(0);
+				char buf[D2S_BUFSIZE];
+
+				GET_ARG_NUMERIC(val, len);
+				s = d2s(val, (plus_plus ? '+' : (plus_space ?
+				    ' ' : '-')), buf, &slen);
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			} case 'o': {
+				uintmax_t val JEMALLOC_CC_SILENCE_INIT(0);
+				char buf[O2S_BUFSIZE];
+
+				GET_ARG_NUMERIC(val, len | 0x80);
+				s = o2s(val, alt_form, buf, &slen);
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			} case 'u': {
+				uintmax_t val JEMALLOC_CC_SILENCE_INIT(0);
+				char buf[U2S_BUFSIZE];
+
+				GET_ARG_NUMERIC(val, len | 0x80);
+				s = u2s(val, 10, false, buf, &slen);
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			} case 'x': case 'X': {
+				uintmax_t val JEMALLOC_CC_SILENCE_INIT(0);
+				char buf[X2S_BUFSIZE];
+
+				GET_ARG_NUMERIC(val, len | 0x80);
+				s = x2s(val, alt_form, *f == 'X', buf, &slen);
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			} case 'c': {
+				unsigned char val;
+				char buf[2];
+
+				assert(len == '?' || len == 'l');
+				assert_not_implemented(len != 'l');
+				val = va_arg(ap, int);
+				buf[0] = val;
+				buf[1] = '\0';
+				APPEND_PADDED_S(buf, 1, width, left_justify);
+				f++;
+				break;
+			} case 's':
+				assert(len == '?' || len == 'l');
+				assert_not_implemented(len != 'l');
+				s = va_arg(ap, char *);
+				slen = (prec < 0) ? strlen(s) : (size_t)prec;
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			case 'p': {
+				uintmax_t val;
+				char buf[X2S_BUFSIZE];
+
+				GET_ARG_NUMERIC(val, 'p');
+				s = x2s(val, true, false, buf, &slen);
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			} default: not_reached();
+			}
+			break;
+		} default: {
+			APPEND_C(*f);
+			f++;
+			break;
+		}}
+	}
+	label_out:
+	if (i < size) {
+		str[i] = '\0';
+	} else {
+		str[size - 1] = '\0';
+	}
+
+#undef APPEND_C
+#undef APPEND_S
+#undef APPEND_PADDED_S
+#undef GET_ARG_NUMERIC
+	return i;
+}
+
+JEMALLOC_FORMAT_PRINTF(3, 4)
+size_t
+malloc_snprintf(char *str, size_t size, const char *format, ...) {
+	size_t ret;
+	va_list ap;
+
+	va_start(ap, format);
+	ret = malloc_vsnprintf(str, size, format, ap);
+	va_end(ap);
+
+	return ret;
+}
+
+void
+malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *format, va_list ap) {
+	char buf[MALLOC_PRINTF_BUFSIZE];
+
+	if (write_cb == NULL) {
+		/*
+		 * The caller did not provide an alternate write_cb callback
+		 * function, so use the default one.  malloc_write() is an
+		 * inline function, so use malloc_message() directly here.
+		 */
+		write_cb = (je_malloc_message != NULL) ? je_malloc_message :
+		    wrtmessage;
+		cbopaque = NULL;
+	}
+
+	malloc_vsnprintf(buf, sizeof(buf), format, ap);
+	write_cb(cbopaque, buf);
+}
+
+/*
+ * Print to a callback function in such a way as to (hopefully) avoid memory
+ * allocation.
+ */
+JEMALLOC_FORMAT_PRINTF(3, 4)
+void
+malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *format, ...) {
+	va_list ap;
+
+	va_start(ap, format);
+	malloc_vcprintf(write_cb, cbopaque, format, ap);
+	va_end(ap);
+}
+
+/* Print to stderr in such a way as to avoid memory allocation. */
+JEMALLOC_FORMAT_PRINTF(1, 2)
+void
+malloc_printf(const char *format, ...) {
+	va_list ap;
+
+	va_start(ap, format);
+	malloc_vcprintf(NULL, NULL, format, ap);
+	va_end(ap);
+}
+
+/*
+ * Restore normal assertion macros, in order to make it possible to compile all
+ * C files as a single concatenation.
+ */
+#undef assert
+#undef not_reached
+#undef not_implemented
+#undef assert_not_implemented
+#include "jemalloc/internal/assert.h"
--- a/deps/jemalloc/src/mutex_pool.c
+++ b/deps/jemalloc/src/mutex_pool.c
@@ -0,0 +1,18 @@
+#define JEMALLOC_MUTEX_POOL_C_
+
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mutex_pool.h"
+
+bool
+mutex_pool_init(mutex_pool_t *pool, const char *name, witness_rank_t rank) {
+	for (int i = 0; i < MUTEX_POOL_SIZE; ++i) {
+		if (malloc_mutex_init(&pool->mutexes[i], name, rank,
+		    malloc_mutex_address_ordered)) {
+			return true;
+		}
+	}
+	return false;
+}
--- a/deps/jemalloc/src/nstime.c
+++ b/deps/jemalloc/src/nstime.c
@@ -0,0 +1,170 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/nstime.h"
+
+#include "jemalloc/internal/assert.h"
+
+#define BILLION	UINT64_C(1000000000)
+#define MILLION	UINT64_C(1000000)
+
+void
+nstime_init(nstime_t *time, uint64_t ns) {
+	time->ns = ns;
+}
+
+void
+nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec) {
+	time->ns = sec * BILLION + nsec;
+}
+
+uint64_t
+nstime_ns(const nstime_t *time) {
+	return time->ns;
+}
+
+uint64_t
+nstime_msec(const nstime_t *time) {
+	return time->ns / MILLION;
+}
+
+uint64_t
+nstime_sec(const nstime_t *time) {
+	return time->ns / BILLION;
+}
+
+uint64_t
+nstime_nsec(const nstime_t *time) {
+	return time->ns % BILLION;
+}
+
+void
+nstime_copy(nstime_t *time, const nstime_t *source) {
+	*time = *source;
+}
+
+int
+nstime_compare(const nstime_t *a, const nstime_t *b) {
+	return (a->ns > b->ns) - (a->ns < b->ns);
+}
+
+void
+nstime_add(nstime_t *time, const nstime_t *addend) {
+	assert(UINT64_MAX - time->ns >= addend->ns);
+
+	time->ns += addend->ns;
+}
+
+void
+nstime_iadd(nstime_t *time, uint64_t addend) {
+	assert(UINT64_MAX - time->ns >= addend);
+
+	time->ns += addend;
+}
+
+void
+nstime_subtract(nstime_t *time, const nstime_t *subtrahend) {
+	assert(nstime_compare(time, subtrahend) >= 0);
+
+	time->ns -= subtrahend->ns;
+}
+
+void
+nstime_isubtract(nstime_t *time, uint64_t subtrahend) {
+	assert(time->ns >= subtrahend);
+
+	time->ns -= subtrahend;
+}
+
+void
+nstime_imultiply(nstime_t *time, uint64_t multiplier) {
+	assert((((time->ns | multiplier) & (UINT64_MAX << (sizeof(uint64_t) <<
+	    2))) == 0) || ((time->ns * multiplier) / multiplier == time->ns));
+
+	time->ns *= multiplier;
+}
+
+void
+nstime_idivide(nstime_t *time, uint64_t divisor) {
+	assert(divisor != 0);
+
+	time->ns /= divisor;
+}
+
+uint64_t
+nstime_divide(const nstime_t *time, const nstime_t *divisor) {
+	assert(divisor->ns != 0);
+
+	return time->ns / divisor->ns;
+}
+
+#ifdef _WIN32
+#  define NSTIME_MONOTONIC true
+static void
+nstime_get(nstime_t *time) {
+	FILETIME ft;
+	uint64_t ticks_100ns;
+
+	GetSystemTimeAsFileTime(&ft);
+	ticks_100ns = (((uint64_t)ft.dwHighDateTime) << 32) | ft.dwLowDateTime;
+
+	nstime_init(time, ticks_100ns * 100);
+}
+#elif defined(JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE)
+#  define NSTIME_MONOTONIC true
+static void
+nstime_get(nstime_t *time) {
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
+	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
+}
+#elif defined(JEMALLOC_HAVE_CLOCK_MONOTONIC)
+#  define NSTIME_MONOTONIC true
+static void
+nstime_get(nstime_t *time) {
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
+}
+#elif defined(JEMALLOC_HAVE_MACH_ABSOLUTE_TIME)
+#  define NSTIME_MONOTONIC true
+static void
+nstime_get(nstime_t *time) {
+	nstime_init(time, mach_absolute_time());
+}
+#else
+#  define NSTIME_MONOTONIC false
+static void
+nstime_get(nstime_t *time) {
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	nstime_init2(time, tv.tv_sec, tv.tv_usec * 1000);
+}
+#endif
+
+static bool
+nstime_monotonic_impl(void) {
+	return NSTIME_MONOTONIC;
+#undef NSTIME_MONOTONIC
+}
+nstime_monotonic_t *JET_MUTABLE nstime_monotonic = nstime_monotonic_impl;
+
+static bool
+nstime_update_impl(nstime_t *time) {
+	nstime_t old_time;
+
+	nstime_copy(&old_time, time);
+	nstime_get(time);
+
+	/* Handle non-monotonic clocks. */
+	if (unlikely(nstime_compare(&old_time, time) > 0)) {
+		nstime_copy(time, &old_time);
+		return true;
+	}
+
+	return false;
+}
+nstime_update_t *JET_MUTABLE nstime_update = nstime_update_impl;
--- a/deps/jemalloc/src/pages.c
+++ b/deps/jemalloc/src/pages.c
@@ -0,0 +1,423 @@
+#define JEMALLOC_PAGES_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+#include "jemalloc/internal/pages.h"
+
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/malloc_io.h"
+
+#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+#include <sys/sysctl.h>
+#endif
+
+/******************************************************************************/
+/* Data. */
+
+/* Actual operating system page size, detected during bootstrap, <= PAGE. */
+static size_t	os_page;
+
+#ifndef _WIN32
+#  define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
+#  define PAGES_PROT_DECOMMIT (PROT_NONE)
+static int	mmap_flags;
+#endif
+static bool	os_overcommits;
+
+/******************************************************************************/
+/*
+ * Function prototypes for static functions that are referenced prior to
+ * definition.
+ */
+
+static void os_pages_unmap(void *addr, size_t size);
+
+/******************************************************************************/
+
+static void *
+os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
+	assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
+	assert(ALIGNMENT_CEILING(size, os_page) == size);
+	assert(size != 0);
+
+	if (os_overcommits) {
+		*commit = true;
+	}
+
+	void *ret;
+#ifdef _WIN32
+	/*
+	 * If VirtualAlloc can't allocate at the given address when one is
+	 * given, it fails and returns NULL.
+	 */
+	ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0),
+	    PAGE_READWRITE);
+#else
+	/*
+	 * We don't use MAP_FIXED here, because it can cause the *replacement*
+	 * of existing mappings, and we only want to create new mappings.
+	 */
+	{
+		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
+
+		ret = mmap(addr, size, prot, mmap_flags, -1, 0);
+	}
+	assert(ret != NULL);
+
+	if (ret == MAP_FAILED) {
+		ret = NULL;
+	} else if (addr != NULL && ret != addr) {
+		/*
+		 * We succeeded in mapping memory, but not in the right place.
+		 */
+		os_pages_unmap(ret, size);
+		ret = NULL;
+	}
+#endif
+	assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL &&
+	    ret == addr));
+	return ret;
+}
+
+static void *
+os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
+    bool *commit) {
+	void *ret = (void *)((uintptr_t)addr + leadsize);
+
+	assert(alloc_size >= leadsize + size);
+#ifdef _WIN32
+	os_pages_unmap(addr, alloc_size);
+	void *new_addr = os_pages_map(ret, size, PAGE, commit);
+	if (new_addr == ret) {
+		return ret;
+	}
+	if (new_addr != NULL) {
+		os_pages_unmap(new_addr, size);
+	}
+	return NULL;
+#else
+	size_t trailsize = alloc_size - leadsize - size;
+
+	if (leadsize != 0) {
+		os_pages_unmap(addr, leadsize);
+	}
+	if (trailsize != 0) {
+		os_pages_unmap((void *)((uintptr_t)ret + size), trailsize);
+	}
+	return ret;
+#endif
+}
+
+static void
+os_pages_unmap(void *addr, size_t size) {
+	assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
+	assert(ALIGNMENT_CEILING(size, os_page) == size);
+
+#ifdef _WIN32
+	if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
+#else
+	if (munmap(addr, size) == -1)
+#endif
+	{
+		char buf[BUFERROR_BUF];
+
+		buferror(get_errno(), buf, sizeof(buf));
+		malloc_printf("<jemalloc>: Error in "
+#ifdef _WIN32
+		    "VirtualFree"
+#else
+		    "munmap"
+#endif
+		    "(): %s\n", buf);
+		if (opt_abort) {
+			abort();
+		}
+	}
+}
+
+static void *
+pages_map_slow(size_t size, size_t alignment, bool *commit) {
+	size_t alloc_size = size + alignment - os_page;
+	/* Beware size_t wrap-around. */
+	if (alloc_size < size) {
+		return NULL;
+	}
+
+	void *ret;
+	do {
+		void *pages = os_pages_map(NULL, alloc_size, alignment, commit);
+		if (pages == NULL) {
+			return NULL;
+		}
+		size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment)
+		    - (uintptr_t)pages;
+		ret = os_pages_trim(pages, alloc_size, leadsize, size, commit);
+	} while (ret == NULL);
+
+	assert(ret != NULL);
+	assert(PAGE_ADDR2BASE(ret) == ret);
+	return ret;
+}
+
+void *
+pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
+	assert(alignment >= PAGE);
+	assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr);
+
+	/*
+	 * Ideally, there would be a way to specify alignment to mmap() (like
+	 * NetBSD has), but in the absence of such a feature, we have to work
+	 * hard to efficiently create aligned mappings.  The reliable, but
+	 * slow method is to create a mapping that is over-sized, then trim the
+	 * excess.  However, that always results in one or two calls to
+	 * os_pages_unmap(), and it can leave holes in the process's virtual
+	 * memory map if memory grows downward.
+	 *
+	 * Optimistically try mapping precisely the right amount before falling
+	 * back to the slow method, with the expectation that the optimistic
+	 * approach works most of the time.
+	 */
+
+	void *ret = os_pages_map(addr, size, os_page, commit);
+	if (ret == NULL || ret == addr) {
+		return ret;
+	}
+	assert(addr == NULL);
+	if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) {
+		os_pages_unmap(ret, size);
+		return pages_map_slow(size, alignment, commit);
+	}
+
+	assert(PAGE_ADDR2BASE(ret) == ret);
+	return ret;
+}
+
+void
+pages_unmap(void *addr, size_t size) {
+	assert(PAGE_ADDR2BASE(addr) == addr);
+	assert(PAGE_CEILING(size) == size);
+
+	os_pages_unmap(addr, size);
+}
+
+static bool
+pages_commit_impl(void *addr, size_t size, bool commit) {
+	assert(PAGE_ADDR2BASE(addr) == addr);
+	assert(PAGE_CEILING(size) == size);
+
+	if (os_overcommits) {
+		return true;
+	}
+
+#ifdef _WIN32
+	return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
+	    PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
+#else
+	{
+		int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
+		void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
+		    -1, 0);
+		if (result == MAP_FAILED) {
+			return true;
+		}
+		if (result != addr) {
+			/*
+			 * We succeeded in mapping memory, but not in the right
+			 * place.
+			 */
+			os_pages_unmap(result, size);
+			return true;
+		}
+		return false;
+	}
+#endif
+}
+
+bool
+pages_commit(void *addr, size_t size) {
+	return pages_commit_impl(addr, size, true);
+}
+
+bool
+pages_decommit(void *addr, size_t size) {
+	return pages_commit_impl(addr, size, false);
+}
+
+bool
+pages_purge_lazy(void *addr, size_t size) {
+	assert(PAGE_ADDR2BASE(addr) == addr);
+	assert(PAGE_CEILING(size) == size);
+
+	if (!pages_can_purge_lazy) {
+		return true;
+	}
+
+#ifdef _WIN32
+	VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
+	return false;
+#elif defined(JEMALLOC_PURGE_MADVISE_FREE) && \
+    !defined(PAGES_CAN_PURGE_LAZY)
+	return (madvise(addr, size, MADV_FREE) != 0);
+#elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
+    !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
+	return (madvise(addr, size, MADV_DONTNEED) != 0);
+#else
+	not_reached();
+#endif
+}
+
+bool
+pages_purge_forced(void *addr, size_t size) {
+	assert(PAGE_ADDR2BASE(addr) == addr);
+	assert(PAGE_CEILING(size) == size);
+
+	if (!pages_can_purge_forced) {
+		return true;
+	}
+
+#if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
+    defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
+	return (madvise(addr, size, MADV_DONTNEED) != 0);
+#elif defined(JEMALLOC_MAPS_COALESCE)
+	/* Try to overlay a new demand-zeroed mapping. */
+	return pages_commit(addr, size);
+#else
+	not_reached();
+#endif
+}
+
+bool
+pages_huge(void *addr, size_t size) {
+	assert(HUGEPAGE_ADDR2BASE(addr) == addr);
+	assert(HUGEPAGE_CEILING(size) == size);
+
+#ifdef JEMALLOC_THP
+	return (madvise(addr, size, MADV_HUGEPAGE) != 0);
+#else
+	return true;
+#endif
+}
+
+bool
+pages_nohuge(void *addr, size_t size) {
+	assert(HUGEPAGE_ADDR2BASE(addr) == addr);
+	assert(HUGEPAGE_CEILING(size) == size);
+
+#ifdef JEMALLOC_THP
+	return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
+#else
+	return false;
+#endif
+}
+
+static size_t
+os_page_detect(void) {
+#ifdef _WIN32
+	SYSTEM_INFO si;
+	GetSystemInfo(&si);
+	return si.dwPageSize;
+#else
+	long result = sysconf(_SC_PAGESIZE);
+	if (result == -1) {
+		return LG_PAGE;
+	}
+	return (size_t)result;
+#endif
+}
+
+#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+static bool
+os_overcommits_sysctl(void) {
+	int vm_overcommit;
+	size_t sz;
+
+	sz = sizeof(vm_overcommit);
+	if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) {
+		return false; /* Error. */
+	}
+
+	return ((vm_overcommit & 0x3) == 0);
+}
+#endif
+
+#ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
+/*
+ * Use syscall(2) rather than {open,read,close}(2) when possible to avoid
+ * reentry during bootstrapping if another library has interposed system call
+ * wrappers.
+ */
+static bool
+os_overcommits_proc(void) {
+	int fd;
+	char buf[1];
+	ssize_t nread;
+
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
+	fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY |
+	    O_CLOEXEC);
+#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
+	fd = (int)syscall(SYS_openat,
+	    AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
+#else
+	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
+#endif
+	if (fd == -1) {
+		return false; /* Error. */
+	}
+
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
+	nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
+#else
+	nread = read(fd, &buf, sizeof(buf));
+#endif
+
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
+	syscall(SYS_close, fd);
+#else
+	close(fd);
+#endif
+
+	if (nread < 1) {
+		return false; /* Error. */
+	}
+	/*
+	 * /proc/sys/vm/overcommit_memory meanings:
+	 * 0: Heuristic overcommit.
+	 * 1: Always overcommit.
+	 * 2: Never overcommit.
+	 */
+	return (buf[0] == '0' || buf[0] == '1');
+}
+#endif
+
+bool
+pages_boot(void) {
+	os_page = os_page_detect();
+	if (os_page > PAGE) {
+		malloc_write("<jemalloc>: Unsupported system page size\n");
+		if (opt_abort) {
+			abort();
+		}
+		return true;
+	}
+
+#ifndef _WIN32
+	mmap_flags = MAP_PRIVATE | MAP_ANON;
+#endif
+
+#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+	os_overcommits = os_overcommits_sysctl();
+#elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY)
+	os_overcommits = os_overcommits_proc();
+#  ifdef MAP_NORESERVE
+	if (os_overcommits) {
+		mmap_flags |= MAP_NORESERVE;
+	}
+#  endif
+#else
+	os_overcommits = false;
+#endif
+
+	return false;
+}
--- a/deps/jemalloc/src/prng.c
+++ b/deps/jemalloc/src/prng.c
@@ -0,0 +1,3 @@
+#define JEMALLOC_PRNG_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
--- a/deps/jemalloc/src/spin.c
+++ b/deps/jemalloc/src/spin.c
@@ -0,0 +1,4 @@
+#define JEMALLOC_SPIN_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+#include "jemalloc/internal/spin.h"
--- a/deps/jemalloc/src/sz.c
+++ b/deps/jemalloc/src/sz.c
@@ -0,0 +1,106 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/sz.h"
+
+JEMALLOC_ALIGNED(CACHELINE)
+const size_t sz_pind2sz_tab[NPSIZES+1] = {
+#define PSZ_yes(lg_grp, ndelta, lg_delta)				\
+	(((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))),
+#define PSZ_no(lg_grp, ndelta, lg_delta)
+#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
+	PSZ_##psz(lg_grp, ndelta, lg_delta)
+	SIZE_CLASSES
+#undef PSZ_yes
+#undef PSZ_no
+#undef SC
+	(LARGE_MAXCLASS + PAGE)
+};
+
+JEMALLOC_ALIGNED(CACHELINE)
+const size_t sz_index2size_tab[NSIZES] = {
+#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
+	((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
+	SIZE_CLASSES
+#undef SC
+};
+
+JEMALLOC_ALIGNED(CACHELINE)
+const uint8_t sz_size2index_tab[] = {
+#if LG_TINY_MIN == 0
+#warning "Dangerous LG_TINY_MIN"
+#define S2B_0(i)	i,
+#elif LG_TINY_MIN == 1
+#warning "Dangerous LG_TINY_MIN"
+#define S2B_1(i)	i,
+#elif LG_TINY_MIN == 2
+#warning "Dangerous LG_TINY_MIN"
+#define S2B_2(i)	i,
+#elif LG_TINY_MIN == 3
+#define S2B_3(i)	i,
+#elif LG_TINY_MIN == 4
+#define S2B_4(i)	i,
+#elif LG_TINY_MIN == 5
+#define S2B_5(i)	i,
+#elif LG_TINY_MIN == 6
+#define S2B_6(i)	i,
+#elif LG_TINY_MIN == 7
+#define S2B_7(i)	i,
+#elif LG_TINY_MIN == 8
+#define S2B_8(i)	i,
+#elif LG_TINY_MIN == 9
+#define S2B_9(i)	i,
+#elif LG_TINY_MIN == 10
+#define S2B_10(i)	i,
+#elif LG_TINY_MIN == 11
+#define S2B_11(i)	i,
+#else
+#error "Unsupported LG_TINY_MIN"
+#endif
+#if LG_TINY_MIN < 1
+#define S2B_1(i)	S2B_0(i) S2B_0(i)
+#endif
+#if LG_TINY_MIN < 2
+#define S2B_2(i)	S2B_1(i) S2B_1(i)
+#endif
+#if LG_TINY_MIN < 3
+#define S2B_3(i)	S2B_2(i) S2B_2(i)
+#endif
+#if LG_TINY_MIN < 4
+#define S2B_4(i)	S2B_3(i) S2B_3(i)
+#endif
+#if LG_TINY_MIN < 5
+#define S2B_5(i)	S2B_4(i) S2B_4(i)
+#endif
+#if LG_TINY_MIN < 6
+#define S2B_6(i)	S2B_5(i) S2B_5(i)
+#endif
+#if LG_TINY_MIN < 7
+#define S2B_7(i)	S2B_6(i) S2B_6(i)
+#endif
+#if LG_TINY_MIN < 8
+#define S2B_8(i)	S2B_7(i) S2B_7(i)
+#endif
+#if LG_TINY_MIN < 9
+#define S2B_9(i)	S2B_8(i) S2B_8(i)
+#endif
+#if LG_TINY_MIN < 10
+#define S2B_10(i)	S2B_9(i) S2B_9(i)
+#endif
+#if LG_TINY_MIN < 11
+#define S2B_11(i)	S2B_10(i) S2B_10(i)
+#endif
+#define S2B_no(i)
+#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
+	S2B_##lg_delta_lookup(index)
+	SIZE_CLASSES
+#undef S2B_3
+#undef S2B_4
+#undef S2B_5
+#undef S2B_6
+#undef S2B_7
+#undef S2B_8
+#undef S2B_9
+#undef S2B_10
+#undef S2B_11
+#undef S2B_no
+#undef SC
+};
--- a/deps/jemalloc/src/ticker.c
+++ b/deps/jemalloc/src/ticker.c
@@ -0,0 +1,3 @@
+#define JEMALLOC_TICKER_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
--- a/deps/jemalloc/src/witness.c
+++ b/deps/jemalloc/src/witness.c
@@ -0,0 +1,100 @@
+#define JEMALLOC_WITNESS_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/malloc_io.h"
+
+void
+witness_init(witness_t *witness, const char *name, witness_rank_t rank,
+    witness_comp_t *comp, void *opaque) {
+	witness->name = name;
+	witness->rank = rank;
+	witness->comp = comp;
+	witness->opaque = opaque;
+}
+
+static void
+witness_lock_error_impl(const witness_list_t *witnesses,
+    const witness_t *witness) {
+	witness_t *w;
+
+	malloc_printf("<jemalloc>: Lock rank order reversal:");
+	ql_foreach(w, witnesses, link) {
+		malloc_printf(" %s(%u)", w->name, w->rank);
+	}
+	malloc_printf(" %s(%u)\n", witness->name, witness->rank);
+	abort();
+}
+witness_lock_error_t *JET_MUTABLE witness_lock_error = witness_lock_error_impl;
+
+static void
+witness_owner_error_impl(const witness_t *witness) {
+	malloc_printf("<jemalloc>: Should own %s(%u)\n", witness->name,
+	    witness->rank);
+	abort();
+}
+witness_owner_error_t *JET_MUTABLE witness_owner_error =
+    witness_owner_error_impl;
+
+static void
+witness_not_owner_error_impl(const witness_t *witness) {
+	malloc_printf("<jemalloc>: Should not own %s(%u)\n", witness->name,
+	    witness->rank);
+	abort();
+}
+witness_not_owner_error_t *JET_MUTABLE witness_not_owner_error =
+    witness_not_owner_error_impl;
+
+static void
+witness_depth_error_impl(const witness_list_t *witnesses,
+    witness_rank_t rank_inclusive, unsigned depth) {
+	witness_t *w;
+
+	malloc_printf("<jemalloc>: Should own %u lock%s of rank >= %u:", depth,
+	    (depth != 1) ?  "s" : "", rank_inclusive);
+	ql_foreach(w, witnesses, link) {
+		malloc_printf(" %s(%u)", w->name, w->rank);
+	}
+	malloc_printf("\n");
+	abort();
+}
+witness_depth_error_t *JET_MUTABLE witness_depth_error =
+    witness_depth_error_impl;
+
+void
+witnesses_cleanup(witness_tsd_t *witness_tsd) {
+	witness_assert_lockless(witness_tsd_tsdn(witness_tsd));
+
+	/* Do nothing. */
+}
+
+void
+witness_prefork(witness_tsd_t *witness_tsd) {
+	if (!config_debug) {
+		return;
+	}
+	witness_tsd->forking = true;
+}
+
+void
+witness_postfork_parent(witness_tsd_t *witness_tsd) {
+	if (!config_debug) {
+		return;
+	}
+	witness_tsd->forking = false;
+}
+
+void
+witness_postfork_child(witness_tsd_t *witness_tsd) {
+	if (!config_debug) {
+		return;
+	}
+#ifndef JEMALLOC_MUTEX_INIT_CB
+	witness_list_t *witnesses;
+
+	witnesses = &witness_tsd->witnesses;
+	ql_new(witnesses);
+#endif
+	witness_tsd->forking = false;
+}
--- a/deps/recastnavigation/Detour/Include/DetourAlloc.h
+++ b/deps/recastnavigation/Detour/Include/DetourAlloc.h
@@ -0,0 +1,61 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURALLOCATOR_H
+#define DETOURALLOCATOR_H
+
+#include <stddef.h>
+
+/// Provides hint values to the memory allocator on how long the
+/// memory is expected to be used.
+enum dtAllocHint
+{
+	DT_ALLOC_PERM,		///< Memory persist after a function call.
+	DT_ALLOC_TEMP		///< Memory used temporarily within a function.
+};
+
+/// A memory allocation function.
+//  @param[in]		size			The size, in bytes of memory, to allocate.
+//  @param[in]		rcAllocHint	A hint to the allocator on how long the memory is expected to be in use.
+//  @return A pointer to the beginning of the allocated memory block, or null if the allocation failed.
+///  @see dtAllocSetCustom
+typedef void* (dtAllocFunc)(size_t size, dtAllocHint hint);
+
+/// A memory deallocation function.
+///  @param[in]		ptr		A pointer to a memory block previously allocated using #dtAllocFunc.
+/// @see dtAllocSetCustom
+typedef void (dtFreeFunc)(void* ptr);
+
+/// Sets the base custom allocation functions to be used by Detour.
+///  @param[in]		allocFunc	The memory allocation function to be used by #dtAlloc
+///  @param[in]		freeFunc	The memory de-allocation function to be used by #dtFree
+void dtAllocSetCustom(dtAllocFunc *allocFunc, dtFreeFunc *freeFunc);
+
+/// Allocates a memory block.
+///  @param[in]		size	The size, in bytes of memory, to allocate.
+///  @param[in]		hint	A hint to the allocator on how long the memory is expected to be in use.
+///  @return A pointer to the beginning of the allocated memory block, or null if the allocation failed.
+/// @see dtFree
+void* dtAlloc(size_t size, dtAllocHint hint);
+
+/// Deallocates a memory block.
+///  @param[in]		ptr		A pointer to a memory block previously allocated using #dtAlloc.
+/// @see dtAlloc
+void dtFree(void* ptr);
+
+#endif
--- a/deps/recastnavigation/Detour/Include/DetourAssert.h
+++ b/deps/recastnavigation/Detour/Include/DetourAssert.h
@@ -0,0 +1,33 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURASSERT_H
+#define DETOURASSERT_H
+
+// Note: This header file's only purpose is to include define assert.
+// Feel free to change the file and include your own implementation instead.
+
+#ifdef NDEBUG
+// From http://cnicholson.net/2009/02/stupid-c-tricks-adventures-in-assert/
+#	define dtAssert(x) do { (void)sizeof(x); } while((void)(__LINE__==-1),false)  
+#else
+#	include <assert.h> 
+#	define dtAssert assert
+#endif
+
+#endif // DETOURASSERT_H
--- a/deps/recastnavigation/Detour/Include/DetourCommon.h
+++ b/deps/recastnavigation/Detour/Include/DetourCommon.h
@@ -0,0 +1,550 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURCOMMON_H
+#define DETOURCOMMON_H
+
+#include "DetourMath.h"
+#include <stddef.h>
+
+/**
+@defgroup detour Detour
+
+Members in this module are used to create, manipulate, and query navigation 
+meshes.
+
+@note This is a summary list of members.  Use the index or search 
+feature to find minor members.
+*/
+
+/// @name General helper functions
+/// @{
+
+/// Used to ignore a function parameter.  VS complains about unused parameters
+/// and this silences the warning.
+///  @param [in] _ Unused parameter
+template<class T> void dtIgnoreUnused(const T&) { }
+
+/// Swaps the values of the two parameters.
+///  @param[in,out]	a	Value A
+///  @param[in,out]	b	Value B
+template<class T> inline void dtSwap(T& a, T& b) { T t = a; a = b; b = t; }
+
+/// Returns the minimum of two values.
+///  @param[in]		a	Value A
+///  @param[in]		b	Value B
+///  @return The minimum of the two values.
+template<class T> inline T dtMin(T a, T b) { return a < b ? a : b; }
+
+/// Returns the maximum of two values.
+///  @param[in]		a	Value A
+///  @param[in]		b	Value B
+///  @return The maximum of the two values.
+template<class T> inline T dtMax(T a, T b) { return a > b ? a : b; }
+
+/// Returns the absolute value.
+///  @param[in]		a	The value.
+///  @return The absolute value of the specified value.
+template<class T> inline T dtAbs(T a) { return a < 0 ? -a : a; }
+
+/// Returns the square of the value.
+///  @param[in]		a	The value.
+///  @return The square of the value.
+template<class T> inline T dtSqr(T a) { return a*a; }
+
+/// Clamps the value to the specified range.
+///  @param[in]		v	The value to clamp.
+///  @param[in]		mn	The minimum permitted return value.
+///  @param[in]		mx	The maximum permitted return value.
+///  @return The value, clamped to the specified range.
+template<class T> inline T dtClamp(T v, T mn, T mx) { return v < mn ? mn : (v > mx ? mx : v); }
+
+/// @}
+/// @name Vector helper functions.
+/// @{
+
+/// Derives the cross product of two vectors. (@p v1 x @p v2)
+///  @param[out]	dest	The cross product. [(x, y, z)]
+///  @param[in]		v1		A Vector [(x, y, z)]
+///  @param[in]		v2		A vector [(x, y, z)]
+inline void dtVcross(float* dest, const float* v1, const float* v2)
+{
+	dest[0] = v1[1]*v2[2] - v1[2]*v2[1];
+	dest[1] = v1[2]*v2[0] - v1[0]*v2[2];
+	dest[2] = v1[0]*v2[1] - v1[1]*v2[0]; 
+}
+
+/// Derives the dot product of two vectors. (@p v1 . @p v2)
+///  @param[in]		v1	A Vector [(x, y, z)]
+///  @param[in]		v2	A vector [(x, y, z)]
+/// @return The dot product.
+inline float dtVdot(const float* v1, const float* v2)
+{
+	return v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2];
+}
+
+/// Performs a scaled vector addition. (@p v1 + (@p v2 * @p s))
+///  @param[out]	dest	The result vector. [(x, y, z)]
+///  @param[in]		v1		The base vector. [(x, y, z)]
+///  @param[in]		v2		The vector to scale and add to @p v1. [(x, y, z)]
+///  @param[in]		s		The amount to scale @p v2 by before adding to @p v1.
+inline void dtVmad(float* dest, const float* v1, const float* v2, const float s)
+{
+	dest[0] = v1[0]+v2[0]*s;
+	dest[1] = v1[1]+v2[1]*s;
+	dest[2] = v1[2]+v2[2]*s;
+}
+
+/// Performs a linear interpolation between two vectors. (@p v1 toward @p v2)
+///  @param[out]	dest	The result vector. [(x, y, x)]
+///  @param[in]		v1		The starting vector.
+///  @param[in]		v2		The destination vector.
+///	 @param[in]		t		The interpolation factor. [Limits: 0 <= value <= 1.0]
+inline void dtVlerp(float* dest, const float* v1, const float* v2, const float t)
+{
+	dest[0] = v1[0]+(v2[0]-v1[0])*t;
+	dest[1] = v1[1]+(v2[1]-v1[1])*t;
+	dest[2] = v1[2]+(v2[2]-v1[2])*t;
+}
+
+/// Performs a vector addition. (@p v1 + @p v2)
+///  @param[out]	dest	The result vector. [(x, y, z)]
+///  @param[in]		v1		The base vector. [(x, y, z)]
+///  @param[in]		v2		The vector to add to @p v1. [(x, y, z)]
+inline void dtVadd(float* dest, const float* v1, const float* v2)
+{
+	dest[0] = v1[0]+v2[0];
+	dest[1] = v1[1]+v2[1];
+	dest[2] = v1[2]+v2[2];
+}
+
+/// Performs a vector subtraction. (@p v1 - @p v2)
+///  @param[out]	dest	The result vector. [(x, y, z)]
+///  @param[in]		v1		The base vector. [(x, y, z)]
+///  @param[in]		v2		The vector to subtract from @p v1. [(x, y, z)]
+inline void dtVsub(float* dest, const float* v1, const float* v2)
+{
+	dest[0] = v1[0]-v2[0];
+	dest[1] = v1[1]-v2[1];
+	dest[2] = v1[2]-v2[2];
+}
+
+/// Scales the vector by the specified value. (@p v * @p t)
+///  @param[out]	dest	The result vector. [(x, y, z)]
+///  @param[in]		v		The vector to scale. [(x, y, z)]
+///  @param[in]		t		The scaling factor.
+inline void dtVscale(float* dest, const float* v, const float t)
+{
+	dest[0] = v[0]*t;
+	dest[1] = v[1]*t;
+	dest[2] = v[2]*t;
+}
+
+/// Selects the minimum value of each element from the specified vectors.
+///  @param[in,out]	mn	A vector.  (Will be updated with the result.) [(x, y, z)]
+///  @param[in]	v	A vector. [(x, y, z)]
+inline void dtVmin(float* mn, const float* v)
+{
+	mn[0] = dtMin(mn[0], v[0]);
+	mn[1] = dtMin(mn[1], v[1]);
+	mn[2] = dtMin(mn[2], v[2]);
+}
+
+/// Selects the maximum value of each element from the specified vectors.
+///  @param[in,out]	mx	A vector.  (Will be updated with the result.) [(x, y, z)]
+///  @param[in]		v	A vector. [(x, y, z)]
+inline void dtVmax(float* mx, const float* v)
+{
+	mx[0] = dtMax(mx[0], v[0]);
+	mx[1] = dtMax(mx[1], v[1]);
+	mx[2] = dtMax(mx[2], v[2]);
+}
+
+/// Sets the vector elements to the specified values.
+///  @param[out]	dest	The result vector. [(x, y, z)]
+///  @param[in]		x		The x-value of the vector.
+///  @param[in]		y		The y-value of the vector.
+///  @param[in]		z		The z-value of the vector.
+inline void dtVset(float* dest, const float x, const float y, const float z)
+{
+	dest[0] = x; dest[1] = y; dest[2] = z;
+}
+
+/// Performs a vector copy.
+///  @param[out]	dest	The result. [(x, y, z)]
+///  @param[in]		a		The vector to copy. [(x, y, z)]
+inline void dtVcopy(float* dest, const float* a)
+{
+	dest[0] = a[0];
+	dest[1] = a[1];
+	dest[2] = a[2];
+}
+
+/// Derives the scalar length of the vector.
+///  @param[in]		v The vector. [(x, y, z)]
+/// @return The scalar length of the vector.
+inline float dtVlen(const float* v)
+{
+	return dtMathSqrtf(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
+}
+
+/// Derives the square of the scalar length of the vector. (len * len)
+///  @param[in]		v The vector. [(x, y, z)]
+/// @return The square of the scalar length of the vector.
+inline float dtVlenSqr(const float* v)
+{
+	return v[0]*v[0] + v[1]*v[1] + v[2]*v[2];
+}
+
+/// Returns the distance between two points.
+///  @param[in]		v1	A point. [(x, y, z)]
+///  @param[in]		v2	A point. [(x, y, z)]
+/// @return The distance between the two points.
+inline float dtVdist(const float* v1, const float* v2)
+{
+	const float dx = v2[0] - v1[0];
+	const float dy = v2[1] - v1[1];
+	const float dz = v2[2] - v1[2];
+	return dtMathSqrtf(dx*dx + dy*dy + dz*dz);
+}
+
+/// Returns the square of the distance between two points.
+///  @param[in]		v1	A point. [(x, y, z)]
+///  @param[in]		v2	A point. [(x, y, z)]
+/// @return The square of the distance between the two points.
+inline float dtVdistSqr(const float* v1, const float* v2)
+{
+	const float dx = v2[0] - v1[0];
+	const float dy = v2[1] - v1[1];
+	const float dz = v2[2] - v1[2];
+	return dx*dx + dy*dy + dz*dz;
+}
+
+/// Derives the distance between the specified points on the xz-plane.
+///  @param[in]		v1	A point. [(x, y, z)]
+///  @param[in]		v2	A point. [(x, y, z)]
+/// @return The distance between the point on the xz-plane.
+///
+/// The vectors are projected onto the xz-plane, so the y-values are ignored.
+inline float dtVdist2D(const float* v1, const float* v2)
+{
+	const float dx = v2[0] - v1[0];
+	const float dz = v2[2] - v1[2];
+	return dtMathSqrtf(dx*dx + dz*dz);
+}
+
+/// Derives the square of the distance between the specified points on the xz-plane.
+///  @param[in]		v1	A point. [(x, y, z)]
+///  @param[in]		v2	A point. [(x, y, z)]
+/// @return The square of the distance between the point on the xz-plane.
+inline float dtVdist2DSqr(const float* v1, const float* v2)
+{
+	const float dx = v2[0] - v1[0];
+	const float dz = v2[2] - v1[2];
+	return dx*dx + dz*dz;
+}
+
+/// Normalizes the vector.
+///  @param[in,out]	v	The vector to normalize. [(x, y, z)]
+inline void dtVnormalize(float* v)
+{
+	float d = 1.0f / dtMathSqrtf(dtSqr(v[0]) + dtSqr(v[1]) + dtSqr(v[2]));
+	v[0] *= d;
+	v[1] *= d;
+	v[2] *= d;
+}
+
+/// Performs a 'sloppy' colocation check of the specified points.
+///  @param[in]		p0	A point. [(x, y, z)]
+///  @param[in]		p1	A point. [(x, y, z)]
+/// @return True if the points are considered to be at the same location.
+///
+/// Basically, this function will return true if the specified points are 
+/// close enough to eachother to be considered colocated.
+inline bool dtVequal(const float* p0, const float* p1)
+{
+	static const float thr = dtSqr(1.0f/16384.0f);
+	const float d = dtVdistSqr(p0, p1);
+	return d < thr;
+}
+
+/// Derives the dot product of two vectors on the xz-plane. (@p u . @p v)
+///  @param[in]		u		A vector [(x, y, z)]
+///  @param[in]		v		A vector [(x, y, z)]
+/// @return The dot product on the xz-plane.
+///
+/// The vectors are projected onto the xz-plane, so the y-values are ignored.
+inline float dtVdot2D(const float* u, const float* v)
+{
+	return u[0]*v[0] + u[2]*v[2];
+}
+
+/// Derives the xz-plane 2D perp product of the two vectors. (uz*vx - ux*vz)
+///  @param[in]		u		The LHV vector [(x, y, z)]
+///  @param[in]		v		The RHV vector [(x, y, z)]
+/// @return The dot product on the xz-plane.
+///
+/// The vectors are projected onto the xz-plane, so the y-values are ignored.
+inline float dtVperp2D(const float* u, const float* v)
+{
+	return u[2]*v[0] - u[0]*v[2];
+}
+
+/// @}
+/// @name Computational geometry helper functions.
+/// @{
+
+/// Derives the signed xz-plane area of the triangle ABC, or the relationship of line AB to point C.
+///  @param[in]		a		Vertex A. [(x, y, z)]
+///  @param[in]		b		Vertex B. [(x, y, z)]
+///  @param[in]		c		Vertex C. [(x, y, z)]
+/// @return The signed xz-plane area of the triangle.
+inline float dtTriArea2D(const float* a, const float* b, const float* c)
+{
+	const float abx = b[0] - a[0];
+	const float abz = b[2] - a[2];
+	const float acx = c[0] - a[0];
+	const float acz = c[2] - a[2];
+	return acx*abz - abx*acz;
+}
+
+/// Determines if two axis-aligned bounding boxes overlap.
+///  @param[in]		amin	Minimum bounds of box A. [(x, y, z)]
+///  @param[in]		amax	Maximum bounds of box A. [(x, y, z)]
+///  @param[in]		bmin	Minimum bounds of box B. [(x, y, z)]
+///  @param[in]		bmax	Maximum bounds of box B. [(x, y, z)]
+/// @return True if the two AABB's overlap.
+/// @see dtOverlapBounds
+inline bool dtOverlapQuantBounds(const unsigned short amin[3], const unsigned short amax[3],
+								 const unsigned short bmin[3], const unsigned short bmax[3])
+{
+	bool overlap = true;
+	overlap = (amin[0] > bmax[0] || amax[0] < bmin[0]) ? false : overlap;
+	overlap = (amin[1] > bmax[1] || amax[1] < bmin[1]) ? false : overlap;
+	overlap = (amin[2] > bmax[2] || amax[2] < bmin[2]) ? false : overlap;
+	return overlap;
+}
+
+/// Determines if two axis-aligned bounding boxes overlap.
+///  @param[in]		amin	Minimum bounds of box A. [(x, y, z)]
+///  @param[in]		amax	Maximum bounds of box A. [(x, y, z)]
+///  @param[in]		bmin	Minimum bounds of box B. [(x, y, z)]
+///  @param[in]		bmax	Maximum bounds of box B. [(x, y, z)]
+/// @return True if the two AABB's overlap.
+/// @see dtOverlapQuantBounds
+inline bool dtOverlapBounds(const float* amin, const float* amax,
+							const float* bmin, const float* bmax)
+{
+	bool overlap = true;
+	overlap = (amin[0] > bmax[0] || amax[0] < bmin[0]) ? false : overlap;
+	overlap = (amin[1] > bmax[1] || amax[1] < bmin[1]) ? false : overlap;
+	overlap = (amin[2] > bmax[2] || amax[2] < bmin[2]) ? false : overlap;
+	return overlap;
+}
+
+/// Derives the closest point on a triangle from the specified reference point.
+///  @param[out]	closest	The closest point on the triangle.	
+///  @param[in]		p		The reference point from which to test. [(x, y, z)]
+///  @param[in]		a		Vertex A of triangle ABC. [(x, y, z)]
+///  @param[in]		b		Vertex B of triangle ABC. [(x, y, z)]
+///  @param[in]		c		Vertex C of triangle ABC. [(x, y, z)]
+void dtClosestPtPointTriangle(float* closest, const float* p,
+							  const float* a, const float* b, const float* c);
+
+/// Derives the y-axis height of the closest point on the triangle from the specified reference point.
+///  @param[in]		p		The reference point from which to test. [(x, y, z)]
+///  @param[in]		a		Vertex A of triangle ABC. [(x, y, z)]
+///  @param[in]		b		Vertex B of triangle ABC. [(x, y, z)]
+///  @param[in]		c		Vertex C of triangle ABC. [(x, y, z)]
+///  @param[out]	h		The resulting height.
+bool dtClosestHeightPointTriangle(const float* p, const float* a, const float* b, const float* c, float& h);
+
+bool dtIntersectSegmentPoly2D(const float* p0, const float* p1,
+							  const float* verts, int nverts,
+							  float& tmin, float& tmax,
+							  int& segMin, int& segMax);
+
+bool dtIntersectSegSeg2D(const float* ap, const float* aq,
+						 const float* bp, const float* bq,
+						 float& s, float& t);
+
+/// Determines if the specified point is inside the convex polygon on the xz-plane.
+///  @param[in]		pt		The point to check. [(x, y, z)]
+///  @param[in]		verts	The polygon vertices. [(x, y, z) * @p nverts]
+///  @param[in]		nverts	The number of vertices. [Limit: >= 3]
+/// @return True if the point is inside the polygon.
+bool dtPointInPolygon(const float* pt, const float* verts, const int nverts);
+
+bool dtDistancePtPolyEdgesSqr(const float* pt, const float* verts, const int nverts,
+							float* ed, float* et);
+
+float dtDistancePtSegSqr2D(const float* pt, const float* p, const float* q, float& t);
+
+/// Derives the centroid of a convex polygon.
+///  @param[out]	tc		The centroid of the polgyon. [(x, y, z)]
+///  @param[in]		idx		The polygon indices. [(vertIndex) * @p nidx]
+///  @param[in]		nidx	The number of indices in the polygon. [Limit: >= 3]
+///  @param[in]		verts	The polygon vertices. [(x, y, z) * vertCount]
+void dtCalcPolyCenter(float* tc, const unsigned short* idx, int nidx, const float* verts);
+
+/// Determines if the two convex polygons overlap on the xz-plane.
+///  @param[in]		polya		Polygon A vertices.	[(x, y, z) * @p npolya]
+///  @param[in]		npolya		The number of vertices in polygon A.
+///  @param[in]		polyb		Polygon B vertices.	[(x, y, z) * @p npolyb]
+///  @param[in]		npolyb		The number of vertices in polygon B.
+/// @return True if the two polygons overlap.
+bool dtOverlapPolyPoly2D(const float* polya, const int npolya,
+						 const float* polyb, const int npolyb);
+
+/// @}
+/// @name Miscellanious functions.
+/// @{
+
+inline unsigned int dtNextPow2(unsigned int v)
+{
+	v--;
+	v |= v >> 1;
+	v |= v >> 2;
+	v |= v >> 4;
+	v |= v >> 8;
+	v |= v >> 16;
+	v++;
+	return v;
+}
+
+inline unsigned int dtIlog2(unsigned int v)
+{
+	unsigned int r;
+	unsigned int shift;
+	r = (v > 0xffff) << 4; v >>= r;
+	shift = (v > 0xff) << 3; v >>= shift; r |= shift;
+	shift = (v > 0xf) << 2; v >>= shift; r |= shift;
+	shift = (v > 0x3) << 1; v >>= shift; r |= shift;
+	r |= (v >> 1);
+	return r;
+}
+
+inline int dtAlign4(int x) { return (x+3) & ~3; }
+
+inline int dtOppositeTile(int side) { return (side+4) & 0x7; }
+
+inline void dtSwapByte(unsigned char* a, unsigned char* b)
+{
+	unsigned char tmp = *a;
+	*a = *b;
+	*b = tmp;
+}
+
+inline void dtSwapEndian(unsigned short* v)
+{
+	unsigned char* x = (unsigned char*)v;
+	dtSwapByte(x+0, x+1);
+}
+
+inline void dtSwapEndian(short* v)
+{
+	unsigned char* x = (unsigned char*)v;
+	dtSwapByte(x+0, x+1);
+}
+
+inline void dtSwapEndian(unsigned int* v)
+{
+	unsigned char* x = (unsigned char*)v;
+	dtSwapByte(x+0, x+3); dtSwapByte(x+1, x+2);
+}
+
+inline void dtSwapEndian(int* v)
+{
+	unsigned char* x = (unsigned char*)v;
+	dtSwapByte(x+0, x+3); dtSwapByte(x+1, x+2);
+}
+
+inline void dtSwapEndian(float* v)
+{
+	unsigned char* x = (unsigned char*)v;
+	dtSwapByte(x+0, x+3); dtSwapByte(x+1, x+2);
+}
+
+void dtRandomPointInConvexPoly(const float* pts, const int npts, float* areas,
+							   const float s, const float t, float* out);
+
+template<typename TypeToRetrieveAs>
+TypeToRetrieveAs* dtGetThenAdvanceBufferPointer(const unsigned char*& buffer, const size_t distanceToAdvance)
+{
+	TypeToRetrieveAs* returnPointer = reinterpret_cast<TypeToRetrieveAs*>(buffer);
+	buffer += distanceToAdvance;
+	return returnPointer;
+}
+
+template<typename TypeToRetrieveAs>
+TypeToRetrieveAs* dtGetThenAdvanceBufferPointer(unsigned char*& buffer, const size_t distanceToAdvance)
+{
+	TypeToRetrieveAs* returnPointer = reinterpret_cast<TypeToRetrieveAs*>(buffer);
+	buffer += distanceToAdvance;
+	return returnPointer;
+}
+
+
+/// @}
+
+#endif // DETOURCOMMON_H
+
+///////////////////////////////////////////////////////////////////////////
+
+// This section contains detailed documentation for members that don't have
+// a source file. It reduces clutter in the main section of the header.
+
+/**
+
+@fn float dtTriArea2D(const float* a, const float* b, const float* c)
+@par
+
+The vertices are projected onto the xz-plane, so the y-values are ignored.
+
+This is a low cost function than can be used for various purposes.  Its main purpose
+is for point/line relationship testing.
+
+In all cases: A value of zero indicates that all vertices are collinear or represent the same point.
+(On the xz-plane.)
+
+When used for point/line relationship tests, AB usually represents a line against which
+the C point is to be tested.  In this case:
+
+A positive value indicates that point C is to the left of line AB, looking from A toward B.<br/>
+A negative value indicates that point C is to the right of lineAB, looking from A toward B.
+
+When used for evaluating a triangle:
+
+The absolute value of the return value is two times the area of the triangle when it is
+projected onto the xz-plane.
+
+A positive return value indicates:
+
+<ul>
+<li>The vertices are wrapped in the normal Detour wrap direction.</li>
+<li>The triangle's 3D face normal is in the general up direction.</li>
+</ul>
+
+A negative return value indicates:
+
+<ul>
+<li>The vertices are reverse wrapped. (Wrapped opposite the normal Detour wrap direction.)</li>
+<li>The triangle's 3D face normal is in the general down direction.</li>
+</ul>
+
+*/
--- a/deps/recastnavigation/Detour/Include/DetourMath.h
+++ b/deps/recastnavigation/Detour/Include/DetourMath.h
@@ -0,0 +1,20 @@
+/**
+@defgroup detour Detour
+
+Members in this module are wrappers around the standard math library
+*/
+
+#ifndef DETOURMATH_H
+#define DETOURMATH_H
+
+#include <math.h>
+
+inline float dtMathFabsf(float x) { return fabsf(x); }
+inline float dtMathSqrtf(float x) { return sqrtf(x); }
+inline float dtMathFloorf(float x) { return floorf(x); }
+inline float dtMathCeilf(float x) { return ceilf(x); }
+inline float dtMathCosf(float x) { return cosf(x); }
+inline float dtMathSinf(float x) { return sinf(x); }
+inline float dtMathAtan2f(float y, float x) { return atan2f(y, x); }
+
+#endif
--- a/deps/recastnavigation/Detour/Include/DetourNavMesh.h
+++ b/deps/recastnavigation/Detour/Include/DetourNavMesh.h
@@ -0,0 +1,777 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURNAVMESH_H
+#define DETOURNAVMESH_H
+
+#include "DetourAlloc.h"
+#include "DetourStatus.h"
+
+// Undefine (or define in a build cofnig) the following line to use 64bit polyref.
+// Generally not needed, useful for very large worlds.
+// Note: tiles build using 32bit refs are not compatible with 64bit refs!
+#define DT_POLYREF64 1
+
+#ifdef DT_POLYREF64
+// TODO: figure out a multiplatform version of uint64_t
+// - maybe: https://code.google.com/p/msinttypes/
+// - or: http://www.azillionmonkeys.com/qed/pstdint.h
+#if defined(WIN32) && !defined(__MINGW32__)
+/// Do not rename back to uint64. Otherwise mac complains about typedef redefinition
+typedef unsigned __int64    uint64_d;
+#else
+#include <stdint.h>
+#ifndef uint64_t
+#ifdef __linux__
+#include <linux/types.h>
+#endif
+#endif
+/// Do not rename back to uint64. Otherwise mac complains about typedef redefinition
+typedef uint64_t            uint64_d;
+#endif 
+#endif
+
+// Note: If you want to use 64-bit refs, change the types of both dtPolyRef & dtTileRef.
+// It is also recommended that you change dtHashRef() to a proper 64-bit hash.
+
+/// A handle to a polygon within a navigation mesh tile.
+/// @ingroup detour
+#ifdef DT_POLYREF64
+static const unsigned int DT_SALT_BITS = 12;
+static const unsigned int DT_TILE_BITS = 21;
+static const unsigned int DT_POLY_BITS = 31;
+typedef uint64_d dtPolyRef;
+#else
+typedef unsigned int dtPolyRef;
+#endif
+
+/// A handle to a tile within a navigation mesh.
+/// @ingroup detour
+#ifdef DT_POLYREF64
+typedef uint64_d dtTileRef;
+#else
+typedef unsigned int dtTileRef;
+#endif
+
+/// The maximum number of vertices per navigation polygon.
+/// @ingroup detour
+static const int DT_VERTS_PER_POLYGON = 6;
+
+/// @{
+/// @name Tile Serialization Constants
+/// These constants are used to detect whether a navigation tile's data
+/// and state format is compatible with the current build.
+///
+
+/// A magic number used to detect compatibility of navigation tile data.
+static const int DT_NAVMESH_MAGIC = 'D'<<24 | 'N'<<16 | 'A'<<8 | 'V';
+
+/// A version number used to detect compatibility of navigation tile data.
+static const int DT_NAVMESH_VERSION = 7;
+
+/// A magic number used to detect the compatibility of navigation tile states.
+static const int DT_NAVMESH_STATE_MAGIC = 'D'<<24 | 'N'<<16 | 'M'<<8 | 'S';
+
+/// A version number used to detect compatibility of navigation tile states.
+static const int DT_NAVMESH_STATE_VERSION = 1;
+
+/// @}
+
+/// A flag that indicates that an entity links to an external entity.
+/// (E.g. A polygon edge is a portal that links to another polygon.)
+static const unsigned short DT_EXT_LINK = 0x8000;
+
+/// A value that indicates the entity does not link to anything.
+static const unsigned int DT_NULL_LINK = 0xffffffff;
+
+/// A flag that indicates that an off-mesh connection can be traversed in both directions. (Is bidirectional.)
+static const unsigned int DT_OFFMESH_CON_BIDIR = 1;
+
+/// The maximum number of user defined area ids.
+/// @ingroup detour
+static const int DT_MAX_AREAS = 64;
+
+/// Tile flags used for various functions and fields.
+/// For an example, see dtNavMesh::addTile().
+enum dtTileFlags
+{
+	/// The navigation mesh owns the tile memory and is responsible for freeing it.
+	DT_TILE_FREE_DATA = 0x01,
+};
+
+/// Vertex flags returned by dtNavMeshQuery::findStraightPath.
+enum dtStraightPathFlags
+{
+	DT_STRAIGHTPATH_START = 0x01,				///< The vertex is the start position in the path.
+	DT_STRAIGHTPATH_END = 0x02,					///< The vertex is the end position in the path.
+	DT_STRAIGHTPATH_OFFMESH_CONNECTION = 0x04,	///< The vertex is the start of an off-mesh connection.
+};
+
+/// Options for dtNavMeshQuery::findStraightPath.
+enum dtStraightPathOptions
+{
+	DT_STRAIGHTPATH_AREA_CROSSINGS = 0x01,	///< Add a vertex at every polygon edge crossing where area changes.
+	DT_STRAIGHTPATH_ALL_CROSSINGS = 0x02,	///< Add a vertex at every polygon edge crossing.
+};
+
+
+/// Options for dtNavMeshQuery::initSlicedFindPath and updateSlicedFindPath
+enum dtFindPathOptions
+{
+	DT_FINDPATH_ANY_ANGLE	= 0x02,		///< use raycasts during pathfind to "shortcut" (raycast still consider costs)
+};
+
+/// Options for dtNavMeshQuery::raycast
+enum dtRaycastOptions
+{
+	DT_RAYCAST_USE_COSTS = 0x01,		///< Raycast should calculate movement cost along the ray and fill RaycastHit::cost
+};
+
+
+/// Limit raycasting during any angle pahfinding
+/// The limit is given as a multiple of the character radius
+static const float DT_RAY_CAST_LIMIT_PROPORTIONS = 50.0f;
+
+/// Flags representing the type of a navigation mesh polygon.
+enum dtPolyTypes
+{
+	/// The polygon is a standard convex polygon that is part of the surface of the mesh.
+	DT_POLYTYPE_GROUND = 0,
+	/// The polygon is an off-mesh connection consisting of two vertices.
+	DT_POLYTYPE_OFFMESH_CONNECTION = 1,
+};
+
+
+/// Defines a polygon within a dtMeshTile object.
+/// @ingroup detour
+struct dtPoly
+{
+	/// Index to first link in linked list. (Or #DT_NULL_LINK if there is no link.)
+	unsigned int firstLink;
+
+	/// The indices of the polygon's vertices.
+	/// The actual vertices are located in dtMeshTile::verts.
+	unsigned short verts[DT_VERTS_PER_POLYGON];
+
+	/// Packed data representing neighbor polygons references and flags for each edge.
+	unsigned short neis[DT_VERTS_PER_POLYGON];
+
+	/// The user defined polygon flags.
+	unsigned short flags;
+
+	/// The number of vertices in the polygon.
+	unsigned char vertCount;
+
+	/// The bit packed area id and polygon type.
+	/// @note Use the structure's set and get methods to acess this value.
+	unsigned char areaAndtype;
+
+	/// Sets the user defined area id. [Limit: < #DT_MAX_AREAS]
+	inline void setArea(unsigned char a) { areaAndtype = (areaAndtype & 0xc0) | (a & 0x3f); }
+
+	/// Sets the polygon type. (See: #dtPolyTypes.)
+	inline void setType(unsigned char t) { areaAndtype = (areaAndtype & 0x3f) | (t << 6); }
+
+	/// Gets the user defined area id.
+	inline unsigned char getArea() const { return areaAndtype & 0x3f; }
+
+	/// Gets the polygon type. (See: #dtPolyTypes)
+	inline unsigned char getType() const { return areaAndtype >> 6; }
+};
+
+/// Defines the location of detail sub-mesh data within a dtMeshTile.
+struct dtPolyDetail
+{
+	unsigned int vertBase;			///< The offset of the vertices in the dtMeshTile::detailVerts array.
+	unsigned int triBase;			///< The offset of the triangles in the dtMeshTile::detailTris array.
+	unsigned char vertCount;		///< The number of vertices in the sub-mesh.
+	unsigned char triCount;			///< The number of triangles in the sub-mesh.
+};
+
+/// Defines a link between polygons.
+/// @note This structure is rarely if ever used by the end user.
+/// @see dtMeshTile
+struct dtLink
+{
+	dtPolyRef ref;					///< Neighbour reference. (The neighbor that is linked to.)
+	unsigned int next;				///< Index of the next link.
+	unsigned char edge;				///< Index of the polygon edge that owns this link.
+	unsigned char side;				///< If a boundary link, defines on which side the link is.
+	unsigned char bmin;				///< If a boundary link, defines the minimum sub-edge area.
+	unsigned char bmax;				///< If a boundary link, defines the maximum sub-edge area.
+};
+
+/// Bounding volume node.
+/// @note This structure is rarely if ever used by the end user.
+/// @see dtMeshTile
+struct dtBVNode
+{
+	unsigned short bmin[3];			///< Minimum bounds of the node's AABB. [(x, y, z)]
+	unsigned short bmax[3];			///< Maximum bounds of the node's AABB. [(x, y, z)]
+	int i;							///< The node's index. (Negative for escape sequence.)
+};
+
+/// Defines an navigation mesh off-mesh connection within a dtMeshTile object.
+/// An off-mesh connection is a user defined traversable connection made up to two vertices.
+struct dtOffMeshConnection
+{
+	/// The endpoints of the connection. [(ax, ay, az, bx, by, bz)]
+	float pos[6];
+
+	/// The radius of the endpoints. [Limit: >= 0]
+	float rad;		
+
+	/// The polygon reference of the connection within the tile.
+	unsigned short poly;
+
+	/// Link flags. 
+	/// @note These are not the connection's user defined flags. Those are assigned via the 
+	/// connection's dtPoly definition. These are link flags used for internal purposes.
+	unsigned char flags;
+
+	/// End point side.
+	unsigned char side;
+
+	/// The id of the offmesh connection. (User assigned when the navigation mesh is built.)
+	unsigned int userId;
+};
+
+/// Provides high level information related to a dtMeshTile object.
+/// @ingroup detour
+struct dtMeshHeader
+{
+	int magic;				///< Tile magic number. (Used to identify the data format.)
+	int version;			///< Tile data format version number.
+	int x;					///< The x-position of the tile within the dtNavMesh tile grid. (x, y, layer)
+	int y;					///< The y-position of the tile within the dtNavMesh tile grid. (x, y, layer)
+	int layer;				///< The layer of the tile within the dtNavMesh tile grid. (x, y, layer)
+	unsigned int userId;	///< The user defined id of the tile.
+	int polyCount;			///< The number of polygons in the tile.
+	int vertCount;			///< The number of vertices in the tile.
+	int maxLinkCount;		///< The number of allocated links.
+	int detailMeshCount;	///< The number of sub-meshes in the detail mesh.
+	
+	/// The number of unique vertices in the detail mesh. (In addition to the polygon vertices.)
+	int detailVertCount;
+	
+	int detailTriCount;			///< The number of triangles in the detail mesh.
+	int bvNodeCount;			///< The number of bounding volume nodes. (Zero if bounding volumes are disabled.)
+	int offMeshConCount;		///< The number of off-mesh connections.
+	int offMeshBase;			///< The index of the first polygon which is an off-mesh connection.
+	float walkableHeight;		///< The height of the agents using the tile.
+	float walkableRadius;		///< The radius of the agents using the tile.
+	float walkableClimb;		///< The maximum climb height of the agents using the tile.
+	float bmin[3];				///< The minimum bounds of the tile's AABB. [(x, y, z)]
+	float bmax[3];				///< The maximum bounds of the tile's AABB. [(x, y, z)]
+	
+	/// The bounding volume quantization factor. 
+	float bvQuantFactor;
+};
+
+/// Defines a navigation mesh tile.
+/// @ingroup detour
+struct dtMeshTile
+{
+	unsigned int salt;					///< Counter describing modifications to the tile.
+
+	unsigned int linksFreeList;			///< Index to the next free link.
+	dtMeshHeader* header;				///< The tile header.
+	dtPoly* polys;						///< The tile polygons. [Size: dtMeshHeader::polyCount]
+	float* verts;						///< The tile vertices. [Size: dtMeshHeader::vertCount]
+	dtLink* links;						///< The tile links. [Size: dtMeshHeader::maxLinkCount]
+	dtPolyDetail* detailMeshes;			///< The tile's detail sub-meshes. [Size: dtMeshHeader::detailMeshCount]
+	
+	/// The detail mesh's unique vertices. [(x, y, z) * dtMeshHeader::detailVertCount]
+	float* detailVerts;	
+
+	/// The detail mesh's triangles. [(vertA, vertB, vertC) * dtMeshHeader::detailTriCount]
+	unsigned char* detailTris;	
+
+	/// The tile bounding volume nodes. [Size: dtMeshHeader::bvNodeCount]
+	/// (Will be null if bounding volumes are disabled.)
+	dtBVNode* bvTree;
+
+	dtOffMeshConnection* offMeshCons;		///< The tile off-mesh connections. [Size: dtMeshHeader::offMeshConCount]
+		
+	unsigned char* data;					///< The tile data. (Not directly accessed under normal situations.)
+	int dataSize;							///< Size of the tile data.
+	int flags;								///< Tile flags. (See: #dtTileFlags)
+	dtMeshTile* next;						///< The next free tile, or the next tile in the spatial grid.
+private:
+	dtMeshTile(const dtMeshTile&);
+	dtMeshTile& operator=(const dtMeshTile&);
+};
+
+/// Configuration parameters used to define multi-tile navigation meshes.
+/// The values are used to allocate space during the initialization of a navigation mesh.
+/// @see dtNavMesh::init()
+/// @ingroup detour
+struct dtNavMeshParams
+{
+	float orig[3];					///< The world space origin of the navigation mesh's tile space. [(x, y, z)]
+	float tileWidth;				///< The width of each tile. (Along the x-axis.)
+	float tileHeight;				///< The height of each tile. (Along the z-axis.)
+	int maxTiles;					///< The maximum number of tiles the navigation mesh can contain.
+	int maxPolys;					///< The maximum number of polygons each tile can contain.
+};
+
+/// A navigation mesh based on tiles of convex polygons.
+/// @ingroup detour
+class dtNavMesh
+{
+public:
+	dtNavMesh();
+	~dtNavMesh();
+
+	/// @{
+	/// @name Initialization and Tile Management
+
+	/// Initializes the navigation mesh for tiled use.
+	///  @param[in]	params		Initialization parameters.
+	/// @return The status flags for the operation.
+	dtStatus init(const dtNavMeshParams* params);
+
+	/// Initializes the navigation mesh for single tile use.
+	///  @param[in]	data		Data of the new tile. (See: #dtCreateNavMeshData)
+	///  @param[in]	dataSize	The data size of the new tile.
+	///  @param[in]	flags		The tile flags. (See: #dtTileFlags)
+	/// @return The status flags for the operation.
+	///  @see dtCreateNavMeshData
+	dtStatus init(unsigned char* data, const int dataSize, const int flags);
+	
+	/// The navigation mesh initialization params.
+	const dtNavMeshParams* getParams() const;
+
+	/// Adds a tile to the navigation mesh.
+	///  @param[in]		data		Data for the new tile mesh. (See: #dtCreateNavMeshData)
+	///  @param[in]		dataSize	Data size of the new tile mesh.
+	///  @param[in]		flags		Tile flags. (See: #dtTileFlags)
+	///  @param[in]		lastRef		The desired reference for the tile. (When reloading a tile.) [opt] [Default: 0]
+	///  @param[out]	result		The tile reference. (If the tile was succesfully added.) [opt]
+	/// @return The status flags for the operation.
+	dtStatus addTile(unsigned char* data, int dataSize, int flags, dtTileRef lastRef, dtTileRef* result);
+	
+	/// Removes the specified tile from the navigation mesh.
+	///  @param[in]		ref			The reference of the tile to remove.
+	///  @param[out]	data		Data associated with deleted tile.
+	///  @param[out]	dataSize	Size of the data associated with deleted tile.
+	/// @return The status flags for the operation.
+	dtStatus removeTile(dtTileRef ref, unsigned char** data, int* dataSize);
+
+	/// @}
+
+	/// @{
+	/// @name Query Functions
+
+	/// Calculates the tile grid location for the specified world position.
+	///  @param[in]	pos  The world position for the query. [(x, y, z)]
+	///  @param[out]	tx		The tile's x-location. (x, y)
+	///  @param[out]	ty		The tile's y-location. (x, y)
+	void calcTileLoc(const float* pos, int* tx, int* ty) const;
+
+	/// Gets the tile at the specified grid location.
+	///  @param[in]	x		The tile's x-location. (x, y, layer)
+	///  @param[in]	y		The tile's y-location. (x, y, layer)
+	///  @param[in]	layer	The tile's layer. (x, y, layer)
+	/// @return The tile, or null if the tile does not exist.
+	const dtMeshTile* getTileAt(const int x, const int y, const int layer) const;
+
+	/// Gets all tiles at the specified grid location. (All layers.)
+	///  @param[in]		x			The tile's x-location. (x, y)
+	///  @param[in]		y			The tile's y-location. (x, y)
+	///  @param[out]	tiles		A pointer to an array of tiles that will hold the result.
+	///  @param[in]		maxTiles	The maximum tiles the tiles parameter can hold.
+	/// @return The number of tiles returned in the tiles array.
+	int getTilesAt(const int x, const int y,
+				   dtMeshTile const** tiles, const int maxTiles) const;
+	
+	/// Gets the tile reference for the tile at specified grid location.
+	///  @param[in]	x		The tile's x-location. (x, y, layer)
+	///  @param[in]	y		The tile's y-location. (x, y, layer)
+	///  @param[in]	layer	The tile's layer. (x, y, layer)
+	/// @return The tile reference of the tile, or 0 if there is none.
+	dtTileRef getTileRefAt(int x, int y, int layer) const;
+
+	/// Gets the tile reference for the specified tile.
+	///  @param[in]	tile	The tile.
+	/// @return The tile reference of the tile.
+	dtTileRef getTileRef(const dtMeshTile* tile) const;
+
+	/// Gets the tile for the specified tile reference.
+	///  @param[in]	ref		The tile reference of the tile to retrieve.
+	/// @return The tile for the specified reference, or null if the 
+	///		reference is invalid.
+	const dtMeshTile* getTileByRef(dtTileRef ref) const;
+	
+	/// The maximum number of tiles supported by the navigation mesh.
+	/// @return The maximum number of tiles supported by the navigation mesh.
+	int getMaxTiles() const;
+	
+	/// Gets the tile at the specified index.
+	///  @param[in]	i		The tile index. [Limit: 0 >= index < #getMaxTiles()]
+	/// @return The tile at the specified index.
+	const dtMeshTile* getTile(int i) const;
+
+	/// Gets the tile and polygon for the specified polygon reference.
+	///  @param[in]		ref		The reference for the a polygon.
+	///  @param[out]	tile	The tile containing the polygon.
+	///  @param[out]	poly	The polygon.
+	/// @return The status flags for the operation.
+	dtStatus getTileAndPolyByRef(const dtPolyRef ref, const dtMeshTile** tile, const dtPoly** poly) const;
+	
+	/// Returns the tile and polygon for the specified polygon reference.
+	///  @param[in]		ref		A known valid reference for a polygon.
+	///  @param[out]	tile	The tile containing the polygon.
+	///  @param[out]	poly	The polygon.
+	void getTileAndPolyByRefUnsafe(const dtPolyRef ref, const dtMeshTile** tile, const dtPoly** poly) const;
+
+	/// Checks the validity of a polygon reference.
+	///  @param[in]	ref		The polygon reference to check.
+	/// @return True if polygon reference is valid for the navigation mesh.
+	bool isValidPolyRef(dtPolyRef ref) const;
+	
+	/// Gets the polygon reference for the tile's base polygon.
+	///  @param[in]	tile		The tile.
+	/// @return The polygon reference for the base polygon in the specified tile.
+	dtPolyRef getPolyRefBase(const dtMeshTile* tile) const;
+	
+	/// Gets the endpoints for an off-mesh connection, ordered by "direction of travel".
+	///  @param[in]		prevRef		The reference of the polygon before the connection.
+	///  @param[in]		polyRef		The reference of the off-mesh connection polygon.
+	///  @param[out]	startPos	The start position of the off-mesh connection. [(x, y, z)]
+	///  @param[out]	endPos		The end position of the off-mesh connection. [(x, y, z)]
+	/// @return The status flags for the operation.
+	dtStatus getOffMeshConnectionPolyEndPoints(dtPolyRef prevRef, dtPolyRef polyRef, float* startPos, float* endPos) const;
+
+	/// Gets the specified off-mesh connection.
+	///  @param[in]	ref		The polygon reference of the off-mesh connection.
+	/// @return The specified off-mesh connection, or null if the polygon reference is not valid.
+	const dtOffMeshConnection* getOffMeshConnectionByRef(dtPolyRef ref) const;
+	
+	/// @}
+
+	/// @{
+	/// @name State Management
+	/// These functions do not effect #dtTileRef or #dtPolyRef's. 
+
+	/// Sets the user defined flags for the specified polygon.
+	///  @param[in]	ref		The polygon reference.
+	///  @param[in]	flags	The new flags for the polygon.
+	/// @return The status flags for the operation.
+	dtStatus setPolyFlags(dtPolyRef ref, unsigned short flags);
+
+	/// Gets the user defined flags for the specified polygon.
+	///  @param[in]		ref				The polygon reference.
+	///  @param[out]	resultFlags		The polygon flags.
+	/// @return The status flags for the operation.
+	dtStatus getPolyFlags(dtPolyRef ref, unsigned short* resultFlags) const;
+
+	/// Sets the user defined area for the specified polygon.
+	///  @param[in]	ref		The polygon reference.
+	///  @param[in]	area	The new area id for the polygon. [Limit: < #DT_MAX_AREAS]
+	/// @return The status flags for the operation.
+	dtStatus setPolyArea(dtPolyRef ref, unsigned char area);
+
+	/// Gets the user defined area for the specified polygon.
+	///  @param[in]		ref			The polygon reference.
+	///  @param[out]	resultArea	The area id for the polygon.
+	/// @return The status flags for the operation.
+	dtStatus getPolyArea(dtPolyRef ref, unsigned char* resultArea) const;
+
+	/// Gets the size of the buffer required by #storeTileState to store the specified tile's state.
+	///  @param[in]	tile	The tile.
+	/// @return The size of the buffer required to store the state.
+	int getTileStateSize(const dtMeshTile* tile) const;
+	
+	/// Stores the non-structural state of the tile in the specified buffer. (Flags, area ids, etc.)
+	///  @param[in]		tile			The tile.
+	///  @param[out]	data			The buffer to store the tile's state in.
+	///  @param[in]		maxDataSize		The size of the data buffer. [Limit: >= #getTileStateSize]
+	/// @return The status flags for the operation.
+	dtStatus storeTileState(const dtMeshTile* tile, unsigned char* data, const int maxDataSize) const;
+	
+	/// Restores the state of the tile.
+	///  @param[in]	tile			The tile.
+	///  @param[in]	data			The new state. (Obtained from #storeTileState.)
+	///  @param[in]	maxDataSize		The size of the state within the data buffer.
+	/// @return The status flags for the operation.
+	dtStatus restoreTileState(dtMeshTile* tile, const unsigned char* data, const int maxDataSize);
+	
+	/// @}
+
+	/// @{
+	/// @name Encoding and Decoding
+	/// These functions are generally meant for internal use only.
+
+	/// Derives a standard polygon reference.
+	///  @note This function is generally meant for internal use only.
+	///  @param[in]	salt	The tile's salt value.
+	///  @param[in]	it		The index of the tile.
+	///  @param[in]	ip		The index of the polygon within the tile.
+	inline dtPolyRef encodePolyId(unsigned int salt, unsigned int it, unsigned int ip) const
+	{
+#ifdef DT_POLYREF64
+		return ((dtPolyRef)salt << (DT_POLY_BITS+DT_TILE_BITS)) | ((dtPolyRef)it << DT_POLY_BITS) | (dtPolyRef)ip;
+#else
+		return ((dtPolyRef)salt << (m_polyBits+m_tileBits)) | ((dtPolyRef)it << m_polyBits) | (dtPolyRef)ip;
+#endif
+	}
+	
+	/// Decodes a standard polygon reference.
+	///  @note This function is generally meant for internal use only.
+	///  @param[in]	ref   The polygon reference to decode.
+	///  @param[out]	salt	The tile's salt value.
+	///  @param[out]	it		The index of the tile.
+	///  @param[out]	ip		The index of the polygon within the tile.
+	///  @see #encodePolyId
+	inline void decodePolyId(dtPolyRef ref, unsigned int& salt, unsigned int& it, unsigned int& ip) const
+	{
+#ifdef DT_POLYREF64
+		const dtPolyRef saltMask = ((dtPolyRef)1<<DT_SALT_BITS)-1;
+		const dtPolyRef tileMask = ((dtPolyRef)1<<DT_TILE_BITS)-1;
+		const dtPolyRef polyMask = ((dtPolyRef)1<<DT_POLY_BITS)-1;
+		salt = (unsigned int)((ref >> (DT_POLY_BITS+DT_TILE_BITS)) & saltMask);
+		it = (unsigned int)((ref >> DT_POLY_BITS) & tileMask);
+		ip = (unsigned int)(ref & polyMask);
+#else
+		const dtPolyRef saltMask = ((dtPolyRef)1<<m_saltBits)-1;
+		const dtPolyRef tileMask = ((dtPolyRef)1<<m_tileBits)-1;
+		const dtPolyRef polyMask = ((dtPolyRef)1<<m_polyBits)-1;
+		salt = (unsigned int)((ref >> (m_polyBits+m_tileBits)) & saltMask);
+		it = (unsigned int)((ref >> m_polyBits) & tileMask);
+		ip = (unsigned int)(ref & polyMask);
+#endif
+	}
+
+	/// Extracts a tile's salt value from the specified polygon reference.
+	///  @note This function is generally meant for internal use only.
+	///  @param[in]	ref		The polygon reference.
+	///  @see #encodePolyId
+	inline unsigned int decodePolyIdSalt(dtPolyRef ref) const
+	{
+#ifdef DT_POLYREF64
+		const dtPolyRef saltMask = ((dtPolyRef)1<<DT_SALT_BITS)-1;
+		return (unsigned int)((ref >> (DT_POLY_BITS+DT_TILE_BITS)) & saltMask);
+#else
+		const dtPolyRef saltMask = ((dtPolyRef)1<<m_saltBits)-1;
+		return (unsigned int)((ref >> (m_polyBits+m_tileBits)) & saltMask);
+#endif
+	}
+	
+	/// Extracts the tile's index from the specified polygon reference.
+	///  @note This function is generally meant for internal use only.
+	///  @param[in]	ref		The polygon reference.
+	///  @see #encodePolyId
+	inline unsigned int decodePolyIdTile(dtPolyRef ref) const
+	{
+#ifdef DT_POLYREF64
+		const dtPolyRef tileMask = ((dtPolyRef)1<<DT_TILE_BITS)-1;
+		return (unsigned int)((ref >> DT_POLY_BITS) & tileMask);
+#else
+		const dtPolyRef tileMask = ((dtPolyRef)1<<m_tileBits)-1;
+		return (unsigned int)((ref >> m_polyBits) & tileMask);
+#endif
+	}
+	
+	/// Extracts the polygon's index (within its tile) from the specified polygon reference.
+	///  @note This function is generally meant for internal use only.
+	///  @param[in]	ref		The polygon reference.
+	///  @see #encodePolyId
+	inline unsigned int decodePolyIdPoly(dtPolyRef ref) const
+	{
+#ifdef DT_POLYREF64
+		const dtPolyRef polyMask = ((dtPolyRef)1<<DT_POLY_BITS)-1;
+		return (unsigned int)(ref & polyMask);
+#else
+		const dtPolyRef polyMask = ((dtPolyRef)1<<m_polyBits)-1;
+		return (unsigned int)(ref & polyMask);
+#endif
+	}
+
+	/// @}
+	
+private:
+	// Explicitly disabled copy constructor and copy assignment operator.
+	dtNavMesh(const dtNavMesh&);
+	dtNavMesh& operator=(const dtNavMesh&);
+
+	/// Returns pointer to tile in the tile array.
+	dtMeshTile* getTile(int i);
+
+	/// Returns neighbour tile based on side.
+	int getTilesAt(const int x, const int y,
+				   dtMeshTile** tiles, const int maxTiles) const;
+
+	/// Returns neighbour tile based on side.
+	int getNeighbourTilesAt(const int x, const int y, const int side,
+							dtMeshTile** tiles, const int maxTiles) const;
+	
+	/// Returns all polygons in neighbour tile based on portal defined by the segment.
+	int findConnectingPolys(const float* va, const float* vb,
+							const dtMeshTile* tile, int side,
+							dtPolyRef* con, float* conarea, int maxcon) const;
+	
+	/// Builds internal polygons links for a tile.
+	void connectIntLinks(dtMeshTile* tile);
+	/// Builds internal polygons links for a tile.
+	void baseOffMeshLinks(dtMeshTile* tile);
+
+	/// Builds external polygon links for a tile.
+	void connectExtLinks(dtMeshTile* tile, dtMeshTile* target, int side);
+	/// Builds external polygon links for a tile.
+	void connectExtOffMeshLinks(dtMeshTile* tile, dtMeshTile* target, int side);
+	
+	/// Removes external links at specified side.
+	void unconnectLinks(dtMeshTile* tile, dtMeshTile* target);
+	
+
+	// TODO: These methods are duplicates from dtNavMeshQuery, but are needed for off-mesh connection finding.
+	
+	/// Queries polygons within a tile.
+	int queryPolygonsInTile(const dtMeshTile* tile, const float* qmin, const float* qmax,
+							dtPolyRef* polys, const int maxPolys) const;
+	/// Find nearest polygon within a tile.
+	dtPolyRef findNearestPolyInTile(const dtMeshTile* tile, const float* center,
+									const float* extents, float* nearestPt) const;
+	/// Returns closest point on polygon.
+	void closestPointOnPoly(dtPolyRef ref, const float* pos, float* closest, bool* posOverPoly) const;
+	
+	dtNavMeshParams m_params;			///< Current initialization params. TODO: do not store this info twice.
+	float m_orig[3];					///< Origin of the tile (0,0)
+	float m_tileWidth, m_tileHeight;	///< Dimensions of each tile.
+	int m_maxTiles;						///< Max number of tiles.
+	int m_tileLutSize;					///< Tile hash lookup size (must be pot).
+	int m_tileLutMask;					///< Tile hash lookup mask.
+
+	dtMeshTile** m_posLookup;			///< Tile hash lookup.
+	dtMeshTile* m_nextFree;				///< Freelist of tiles.
+	dtMeshTile* m_tiles;				///< List of tiles.
+		
+#ifndef DT_POLYREF64
+	unsigned int m_saltBits;			///< Number of salt bits in the tile ID.
+	unsigned int m_tileBits;			///< Number of tile bits in the tile ID.
+	unsigned int m_polyBits;			///< Number of poly bits in the tile ID.
+#endif
+};
+
+/// Allocates a navigation mesh object using the Detour allocator.
+/// @return A navigation mesh that is ready for initialization, or null on failure.
+///  @ingroup detour
+dtNavMesh* dtAllocNavMesh();
+
+/// Frees the specified navigation mesh object using the Detour allocator.
+///  @param[in]	navmesh		A navigation mesh allocated using #dtAllocNavMesh
+///  @ingroup detour
+void dtFreeNavMesh(dtNavMesh* navmesh);
+
+#endif // DETOURNAVMESH_H
+
+///////////////////////////////////////////////////////////////////////////
+
+// This section contains detailed documentation for members that don't have
+// a source file. It reduces clutter in the main section of the header.
+
+/**
+
+@typedef dtPolyRef
+@par
+
+Polygon references are subject to the same invalidate/preserve/restore 
+rules that apply to #dtTileRef's.  If the #dtTileRef for the polygon's
+tile changes, the polygon reference becomes invalid.
+
+Changing a polygon's flags, area id, etc. does not impact its polygon
+reference.
+
+@typedef dtTileRef
+@par
+
+The following changes will invalidate a tile reference:
+
+- The referenced tile has been removed from the navigation mesh.
+- The navigation mesh has been initialized using a different set
+  of #dtNavMeshParams.
+
+A tile reference is preserved/restored if the tile is added to a navigation 
+mesh initialized with the original #dtNavMeshParams and is added at the
+original reference location. (E.g. The lastRef parameter is used with
+dtNavMesh::addTile.)
+
+Basically, if the storage structure of a tile changes, its associated
+tile reference changes.
+
+
+@var unsigned short dtPoly::neis[DT_VERTS_PER_POLYGON]
+@par
+
+Each entry represents data for the edge starting at the vertex of the same index. 
+E.g. The entry at index n represents the edge data for vertex[n] to vertex[n+1].
+
+A value of zero indicates the edge has no polygon connection. (It makes up the 
+border of the navigation mesh.)
+
+The information can be extracted as follows: 
+@code 
+neighborRef = neis[n] & 0xff; // Get the neighbor polygon reference.
+
+if (neis[n] & #DT_EX_LINK)
+{
+    // The edge is an external (portal) edge.
+}
+@endcode
+
+@var float dtMeshHeader::bvQuantFactor
+@par
+
+This value is used for converting between world and bounding volume coordinates.
+For example:
+@code
+const float cs = 1.0f / tile->header->bvQuantFactor;
+const dtBVNode* n = &tile->bvTree[i];
+if (n->i >= 0)
+{
+    // This is a leaf node.
+    float worldMinX = tile->header->bmin[0] + n->bmin[0]*cs;
+    float worldMinY = tile->header->bmin[0] + n->bmin[1]*cs;
+    // Etc...
+}
+@endcode
+
+@struct dtMeshTile
+@par
+
+Tiles generally only exist within the context of a dtNavMesh object.
+
+Some tile content is optional.  For example, a tile may not contain any
+off-mesh connections.  In this case the associated pointer will be null.
+
+If a detail mesh exists it will share vertices with the base polygon mesh.  
+Only the vertices unique to the detail mesh will be stored in #detailVerts.
+
+@warning Tiles returned by a dtNavMesh object are not guarenteed to be populated.
+For example: The tile at a location might not have been loaded yet, or may have been removed.
+In this case, pointers will be null.  So if in doubt, check the polygon count in the 
+tile's header to determine if a tile has polygons defined.
+
+@var float dtOffMeshConnection::pos[6]
+@par
+
+For a properly built navigation mesh, vertex A will always be within the bounds of the mesh. 
+Vertex B is not required to be within the bounds of the mesh.
+
+*/
--- a/deps/recastnavigation/Detour/Include/DetourNavMeshBuilder.h
+++ b/deps/recastnavigation/Detour/Include/DetourNavMeshBuilder.h
@@ -0,0 +1,149 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURNAVMESHBUILDER_H
+#define DETOURNAVMESHBUILDER_H
+
+#include "DetourAlloc.h"
+
+/// Represents the source data used to build an navigation mesh tile.
+/// @ingroup detour
+struct dtNavMeshCreateParams
+{
+
+	/// @name Polygon Mesh Attributes
+	/// Used to create the base navigation graph.
+	/// See #rcPolyMesh for details related to these attributes.
+	/// @{
+
+	const unsigned short* verts;			///< The polygon mesh vertices. [(x, y, z) * #vertCount] [Unit: vx]
+	int vertCount;							///< The number vertices in the polygon mesh. [Limit: >= 3]
+	const unsigned short* polys;			///< The polygon data. [Size: #polyCount * 2 * #nvp]
+	const unsigned short* polyFlags;		///< The user defined flags assigned to each polygon. [Size: #polyCount]
+	const unsigned char* polyAreas;			///< The user defined area ids assigned to each polygon. [Size: #polyCount]
+	int polyCount;							///< Number of polygons in the mesh. [Limit: >= 1]
+	int nvp;								///< Number maximum number of vertices per polygon. [Limit: >= 3]
+
+	/// @}
+	/// @name Height Detail Attributes (Optional)
+	/// See #rcPolyMeshDetail for details related to these attributes.
+	/// @{
+
+	const unsigned int* detailMeshes;		///< The height detail sub-mesh data. [Size: 4 * #polyCount]
+	const float* detailVerts;				///< The detail mesh vertices. [Size: 3 * #detailVertsCount] [Unit: wu]
+	int detailVertsCount;					///< The number of vertices in the detail mesh.
+	const unsigned char* detailTris;		///< The detail mesh triangles. [Size: 4 * #detailTriCount]
+	int detailTriCount;						///< The number of triangles in the detail mesh.
+
+	/// @}
+	/// @name Off-Mesh Connections Attributes (Optional)
+	/// Used to define a custom point-to-point edge within the navigation graph, an 
+	/// off-mesh connection is a user defined traversable connection made up to two vertices, 
+	/// at least one of which resides within a navigation mesh polygon.
+	/// @{
+
+	/// Off-mesh connection vertices. [(ax, ay, az, bx, by, bz) * #offMeshConCount] [Unit: wu]
+	const float* offMeshConVerts;
+	/// Off-mesh connection radii. [Size: #offMeshConCount] [Unit: wu]
+	const float* offMeshConRad;
+	/// User defined flags assigned to the off-mesh connections. [Size: #offMeshConCount]
+	const unsigned short* offMeshConFlags;
+	/// User defined area ids assigned to the off-mesh connections. [Size: #offMeshConCount]
+	const unsigned char* offMeshConAreas;
+	/// The permitted travel direction of the off-mesh connections. [Size: #offMeshConCount]
+	///
+	/// 0 = Travel only from endpoint A to endpoint B.<br/>
+	/// #DT_OFFMESH_CON_BIDIR = Bidirectional travel.
+	const unsigned char* offMeshConDir;	
+	/// The user defined ids of the off-mesh connection. [Size: #offMeshConCount]
+	const unsigned int* offMeshConUserID;
+	/// The number of off-mesh connections. [Limit: >= 0]
+	int offMeshConCount;
+
+	/// @}
+	/// @name Tile Attributes
+	/// @note The tile grid/layer data can be left at zero if the destination is a single tile mesh.
+	/// @{
+
+	unsigned int userId;	///< The user defined id of the tile.
+	int tileX;				///< The tile's x-grid location within the multi-tile destination mesh. (Along the x-axis.)
+	int tileY;				///< The tile's y-grid location within the multi-tile desitation mesh. (Along the z-axis.)
+	int tileLayer;			///< The tile's layer within the layered destination mesh. [Limit: >= 0] (Along the y-axis.)
+	float bmin[3];			///< The minimum bounds of the tile. [(x, y, z)] [Unit: wu]
+	float bmax[3];			///< The maximum bounds of the tile. [(x, y, z)] [Unit: wu]
+
+	/// @}
+	/// @name General Configuration Attributes
+	/// @{
+
+	float walkableHeight;	///< The agent height. [Unit: wu]
+	float walkableRadius;	///< The agent radius. [Unit: wu]
+	float walkableClimb;	///< The agent maximum traversable ledge. (Up/Down) [Unit: wu]
+	float cs;				///< The xz-plane cell size of the polygon mesh. [Limit: > 0] [Unit: wu]
+	float ch;				///< The y-axis cell height of the polygon mesh. [Limit: > 0] [Unit: wu]
+
+	/// True if a bounding volume tree should be built for the tile.
+	/// @note The BVTree is not normally needed for layered navigation meshes.
+	bool buildBvTree;
+
+	/// @}
+};
+
+/// Builds navigation mesh tile data from the provided tile creation data.
+/// @ingroup detour
+///  @param[in]		params		Tile creation data.
+///  @param[out]	outData		The resulting tile data.
+///  @param[out]	outDataSize	The size of the tile data array.
+/// @return True if the tile data was successfully created.
+bool dtCreateNavMeshData(dtNavMeshCreateParams* params, unsigned char** outData, int* outDataSize);
+
+/// Swaps the endianess of the tile data's header (#dtMeshHeader).
+///  @param[in,out]	data		The tile data array.
+///  @param[in]		dataSize	The size of the data array.
+bool dtNavMeshHeaderSwapEndian(unsigned char* data, const int dataSize);
+
+/// Swaps endianess of the tile data.
+///  @param[in,out]	data		The tile data array.
+///  @param[in]		dataSize	The size of the data array.
+bool dtNavMeshDataSwapEndian(unsigned char* data, const int dataSize);
+
+#endif // DETOURNAVMESHBUILDER_H
+
+// This section contains detailed documentation for members that don't have
+// a source file. It reduces clutter in the main section of the header.
+
+/**
+
+@struct dtNavMeshCreateParams
+@par
+
+This structure is used to marshal data between the Recast mesh generation pipeline and Detour navigation components.
+
+See the rcPolyMesh and rcPolyMeshDetail documentation for detailed information related to mesh structure.
+
+Units are usually in voxels (vx) or world units (wu). The units for voxels, grid size, and cell size 
+are all based on the values of #cs and #ch.
+
+The standard navigation mesh build process is to create tile data using dtCreateNavMeshData, then add the tile 
+to a navigation mesh using either the dtNavMesh single tile <tt>init()</tt> function or the dtNavMesh::addTile()
+function.
+
+@see dtCreateNavMeshData
+
+*/
+
--- a/deps/recastnavigation/Detour/Include/DetourNavMeshQuery.h
+++ b/deps/recastnavigation/Detour/Include/DetourNavMeshQuery.h
@@ -0,0 +1,575 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURNAVMESHQUERY_H
+#define DETOURNAVMESHQUERY_H
+
+#include "DetourNavMesh.h"
+#include "DetourStatus.h"
+
+
+// Define DT_VIRTUAL_QUERYFILTER if you wish to derive a custom filter from dtQueryFilter.
+// On certain platforms indirect or virtual function call is expensive. The default
+// setting is to use non-virtual functions, the actual implementations of the functions
+// are declared as inline for maximum speed. 
+
+//#define DT_VIRTUAL_QUERYFILTER 1
+
+/// Defines polygon filtering and traversal costs for navigation mesh query operations.
+/// @ingroup detour
+class dtQueryFilter
+{
+	float m_areaCost[DT_MAX_AREAS];		///< Cost per area type. (Used by default implementation.)
+	unsigned short m_includeFlags;		///< Flags for polygons that can be visited. (Used by default implementation.)
+	unsigned short m_excludeFlags;		///< Flags for polygons that should not be visted. (Used by default implementation.)
+	
+public:
+	dtQueryFilter();
+	
+#ifdef DT_VIRTUAL_QUERYFILTER
+	virtual ~dtQueryFilter() { }
+#endif
+	
+	/// Returns true if the polygon can be visited.  (I.e. Is traversable.)
+	///  @param[in]		ref		The reference id of the polygon test.
+	///  @param[in]		tile	The tile containing the polygon.
+	///  @param[in]		poly  The polygon to test.
+#ifdef DT_VIRTUAL_QUERYFILTER
+	virtual bool passFilter(const dtPolyRef ref,
+							const dtMeshTile* tile,
+							const dtPoly* poly) const;
+#else
+	bool passFilter(const dtPolyRef ref,
+					const dtMeshTile* tile,
+					const dtPoly* poly) const;
+#endif
+
+	/// Returns cost to move from the beginning to the end of a line segment
+	/// that is fully contained within a polygon.
+	///  @param[in]		pa			The start position on the edge of the previous and current polygon. [(x, y, z)]
+	///  @param[in]		pb			The end position on the edge of the current and next polygon. [(x, y, z)]
+	///  @param[in]		prevRef		The reference id of the previous polygon. [opt]
+	///  @param[in]		prevTile	The tile containing the previous polygon. [opt]
+	///  @param[in]		prevPoly	The previous polygon. [opt]
+	///  @param[in]		curRef		The reference id of the current polygon.
+	///  @param[in]		curTile		The tile containing the current polygon.
+	///  @param[in]		curPoly		The current polygon.
+	///  @param[in]		nextRef		The refernece id of the next polygon. [opt]
+	///  @param[in]		nextTile	The tile containing the next polygon. [opt]
+	///  @param[in]		nextPoly	The next polygon. [opt]
+#ifdef DT_VIRTUAL_QUERYFILTER
+	virtual float getCost(const float* pa, const float* pb,
+						  const dtPolyRef prevRef, const dtMeshTile* prevTile, const dtPoly* prevPoly,
+						  const dtPolyRef curRef, const dtMeshTile* curTile, const dtPoly* curPoly,
+						  const dtPolyRef nextRef, const dtMeshTile* nextTile, const dtPoly* nextPoly) const;
+#else
+	float getCost(const float* pa, const float* pb,
+				  const dtPolyRef prevRef, const dtMeshTile* prevTile, const dtPoly* prevPoly,
+				  const dtPolyRef curRef, const dtMeshTile* curTile, const dtPoly* curPoly,
+				  const dtPolyRef nextRef, const dtMeshTile* nextTile, const dtPoly* nextPoly) const;
+#endif
+
+	/// @name Getters and setters for the default implementation data.
+	///@{
+
+	/// Returns the traversal cost of the area.
+	///  @param[in]		i		The id of the area.
+	/// @returns The traversal cost of the area.
+	inline float getAreaCost(const int i) const { return m_areaCost[i]; }
+
+	/// Sets the traversal cost of the area.
+	///  @param[in]		i		The id of the area.
+	///  @param[in]		cost	The new cost of traversing the area.
+	inline void setAreaCost(const int i, const float cost) { m_areaCost[i] = cost; } 
+
+	/// Returns the include flags for the filter.
+	/// Any polygons that include one or more of these flags will be
+	/// included in the operation.
+	inline unsigned short getIncludeFlags() const { return m_includeFlags; }
+
+	/// Sets the include flags for the filter.
+	/// @param[in]		flags	The new flags.
+	inline void setIncludeFlags(const unsigned short flags) { m_includeFlags = flags; }
+
+	/// Returns the exclude flags for the filter.
+	/// Any polygons that include one ore more of these flags will be
+	/// excluded from the operation.
+	inline unsigned short getExcludeFlags() const { return m_excludeFlags; }
+
+	/// Sets the exclude flags for the filter.
+	/// @param[in]		flags		The new flags.
+	inline void setExcludeFlags(const unsigned short flags) { m_excludeFlags = flags; }	
+
+	///@}
+
+};
+
+
+
+/// Provides information about raycast hit
+/// filled by dtNavMeshQuery::raycast
+/// @ingroup detour
+struct dtRaycastHit
+{
+	/// The hit parameter. (FLT_MAX if no wall hit.)
+	float t; 
+	
+	/// hitNormal	The normal of the nearest wall hit. [(x, y, z)]
+	float hitNormal[3];
+
+	/// The index of the edge on the final polygon where the wall was hit.
+	int hitEdgeIndex;
+	
+	/// Pointer to an array of reference ids of the visited polygons. [opt]
+	dtPolyRef* path;
+	
+	/// The number of visited polygons. [opt]
+	int pathCount;
+
+	/// The maximum number of polygons the @p path array can hold.
+	int maxPath;
+
+	///  The cost of the path until hit.
+	float pathCost;
+};
+
+/// Provides custom polygon query behavior.
+/// Used by dtNavMeshQuery::queryPolygons.
+/// @ingroup detour
+class dtPolyQuery
+{
+public:
+	virtual ~dtPolyQuery() { }
+
+	/// Called for each batch of unique polygons touched by the search area in dtNavMeshQuery::queryPolygons.
+	/// This can be called multiple times for a single query.
+	virtual void process(const dtMeshTile* tile, dtPoly** polys, dtPolyRef* refs, int count) = 0;
+};
+
+/// Provides the ability to perform pathfinding related queries against
+/// a navigation mesh.
+/// @ingroup detour
+class dtNavMeshQuery
+{
+public:
+	dtNavMeshQuery();
+	~dtNavMeshQuery();
+	
+	/// Initializes the query object.
+	///  @param[in]		nav			Pointer to the dtNavMesh object to use for all queries.
+	///  @param[in]		maxNodes	Maximum number of search nodes. [Limits: 0 < value <= 65535]
+	/// @returns The status flags for the query.
+	dtStatus init(const dtNavMesh* nav, const int maxNodes);
+	
+	/// @name Standard Pathfinding Functions
+	// /@{
+
+	/// Finds a path from the start polygon to the end polygon.
+	///  @param[in]		startRef	The refrence id of the start polygon.
+	///  @param[in]		endRef		The reference id of the end polygon.
+	///  @param[in]		startPos	A position within the start polygon. [(x, y, z)]
+	///  @param[in]		endPos		A position within the end polygon. [(x, y, z)]
+	///  @param[in]		filter		The polygon filter to apply to the query.
+	///  @param[out]	path		An ordered list of polygon references representing the path. (Start to end.) 
+	///  							[(polyRef) * @p pathCount]
+	///  @param[out]	pathCount	The number of polygons returned in the @p path array.
+	///  @param[in]		maxPath		The maximum number of polygons the @p path array can hold. [Limit: >= 1]
+	dtStatus findPath(dtPolyRef startRef, dtPolyRef endRef,
+					  const float* startPos, const float* endPos,
+					  const dtQueryFilter* filter,
+					  dtPolyRef* path, int* pathCount, const int maxPath) const;
+
+	/// Finds the straight path from the start to the end position within the polygon corridor.
+	///  @param[in]		startPos			Path start position. [(x, y, z)]
+	///  @param[in]		endPos				Path end position. [(x, y, z)]
+	///  @param[in]		path				An array of polygon references that represent the path corridor.
+	///  @param[in]		pathSize			The number of polygons in the @p path array.
+	///  @param[out]	straightPath		Points describing the straight path. [(x, y, z) * @p straightPathCount].
+	///  @param[out]	straightPathFlags	Flags describing each point. (See: #dtStraightPathFlags) [opt]
+	///  @param[out]	straightPathRefs	The reference id of the polygon that is being entered at each point. [opt]
+	///  @param[out]	straightPathCount	The number of points in the straight path.
+	///  @param[in]		maxStraightPath		The maximum number of points the straight path arrays can hold.  [Limit: > 0]
+	///  @param[in]		options				Query options. (see: #dtStraightPathOptions)
+	/// @returns The status flags for the query.
+	dtStatus findStraightPath(const float* startPos, const float* endPos,
+							  const dtPolyRef* path, const int pathSize,
+							  float* straightPath, unsigned char* straightPathFlags, dtPolyRef* straightPathRefs,
+							  int* straightPathCount, const int maxStraightPath, const int options = 0) const;
+
+	///@}
+	/// @name Sliced Pathfinding Functions
+	/// Common use case:
+	///	-# Call initSlicedFindPath() to initialize the sliced path query.
+	///	-# Call updateSlicedFindPath() until it returns complete.
+	///	-# Call finalizeSlicedFindPath() to get the path.
+	///@{ 
+
+	/// Intializes a sliced path query.
+	///  @param[in]		startRef	The refrence id of the start polygon.
+	///  @param[in]		endRef		The reference id of the end polygon.
+	///  @param[in]		startPos	A position within the start polygon. [(x, y, z)]
+	///  @param[in]		endPos		A position within the end polygon. [(x, y, z)]
+	///  @param[in]		filter		The polygon filter to apply to the query.
+	///  @param[in]		options		query options (see: #dtFindPathOptions)
+	/// @returns The status flags for the query.
+	dtStatus initSlicedFindPath(dtPolyRef startRef, dtPolyRef endRef,
+								const float* startPos, const float* endPos,
+								const dtQueryFilter* filter, const unsigned int options = 0);
+
+	/// Updates an in-progress sliced path query.
+	///  @param[in]		maxIter		The maximum number of iterations to perform.
+	///  @param[out]	doneIters	The actual number of iterations completed. [opt]
+	/// @returns The status flags for the query.
+	dtStatus updateSlicedFindPath(const int maxIter, int* doneIters);
+
+	/// Finalizes and returns the results of a sliced path query.
+	///  @param[out]	path		An ordered list of polygon references representing the path. (Start to end.) 
+	///  							[(polyRef) * @p pathCount]
+	///  @param[out]	pathCount	The number of polygons returned in the @p path array.
+	///  @param[in]		maxPath		The max number of polygons the path array can hold. [Limit: >= 1]
+	/// @returns The status flags for the query.
+	dtStatus finalizeSlicedFindPath(dtPolyRef* path, int* pathCount, const int maxPath);
+	
+	/// Finalizes and returns the results of an incomplete sliced path query, returning the path to the furthest
+	/// polygon on the existing path that was visited during the search.
+	///  @param[in]		existing		An array of polygon references for the existing path.
+	///  @param[in]		existingSize	The number of polygon in the @p existing array.
+	///  @param[out]	path			An ordered list of polygon references representing the path. (Start to end.) 
+	///  								[(polyRef) * @p pathCount]
+	///  @param[out]	pathCount		The number of polygons returned in the @p path array.
+	///  @param[in]		maxPath			The max number of polygons the @p path array can hold. [Limit: >= 1]
+	/// @returns The status flags for the query.
+	dtStatus finalizeSlicedFindPathPartial(const dtPolyRef* existing, const int existingSize,
+										   dtPolyRef* path, int* pathCount, const int maxPath);
+
+	///@}
+	/// @name Dijkstra Search Functions
+	/// @{ 
+
+	/// Finds the polygons along the navigation graph that touch the specified circle.
+	///  @param[in]		startRef		The reference id of the polygon where the search starts.
+	///  @param[in]		centerPos		The center of the search circle. [(x, y, z)]
+	///  @param[in]		radius			The radius of the search circle.
+	///  @param[in]		filter			The polygon filter to apply to the query.
+	///  @param[out]	resultRef		The reference ids of the polygons touched by the circle. [opt]
+	///  @param[out]	resultParent	The reference ids of the parent polygons for each result. 
+	///  								Zero if a result polygon has no parent. [opt]
+	///  @param[out]	resultCost		The search cost from @p centerPos to the polygon. [opt]
+	///  @param[out]	resultCount		The number of polygons found. [opt]
+	///  @param[in]		maxResult		The maximum number of polygons the result arrays can hold.
+	/// @returns The status flags for the query.
+	dtStatus findPolysAroundCircle(dtPolyRef startRef, const float* centerPos, const float radius,
+								   const dtQueryFilter* filter,
+								   dtPolyRef* resultRef, dtPolyRef* resultParent, float* resultCost,
+								   int* resultCount, const int maxResult) const;
+	
+	/// Finds the polygons along the naviation graph that touch the specified convex polygon.
+	///  @param[in]		startRef		The reference id of the polygon where the search starts.
+	///  @param[in]		verts			The vertices describing the convex polygon. (CCW) 
+	///  								[(x, y, z) * @p nverts]
+	///  @param[in]		nverts			The number of vertices in the polygon.
+	///  @param[in]		filter			The polygon filter to apply to the query.
+	///  @param[out]	resultRef		The reference ids of the polygons touched by the search polygon. [opt]
+	///  @param[out]	resultParent	The reference ids of the parent polygons for each result. Zero if a 
+	///  								result polygon has no parent. [opt]
+	///  @param[out]	resultCost		The search cost from the centroid point to the polygon. [opt]
+	///  @param[out]	resultCount		The number of polygons found.
+	///  @param[in]		maxResult		The maximum number of polygons the result arrays can hold.
+	/// @returns The status flags for the query.
+	dtStatus findPolysAroundShape(dtPolyRef startRef, const float* verts, const int nverts,
+								  const dtQueryFilter* filter,
+								  dtPolyRef* resultRef, dtPolyRef* resultParent, float* resultCost,
+								  int* resultCount, const int maxResult) const;
+	
+	/// Gets a path from the explored nodes in the previous search.
+	///  @param[in]		endRef		The reference id of the end polygon.
+	///  @param[out]	path		An ordered list of polygon references representing the path. (Start to end.)
+	///  							[(polyRef) * @p pathCount]
+	///  @param[out]	pathCount	The number of polygons returned in the @p path array.
+	///  @param[in]		maxPath		The maximum number of polygons the @p path array can hold. [Limit: >= 0]
+	///  @returns		The status flags. Returns DT_FAILURE | DT_INVALID_PARAM if any parameter is wrong, or if
+	///  				@p endRef was not explored in the previous search. Returns DT_SUCCESS | DT_BUFFER_TOO_SMALL
+	///  				if @p path cannot contain the entire path. In this case it is filled to capacity with a partial path.
+	///  				Otherwise returns DT_SUCCESS.
+	///  @remarks		The result of this function depends on the state of the query object. For that reason it should only
+	///  				be used immediately after one of the two Dijkstra searches, findPolysAroundCircle or findPolysAroundShape.
+	dtStatus getPathFromDijkstraSearch(dtPolyRef endRef, dtPolyRef* path, int* pathCount, int maxPath) const;
+
+	/// @}
+	/// @name Local Query Functions
+	///@{
+
+	/// Finds the polygon nearest to the specified center point.
+	///  @param[in]		center		The center of the search box. [(x, y, z)]
+	///  @param[in]		extents		The search distance along each axis. [(x, y, z)]
+	///  @param[in]		filter		The polygon filter to apply to the query.
+	///  @param[out]	nearestRef	The reference id of the nearest polygon.
+	///  @param[out]	nearestPt	The nearest point on the polygon. [opt] [(x, y, z)]
+	/// @returns The status flags for the query.
+	dtStatus findNearestPoly(const float* center, const float* extents,
+							 const dtQueryFilter* filter,
+							 dtPolyRef* nearestRef, float* nearestPt) const;
+	
+	/// Finds polygons that overlap the search box.
+	///  @param[in]		center		The center of the search box. [(x, y, z)]
+	///  @param[in]		extents		The search distance along each axis. [(x, y, z)]
+	///  @param[in]		filter		The polygon filter to apply to the query.
+	///  @param[out]	polys		The reference ids of the polygons that overlap the query box.
+	///  @param[out]	polyCount	The number of polygons in the search result.
+	///  @param[in]		maxPolys	The maximum number of polygons the search result can hold.
+	/// @returns The status flags for the query.
+	dtStatus queryPolygons(const float* center, const float* extents,
+						   const dtQueryFilter* filter,
+						   dtPolyRef* polys, int* polyCount, const int maxPolys) const;
+
+	/// Finds polygons that overlap the search box.
+	///  @param[in]		center		The center of the search box. [(x, y, z)]
+	///  @param[in]		extents		The search distance along each axis. [(x, y, z)]
+	///  @param[in]		filter		The polygon filter to apply to the query.
+	///  @param[in]		query		The query. Polygons found will be batched together and passed to this query.
+	dtStatus queryPolygons(const float* center, const float* extents,
+						   const dtQueryFilter* filter, dtPolyQuery* query) const;
+
+	/// Finds the non-overlapping navigation polygons in the local neighbourhood around the center position.
+	///  @param[in]		startRef		The reference id of the polygon where the search starts.
+	///  @param[in]		centerPos		The center of the query circle. [(x, y, z)]
+	///  @param[in]		radius			The radius of the query circle.
+	///  @param[in]		filter			The polygon filter to apply to the query.
+	///  @param[out]	resultRef		The reference ids of the polygons touched by the circle.
+	///  @param[out]	resultParent	The reference ids of the parent polygons for each result. 
+	///  								Zero if a result polygon has no parent. [opt]
+	///  @param[out]	resultCount		The number of polygons found.
+	///  @param[in]		maxResult		The maximum number of polygons the result arrays can hold.
+	/// @returns The status flags for the query.
+	dtStatus findLocalNeighbourhood(dtPolyRef startRef, const float* centerPos, const float radius,
+									const dtQueryFilter* filter,
+									dtPolyRef* resultRef, dtPolyRef* resultParent,
+									int* resultCount, const int maxResult) const;
+
+	/// Moves from the start to the end position constrained to the navigation mesh.
+	///  @param[in]		startRef		The reference id of the start polygon.
+	///  @param[in]		startPos		A position of the mover within the start polygon. [(x, y, x)]
+	///  @param[in]		endPos			The desired end position of the mover. [(x, y, z)]
+	///  @param[in]		filter			The polygon filter to apply to the query.
+	///  @param[out]	resultPos		The result position of the mover. [(x, y, z)]
+	///  @param[out]	visited			The reference ids of the polygons visited during the move.
+	///  @param[out]	visitedCount	The number of polygons visited during the move.
+	///  @param[in]		maxVisitedSize	The maximum number of polygons the @p visited array can hold.
+	/// @returns The status flags for the query.
+	dtStatus moveAlongSurface(dtPolyRef startRef, const float* startPos, const float* endPos,
+							  const dtQueryFilter* filter,
+							  float* resultPos, dtPolyRef* visited, int* visitedCount, const int maxVisitedSize) const;
+	
+	/// Casts a 'walkability' ray along the surface of the navigation mesh from 
+	/// the start position toward the end position.
+	/// @note A wrapper around raycast(..., RaycastHit*). Retained for backward compatibility.
+	///  @param[in]		startRef	The reference id of the start polygon.
+	///  @param[in]		startPos	A position within the start polygon representing 
+	///  							the start of the ray. [(x, y, z)]
+	///  @param[in]		endPos		The position to cast the ray toward. [(x, y, z)]
+	///  @param[out]	t			The hit parameter. (FLT_MAX if no wall hit.)
+	///  @param[out]	hitNormal	The normal of the nearest wall hit. [(x, y, z)]
+	///  @param[in]		filter		The polygon filter to apply to the query.
+	///  @param[out]	path		The reference ids of the visited polygons. [opt]
+	///  @param[out]	pathCount	The number of visited polygons. [opt]
+	///  @param[in]		maxPath		The maximum number of polygons the @p path array can hold.
+	/// @returns The status flags for the query.
+	dtStatus raycast(dtPolyRef startRef, const float* startPos, const float* endPos,
+					 const dtQueryFilter* filter,
+					 float* t, float* hitNormal, dtPolyRef* path, int* pathCount, const int maxPath) const;
+	
+	/// Casts a 'walkability' ray along the surface of the navigation mesh from 
+	/// the start position toward the end position.
+	///  @param[in]		startRef	The reference id of the start polygon.
+	///  @param[in]		startPos	A position within the start polygon representing 
+	///  							the start of the ray. [(x, y, z)]
+	///  @param[in]		endPos		The position to cast the ray toward. [(x, y, z)]
+	///  @param[in]		filter		The polygon filter to apply to the query.
+	///  @param[in]		flags		govern how the raycast behaves. See dtRaycastOptions
+	///  @param[out]	hit			Pointer to a raycast hit structure which will be filled by the results.
+	///  @param[in]		prevRef		parent of start ref. Used during for cost calculation [opt]
+	/// @returns The status flags for the query.
+	dtStatus raycast(dtPolyRef startRef, const float* startPos, const float* endPos,
+					 const dtQueryFilter* filter, const unsigned int options,
+					 dtRaycastHit* hit, dtPolyRef prevRef = 0) const;
+
+
+	/// Finds the distance from the specified position to the nearest polygon wall.
+	///  @param[in]		startRef		The reference id of the polygon containing @p centerPos.
+	///  @param[in]		centerPos		The center of the search circle. [(x, y, z)]
+	///  @param[in]		maxRadius		The radius of the search circle.
+	///  @param[in]		filter			The polygon filter to apply to the query.
+	///  @param[out]	hitDist			The distance to the nearest wall from @p centerPos.
+	///  @param[out]	hitPos			The nearest position on the wall that was hit. [(x, y, z)]
+	///  @param[out]	hitNormal		The normalized ray formed from the wall point to the 
+	///  								source point. [(x, y, z)]
+	/// @returns The status flags for the query.
+	dtStatus findDistanceToWall(dtPolyRef startRef, const float* centerPos, const float maxRadius,
+								const dtQueryFilter* filter,
+								float* hitDist, float* hitPos, float* hitNormal) const;
+	
+	/// Returns the segments for the specified polygon, optionally including portals.
+	///  @param[in]		ref				The reference id of the polygon.
+	///  @param[in]		filter			The polygon filter to apply to the query.
+	///  @param[out]	segmentVerts	The segments. [(ax, ay, az, bx, by, bz) * segmentCount]
+	///  @param[out]	segmentRefs		The reference ids of each segment's neighbor polygon. 
+	///  								Or zero if the segment is a wall. [opt] [(parentRef) * @p segmentCount] 
+	///  @param[out]	segmentCount	The number of segments returned.
+	///  @param[in]		maxSegments		The maximum number of segments the result arrays can hold.
+	/// @returns The status flags for the query.
+	dtStatus getPolyWallSegments(dtPolyRef ref, const dtQueryFilter* filter,
+								 float* segmentVerts, dtPolyRef* segmentRefs, int* segmentCount,
+								 const int maxSegments) const;
+
+	/// Returns random location on navmesh.
+	/// Polygons are chosen weighted by area. The search runs in linear related to number of polygon.
+	///  @param[in]		filter			The polygon filter to apply to the query.
+	///  @param[in]		frand			Function returning a random number [0..1).
+	///  @param[out]	randomRef		The reference id of the random location.
+	///  @param[out]	randomPt		The random location. 
+	/// @returns The status flags for the query.
+	dtStatus findRandomPoint(const dtQueryFilter* filter, float (*frand)(),
+							 dtPolyRef* randomRef, float* randomPt) const;
+
+	/// Returns random location on navmesh within the reach of specified location.
+	/// Polygons are chosen weighted by area. The search runs in linear related to number of polygon.
+	/// The location is not exactly constrained by the circle, but it limits the visited polygons.
+	///  @param[in]		startRef		The reference id of the polygon where the search starts.
+	///  @param[in]		centerPos		The center of the search circle. [(x, y, z)]
+	///  @param[in]		filter			The polygon filter to apply to the query.
+	///  @param[in]		frand			Function returning a random number [0..1).
+	///  @param[out]	randomRef		The reference id of the random location.
+	///  @param[out]	randomPt		The random location. [(x, y, z)]
+	/// @returns The status flags for the query.
+	dtStatus findRandomPointAroundCircle(dtPolyRef startRef, const float* centerPos, const float maxRadius,
+										 const dtQueryFilter* filter, float (*frand)(),
+										 dtPolyRef* randomRef, float* randomPt) const;
+	
+	/// Finds the closest point on the specified polygon.
+	///  @param[in]		ref			The reference id of the polygon.
+	///  @param[in]		pos			The position to check. [(x, y, z)]
+	///  @param[out]	closest		The closest point on the polygon. [(x, y, z)]
+	///  @param[out]	posOverPoly	True of the position is over the polygon.
+	/// @returns The status flags for the query.
+	dtStatus closestPointOnPoly(dtPolyRef ref, const float* pos, float* closest, bool* posOverPoly) const;
+	
+	/// Returns a point on the boundary closest to the source point if the source point is outside the 
+	/// polygon's xz-bounds.
+	///  @param[in]		ref			The reference id to the polygon.
+	///  @param[in]		pos			The position to check. [(x, y, z)]
+	///  @param[out]	closest		The closest point. [(x, y, z)]
+	/// @returns The status flags for the query.
+	dtStatus closestPointOnPolyBoundary(dtPolyRef ref, const float* pos, float* closest) const;
+	
+	/// Gets the height of the polygon at the provided position using the height detail. (Most accurate.)
+	///  @param[in]		ref			The reference id of the polygon.
+	///  @param[in]		pos			A position within the xz-bounds of the polygon. [(x, y, z)]
+	///  @param[out]	height		The height at the surface of the polygon.
+	/// @returns The status flags for the query.
+	dtStatus getPolyHeight(dtPolyRef ref, const float* pos, float* height) const;
+
+	/// @}
+	/// @name Miscellaneous Functions
+	/// @{
+
+	/// Returns true if the polygon reference is valid and passes the filter restrictions.
+	///  @param[in]		ref			The polygon reference to check.
+	///  @param[in]		filter		The filter to apply.
+	bool isValidPolyRef(dtPolyRef ref, const dtQueryFilter* filter) const;
+
+	/// Returns true if the polygon reference is in the closed list. 
+	///  @param[in]		ref		The reference id of the polygon to check.
+	/// @returns True if the polygon is in closed list.
+	bool isInClosedList(dtPolyRef ref) const;
+	
+	/// Gets the node pool.
+	/// @returns The node pool.
+	class dtNodePool* getNodePool() const { return m_nodePool; }
+	
+	/// Gets the navigation mesh the query object is using.
+	/// @return The navigation mesh the query object is using.
+	const dtNavMesh* getAttachedNavMesh() const { return m_nav; }
+
+	/// @}
+	
+private:
+	// Explicitly disabled copy constructor and copy assignment operator
+	dtNavMeshQuery(const dtNavMeshQuery&);
+	dtNavMeshQuery& operator=(const dtNavMeshQuery&);
+	
+	/// Queries polygons within a tile.
+	void queryPolygonsInTile(const dtMeshTile* tile, const float* qmin, const float* qmax,
+							 const dtQueryFilter* filter, dtPolyQuery* query) const;
+
+	/// Returns portal points between two polygons.
+	dtStatus getPortalPoints(dtPolyRef from, dtPolyRef to, float* left, float* right,
+							 unsigned char& fromType, unsigned char& toType) const;
+	dtStatus getPortalPoints(dtPolyRef from, const dtPoly* fromPoly, const dtMeshTile* fromTile,
+							 dtPolyRef to, const dtPoly* toPoly, const dtMeshTile* toTile,
+							 float* left, float* right) const;
+	
+	/// Returns edge mid point between two polygons.
+	dtStatus getEdgeMidPoint(dtPolyRef from, dtPolyRef to, float* mid) const;
+	dtStatus getEdgeMidPoint(dtPolyRef from, const dtPoly* fromPoly, const dtMeshTile* fromTile,
+							 dtPolyRef to, const dtPoly* toPoly, const dtMeshTile* toTile,
+							 float* mid) const;
+	
+	// Appends vertex to a straight path
+	dtStatus appendVertex(const float* pos, const unsigned char flags, const dtPolyRef ref,
+						  float* straightPath, unsigned char* straightPathFlags, dtPolyRef* straightPathRefs,
+						  int* straightPathCount, const int maxStraightPath) const;
+
+	// Appends intermediate portal points to a straight path.
+	dtStatus appendPortals(const int startIdx, const int endIdx, const float* endPos, const dtPolyRef* path,
+						   float* straightPath, unsigned char* straightPathFlags, dtPolyRef* straightPathRefs,
+						   int* straightPathCount, const int maxStraightPath, const int options) const;
+
+	// Gets the path leading to the specified end node.
+	dtStatus getPathToNode(struct dtNode* endNode, dtPolyRef* path, int* pathCount, int maxPath) const;
+	
+	const dtNavMesh* m_nav;				///< Pointer to navmesh data.
+
+	struct dtQueryData
+	{
+		dtStatus status;
+		struct dtNode* lastBestNode;
+		float lastBestNodeCost;
+		dtPolyRef startRef, endRef;
+		float startPos[3], endPos[3];
+		const dtQueryFilter* filter;
+		unsigned int options;
+		float raycastLimitSqr;
+	};
+	dtQueryData m_query;				///< Sliced query state.
+
+	class dtNodePool* m_tinyNodePool;	///< Pointer to small node pool.
+	class dtNodePool* m_nodePool;		///< Pointer to node pool.
+	class dtNodeQueue* m_openList;		///< Pointer to open list queue.
+};
+
+/// Allocates a query object using the Detour allocator.
+/// @return An allocated query object, or null on failure.
+/// @ingroup detour
+dtNavMeshQuery* dtAllocNavMeshQuery();
+
+/// Frees the specified query object using the Detour allocator.
+///  @param[in]		query		A query object allocated using #dtAllocNavMeshQuery
+/// @ingroup detour
+void dtFreeNavMeshQuery(dtNavMeshQuery* query);
+
+#endif // DETOURNAVMESHQUERY_H
--- a/deps/recastnavigation/Detour/Include/DetourNode.h
+++ b/deps/recastnavigation/Detour/Include/DetourNode.h
@@ -0,0 +1,168 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURNODE_H
+#define DETOURNODE_H
+
+#include "DetourNavMesh.h"
+
+enum dtNodeFlags
+{
+	DT_NODE_OPEN = 0x01,
+	DT_NODE_CLOSED = 0x02,
+	DT_NODE_PARENT_DETACHED = 0x04, // parent of the node is not adjacent. Found using raycast.
+};
+
+typedef unsigned short dtNodeIndex;
+static const dtNodeIndex DT_NULL_IDX = (dtNodeIndex)~0;
+
+static const int DT_NODE_PARENT_BITS = 24;
+static const int DT_NODE_STATE_BITS = 2;
+struct dtNode
+{
+	float pos[3];								///< Position of the node.
+	float cost;									///< Cost from previous node to current node.
+	float total;								///< Cost up to the node.
+	unsigned int pidx : DT_NODE_PARENT_BITS;	///< Index to parent node.
+	unsigned int state : DT_NODE_STATE_BITS;	///< extra state information. A polyRef can have multiple nodes with different extra info. see DT_MAX_STATES_PER_NODE
+	unsigned int flags : 3;						///< Node flags. A combination of dtNodeFlags.
+	dtPolyRef id;								///< Polygon ref the node corresponds to.
+};
+
+static const int DT_MAX_STATES_PER_NODE = 1 << DT_NODE_STATE_BITS;	// number of extra states per node. See dtNode::state
+
+class dtNodePool
+{
+public:
+	dtNodePool(int maxNodes, int hashSize);
+	~dtNodePool();
+	void clear();
+
+	// Get a dtNode by ref and extra state information. If there is none then - allocate
+	// There can be more than one node for the same polyRef but with different extra state information
+	dtNode* getNode(dtPolyRef id, unsigned char state=0);	
+	dtNode* findNode(dtPolyRef id, unsigned char state);
+	unsigned int findNodes(dtPolyRef id, dtNode** nodes, const int maxNodes);
+
+	inline unsigned int getNodeIdx(const dtNode* node) const
+	{
+		if (!node) return 0;
+		return (unsigned int)(node - m_nodes) + 1;
+	}
+
+	inline dtNode* getNodeAtIdx(unsigned int idx)
+	{
+		if (!idx) return 0;
+		return &m_nodes[idx - 1];
+	}
+
+	inline const dtNode* getNodeAtIdx(unsigned int idx) const
+	{
+		if (!idx) return 0;
+		return &m_nodes[idx - 1];
+	}
+	
+	inline int getMemUsed() const
+	{
+		return sizeof(*this) +
+			sizeof(dtNode)*m_maxNodes +
+			sizeof(dtNodeIndex)*m_maxNodes +
+			sizeof(dtNodeIndex)*m_hashSize;
+	}
+	
+	inline int getMaxNodes() const { return m_maxNodes; }
+	
+	inline int getHashSize() const { return m_hashSize; }
+	inline dtNodeIndex getFirst(int bucket) const { return m_first[bucket]; }
+	inline dtNodeIndex getNext(int i) const { return m_next[i]; }
+	inline int getNodeCount() const { return m_nodeCount; }
+	
+private:
+	// Explicitly disabled copy constructor and copy assignment operator.
+	dtNodePool(const dtNodePool&);
+	dtNodePool& operator=(const dtNodePool&);
+	
+	dtNode* m_nodes;
+	dtNodeIndex* m_first;
+	dtNodeIndex* m_next;
+	const int m_maxNodes;
+	const int m_hashSize;
+	int m_nodeCount;
+};
+
+class dtNodeQueue
+{
+public:
+	dtNodeQueue(int n);
+	~dtNodeQueue();
+	
+	inline void clear() { m_size = 0; }
+	
+	inline dtNode* top() { return m_heap[0]; }
+	
+	inline dtNode* pop()
+	{
+		dtNode* result = m_heap[0];
+		m_size--;
+		trickleDown(0, m_heap[m_size]);
+		return result;
+	}
+	
+	inline void push(dtNode* node)
+	{
+		m_size++;
+		bubbleUp(m_size-1, node);
+	}
+	
+	inline void modify(dtNode* node)
+	{
+		for (int i = 0; i < m_size; ++i)
+		{
+			if (m_heap[i] == node)
+			{
+				bubbleUp(i, node);
+				return;
+			}
+		}
+	}
+	
+	inline bool empty() const { return m_size == 0; }
+	
+	inline int getMemUsed() const
+	{
+		return sizeof(*this) +
+		sizeof(dtNode*) * (m_capacity + 1);
+	}
+	
+	inline int getCapacity() const { return m_capacity; }
+	
+private:
+	// Explicitly disabled copy constructor and copy assignment operator.
+	dtNodeQueue(const dtNodeQueue&);
+	dtNodeQueue& operator=(const dtNodeQueue&);
+
+	void bubbleUp(int i, dtNode* node);
+	void trickleDown(int i, dtNode* node);
+	
+	dtNode** m_heap;
+	const int m_capacity;
+	int m_size;
+};		
+
+
+#endif // DETOURNODE_H
--- a/deps/recastnavigation/Detour/Include/DetourStatus.h
+++ b/deps/recastnavigation/Detour/Include/DetourStatus.h
@@ -0,0 +1,64 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURSTATUS_H
+#define DETOURSTATUS_H
+
+typedef unsigned int dtStatus;
+
+// High level status.
+static const unsigned int DT_FAILURE = 1u << 31;			// Operation failed.
+static const unsigned int DT_SUCCESS = 1u << 30;			// Operation succeed.
+static const unsigned int DT_IN_PROGRESS = 1u << 29;		// Operation still in progress.
+
+// Detail information for status.
+static const unsigned int DT_STATUS_DETAIL_MASK = 0x0ffffff;
+static const unsigned int DT_WRONG_MAGIC = 1 << 0;		// Input data is not recognized.
+static const unsigned int DT_WRONG_VERSION = 1 << 1;	// Input data is in wrong version.
+static const unsigned int DT_OUT_OF_MEMORY = 1 << 2;	// Operation ran out of memory.
+static const unsigned int DT_INVALID_PARAM = 1 << 3;	// An input parameter was invalid.
+static const unsigned int DT_BUFFER_TOO_SMALL = 1 << 4;	// Result buffer for the query was too small to store all results.
+static const unsigned int DT_OUT_OF_NODES = 1 << 5;		// Query ran out of nodes during search.
+static const unsigned int DT_PARTIAL_RESULT = 1 << 6;	// Query did not reach the end location, returning best guess. 
+
+
+// Returns true of status is success.
+inline bool dtStatusSucceed(dtStatus status)
+{
+	return (status & DT_SUCCESS) != 0;
+}
+
+// Returns true of status is failure.
+inline bool dtStatusFailed(dtStatus status)
+{
+	return (status & DT_FAILURE) != 0;
+}
+
+// Returns true of status is in progress.
+inline bool dtStatusInProgress(dtStatus status)
+{
+	return (status & DT_IN_PROGRESS) != 0;
+}
+
+// Returns true if specific detail is set.
+inline bool dtStatusDetail(dtStatus status, unsigned int detail)
+{
+	return (status & detail) != 0;
+}
+
+#endif // DETOURSTATUS_H
--- a/deps/recastnavigation/Detour/Source/DetourAlloc.cpp
+++ b/deps/recastnavigation/Detour/Source/DetourAlloc.cpp
@@ -0,0 +1,50 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include <stdlib.h>
+#include "DetourAlloc.h"
+
+static void *dtAllocDefault(size_t size, dtAllocHint)
+{
+	return malloc(size);
+}
+
+static void dtFreeDefault(void *ptr)
+{
+	free(ptr);
+}
+
+static dtAllocFunc* sAllocFunc = dtAllocDefault;
+static dtFreeFunc* sFreeFunc = dtFreeDefault;
+
+void dtAllocSetCustom(dtAllocFunc *allocFunc, dtFreeFunc *freeFunc)
+{
+	sAllocFunc = allocFunc ? allocFunc : dtAllocDefault;
+	sFreeFunc = freeFunc ? freeFunc : dtFreeDefault;
+}
+
+void* dtAlloc(size_t size, dtAllocHint hint)
+{
+	return sAllocFunc(size, hint);
+}
+
+void dtFree(void* ptr)
+{
+	if (ptr)
+		sFreeFunc(ptr);
+}
--- a/deps/recastnavigation/Detour/Source/DetourCommon.cpp
+++ b/deps/recastnavigation/Detour/Source/DetourCommon.cpp
@@ -0,0 +1,388 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include "DetourCommon.h"
+#include "DetourMath.h"
+
+//////////////////////////////////////////////////////////////////////////////////////////
+
+void dtClosestPtPointTriangle(float* closest, const float* p,
+							  const float* a, const float* b, const float* c)
+{
+	// Check if P in vertex region outside A
+	float ab[3], ac[3], ap[3];
+	dtVsub(ab, b, a);
+	dtVsub(ac, c, a);
+	dtVsub(ap, p, a);
+	float d1 = dtVdot(ab, ap);
+	float d2 = dtVdot(ac, ap);
+	if (d1 <= 0.0f && d2 <= 0.0f)
+	{
+		// barycentric coordinates (1,0,0)
+		dtVcopy(closest, a);
+		return;
+	}
+	
+	// Check if P in vertex region outside B
+	float bp[3];
+	dtVsub(bp, p, b);
+	float d3 = dtVdot(ab, bp);
+	float d4 = dtVdot(ac, bp);
+	if (d3 >= 0.0f && d4 <= d3)
+	{
+		// barycentric coordinates (0,1,0)
+		dtVcopy(closest, b);
+		return;
+	}
+	
+	// Check if P in edge region of AB, if so return projection of P onto AB
+	float vc = d1*d4 - d3*d2;
+	if (vc <= 0.0f && d1 >= 0.0f && d3 <= 0.0f)
+	{
+		// barycentric coordinates (1-v,v,0)
+		float v = d1 / (d1 - d3);
+		closest[0] = a[0] + v * ab[0];
+		closest[1] = a[1] + v * ab[1];
+		closest[2] = a[2] + v * ab[2];
+		return;
+	}
+	
+	// Check if P in vertex region outside C
+	float cp[3];
+	dtVsub(cp, p, c);
+	float d5 = dtVdot(ab, cp);
+	float d6 = dtVdot(ac, cp);
+	if (d6 >= 0.0f && d5 <= d6)
+	{
+		// barycentric coordinates (0,0,1)
+		dtVcopy(closest, c);
+		return;
+	}
+	
+	// Check if P in edge region of AC, if so return projection of P onto AC
+	float vb = d5*d2 - d1*d6;
+	if (vb <= 0.0f && d2 >= 0.0f && d6 <= 0.0f)
+	{
+		// barycentric coordinates (1-w,0,w)
+		float w = d2 / (d2 - d6);
+		closest[0] = a[0] + w * ac[0];
+		closest[1] = a[1] + w * ac[1];
+		closest[2] = a[2] + w * ac[2];
+		return;
+	}
+	
+	// Check if P in edge region of BC, if so return projection of P onto BC
+	float va = d3*d6 - d5*d4;
+	if (va <= 0.0f && (d4 - d3) >= 0.0f && (d5 - d6) >= 0.0f)
+	{
+		// barycentric coordinates (0,1-w,w)
+		float w = (d4 - d3) / ((d4 - d3) + (d5 - d6));
+		closest[0] = b[0] + w * (c[0] - b[0]);
+		closest[1] = b[1] + w * (c[1] - b[1]);
+		closest[2] = b[2] + w * (c[2] - b[2]);
+		return;
+	}
+	
+	// P inside face region. Compute Q through its barycentric coordinates (u,v,w)
+	float denom = 1.0f / (va + vb + vc);
+	float v = vb * denom;
+	float w = vc * denom;
+	closest[0] = a[0] + ab[0] * v + ac[0] * w;
+	closest[1] = a[1] + ab[1] * v + ac[1] * w;
+	closest[2] = a[2] + ab[2] * v + ac[2] * w;
+}
+
+bool dtIntersectSegmentPoly2D(const float* p0, const float* p1,
+							  const float* verts, int nverts,
+							  float& tmin, float& tmax,
+							  int& segMin, int& segMax)
+{
+	static const float EPS = 0.00000001f;
+	
+	tmin = 0;
+	tmax = 1;
+	segMin = -1;
+	segMax = -1;
+	
+	float dir[3];
+	dtVsub(dir, p1, p0);
+	
+	for (int i = 0, j = nverts-1; i < nverts; j=i++)
+	{
+		float edge[3], diff[3];
+		dtVsub(edge, &verts[i*3], &verts[j*3]);
+		dtVsub(diff, p0, &verts[j*3]);
+		const float n = dtVperp2D(edge, diff);
+		const float d = dtVperp2D(dir, edge);
+		if (fabsf(d) < EPS)
+		{
+			// S is nearly parallel to this edge
+			if (n < 0)
+				return false;
+			else
+				continue;
+		}
+		const float t = n / d;
+		if (d < 0)
+		{
+			// segment S is entering across this edge
+			if (t > tmin)
+			{
+				tmin = t;
+				segMin = j;
+				// S enters after leaving polygon
+				if (tmin > tmax)
+					return false;
+			}
+		}
+		else
+		{
+			// segment S is leaving across this edge
+			if (t < tmax)
+			{
+				tmax = t;
+				segMax = j;
+				// S leaves before entering polygon
+				if (tmax < tmin)
+					return false;
+			}
+		}
+	}
+	
+	return true;
+}
+
+float dtDistancePtSegSqr2D(const float* pt, const float* p, const float* q, float& t)
+{
+	float pqx = q[0] - p[0];
+	float pqz = q[2] - p[2];
+	float dx = pt[0] - p[0];
+	float dz = pt[2] - p[2];
+	float d = pqx*pqx + pqz*pqz;
+	t = pqx*dx + pqz*dz;
+	if (d > 0) t /= d;
+	if (t < 0) t = 0;
+	else if (t > 1) t = 1;
+	dx = p[0] + t*pqx - pt[0];
+	dz = p[2] + t*pqz - pt[2];
+	return dx*dx + dz*dz;
+}
+
+void dtCalcPolyCenter(float* tc, const unsigned short* idx, int nidx, const float* verts)
+{
+	tc[0] = 0.0f;
+	tc[1] = 0.0f;
+	tc[2] = 0.0f;
+	for (int j = 0; j < nidx; ++j)
+	{
+		const float* v = &verts[idx[j]*3];
+		tc[0] += v[0];
+		tc[1] += v[1];
+		tc[2] += v[2];
+	}
+	const float s = 1.0f / nidx;
+	tc[0] *= s;
+	tc[1] *= s;
+	tc[2] *= s;
+}
+
+bool dtClosestHeightPointTriangle(const float* p, const float* a, const float* b, const float* c, float& h)
+{
+	float v0[3], v1[3], v2[3];
+	dtVsub(v0, c,a);
+	dtVsub(v1, b,a);
+	dtVsub(v2, p,a);
+	
+	const float dot00 = dtVdot2D(v0, v0);
+	const float dot01 = dtVdot2D(v0, v1);
+	const float dot02 = dtVdot2D(v0, v2);
+	const float dot11 = dtVdot2D(v1, v1);
+	const float dot12 = dtVdot2D(v1, v2);
+	
+	// Compute barycentric coordinates
+	const float invDenom = 1.0f / (dot00 * dot11 - dot01 * dot01);
+	const float u = (dot11 * dot02 - dot01 * dot12) * invDenom;
+	const float v = (dot00 * dot12 - dot01 * dot02) * invDenom;
+
+	// The (sloppy) epsilon is needed to allow to get height of points which
+	// are interpolated along the edges of the triangles.
+	static const float EPS = 1e-4f;
+	
+	// If point lies inside the triangle, return interpolated ycoord.
+	if (u >= -EPS && v >= -EPS && (u+v) <= 1+EPS)
+	{
+		h = a[1] + v0[1]*u + v1[1]*v;
+		return true;
+	}
+	
+	return false;
+}
+
+/// @par
+///
+/// All points are projected onto the xz-plane, so the y-values are ignored.
+bool dtPointInPolygon(const float* pt, const float* verts, const int nverts)
+{
+	// TODO: Replace pnpoly with triArea2D tests?
+	int i, j;
+	bool c = false;
+	for (i = 0, j = nverts-1; i < nverts; j = i++)
+	{
+		const float* vi = &verts[i*3];
+		const float* vj = &verts[j*3];
+		if (((vi[2] > pt[2]) != (vj[2] > pt[2])) &&
+			(pt[0] < (vj[0]-vi[0]) * (pt[2]-vi[2]) / (vj[2]-vi[2]) + vi[0]) )
+			c = !c;
+	}
+	return c;
+}
+
+bool dtDistancePtPolyEdgesSqr(const float* pt, const float* verts, const int nverts,
+							  float* ed, float* et)
+{
+	// TODO: Replace pnpoly with triArea2D tests?
+	int i, j;
+	bool c = false;
+	for (i = 0, j = nverts-1; i < nverts; j = i++)
+	{
+		const float* vi = &verts[i*3];
+		const float* vj = &verts[j*3];
+		if (((vi[2] > pt[2]) != (vj[2] > pt[2])) &&
+			(pt[0] < (vj[0]-vi[0]) * (pt[2]-vi[2]) / (vj[2]-vi[2]) + vi[0]) )
+			c = !c;
+		ed[j] = dtDistancePtSegSqr2D(pt, vj, vi, et[j]);
+	}
+	return c;
+}
+
+static void projectPoly(const float* axis, const float* poly, const int npoly,
+						float& rmin, float& rmax)
+{
+	rmin = rmax = dtVdot2D(axis, &poly[0]);
+	for (int i = 1; i < npoly; ++i)
+	{
+		const float d = dtVdot2D(axis, &poly[i*3]);
+		rmin = dtMin(rmin, d);
+		rmax = dtMax(rmax, d);
+	}
+}
+
+inline bool overlapRange(const float amin, const float amax,
+						 const float bmin, const float bmax,
+						 const float eps)
+{
+	return ((amin+eps) > bmax || (amax-eps) < bmin) ? false : true;
+}
+
+/// @par
+///
+/// All vertices are projected onto the xz-plane, so the y-values are ignored.
+bool dtOverlapPolyPoly2D(const float* polya, const int npolya,
+						 const float* polyb, const int npolyb)
+{
+	const float eps = 1e-4f;
+	
+	for (int i = 0, j = npolya-1; i < npolya; j=i++)
+	{
+		const float* va = &polya[j*3];
+		const float* vb = &polya[i*3];
+		const float n[3] = { vb[2]-va[2], 0, -(vb[0]-va[0]) };
+		float amin,amax,bmin,bmax;
+		projectPoly(n, polya, npolya, amin,amax);
+		projectPoly(n, polyb, npolyb, bmin,bmax);
+		if (!overlapRange(amin,amax, bmin,bmax, eps))
+		{
+			// Found separating axis
+			return false;
+		}
+	}
+	for (int i = 0, j = npolyb-1; i < npolyb; j=i++)
+	{
+		const float* va = &polyb[j*3];
+		const float* vb = &polyb[i*3];
+		const float n[3] = { vb[2]-va[2], 0, -(vb[0]-va[0]) };
+		float amin,amax,bmin,bmax;
+		projectPoly(n, polya, npolya, amin,amax);
+		projectPoly(n, polyb, npolyb, bmin,bmax);
+		if (!overlapRange(amin,amax, bmin,bmax, eps))
+		{
+			// Found separating axis
+			return false;
+		}
+	}
+	return true;
+}
+
+// Returns a random point in a convex polygon.
+// Adapted from Graphics Gems article.
+void dtRandomPointInConvexPoly(const float* pts, const int npts, float* areas,
+							   const float s, const float t, float* out)
+{
+	// Calc triangle araes
+	float areasum = 0.0f;
+	for (int i = 2; i < npts; i++) {
+		areas[i] = dtTriArea2D(&pts[0], &pts[(i-1)*3], &pts[i*3]);
+		areasum += dtMax(0.001f, areas[i]);
+	}
+	// Find sub triangle weighted by area.
+	const float thr = s*areasum;
+	float acc = 0.0f;
+	float u = 0.0f;
+	int tri = 0;
+	for (int i = 2; i < npts; i++) {
+		const float dacc = areas[i];
+		if (thr >= acc && thr < (acc+dacc))
+		{
+			u = (thr - acc) / dacc;
+			tri = i;
+			break;
+		}
+		acc += dacc;
+	}
+	
+	float v = dtMathSqrtf(t);
+	
+	const float a = 1 - v;
+	const float b = (1 - u) * v;
+	const float c = u * v;
+	const float* pa = &pts[0];
+	const float* pb = &pts[(tri-1)*3];
+	const float* pc = &pts[tri*3];
+	
+	out[0] = a*pa[0] + b*pb[0] + c*pc[0];
+	out[1] = a*pa[1] + b*pb[1] + c*pc[1];
+	out[2] = a*pa[2] + b*pb[2] + c*pc[2];
+}
+
+inline float vperpXZ(const float* a, const float* b) { return a[0]*b[2] - a[2]*b[0]; }
+
+bool dtIntersectSegSeg2D(const float* ap, const float* aq,
+						 const float* bp, const float* bq,
+						 float& s, float& t)
+{
+	float u[3], v[3], w[3];
+	dtVsub(u,aq,ap);
+	dtVsub(v,bq,bp);
+	dtVsub(w,ap,bp);
+	float d = vperpXZ(u,v);
+	if (fabsf(d) < 1e-6f) return false;
+	s = vperpXZ(v,w) / d;
+	t = vperpXZ(u,w) / d;
+	return true;
+}
+
--- a/deps/recastnavigation/Detour/Source/DetourNavMesh.cpp
+++ b/deps/recastnavigation/Detour/Source/DetourNavMesh.cpp
--- a/deps/recastnavigation/Detour/Source/DetourNavMeshBuilder.cpp
+++ b/deps/recastnavigation/Detour/Source/DetourNavMeshBuilder.cpp
@@ -0,0 +1,777 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <float.h>
+#include "DetourNavMesh.h"
+#include "DetourCommon.h"
+#include "DetourMath.h"
+#include "DetourNavMeshBuilder.h"
+#include "DetourAlloc.h"
+#include "DetourAssert.h"
+
+static unsigned short MESH_NULL_IDX = 0xffff;
+
+
+struct BVItem
+{
+	unsigned short bmin[3];
+	unsigned short bmax[3];
+	int i;
+};
+
+static int compareItemX(const void* va, const void* vb)
+{
+	const BVItem* a = (const BVItem*)va;
+	const BVItem* b = (const BVItem*)vb;
+	if (a->bmin[0] < b->bmin[0])
+		return -1;
+	if (a->bmin[0] > b->bmin[0])
+		return 1;
+	return 0;
+}
+
+static int compareItemY(const void* va, const void* vb)
+{
+	const BVItem* a = (const BVItem*)va;
+	const BVItem* b = (const BVItem*)vb;
+	if (a->bmin[1] < b->bmin[1])
+		return -1;
+	if (a->bmin[1] > b->bmin[1])
+		return 1;
+	return 0;
+}
+
+static int compareItemZ(const void* va, const void* vb)
+{
+	const BVItem* a = (const BVItem*)va;
+	const BVItem* b = (const BVItem*)vb;
+	if (a->bmin[2] < b->bmin[2])
+		return -1;
+	if (a->bmin[2] > b->bmin[2])
+		return 1;
+	return 0;
+}
+
+static void calcExtends(BVItem* items, const int /*nitems*/, const int imin, const int imax,
+						unsigned short* bmin, unsigned short* bmax)
+{
+	bmin[0] = items[imin].bmin[0];
+	bmin[1] = items[imin].bmin[1];
+	bmin[2] = items[imin].bmin[2];
+	
+	bmax[0] = items[imin].bmax[0];
+	bmax[1] = items[imin].bmax[1];
+	bmax[2] = items[imin].bmax[2];
+	
+	for (int i = imin+1; i < imax; ++i)
+	{
+		const BVItem& it = items[i];
+		if (it.bmin[0] < bmin[0]) bmin[0] = it.bmin[0];
+		if (it.bmin[1] < bmin[1]) bmin[1] = it.bmin[1];
+		if (it.bmin[2] < bmin[2]) bmin[2] = it.bmin[2];
+		
+		if (it.bmax[0] > bmax[0]) bmax[0] = it.bmax[0];
+		if (it.bmax[1] > bmax[1]) bmax[1] = it.bmax[1];
+		if (it.bmax[2] > bmax[2]) bmax[2] = it.bmax[2];
+	}
+}
+
+inline int longestAxis(unsigned short x, unsigned short y, unsigned short z)
+{
+	int	axis = 0;
+	unsigned short maxVal = x;
+	if (y > maxVal)
+	{
+		axis = 1;
+		maxVal = y;
+	}
+	if (z > maxVal)
+	{
+		axis = 2;
+	}
+	return axis;
+}
+
+static void subdivide(BVItem* items, int nitems, int imin, int imax, int& curNode, dtBVNode* nodes)
+{
+	int inum = imax - imin;
+	int icur = curNode;
+	
+	dtBVNode& node = nodes[curNode++];
+	
+	if (inum == 1)
+	{
+		// Leaf
+		node.bmin[0] = items[imin].bmin[0];
+		node.bmin[1] = items[imin].bmin[1];
+		node.bmin[2] = items[imin].bmin[2];
+		
+		node.bmax[0] = items[imin].bmax[0];
+		node.bmax[1] = items[imin].bmax[1];
+		node.bmax[2] = items[imin].bmax[2];
+		
+		node.i = items[imin].i;
+	}
+	else
+	{
+		// Split
+		calcExtends(items, nitems, imin, imax, node.bmin, node.bmax);
+		
+		int	axis = longestAxis(node.bmax[0] - node.bmin[0],
+							   node.bmax[1] - node.bmin[1],
+							   node.bmax[2] - node.bmin[2]);
+		
+		if (axis == 0)
+		{
+			// Sort along x-axis
+			qsort(items+imin, inum, sizeof(BVItem), compareItemX);
+		}
+		else if (axis == 1)
+		{
+			// Sort along y-axis
+			qsort(items+imin, inum, sizeof(BVItem), compareItemY);
+		}
+		else
+		{
+			// Sort along z-axis
+			qsort(items+imin, inum, sizeof(BVItem), compareItemZ);
+		}
+		
+		int isplit = imin+inum/2;
+		
+		// Left
+		subdivide(items, nitems, imin, isplit, curNode, nodes);
+		// Right
+		subdivide(items, nitems, isplit, imax, curNode, nodes);
+		
+		int iescape = curNode - icur;
+		// Negative index means escape.
+		node.i = -iescape;
+	}
+}
+
+static int createBVTree(const unsigned short* verts, const int /*nverts*/,
+						const unsigned short* polys, const int npolys, const int nvp,
+						const float cs, const float ch,
+						const int /*nnodes*/, dtBVNode* nodes)
+{
+	// Build tree
+	BVItem* items = (BVItem*)dtAlloc(sizeof(BVItem)*npolys, DT_ALLOC_TEMP);
+	for (int i = 0; i < npolys; i++)
+	{
+		BVItem& it = items[i];
+		it.i = i;
+		// Calc polygon bounds.
+		const unsigned short* p = &polys[i*nvp*2];
+		it.bmin[0] = it.bmax[0] = verts[p[0]*3+0];
+		it.bmin[1] = it.bmax[1] = verts[p[0]*3+1];
+		it.bmin[2] = it.bmax[2] = verts[p[0]*3+2];
+		
+		for (int j = 1; j < nvp; ++j)
+		{
+			if (p[j] == MESH_NULL_IDX) break;
+			unsigned short x = verts[p[j]*3+0];
+			unsigned short y = verts[p[j]*3+1];
+			unsigned short z = verts[p[j]*3+2];
+			
+			if (x < it.bmin[0]) it.bmin[0] = x;
+			if (y < it.bmin[1]) it.bmin[1] = y;
+			if (z < it.bmin[2]) it.bmin[2] = z;
+			
+			if (x > it.bmax[0]) it.bmax[0] = x;
+			if (y > it.bmax[1]) it.bmax[1] = y;
+			if (z > it.bmax[2]) it.bmax[2] = z;
+		}
+		// Remap y
+		it.bmin[1] = (unsigned short)dtMathFloorf((float)it.bmin[1]*ch/cs);
+		it.bmax[1] = (unsigned short)dtMathCeilf((float)it.bmax[1]*ch/cs);
+	}
+	
+	int curNode = 0;
+	subdivide(items, npolys, 0, npolys, curNode, nodes);
+	
+	dtFree(items);
+	
+	return curNode;
+}
+
+static unsigned char classifyOffMeshPoint(const float* pt, const float* bmin, const float* bmax)
+{
+	static const unsigned char XP = 1<<0;
+	static const unsigned char ZP = 1<<1;
+	static const unsigned char XM = 1<<2;
+	static const unsigned char ZM = 1<<3;	
+
+	unsigned char outcode = 0; 
+	outcode |= (pt[0] >= bmax[0]) ? XP : 0;
+	outcode |= (pt[2] >= bmax[2]) ? ZP : 0;
+	outcode |= (pt[0] < bmin[0])  ? XM : 0;
+	outcode |= (pt[2] < bmin[2])  ? ZM : 0;
+
+	switch (outcode)
+	{
+	case XP: return 0;
+	case XP|ZP: return 1;
+	case ZP: return 2;
+	case XM|ZP: return 3;
+	case XM: return 4;
+	case XM|ZM: return 5;
+	case ZM: return 6;
+	case XP|ZM: return 7;
+	};
+
+	return 0xff;	
+}
+
+// TODO: Better error handling.
+
+/// @par
+/// 
+/// The output data array is allocated using the detour allocator (dtAlloc()).  The method
+/// used to free the memory will be determined by how the tile is added to the navigation
+/// mesh.
+///
+/// @see dtNavMesh, dtNavMesh::addTile()
+bool dtCreateNavMeshData(dtNavMeshCreateParams* params, unsigned char** outData, int* outDataSize)
+{
+	if (params->nvp > DT_VERTS_PER_POLYGON)
+		return false;
+	if (params->vertCount >= 0xffff)
+		return false;
+	if (!params->vertCount || !params->verts)
+		return false;
+	if (!params->polyCount || !params->polys)
+		return false;
+
+	const int nvp = params->nvp;
+	
+	// Classify off-mesh connection points. We store only the connections
+	// whose start point is inside the tile.
+	unsigned char* offMeshConClass = 0;
+	int storedOffMeshConCount = 0;
+	int offMeshConLinkCount = 0;
+	
+	if (params->offMeshConCount > 0)
+	{
+		offMeshConClass = (unsigned char*)dtAlloc(sizeof(unsigned char)*params->offMeshConCount*2, DT_ALLOC_TEMP);
+		if (!offMeshConClass)
+			return false;
+
+		// Find tight heigh bounds, used for culling out off-mesh start locations.
+		float hmin = FLT_MAX;
+		float hmax = -FLT_MAX;
+		
+		if (params->detailVerts && params->detailVertsCount)
+		{
+			for (int i = 0; i < params->detailVertsCount; ++i)
+			{
+				const float h = params->detailVerts[i*3+1];
+				hmin = dtMin(hmin,h);
+				hmax = dtMax(hmax,h);
+			}
+		}
+		else
+		{
+			for (int i = 0; i < params->vertCount; ++i)
+			{
+				const unsigned short* iv = &params->verts[i*3];
+				const float h = params->bmin[1] + iv[1] * params->ch;
+				hmin = dtMin(hmin,h);
+				hmax = dtMax(hmax,h);
+			}
+		}
+		hmin -= params->walkableClimb;
+		hmax += params->walkableClimb;
+		float bmin[3], bmax[3];
+		dtVcopy(bmin, params->bmin);
+		dtVcopy(bmax, params->bmax);
+		bmin[1] = hmin;
+		bmax[1] = hmax;
+
+		for (int i = 0; i < params->offMeshConCount; ++i)
+		{
+			const float* p0 = &params->offMeshConVerts[(i*2+0)*3];
+			const float* p1 = &params->offMeshConVerts[(i*2+1)*3];
+			offMeshConClass[i*2+0] = classifyOffMeshPoint(p0, bmin, bmax);
+			offMeshConClass[i*2+1] = classifyOffMeshPoint(p1, bmin, bmax);
+
+			// Zero out off-mesh start positions which are not even potentially touching the mesh.
+			if (offMeshConClass[i*2+0] == 0xff)
+			{
+				if (p0[1] < bmin[1] || p0[1] > bmax[1])
+					offMeshConClass[i*2+0] = 0;
+			}
+
+			// Cound how many links should be allocated for off-mesh connections.
+			if (offMeshConClass[i*2+0] == 0xff)
+				offMeshConLinkCount++;
+			if (offMeshConClass[i*2+1] == 0xff)
+				offMeshConLinkCount++;
+
+			if (offMeshConClass[i*2+0] == 0xff)
+				storedOffMeshConCount++;
+		}
+	}
+	
+	// Off-mesh connectionss are stored as polygons, adjust values.
+	const int totPolyCount = params->polyCount + storedOffMeshConCount;
+	const int totVertCount = params->vertCount + storedOffMeshConCount*2;
+	
+	// Find portal edges which are at tile borders.
+	int edgeCount = 0;
+	int portalCount = 0;
+	for (int i = 0; i < params->polyCount; ++i)
+	{
+		const unsigned short* p = &params->polys[i*2*nvp];
+		for (int j = 0; j < nvp; ++j)
+		{
+			if (p[j] == MESH_NULL_IDX) break;
+			edgeCount++;
+			
+			if (p[nvp+j] & 0x8000)
+			{
+				unsigned short dir = p[nvp+j] & 0xf;
+				if (dir != 0xf)
+					portalCount++;
+			}
+		}
+	}
+
+	const int maxLinkCount = edgeCount + portalCount*2 + offMeshConLinkCount*2;
+	
+	// Find unique detail vertices.
+	int uniqueDetailVertCount = 0;
+	int detailTriCount = 0;
+	if (params->detailMeshes)
+	{
+		// Has detail mesh, count unique detail vertex count and use input detail tri count.
+		detailTriCount = params->detailTriCount;
+		for (int i = 0; i < params->polyCount; ++i)
+		{
+			const unsigned short* p = &params->polys[i*nvp*2];
+			int ndv = params->detailMeshes[i*4+1];
+			int nv = 0;
+			for (int j = 0; j < nvp; ++j)
+			{
+				if (p[j] == MESH_NULL_IDX) break;
+				nv++;
+			}
+			ndv -= nv;
+			uniqueDetailVertCount += ndv;
+		}
+	}
+	else
+	{
+		// No input detail mesh, build detail mesh from nav polys.
+		uniqueDetailVertCount = 0; // No extra detail verts.
+		detailTriCount = 0;
+		for (int i = 0; i < params->polyCount; ++i)
+		{
+			const unsigned short* p = &params->polys[i*nvp*2];
+			int nv = 0;
+			for (int j = 0; j < nvp; ++j)
+			{
+				if (p[j] == MESH_NULL_IDX) break;
+				nv++;
+			}
+			detailTriCount += nv-2;
+		}
+	}
+	
+	// Calculate data size
+	const int headerSize = dtAlign4(sizeof(dtMeshHeader));
+	const int vertsSize = dtAlign4(sizeof(float)*3*totVertCount);
+	const int polysSize = dtAlign4(sizeof(dtPoly)*totPolyCount);
+	const int linksSize = dtAlign4(sizeof(dtLink)*maxLinkCount);
+	const int detailMeshesSize = dtAlign4(sizeof(dtPolyDetail)*params->polyCount);
+	const int detailVertsSize = dtAlign4(sizeof(float)*3*uniqueDetailVertCount);
+	const int detailTrisSize = dtAlign4(sizeof(unsigned char)*4*detailTriCount);
+	const int bvTreeSize = params->buildBvTree ? dtAlign4(sizeof(dtBVNode)*params->polyCount*2) : 0;
+	const int offMeshConsSize = dtAlign4(sizeof(dtOffMeshConnection)*storedOffMeshConCount);
+	
+	const int dataSize = headerSize + vertsSize + polysSize + linksSize +
+						 detailMeshesSize + detailVertsSize + detailTrisSize +
+						 bvTreeSize + offMeshConsSize;
+						 
+	unsigned char* data = (unsigned char*)dtAlloc(sizeof(unsigned char)*dataSize, DT_ALLOC_PERM);
+	if (!data)
+	{
+		dtFree(offMeshConClass);
+		return false;
+	}
+	memset(data, 0, dataSize);
+	
+	unsigned char* d = data;
+
+	dtMeshHeader* header = dtGetThenAdvanceBufferPointer<dtMeshHeader>(d, headerSize);
+	float* navVerts = dtGetThenAdvanceBufferPointer<float>(d, vertsSize);
+	dtPoly* navPolys = dtGetThenAdvanceBufferPointer<dtPoly>(d, polysSize);
+	d += linksSize; // Ignore links; just leave enough space for them. They'll be created on load.
+	dtPolyDetail* navDMeshes = dtGetThenAdvanceBufferPointer<dtPolyDetail>(d, detailMeshesSize);
+	float* navDVerts = dtGetThenAdvanceBufferPointer<float>(d, detailVertsSize);
+	unsigned char* navDTris = dtGetThenAdvanceBufferPointer<unsigned char>(d, detailTrisSize);
+	dtBVNode* navBvtree = dtGetThenAdvanceBufferPointer<dtBVNode>(d, bvTreeSize);
+	dtOffMeshConnection* offMeshCons = dtGetThenAdvanceBufferPointer<dtOffMeshConnection>(d, offMeshConsSize);
+	
+	
+	// Store header
+	header->magic = DT_NAVMESH_MAGIC;
+	header->version = DT_NAVMESH_VERSION;
+	header->x = params->tileX;
+	header->y = params->tileY;
+	header->layer = params->tileLayer;
+	header->userId = params->userId;
+	header->polyCount = totPolyCount;
+	header->vertCount = totVertCount;
+	header->maxLinkCount = maxLinkCount;
+	dtVcopy(header->bmin, params->bmin);
+	dtVcopy(header->bmax, params->bmax);
+	header->detailMeshCount = params->polyCount;
+	header->detailVertCount = uniqueDetailVertCount;
+	header->detailTriCount = detailTriCount;
+	header->bvQuantFactor = 1.0f / params->cs;
+	header->offMeshBase = params->polyCount;
+	header->walkableHeight = params->walkableHeight;
+	header->walkableRadius = params->walkableRadius;
+	header->walkableClimb = params->walkableClimb;
+	header->offMeshConCount = storedOffMeshConCount;
+	header->bvNodeCount = params->buildBvTree ? params->polyCount*2 : 0;
+	
+	const int offMeshVertsBase = params->vertCount;
+	const int offMeshPolyBase = params->polyCount;
+	
+	// Store vertices
+	// Mesh vertices
+	for (int i = 0; i < params->vertCount; ++i)
+	{
+		const unsigned short* iv = &params->verts[i*3];
+		float* v = &navVerts[i*3];
+		v[0] = params->bmin[0] + iv[0] * params->cs;
+		v[1] = params->bmin[1] + iv[1] * params->ch;
+		v[2] = params->bmin[2] + iv[2] * params->cs;
+	}
+	// Off-mesh link vertices.
+	int n = 0;
+	for (int i = 0; i < params->offMeshConCount; ++i)
+	{
+		// Only store connections which start from this tile.
+		if (offMeshConClass[i*2+0] == 0xff)
+		{
+			const float* linkv = &params->offMeshConVerts[i*2*3];
+			float* v = &navVerts[(offMeshVertsBase + n*2)*3];
+			dtVcopy(&v[0], &linkv[0]);
+			dtVcopy(&v[3], &linkv[3]);
+			n++;
+		}
+	}
+	
+	// Store polygons
+	// Mesh polys
+	const unsigned short* src = params->polys;
+	for (int i = 0; i < params->polyCount; ++i)
+	{
+		dtPoly* p = &navPolys[i];
+		p->vertCount = 0;
+		p->flags = params->polyFlags[i];
+		p->setArea(params->polyAreas[i]);
+		p->setType(DT_POLYTYPE_GROUND);
+		for (int j = 0; j < nvp; ++j)
+		{
+			if (src[j] == MESH_NULL_IDX) break;
+			p->verts[j] = src[j];
+			if (src[nvp+j] & 0x8000)
+			{
+				// Border or portal edge.
+				unsigned short dir = src[nvp+j] & 0xf;
+				if (dir == 0xf) // Border
+					p->neis[j] = 0;
+				else if (dir == 0) // Portal x-
+					p->neis[j] = DT_EXT_LINK | 4;
+				else if (dir == 1) // Portal z+
+					p->neis[j] = DT_EXT_LINK | 2;
+				else if (dir == 2) // Portal x+
+					p->neis[j] = DT_EXT_LINK | 0;
+				else if (dir == 3) // Portal z-
+					p->neis[j] = DT_EXT_LINK | 6;
+			}
+			else
+			{
+				// Normal connection
+				p->neis[j] = src[nvp+j]+1;
+			}
+			
+			p->vertCount++;
+		}
+		src += nvp*2;
+	}
+	// Off-mesh connection vertices.
+	n = 0;
+	for (int i = 0; i < params->offMeshConCount; ++i)
+	{
+		// Only store connections which start from this tile.
+		if (offMeshConClass[i*2+0] == 0xff)
+		{
+			dtPoly* p = &navPolys[offMeshPolyBase+n];
+			p->vertCount = 2;
+			p->verts[0] = (unsigned short)(offMeshVertsBase + n*2+0);
+			p->verts[1] = (unsigned short)(offMeshVertsBase + n*2+1);
+			p->flags = params->offMeshConFlags[i];
+			p->setArea(params->offMeshConAreas[i]);
+			p->setType(DT_POLYTYPE_OFFMESH_CONNECTION);
+			n++;
+		}
+	}
+
+	// Store detail meshes and vertices.
+	// The nav polygon vertices are stored as the first vertices on each mesh.
+	// We compress the mesh data by skipping them and using the navmesh coordinates.
+	if (params->detailMeshes)
+	{
+		unsigned short vbase = 0;
+		for (int i = 0; i < params->polyCount; ++i)
+		{
+			dtPolyDetail& dtl = navDMeshes[i];
+			const int vb = (int)params->detailMeshes[i*4+0];
+			const int ndv = (int)params->detailMeshes[i*4+1];
+			const int nv = navPolys[i].vertCount;
+			dtl.vertBase = (unsigned int)vbase;
+			dtl.vertCount = (unsigned char)(ndv-nv);
+			dtl.triBase = (unsigned int)params->detailMeshes[i*4+2];
+			dtl.triCount = (unsigned char)params->detailMeshes[i*4+3];
+			// Copy vertices except the first 'nv' verts which are equal to nav poly verts.
+			if (ndv-nv)
+			{
+				memcpy(&navDVerts[vbase*3], &params->detailVerts[(vb+nv)*3], sizeof(float)*3*(ndv-nv));
+				vbase += (unsigned short)(ndv-nv);
+			}
+		}
+		// Store triangles.
+		memcpy(navDTris, params->detailTris, sizeof(unsigned char)*4*params->detailTriCount);
+	}
+	else
+	{
+		// Create dummy detail mesh by triangulating polys.
+		int tbase = 0;
+		for (int i = 0; i < params->polyCount; ++i)
+		{
+			dtPolyDetail& dtl = navDMeshes[i];
+			const int nv = navPolys[i].vertCount;
+			dtl.vertBase = 0;
+			dtl.vertCount = 0;
+			dtl.triBase = (unsigned int)tbase;
+			dtl.triCount = (unsigned char)(nv-2);
+			// Triangulate polygon (local indices).
+			for (int j = 2; j < nv; ++j)
+			{
+				unsigned char* t = &navDTris[tbase*4];
+				t[0] = 0;
+				t[1] = (unsigned char)(j-1);
+				t[2] = (unsigned char)j;
+				// Bit for each edge that belongs to poly boundary.
+				t[3] = (1<<2);
+				if (j == 2) t[3] |= (1<<0);
+				if (j == nv-1) t[3] |= (1<<4);
+				tbase++;
+			}
+		}
+	}
+
+	// Store and create BVtree.
+	// TODO: take detail mesh into account! use byte per bbox extent?
+	if (params->buildBvTree)
+	{
+		createBVTree(params->verts, params->vertCount, params->polys, params->polyCount,
+					 nvp, params->cs, params->ch, params->polyCount*2, navBvtree);
+	}
+	
+	// Store Off-Mesh connections.
+	n = 0;
+	for (int i = 0; i < params->offMeshConCount; ++i)
+	{
+		// Only store connections which start from this tile.
+		if (offMeshConClass[i*2+0] == 0xff)
+		{
+			dtOffMeshConnection* con = &offMeshCons[n];
+			con->poly = (unsigned short)(offMeshPolyBase + n);
+			// Copy connection end-points.
+			const float* endPts = &params->offMeshConVerts[i*2*3];
+			dtVcopy(&con->pos[0], &endPts[0]);
+			dtVcopy(&con->pos[3], &endPts[3]);
+			con->rad = params->offMeshConRad[i];
+			con->flags = params->offMeshConDir[i] ? DT_OFFMESH_CON_BIDIR : 0;
+			con->side = offMeshConClass[i*2+1];
+			if (params->offMeshConUserID)
+				con->userId = params->offMeshConUserID[i];
+			n++;
+		}
+	}
+		
+	dtFree(offMeshConClass);
+	
+	*outData = data;
+	*outDataSize = dataSize;
+	
+	return true;
+}
+
+bool dtNavMeshHeaderSwapEndian(unsigned char* data, const int /*dataSize*/)
+{
+	dtMeshHeader* header = (dtMeshHeader*)data;
+	
+	int swappedMagic = DT_NAVMESH_MAGIC;
+	int swappedVersion = DT_NAVMESH_VERSION;
+	dtSwapEndian(&swappedMagic);
+	dtSwapEndian(&swappedVersion);
+	
+	if ((header->magic != DT_NAVMESH_MAGIC || header->version != DT_NAVMESH_VERSION) &&
+		(header->magic != swappedMagic || header->version != swappedVersion))
+	{
+		return false;
+	}
+		
+	dtSwapEndian(&header->magic);
+	dtSwapEndian(&header->version);
+	dtSwapEndian(&header->x);
+	dtSwapEndian(&header->y);
+	dtSwapEndian(&header->layer);
+	dtSwapEndian(&header->userId);
+	dtSwapEndian(&header->polyCount);
+	dtSwapEndian(&header->vertCount);
+	dtSwapEndian(&header->maxLinkCount);
+	dtSwapEndian(&header->detailMeshCount);
+	dtSwapEndian(&header->detailVertCount);
+	dtSwapEndian(&header->detailTriCount);
+	dtSwapEndian(&header->bvNodeCount);
+	dtSwapEndian(&header->offMeshConCount);
+	dtSwapEndian(&header->offMeshBase);
+	dtSwapEndian(&header->walkableHeight);
+	dtSwapEndian(&header->walkableRadius);
+	dtSwapEndian(&header->walkableClimb);
+	dtSwapEndian(&header->bmin[0]);
+	dtSwapEndian(&header->bmin[1]);
+	dtSwapEndian(&header->bmin[2]);
+	dtSwapEndian(&header->bmax[0]);
+	dtSwapEndian(&header->bmax[1]);
+	dtSwapEndian(&header->bmax[2]);
+	dtSwapEndian(&header->bvQuantFactor);
+
+	// Freelist index and pointers are updated when tile is added, no need to swap.
+
+	return true;
+}
+
+/// @par
+///
+/// @warning This function assumes that the header is in the correct endianess already. 
+/// Call #dtNavMeshHeaderSwapEndian() first on the data if the data is expected to be in wrong endianess 
+/// to start with. Call #dtNavMeshHeaderSwapEndian() after the data has been swapped if converting from 
+/// native to foreign endianess.
+bool dtNavMeshDataSwapEndian(unsigned char* data, const int /*dataSize*/)
+{
+	// Make sure the data is in right format.
+	dtMeshHeader* header = (dtMeshHeader*)data;
+	if (header->magic != DT_NAVMESH_MAGIC)
+		return false;
+	if (header->version != DT_NAVMESH_VERSION)
+		return false;
+	
+	// Patch header pointers.
+	const int headerSize = dtAlign4(sizeof(dtMeshHeader));
+	const int vertsSize = dtAlign4(sizeof(float)*3*header->vertCount);
+	const int polysSize = dtAlign4(sizeof(dtPoly)*header->polyCount);
+	const int linksSize = dtAlign4(sizeof(dtLink)*(header->maxLinkCount));
+	const int detailMeshesSize = dtAlign4(sizeof(dtPolyDetail)*header->detailMeshCount);
+	const int detailVertsSize = dtAlign4(sizeof(float)*3*header->detailVertCount);
+	const int detailTrisSize = dtAlign4(sizeof(unsigned char)*4*header->detailTriCount);
+	const int bvtreeSize = dtAlign4(sizeof(dtBVNode)*header->bvNodeCount);
+	const int offMeshLinksSize = dtAlign4(sizeof(dtOffMeshConnection)*header->offMeshConCount);
+	
+	unsigned char* d = data + headerSize;
+	float* verts = dtGetThenAdvanceBufferPointer<float>(d, vertsSize);
+	dtPoly* polys = dtGetThenAdvanceBufferPointer<dtPoly>(d, polysSize);
+	d += linksSize; // Ignore links; they technically should be endian-swapped but all their data is overwritten on load anyway.
+	//dtLink* links = dtGetThenAdvanceBufferPointer<dtLink>(d, linksSize);
+	dtPolyDetail* detailMeshes = dtGetThenAdvanceBufferPointer<dtPolyDetail>(d, detailMeshesSize);
+	float* detailVerts = dtGetThenAdvanceBufferPointer<float>(d, detailVertsSize);
+	d += detailTrisSize; // Ignore detail tris; single bytes can't be endian-swapped.
+	//unsigned char* detailTris = dtGetThenAdvanceBufferPointer<unsigned char>(d, detailTrisSize);
+	dtBVNode* bvTree = dtGetThenAdvanceBufferPointer<dtBVNode>(d, bvtreeSize);
+	dtOffMeshConnection* offMeshCons = dtGetThenAdvanceBufferPointer<dtOffMeshConnection>(d, offMeshLinksSize);
+	
+	// Vertices
+	for (int i = 0; i < header->vertCount*3; ++i)
+	{
+		dtSwapEndian(&verts[i]);
+	}
+
+	// Polys
+	for (int i = 0; i < header->polyCount; ++i)
+	{
+		dtPoly* p = &polys[i];
+		// poly->firstLink is update when tile is added, no need to swap.
+		for (int j = 0; j < DT_VERTS_PER_POLYGON; ++j)
+		{
+			dtSwapEndian(&p->verts[j]);
+			dtSwapEndian(&p->neis[j]);
+		}
+		dtSwapEndian(&p->flags);
+	}
+
+	// Links are rebuild when tile is added, no need to swap.
+
+	// Detail meshes
+	for (int i = 0; i < header->detailMeshCount; ++i)
+	{
+		dtPolyDetail* pd = &detailMeshes[i];
+		dtSwapEndian(&pd->vertBase);
+		dtSwapEndian(&pd->triBase);
+	}
+	
+	// Detail verts
+	for (int i = 0; i < header->detailVertCount*3; ++i)
+	{
+		dtSwapEndian(&detailVerts[i]);
+	}
+
+	// BV-tree
+	for (int i = 0; i < header->bvNodeCount; ++i)
+	{
+		dtBVNode* node = &bvTree[i];
+		for (int j = 0; j < 3; ++j)
+		{
+			dtSwapEndian(&node->bmin[j]);
+			dtSwapEndian(&node->bmax[j]);
+		}
+		dtSwapEndian(&node->i);
+	}
+
+	// Off-mesh Connections.
+	for (int i = 0; i < header->offMeshConCount; ++i)
+	{
+		dtOffMeshConnection* con = &offMeshCons[i];
+		for (int j = 0; j < 6; ++j)
+			dtSwapEndian(&con->pos[j]);
+		dtSwapEndian(&con->rad);
+		dtSwapEndian(&con->poly);
+	}
+	
+	return true;
+}
--- a/deps/recastnavigation/Detour/Source/DetourNavMeshQuery.cpp
+++ b/deps/recastnavigation/Detour/Source/DetourNavMeshQuery.cpp
--- a/deps/recastnavigation/Detour/Source/DetourNode.cpp
+++ b/deps/recastnavigation/Detour/Source/DetourNode.cpp
@@ -0,0 +1,200 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include "DetourNode.h"
+#include "DetourAlloc.h"
+#include "DetourAssert.h"
+#include "DetourCommon.h"
+#include <string.h>
+
+#ifdef DT_POLYREF64
+// From Thomas Wang, https://gist.github.com/badboy/6267743
+inline unsigned int dtHashRef(dtPolyRef a)
+{
+	a = (~a) + (a << 18); // a = (a << 18) - a - 1;
+	a = a ^ (a >> 31);
+	a = a * 21; // a = (a + (a << 2)) + (a << 4);
+	a = a ^ (a >> 11);
+	a = a + (a << 6);
+	a = a ^ (a >> 22);
+	return (unsigned int)a;
+}
+#else
+inline unsigned int dtHashRef(dtPolyRef a)
+{
+	a += ~(a<<15);
+	a ^=  (a>>10);
+	a +=  (a<<3);
+	a ^=  (a>>6);
+	a += ~(a<<11);
+	a ^=  (a>>16);
+	return (unsigned int)a;
+}
+#endif
+
+//////////////////////////////////////////////////////////////////////////////////////////
+dtNodePool::dtNodePool(int maxNodes, int hashSize) :
+	m_nodes(0),
+	m_first(0),
+	m_next(0),
+	m_maxNodes(maxNodes),
+	m_hashSize(hashSize),
+	m_nodeCount(0)
+{
+	dtAssert(dtNextPow2(m_hashSize) == (unsigned int)m_hashSize);
+	// pidx is special as 0 means "none" and 1 is the first node. For that reason
+	// we have 1 fewer nodes available than the number of values it can contain.
+	dtAssert(m_maxNodes > 0 && m_maxNodes <= DT_NULL_IDX && m_maxNodes <= (1 << DT_NODE_PARENT_BITS) - 1);
+
+	m_nodes = (dtNode*)dtAlloc(sizeof(dtNode)*m_maxNodes, DT_ALLOC_PERM);
+	m_next = (dtNodeIndex*)dtAlloc(sizeof(dtNodeIndex)*m_maxNodes, DT_ALLOC_PERM);
+	m_first = (dtNodeIndex*)dtAlloc(sizeof(dtNodeIndex)*hashSize, DT_ALLOC_PERM);
+
+	dtAssert(m_nodes);
+	dtAssert(m_next);
+	dtAssert(m_first);
+
+	memset(m_first, 0xff, sizeof(dtNodeIndex)*m_hashSize);
+	memset(m_next, 0xff, sizeof(dtNodeIndex)*m_maxNodes);
+}
+
+dtNodePool::~dtNodePool()
+{
+	dtFree(m_nodes);
+	dtFree(m_next);
+	dtFree(m_first);
+}
+
+void dtNodePool::clear()
+{
+	memset(m_first, 0xff, sizeof(dtNodeIndex)*m_hashSize);
+	m_nodeCount = 0;
+}
+
+unsigned int dtNodePool::findNodes(dtPolyRef id, dtNode** nodes, const int maxNodes)
+{
+	int n = 0;
+	unsigned int bucket = dtHashRef(id) & (m_hashSize-1);
+	dtNodeIndex i = m_first[bucket];
+	while (i != DT_NULL_IDX)
+	{
+		if (m_nodes[i].id == id)
+		{
+			if (n >= maxNodes)
+				return n;
+			nodes[n++] = &m_nodes[i];
+		}
+		i = m_next[i];
+	}
+
+	return n;
+}
+
+dtNode* dtNodePool::findNode(dtPolyRef id, unsigned char state)
+{
+	unsigned int bucket = dtHashRef(id) & (m_hashSize-1);
+	dtNodeIndex i = m_first[bucket];
+	while (i != DT_NULL_IDX)
+	{
+		if (m_nodes[i].id == id && m_nodes[i].state == state)
+			return &m_nodes[i];
+		i = m_next[i];
+	}
+	return 0;
+}
+
+dtNode* dtNodePool::getNode(dtPolyRef id, unsigned char state)
+{
+	unsigned int bucket = dtHashRef(id) & (m_hashSize-1);
+	dtNodeIndex i = m_first[bucket];
+	dtNode* node = 0;
+	while (i != DT_NULL_IDX)
+	{
+		if (m_nodes[i].id == id && m_nodes[i].state == state)
+			return &m_nodes[i];
+		i = m_next[i];
+	}
+	
+	if (m_nodeCount >= m_maxNodes)
+		return 0;
+	
+	i = (dtNodeIndex)m_nodeCount;
+	m_nodeCount++;
+	
+	// Init node
+	node = &m_nodes[i];
+	node->pidx = 0;
+	node->cost = 0;
+	node->total = 0;
+	node->id = id;
+	node->state = state;
+	node->flags = 0;
+	
+	m_next[i] = m_first[bucket];
+	m_first[bucket] = i;
+	
+	return node;
+}
+
+
+//////////////////////////////////////////////////////////////////////////////////////////
+dtNodeQueue::dtNodeQueue(int n) :
+	m_heap(0),
+	m_capacity(n),
+	m_size(0)
+{
+	dtAssert(m_capacity > 0);
+	
+	m_heap = (dtNode**)dtAlloc(sizeof(dtNode*)*(m_capacity+1), DT_ALLOC_PERM);
+	dtAssert(m_heap);
+}
+
+dtNodeQueue::~dtNodeQueue()
+{
+	dtFree(m_heap);
+}
+
+void dtNodeQueue::bubbleUp(int i, dtNode* node)
+{
+	int parent = (i-1)/2;
+	// note: (index > 0) means there is a parent
+	while ((i > 0) && (m_heap[parent]->total > node->total))
+	{
+		m_heap[i] = m_heap[parent];
+		i = parent;
+		parent = (i-1)/2;
+	}
+	m_heap[i] = node;
+}
+
+void dtNodeQueue::trickleDown(int i, dtNode* node)
+{
+	int child = (i*2)+1;
+	while (child < m_size)
+	{
+		if (((child+1) < m_size) && 
+			(m_heap[child]->total > m_heap[child+1]->total))
+		{
+			child++;
+		}
+		m_heap[i] = m_heap[child];
+		i = child;
+		child = (i*2)+1;
+	}
+	bubbleUp(i, node);
+}
--- a/deps/recastnavigation/README.md
+++ b/deps/recastnavigation/README.md
@@ -0,0 +1,89 @@
+
+Recast & Detour
+===============
+
+[![Travis (Linux) Build Status](https://travis-ci.org/recastnavigation/recastnavigation.svg?branch=master)](https://travis-ci.org/recastnavigation/recastnavigation)
+[![Appveyor (Windows) Build  Status](https://ci.appveyor.com/api/projects/status/20w84u25b3f8h179/branch/master?svg=true)](https://ci.appveyor.com/project/recastnavigation/recastnavigation/branch/master)
+
+[![Issue Stats](http://www.issuestats.com/github/recastnavigation/recastnavigation/badge/pr?style=flat)](http://www.issuestats.com/github/recastnavigation/recastnavigation)
+[![Issue Stats](http://www.issuestats.com/github/recastnavigation/recastnavigation/badge/issue?style=flat)](http://www.issuestats.com/github/recastnavigation/recastnavigation)
+
+![screenshot of a navmesh baked with the sample program](/RecastDemo/screenshot.png?raw=true)
+
+## Recast
+
+Recast is state of the art navigation mesh construction toolset for games.
+
+* It is automatic, which means that you can throw any level geometry at it and you will get robust mesh out
+* It is fast which means swift turnaround times for level designers
+* It is open source so it comes with full source and you can customize it to your heart's content. 
+
+The Recast process starts with constructing a voxel mold from a level geometry 
+and then casting a navigation mesh over it. The process consists of three steps, 
+building the voxel mold, partitioning the mold into simple regions, peeling off 
+the regions as simple polygons.
+
+1. The voxel mold is build from the input triangle mesh by rasterizing the triangles into a multi-layer heightfield. Some simple filters are  then applied to the mold to prune out locations where the character would not be able to move.
+2. The walkable areas described by the mold are divided into simple overlayed 2D regions. The resulting regions have only one non-overlapping contour, which simplifies the final step of the process tremendously.
+3. The navigation polygons are peeled off from the regions by first tracing the boundaries and then simplifying them. The resulting polygons are finally converted to convex polygons which makes them perfect for pathfinding and spatial reasoning about the level. 
+
+
+## Detour
+
+Recast is accompanied with Detour, path-finding and spatial reasoning toolkit. You can use any navigation mesh with Detour, but of course the data generated with Recast fits perfectly.
+
+Detour offers simple static navigation mesh which is suitable for many simple cases, as well as tiled navigation mesh which allows you to plug in and out pieces of the mesh. The tiled mesh allows you to create systems where you stream new navigation data in and out as the player progresses the level, or you may regenerate tiles as the world changes. 
+
+
+## Recast Demo
+
+You can find a comprehensive demo project in RecastDemo folder. It is a kitchen sink demo containing all the functionality of the library. If you are new to Recast & Detour, check out [Sample_SoloMesh.cpp](/RecastDemo/Source/Sample_SoloMesh.cpp) to get started with building navmeshes and [NavMeshTesterTool.cpp](/RecastDemo/Source/NavMeshTesterTool.cpp) to see how Detour can be used to find paths.
+
+### Building RecastDemo
+
+RecastDemo uses [premake5](http://premake.github.io/) to build platform specific projects. Download it and make sure it's available on your path, or specify the path to it.
+
+#### Linux
+
+- Install SDl2 and its dependencies according to your distro's guidelines.
+- run `premake5 gmake` from the `RecastDemo` folder.
+- `cd Build/gmake` then `make`
+- Run `RecastDemo\Bin\RecastDemo`
+
+#### OSX
+
+- Grab the latest SDL2 development library dmg from [here](https://www.libsdl.org/download-2.0.php) and place `SDL2.framework` in `/Library/Frameworks/`
+- Navigate to the `RecastDemo` folder and run `premake5 xcode4`
+- Open `Build/xcode4/recastnavigation.xcworkspace`
+- Select the "RecastDemo" project in the left pane, go to the "BuildPhases" tab and expand "Link Binary With Libraries"
+- Remove the existing entry for SDL2 (it should have a white box icon) and re-add it by hitting the plus, selecting "Add Other", and selecting `/Library/Frameworks/SDL2.framework`.  It should now have a suitcase icon.
+- Set the RecastDemo project as the target and build.
+
+#### Windows
+
+- Grab the latest SDL2 development library release from [here](https://www.libsdl.org/download-2.0.php) and unzip it `RecastDemo\Contrib`.  Rename the SDL folder such that the path `RecastDemo\Contrib\SDL\lib\x86` is valid.
+- Run `"premake5" vs2015` from the `RecastDemo` folder
+- Open the solution, build, and run.
+
+### Running Unit tests
+
+- Follow the instructions to build RecastDemo above.  Premake should generate another build target called "Tests".
+- Build the "Tests" project.  This will generate an executable named "Tests" in `RecastDemo/Bin/`
+- Run the "Tests" executable.  It will execute all the unit tests, indicate those that failed, and display a count of those that succeeded.
+
+## Integrating with your own project
+
+It is recommended to add the source directories `DebugUtils`, `Detour`, `DetourCrowd`, `DetourTileCache`, and `Recast` into your own project depending on which parts of the project you need. For example your level building tool could include `DebugUtils`, `Recast`, and `Detour`, and your game runtime could just include `Detour`.
+
+## Contributing
+
+See the [Contributing document](CONTRIBUTING.md) for guidelines for making contributions.
+
+## Discuss
+
+- Discuss Recast & Detour: http://groups.google.com/group/recastnavigation
+- Development blog: http://digestingduck.blogspot.com/
+
+## License
+
+Recast & Detour is licensed under ZLib license, see License.txt for more information.
--- a/deps/recastnavigation/Recast/Include/Recast.h
+++ b/deps/recastnavigation/Recast/Include/Recast.h
--- a/deps/recastnavigation/Recast/Include/RecastAlloc.h
+++ b/deps/recastnavigation/Recast/Include/RecastAlloc.h
@@ -0,0 +1,146 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef RECASTALLOC_H
+#define RECASTALLOC_H
+
+#include <stddef.h>
+
+/// Provides hint values to the memory allocator on how long the
+/// memory is expected to be used.
+enum rcAllocHint
+{
+	RC_ALLOC_PERM,		///< Memory will persist after a function call.
+	RC_ALLOC_TEMP		///< Memory used temporarily within a function.
+};
+
+/// A memory allocation function.
+//  @param[in]		size			The size, in bytes of memory, to allocate.
+//  @param[in]		rcAllocHint	A hint to the allocator on how long the memory is expected to be in use.
+//  @return A pointer to the beginning of the allocated memory block, or null if the allocation failed.
+///  @see rcAllocSetCustom
+typedef void* (rcAllocFunc)(size_t size, rcAllocHint hint);
+
+/// A memory deallocation function.
+///  @param[in]		ptr		A pointer to a memory block previously allocated using #rcAllocFunc.
+/// @see rcAllocSetCustom
+typedef void (rcFreeFunc)(void* ptr);
+
+/// Sets the base custom allocation functions to be used by Recast.
+///  @param[in]		allocFunc	The memory allocation function to be used by #rcAlloc
+///  @param[in]		freeFunc	The memory de-allocation function to be used by #rcFree
+void rcAllocSetCustom(rcAllocFunc *allocFunc, rcFreeFunc *freeFunc);
+
+/// Allocates a memory block.
+///  @param[in]		size	The size, in bytes of memory, to allocate.
+///  @param[in]		hint	A hint to the allocator on how long the memory is expected to be in use.
+///  @return A pointer to the beginning of the allocated memory block, or null if the allocation failed.
+/// @see rcFree
+void* rcAlloc(size_t size, rcAllocHint hint);
+
+/// Deallocates a memory block.
+///  @param[in]		ptr		A pointer to a memory block previously allocated using #rcAlloc.
+/// @see rcAlloc
+void rcFree(void* ptr);
+
+
+/// A simple dynamic array of integers.
+class rcIntArray
+{
+	int* m_data;
+	int m_size, m_cap;
+
+	void doResize(int n);
+	
+	// Explicitly disabled copy constructor and copy assignment operator.
+	rcIntArray(const rcIntArray&);
+	rcIntArray& operator=(const rcIntArray&);
+
+public:
+	/// Constructs an instance with an initial array size of zero.
+	rcIntArray() : m_data(0), m_size(0), m_cap(0) {}
+
+	/// Constructs an instance initialized to the specified size.
+	///  @param[in]		n	The initial size of the integer array.
+	rcIntArray(int n) : m_data(0), m_size(0), m_cap(0) { resize(n); }
+	~rcIntArray() { rcFree(m_data); }
+
+	/// Specifies the new size of the integer array.
+	///  @param[in]		n	The new size of the integer array.
+	void resize(int n)
+	{
+		if (n > m_cap)
+			doResize(n);
+		
+		m_size = n;
+	}
+
+	/// Push the specified integer onto the end of the array and increases the size by one.
+	///  @param[in]		item	The new value.
+	void push(int item) { resize(m_size+1); m_data[m_size-1] = item; }
+
+	/// Returns the value at the end of the array and reduces the size by one.
+	///  @return The value at the end of the array.
+	int pop()
+	{
+		if (m_size > 0)
+			m_size--;
+		
+		return m_data[m_size];
+	}
+
+	/// The value at the specified array index.
+	/// @warning Does not provide overflow protection.
+	///  @param[in]		i	The index of the value.
+	const int& operator[](int i) const { return m_data[i]; }
+
+	/// The value at the specified array index.
+	/// @warning Does not provide overflow protection.
+	///  @param[in]		i	The index of the value.
+	int& operator[](int i) { return m_data[i]; }
+
+	/// The current size of the integer array.
+	int size() const { return m_size; }
+};
+
+/// A simple helper class used to delete an array when it goes out of scope.
+/// @note This class is rarely if ever used by the end user.
+template<class T> class rcScopedDelete
+{
+	T* ptr;
+public:
+
+	/// Constructs an instance with a null pointer.
+	inline rcScopedDelete() : ptr(0) {}
+
+	/// Constructs an instance with the specified pointer.
+	///  @param[in]		p	An pointer to an allocated array.
+	inline rcScopedDelete(T* p) : ptr(p) {}
+	inline ~rcScopedDelete() { rcFree(ptr); }
+
+	/// The root array pointer.
+	///  @return The root array pointer.
+	inline operator T*() { return ptr; }
+	
+private:
+	// Explicitly disabled copy constructor and copy assignment operator.
+	rcScopedDelete(const rcScopedDelete&);
+	rcScopedDelete& operator=(const rcScopedDelete&);
+};
+
+#endif
--- a/Show More
+++ b/Show More