feat(Deps/Jemalloc): update Jemalloc to 5.2.1 (#2413)

This commit is contained in:
Viste
2019-11-14 23:17:38 +03:00
committed by Kargatum
parent fae7ae95a3
commit 685538b01b
103 changed files with 10904 additions and 3886 deletions

View File

@@ -9,7 +9,7 @@
# WITHOUT ANY WARRANTY, to the extent permitted by law; without even the
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
if(CMAKE_SYSTEM_NAME MATCHES "Linux" AND SERVERS AND NOT NOJEM)
if(CMAKE_SYSTEM_NAME MATCHES "Linux" AND NOT NOJEM)
# We need to generate the jemalloc_def.h header based on platform-specific settings
CHECK_SYMBOL_EXISTS(MADV_FREE "sys/mman.h" HAVE_MADV_FREE)
@@ -29,6 +29,14 @@ if(CMAKE_SYSTEM_NAME MATCHES "Linux" AND SERVERS AND NOT NOJEM)
set(JEM_MADFREE_DEF "#undef")
endif()
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
set(JEM_CPU_SPINWAIT "")
set(JEM_HAVE_CPU_SPINWAIT 0)
else()
set(JEM_CPU_SPINWAIT "__asm__ volatile\(\"pause\"\)")
set(JEM_HAVE_CPU_SPINWAIT 1)
endif()
# Create the header, so we can use it
configure_file(
"${CMAKE_SOURCE_DIR}/deps/jemalloc/jemalloc_internal_defs.h.in.cmake"
@@ -41,17 +49,20 @@ if(CMAKE_SYSTEM_NAME MATCHES "Linux" AND SERVERS AND NOT NOJEM)
${CMAKE_CURRENT_SOURCE_DIR}/src/arena.c
${CMAKE_CURRENT_SOURCE_DIR}/src/background_thread.c
${CMAKE_CURRENT_SOURCE_DIR}/src/base.c
${CMAKE_CURRENT_SOURCE_DIR}/src/bin.c
${CMAKE_CURRENT_SOURCE_DIR}/src/bitmap.c
${CMAKE_CURRENT_SOURCE_DIR}/src/ckh.c
${CMAKE_CURRENT_SOURCE_DIR}/src/ctl.c
${CMAKE_CURRENT_SOURCE_DIR}/src/div.c
${CMAKE_CURRENT_SOURCE_DIR}/src/extent.c
${CMAKE_CURRENT_SOURCE_DIR}/src/extent_dss.c
${CMAKE_CURRENT_SOURCE_DIR}/src/extent_mmap.c
${CMAKE_CURRENT_SOURCE_DIR}/src/hash.c
${CMAKE_CURRENT_SOURCE_DIR}/src/hooks.c
${CMAKE_CURRENT_SOURCE_DIR}/src/hook.c
${CMAKE_CURRENT_SOURCE_DIR}/src/jemalloc.c
${CMAKE_CURRENT_SOURCE_DIR}/src/jemalloc_cpp.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/large.c
${CMAKE_CURRENT_SOURCE_DIR}/src/log.c
${CMAKE_CURRENT_SOURCE_DIR}/src/malloc_io.c
${CMAKE_CURRENT_SOURCE_DIR}/src/mutex.c
${CMAKE_CURRENT_SOURCE_DIR}/src/mutex_pool.c
@@ -60,17 +71,18 @@ if(CMAKE_SYSTEM_NAME MATCHES "Linux" AND SERVERS AND NOT NOJEM)
${CMAKE_CURRENT_SOURCE_DIR}/src/prng.c
${CMAKE_CURRENT_SOURCE_DIR}/src/prof.c
${CMAKE_CURRENT_SOURCE_DIR}/src/rtree.c
${CMAKE_CURRENT_SOURCE_DIR}/src/spin.c
${CMAKE_CURRENT_SOURCE_DIR}/src/safety_check.c
${CMAKE_CURRENT_SOURCE_DIR}/src/stats.c
${CMAKE_CURRENT_SOURCE_DIR}/src/sc.c
${CMAKE_CURRENT_SOURCE_DIR}/src/sz.c
${CMAKE_CURRENT_SOURCE_DIR}/src/tcache.c
${CMAKE_CURRENT_SOURCE_DIR}/src/test_hooks.c
${CMAKE_CURRENT_SOURCE_DIR}/src/ticker.c
${CMAKE_CURRENT_SOURCE_DIR}/src/tsd.c
${CMAKE_CURRENT_SOURCE_DIR}/src/witness.c
)
add_library(jemalloc STATIC
${jemalloc_STAT_SRC})
add_library(jemalloc STATIC ${jemalloc_STAT_SRC})
target_include_directories(jemalloc
PRIVATE
@@ -95,11 +107,13 @@ if(CMAKE_SYSTEM_NAME MATCHES "Linux" AND SERVERS AND NOT NOJEM)
PROPERTIES
FOLDER
"deps")
else()
# Provide a dummy target for jemalloc which is used when jemalloc
# is disabled or not supported.
add_library(jemalloc INTERFACE)
# target_link_libraries(jemalloc
# INTERFACE
# valgrind)
target_link_libraries(jemalloc
INTERFACE
valgrind)
endif()

View File

@@ -1,10 +1,10 @@
Unless otherwise specified, files in the jemalloc source distribution are
subject to the following license:
--------------------------------------------------------------------------------
Copyright (C) 2002-2017 Jason Evans <jasone@canonware.com>.
Copyright (C) 2002-present Jason Evans <jasone@canonware.com>.
All rights reserved.
Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved.
Copyright (C) 2009-2017 Facebook, Inc. All rights reserved.
Copyright (C) 2009-present Facebook, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

View File

@@ -4,6 +4,259 @@ brevity. Much more detail can be found in the git revision history:
https://github.com/jemalloc/jemalloc
* 5.2.1 (August 5, 2019)
This release is primarily about Windows. A critical virtual memory leak is
resolved on all Windows platforms. The regression was present in all releases
since 5.0.0.
Bug fixes:
- Fix a severe virtual memory leak on Windows. This regression was first
released in 5.0.0. (@Ignition, @j0t, @frederik-h, @davidtgoldblatt,
@interwq)
- Fix size 0 handling in posix_memalign(). This regression was first released
in 5.2.0. (@interwq)
- Fix the prof_log unit test which may observe unexpected backtraces from
compiler optimizations. The test was first added in 5.2.0. (@marxin,
@gnzlbg, @interwq)
- Fix the declaration of the extent_avail tree. This regression was first
released in 5.1.0. (@zoulasc)
- Fix an incorrect reference in jeprof. This functionality was first released
in 3.0.0. (@prehistoric-penguin)
- Fix an assertion on the deallocation fast-path. This regression was first
released in 5.2.0. (@yinan1048576)
- Fix the TLS_MODEL attribute in headers. This regression was first released
in 5.0.0. (@zoulasc, @interwq)
Optimizations and refactors:
- Implement opt.retain on Windows and enable by default on 64-bit. (@interwq,
@davidtgoldblatt)
- Optimize away a branch on the operator delete[] path. (@mgrice)
- Add format annotation to the format generator function. (@zoulasc)
- Refactor and improve the size class header generation. (@yinan1048576)
- Remove best fit. (@djwatson)
- Avoid blocking on background thread locks for stats. (@oranagra, @interwq)
* 5.2.0 (April 2, 2019)
This release includes a few notable improvements, which are summarized below:
1) improved fast-path performance from the optimizations by @djwatson; 2)
reduced virtual memory fragmentation and metadata usage; and 3) bug fixes on
setting the number of background threads. In addition, peak / spike memory
usage is improved with certain allocation patterns. As usual, the release and
prior dev versions have gone through large-scale production testing.
New features:
- Implement oversize_threshold, which uses a dedicated arena for allocations
crossing the specified threshold to reduce fragmentation. (@interwq)
- Add extents usage information to stats. (@tyleretzel)
- Log time information for sampled allocations. (@tyleretzel)
- Support 0 size in sdallocx. (@djwatson)
- Output rate for certain counters in malloc_stats. (@zinoale)
- Add configure option --enable-readlinkat, which allows the use of readlinkat
over readlink. (@davidtgoldblatt)
- Add configure options --{enable,disable}-{static,shared} to allow not
building unwanted libraries. (@Ericson2314)
- Add configure option --disable-libdl to enable fully static builds.
(@interwq)
- Add mallctl interfaces:
+ opt.oversize_threshold (@interwq)
+ stats.arenas.<i>.extent_avail (@tyleretzel)
+ stats.arenas.<i>.extents.<j>.n{dirty,muzzy,retained} (@tyleretzel)
+ stats.arenas.<i>.extents.<j>.{dirty,muzzy,retained}_bytes
(@tyleretzel)
Portability improvements:
- Update MSVC builds. (@maksqwe, @rustyx)
- Workaround a compiler optimizer bug on s390x. (@rkmisra)
- Make use of pthread_set_name_np(3) on FreeBSD. (@trasz)
- Implement malloc_getcpu() to enable percpu_arena for windows. (@santagada)
- Link against -pthread instead of -lpthread. (@paravoid)
- Make background_thread not dependent on libdl. (@interwq)
- Add stringify to fix a linker directive issue on MSVC. (@daverigby)
- Detect and fall back when 8-bit atomics are unavailable. (@interwq)
- Fall back to the default pthread_create if dlsym(3) fails. (@interwq)
Optimizations and refactors:
- Refactor the TSD module. (@davidtgoldblatt)
- Avoid taking extents_muzzy mutex when muzzy is disabled. (@interwq)
- Avoid taking large_mtx for auto arenas on the tcache flush path. (@interwq)
- Optimize ixalloc by avoiding a size lookup. (@interwq)
- Implement opt.oversize_threshold which uses a dedicated arena for requests
crossing the threshold, also eagerly purges the oversize extents. Default
the threshold to 8 MiB. (@interwq)
- Clean compilation with -Wextra. (@gnzlbg, @jasone)
- Refactor the size class module. (@davidtgoldblatt)
- Refactor the stats emitter. (@tyleretzel)
- Optimize pow2_ceil. (@rkmisra)
- Avoid runtime detection of lazy purging on FreeBSD. (@trasz)
- Optimize mmap(2) alignment handling on FreeBSD. (@trasz)
- Improve error handling for THP state initialization. (@jsteemann)
- Rework the malloc() fast path. (@djwatson)
- Rework the free() fast path. (@djwatson)
- Refactor and optimize the tcache fill / flush paths. (@djwatson)
- Optimize sync / lwsync on PowerPC. (@chmeeedalf)
- Bypass extent_dalloc() when retain is enabled. (@interwq)
- Optimize the locking on large deallocation. (@interwq)
- Reduce the number of pages committed from sanity checking in debug build.
(@trasz, @interwq)
- Deprecate OSSpinLock. (@interwq)
- Lower the default number of background threads to 4 (when the feature
is enabled). (@interwq)
- Optimize the trylock spin wait. (@djwatson)
- Use arena index for arena-matching checks. (@interwq)
- Avoid forced decay on thread termination when using background threads.
(@interwq)
- Disable muzzy decay by default. (@djwatson, @interwq)
- Only initialize libgcc unwinder when profiling is enabled. (@paravoid,
@interwq)
Bug fixes (all only relevant to jemalloc 5.x):
- Fix background thread index issues with max_background_threads. (@djwatson,
@interwq)
- Fix stats output for opt.lg_extent_max_active_fit. (@interwq)
- Fix opt.prof_prefix initialization. (@davidtgoldblatt)
- Properly trigger decay on tcache destroy. (@interwq, @amosbird)
- Fix tcache.flush. (@interwq)
- Detect whether explicit extent zero out is necessary with huge pages or
custom extent hooks, which may change the purge semantics. (@interwq)
- Fix a side effect caused by extent_max_active_fit combined with decay-based
purging, where freed extents can accumulate and not be reused for an
extended period of time. (@interwq, @mpghf)
- Fix a missing unlock on extent register error handling. (@zoulasc)
Testing:
- Simplify the Travis script output. (@gnzlbg)
- Update the test scripts for FreeBSD. (@devnexen)
- Add unit tests for the producer-consumer pattern. (@interwq)
- Add Cirrus-CI config for FreeBSD builds. (@jasone)
- Add size-matching sanity checks on tcache flush. (@davidtgoldblatt,
@interwq)
Incompatible changes:
- Remove --with-lg-page-sizes. (@davidtgoldblatt)
Documentation:
- Attempt to build docs by default, however skip doc building when xsltproc
is missing. (@interwq, @cmuellner)
* 5.1.0 (May 4, 2018)
This release is primarily about fine-tuning, ranging from several new features
to numerous notable performance and portability enhancements. The release and
prior dev versions have been running in multiple large scale applications for
months, and the cumulative improvements are substantial in many cases.
Given the long and successful production runs, this release is likely a good
candidate for applications to upgrade, from both jemalloc 5.0 and before. For
performance-critical applications, the newly added TUNING.md provides
guidelines on jemalloc tuning.
New features:
- Implement transparent huge page support for internal metadata. (@interwq)
- Add opt.thp to allow enabling / disabling transparent huge pages for all
mappings. (@interwq)
- Add maximum background thread count option. (@djwatson)
- Allow prof_active to control opt.lg_prof_interval and prof.gdump.
(@interwq)
- Allow arena index lookup based on allocation addresses via mallctl.
(@lionkov)
- Allow disabling initial-exec TLS model. (@davidtgoldblatt, @KenMacD)
- Add opt.lg_extent_max_active_fit to set the max ratio between the size of
the active extent selected (to split off from) and the size of the requested
allocation. (@interwq, @davidtgoldblatt)
- Add retain_grow_limit to set the max size when growing virtual address
space. (@interwq)
- Add mallctl interfaces:
+ arena.<i>.retain_grow_limit (@interwq)
+ arenas.lookup (@lionkov)
+ max_background_threads (@djwatson)
+ opt.lg_extent_max_active_fit (@interwq)
+ opt.max_background_threads (@djwatson)
+ opt.metadata_thp (@interwq)
+ opt.thp (@interwq)
+ stats.metadata_thp (@interwq)
Portability improvements:
- Support GNU/kFreeBSD configuration. (@paravoid)
- Support m68k, nios2 and SH3 architectures. (@paravoid)
- Fall back to FD_CLOEXEC when O_CLOEXEC is unavailable. (@zonyitoo)
- Fix symbol listing for cross-compiling. (@tamird)
- Fix high bits computation on ARM. (@davidtgoldblatt, @paravoid)
- Disable the CPU_SPINWAIT macro for Power. (@davidtgoldblatt, @marxin)
- Fix MSVC 2015 & 2017 builds. (@rustyx)
- Improve RISC-V support. (@EdSchouten)
- Set name mangling script in strict mode. (@nicolov)
- Avoid MADV_HUGEPAGE on ARM. (@marxin)
- Modify configure to determine return value of strerror_r.
(@davidtgoldblatt, @cferris1000)
- Make sure CXXFLAGS is tested with CPP compiler. (@nehaljwani)
- Fix 32-bit build on MSVC. (@rustyx)
- Fix external symbol on MSVC. (@maksqwe)
- Avoid a printf format specifier warning. (@jasone)
- Add configure option --disable-initial-exec-tls which can allow jemalloc to
be dynamically loaded after program startup. (@davidtgoldblatt, @KenMacD)
- AArch64: Add ILP32 support. (@cmuellner)
- Add --with-lg-vaddr configure option to support cross compiling.
(@cmuellner, @davidtgoldblatt)
Optimizations and refactors:
- Improve active extent fit with extent_max_active_fit. This considerably
reduces fragmentation over time and improves virtual memory and metadata
usage. (@davidtgoldblatt, @interwq)
- Eagerly coalesce large extents to reduce fragmentation. (@interwq)
- sdallocx: only read size info when page aligned (i.e. possibly sampled),
which speeds up the sized deallocation path significantly. (@interwq)
- Avoid attempting new mappings for in place expansion with retain, since
it rarely succeeds in practice and causes high overhead. (@interwq)
- Refactor OOM handling in newImpl. (@wqfish)
- Add internal fine-grained logging functionality for debugging use.
(@davidtgoldblatt)
- Refactor arena / tcache interactions. (@davidtgoldblatt)
- Refactor extent management with dumpable flag. (@davidtgoldblatt)
- Add runtime detection of lazy purging. (@interwq)
- Use pairing heap instead of red-black tree for extents_avail. (@djwatson)
- Use sysctl on startup in FreeBSD. (@trasz)
- Use thread local prng state instead of atomic. (@djwatson)
- Make decay to always purge one more extent than before, because in
practice large extents are usually the ones that cross the decay threshold.
Purging the additional extent helps save memory as well as reduce VM
fragmentation. (@interwq)
- Fast division by dynamic values. (@davidtgoldblatt)
- Improve the fit for aligned allocation. (@interwq, @edwinsmith)
- Refactor extent_t bitpacking. (@rkmisra)
- Optimize the generated assembly for ticker operations. (@davidtgoldblatt)
- Convert stats printing to use a structured text emitter. (@davidtgoldblatt)
- Remove preserve_lru feature for extents management. (@djwatson)
- Consolidate two memory loads into one on the fast deallocation path.
(@davidtgoldblatt, @interwq)
Bug fixes (most of the issues are only relevant to jemalloc 5.0):
- Fix deadlock with multithreaded fork in OS X. (@davidtgoldblatt)
- Validate returned file descriptor before use. (@zonyitoo)
- Fix a few background thread initialization and shutdown issues. (@interwq)
- Fix an extent coalesce + decay race by taking both coalescing extents off
the LRU list. (@interwq)
- Fix potentially unbound increase during decay, caused by one thread keep
stashing memory to purge while other threads generating new pages. The
number of pages to purge is checked to prevent this. (@interwq)
- Fix a FreeBSD bootstrap assertion. (@strejda, @interwq)
- Handle 32 bit mutex counters. (@rkmisra)
- Fix a indexing bug when creating background threads. (@davidtgoldblatt,
@binliu19)
- Fix arguments passed to extent_init. (@yuleniwo, @interwq)
- Fix addresses used for ordering mutexes. (@rkmisra)
- Fix abort_conf processing during bootstrap. (@interwq)
- Fix include path order for out-of-tree builds. (@cmuellner)
Incompatible changes:
- Remove --disable-thp. (@interwq)
- Remove mallctl interfaces:
+ config.thp (@interwq)
Documentation:
- Add TUNING.md. (@interwq, @davidtgoldblatt, @djwatson)
* 5.0.1 (July 1, 2017)
This bugfix release fixes several issues, most of which are obscure enough
@@ -22,7 +275,7 @@ brevity. Much more detail can be found in the git revision history:
unlikely to be an issue with other libc implementations. (@interwq)
- Mask signals during background thread creation. This prevents signals from
being inadvertently delivered to background threads. (@jasone,
@davidgoldblatt, @interwq)
@davidtgoldblatt, @interwq)
- Avoid inactivity checks within background threads, in order to prevent
recursive mutex acquisition. (@interwq)
- Fix extent_grow_retained() to use the specified hooks when the
@@ -515,7 +768,7 @@ brevity. Much more detail can be found in the git revision history:
these fixes, xallocx() now tries harder to partially fulfill requests for
optional extra space. Note that a couple of minor heap profiling
optimizations are included, but these are better thought of as performance
fixes that were integral to disovering most of the other bugs.
fixes that were integral to discovering most of the other bugs.
Optimizations:
- Avoid a chunk metadata read in arena_prof_tctx_set(), since it is in the

View File

@@ -1,33 +1,32 @@
#ifndef JEMALLOC_INTERNAL_ARENA_EXTERNS_H
#define JEMALLOC_INTERNAL_ARENA_EXTERNS_H
#include "jemalloc/internal/bin.h"
#include "jemalloc/internal/extent_dss.h"
#include "jemalloc/internal/hook.h"
#include "jemalloc/internal/pages.h"
#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/stats.h"
extern ssize_t opt_dirty_decay_ms;
extern ssize_t opt_muzzy_decay_ms;
extern const arena_bin_info_t arena_bin_info[NBINS];
extern percpu_arena_mode_t opt_percpu_arena;
extern const char *percpu_arena_mode_names[];
extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
extern malloc_mutex_t arenas_lock;
void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
szind_t szind, uint64_t nrequests);
void arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
size_t size);
extern size_t opt_oversize_threshold;
extern size_t oversize_threshold;
void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms,
ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats);
bin_stats_t *bstats, arena_stats_large_t *lstats,
arena_stats_extents_t *estats);
void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
extent_hooks_t **r_extent_hooks, extent_t *extent);
#ifdef JEMALLOC_JET
@@ -50,39 +49,47 @@ void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
void arena_reset(tsd_t *tsd, arena_t *arena);
void arena_destroy(tsd_t *tsd, arena_t *arena);
void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
void arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info,
cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
void arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info,
bool zero);
typedef void (arena_dalloc_junk_small_t)(void *, const arena_bin_info_t *);
typedef void (arena_dalloc_junk_small_t)(void *, const bin_info_t *);
extern arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small;
void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
szind_t ind, bool zero);
void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
size_t alignment, bool zero, tcache_t *tcache);
void arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize);
void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize);
void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
bool slow_path);
void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
extent_t *extent, void *ptr);
void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
szind_t binind, extent_t *extent, void *ptr);
void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
size_t extra, bool zero);
size_t extra, bool zero, size_t *newsize);
void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
size_t size, size_t alignment, bool zero, tcache_t *tcache);
size_t size, size_t alignment, bool zero, tcache_t *tcache,
hook_ralloc_args_t *hook_args);
dss_prec_t arena_dss_prec_get(arena_t *arena);
bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
ssize_t arena_dirty_decay_ms_default_get(void);
bool arena_dirty_decay_ms_default_set(ssize_t decay_ms);
ssize_t arena_muzzy_decay_ms_default_get(void);
bool arena_muzzy_decay_ms_default_set(ssize_t decay_ms);
bool arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena,
size_t *old_limit, size_t *new_limit);
unsigned arena_nthreads_get(arena_t *arena, bool internal);
void arena_nthreads_inc(arena_t *arena, bool internal);
void arena_nthreads_dec(arena_t *arena, bool internal);
size_t arena_extent_sn_next(arena_t *arena);
arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
void arena_boot(void);
bool arena_init_huge(void);
bool arena_is_huge(unsigned arena_ind);
arena_t *arena_choose_huge(tsd_t *tsd);
bin_t *arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
unsigned *binshard);
void arena_boot(sc_data_t *sc_data);
void arena_prefork0(tsdn_t *tsdn, arena_t *arena);
void arena_prefork1(tsdn_t *tsdn, arena_t *arena);
void arena_prefork2(tsdn_t *tsdn, arena_t *arena);

View File

@@ -25,7 +25,7 @@ static inline bool
arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
cassert(config_prof);
if (likely(prof_interval == 0)) {
if (likely(prof_interval == 0 || !prof_active_get_unlocked())) {
return false;
}

View File

@@ -4,15 +4,34 @@
#include "jemalloc/internal/jemalloc_internal_types.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/rtree.h"
#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/sz.h"
#include "jemalloc/internal/ticker.h"
static inline szind_t
arena_bin_index(arena_t *arena, arena_bin_t *bin) {
szind_t binind = (szind_t)(bin - arena->bins);
assert(binind < NBINS);
return binind;
JEMALLOC_ALWAYS_INLINE bool
arena_has_default_hooks(arena_t *arena) {
return (extent_hooks_get(arena) == &extent_hooks_default);
}
JEMALLOC_ALWAYS_INLINE arena_t *
arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
if (arena != NULL) {
return arena;
}
/*
* For huge allocations, use the dedicated huge arena if both are true:
* 1) is using auto arena selection (i.e. arena == NULL), and 2) the
* thread is not assigned to a manual arena.
*/
if (unlikely(size >= oversize_threshold)) {
arena_t *tsd_arena = tsd_arena_get(tsd);
if (tsd_arena == NULL || arena_is_auto(tsd_arena)) {
return arena_choose_huge(tsd);
}
}
return arena_choose(tsd, NULL);
}
JEMALLOC_ALWAYS_INLINE prof_tctx_t *
@@ -64,6 +83,32 @@ arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
large_prof_tctx_reset(tsdn, extent);
}
JEMALLOC_ALWAYS_INLINE nstime_t
arena_prof_alloc_time_get(tsdn_t *tsdn, const void *ptr,
alloc_ctx_t *alloc_ctx) {
cassert(config_prof);
assert(ptr != NULL);
extent_t *extent = iealloc(tsdn, ptr);
/*
* Unlike arena_prof_prof_tctx_{get, set}, we only call this once we're
* sure we have a sampled allocation.
*/
assert(!extent_slab_get(extent));
return large_prof_alloc_time_get(extent);
}
JEMALLOC_ALWAYS_INLINE void
arena_prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
nstime_t t) {
cassert(config_prof);
assert(ptr != NULL);
extent_t *extent = iealloc(tsdn, ptr);
assert(!extent_slab_get(extent));
large_prof_alloc_time_set(extent, t);
}
JEMALLOC_ALWAYS_INLINE void
arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
tsd_t *tsd;
@@ -90,14 +135,33 @@ arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
arena_decay_ticks(tsdn, arena, 1);
}
/* Purge a single extent to retained / unmapped directly. */
JEMALLOC_ALWAYS_INLINE void
arena_decay_extent(tsdn_t *tsdn,arena_t *arena, extent_hooks_t **r_extent_hooks,
extent_t *extent) {
size_t extent_size = extent_size_get(extent);
extent_dalloc_wrapper(tsdn, arena,
r_extent_hooks, extent);
if (config_stats) {
/* Update stats accordingly. */
arena_stats_lock(tsdn, &arena->stats);
arena_stats_add_u64(tsdn, &arena->stats,
&arena->decay_dirty.stats->nmadvise, 1);
arena_stats_add_u64(tsdn, &arena->stats,
&arena->decay_dirty.stats->purged, extent_size >> LG_PAGE);
arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.mapped,
extent_size);
arena_stats_unlock(tsdn, &arena->stats);
}
}
JEMALLOC_ALWAYS_INLINE void *
arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
tcache_t *tcache, bool slow_path) {
assert(!tsdn_null(tsdn) || tcache == NULL);
assert(size != 0);
if (likely(tcache != NULL)) {
if (likely(size <= SMALL_MAXCLASS)) {
if (likely(size <= SC_SMALL_MAXCLASS)) {
return tcache_alloc_small(tsdn_tsd(tsdn), arena,
tcache, size, ind, zero, slow_path);
}
@@ -126,7 +190,7 @@ arena_salloc(tsdn_t *tsdn, const void *ptr) {
szind_t szind = rtree_szind_read(tsdn, &extents_rtree, rtree_ctx,
(uintptr_t)ptr, true);
assert(szind != NSIZES);
assert(szind != SC_NSIZES);
return sz_index2size(szind);
}
@@ -159,11 +223,21 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
/* Only slab members should be looked up via interior pointers. */
assert(extent_addr_get(extent) == ptr || extent_slab_get(extent));
assert(szind != NSIZES);
assert(szind != SC_NSIZES);
return sz_index2size(szind);
}
static inline void
arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
if (config_prof && unlikely(szind < SC_NBINS)) {
arena_dalloc_promoted(tsdn, ptr, NULL, true);
} else {
extent_t *extent = iealloc(tsdn, ptr);
large_dalloc(tsdn, extent);
}
}
static inline void
arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
assert(ptr != NULL);
@@ -180,13 +254,28 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
rtree_ctx, (uintptr_t)ptr, true);
assert(szind == extent_szind_get(extent));
assert(szind < NSIZES);
assert(szind < SC_NSIZES);
assert(slab == extent_slab_get(extent));
}
if (likely(slab)) {
/* Small allocation. */
arena_dalloc_small(tsdn, ptr);
} else {
arena_dalloc_large_no_tcache(tsdn, ptr, szind);
}
}
JEMALLOC_ALWAYS_INLINE void
arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
bool slow_path) {
if (szind < nhbins) {
if (config_prof && unlikely(szind < SC_NBINS)) {
arena_dalloc_promoted(tsdn, ptr, tcache, slow_path);
} else {
tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, szind,
slow_path);
}
} else {
extent_t *extent = iealloc(tsdn, ptr);
large_dalloc(tsdn, extent);
@@ -210,7 +299,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
if (alloc_ctx != NULL) {
szind = alloc_ctx->szind;
slab = alloc_ctx->slab;
assert(szind != NSIZES);
assert(szind != SC_NSIZES);
} else {
rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
@@ -222,7 +311,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
rtree_ctx, (uintptr_t)ptr, true);
assert(szind == extent_szind_get(extent));
assert(szind < NSIZES);
assert(szind < SC_NSIZES);
assert(slab == extent_slab_get(extent));
}
@@ -231,25 +320,14 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
slow_path);
} else {
if (szind < nhbins) {
if (config_prof && unlikely(szind < NBINS)) {
arena_dalloc_promoted(tsdn, ptr, tcache,
slow_path);
} else {
tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
szind, slow_path);
}
} else {
extent_t *extent = iealloc(tsdn, ptr);
large_dalloc(tsdn, extent);
}
arena_dalloc_large(tsdn, ptr, tcache, szind, slow_path);
}
}
static inline void
arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
assert(ptr != NULL);
assert(size <= LARGE_MAXCLASS);
assert(size <= SC_LARGE_MAXCLASS);
szind_t szind;
bool slab;
@@ -259,7 +337,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
* object, so base szind and slab on the given size.
*/
szind = sz_size2index(size);
slab = (szind < NBINS);
slab = (szind < SC_NBINS);
}
if ((config_prof && opt_prof) || config_debug) {
@@ -271,7 +349,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
(uintptr_t)ptr, true, &szind, &slab);
assert(szind == sz_size2index(size));
assert((config_prof && opt_prof) || slab == (szind < NBINS));
assert((config_prof && opt_prof) || slab == (szind < SC_NBINS));
if (config_debug) {
extent_t *extent = rtree_extent_read(tsdn,
@@ -285,8 +363,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
/* Small allocation. */
arena_dalloc_small(tsdn, ptr);
} else {
extent_t *extent = iealloc(tsdn, ptr);
large_dalloc(tsdn, extent);
arena_dalloc_large_no_tcache(tsdn, ptr, szind);
}
}
@@ -295,7 +372,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
alloc_ctx_t *alloc_ctx, bool slow_path) {
assert(!tsdn_null(tsdn) || tcache == NULL);
assert(ptr != NULL);
assert(size <= LARGE_MAXCLASS);
assert(size <= SC_LARGE_MAXCLASS);
if (unlikely(tcache == NULL)) {
arena_sdalloc_no_tcache(tsdn, ptr, size);
@@ -304,7 +381,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
szind_t szind;
bool slab;
UNUSED alloc_ctx_t local_ctx;
alloc_ctx_t local_ctx;
if (config_prof && opt_prof) {
if (alloc_ctx == NULL) {
/* Uncommon case and should be a static check. */
@@ -325,7 +402,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
* object, so base szind and slab on the given size.
*/
szind = sz_size2index(size);
slab = (szind < NBINS);
slab = (szind < SC_NBINS);
}
if (config_debug) {
@@ -343,18 +420,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
slow_path);
} else {
if (szind < nhbins) {
if (config_prof && unlikely(szind < NBINS)) {
arena_dalloc_promoted(tsdn, ptr, tcache,
slow_path);
} else {
tcache_dalloc_large(tsdn_tsd(tsdn),
tcache, ptr, szind, slow_path);
}
} else {
extent_t *extent = iealloc(tsdn, ptr);
large_dalloc(tsdn, extent);
}
arena_dalloc_large(tsdn, ptr, tcache, szind, slow_path);
}
}

View File

@@ -0,0 +1,271 @@
#ifndef JEMALLOC_INTERNAL_ARENA_STATS_H
#define JEMALLOC_INTERNAL_ARENA_STATS_H
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/mutex_prof.h"
#include "jemalloc/internal/sc.h"
JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
/*
* In those architectures that support 64-bit atomics, we use atomic updates for
* our 64-bit values. Otherwise, we use a plain uint64_t and synchronize
* externally.
*/
#ifdef JEMALLOC_ATOMIC_U64
typedef atomic_u64_t arena_stats_u64_t;
#else
/* Must hold the arena stats mutex while reading atomically. */
typedef uint64_t arena_stats_u64_t;
#endif
typedef struct arena_stats_large_s arena_stats_large_t;
struct arena_stats_large_s {
/*
* Total number of allocation/deallocation requests served directly by
* the arena.
*/
arena_stats_u64_t nmalloc;
arena_stats_u64_t ndalloc;
/*
* Number of allocation requests that correspond to this size class.
* This includes requests served by tcache, though tcache only
* periodically merges into this counter.
*/
arena_stats_u64_t nrequests; /* Partially derived. */
/*
* Number of tcache fills / flushes for large (similarly, periodically
* merged). Note that there is no large tcache batch-fill currently
* (i.e. only fill 1 at a time); however flush may be batched.
*/
arena_stats_u64_t nfills; /* Partially derived. */
arena_stats_u64_t nflushes; /* Partially derived. */
/* Current number of allocations of this size class. */
size_t curlextents; /* Derived. */
};
typedef struct arena_stats_decay_s arena_stats_decay_t;
struct arena_stats_decay_s {
/* Total number of purge sweeps. */
arena_stats_u64_t npurge;
/* Total number of madvise calls made. */
arena_stats_u64_t nmadvise;
/* Total number of pages purged. */
arena_stats_u64_t purged;
};
typedef struct arena_stats_extents_s arena_stats_extents_t;
struct arena_stats_extents_s {
/*
* Stats for a given index in the range [0, SC_NPSIZES] in an extents_t.
* We track both bytes and # of extents: two extents in the same bucket
* may have different sizes if adjacent size classes differ by more than
* a page, so bytes cannot always be derived from # of extents.
*/
atomic_zu_t ndirty;
atomic_zu_t dirty_bytes;
atomic_zu_t nmuzzy;
atomic_zu_t muzzy_bytes;
atomic_zu_t nretained;
atomic_zu_t retained_bytes;
};
/*
* Arena stats. Note that fields marked "derived" are not directly maintained
* within the arena code; rather their values are derived during stats merge
* requests.
*/
typedef struct arena_stats_s arena_stats_t;
struct arena_stats_s {
#ifndef JEMALLOC_ATOMIC_U64
malloc_mutex_t mtx;
#endif
/* Number of bytes currently mapped, excluding retained memory. */
atomic_zu_t mapped; /* Partially derived. */
/*
* Number of unused virtual memory bytes currently retained. Retained
* bytes are technically mapped (though always decommitted or purged),
* but they are excluded from the mapped statistic (above).
*/
atomic_zu_t retained; /* Derived. */
/* Number of extent_t structs allocated by base, but not being used. */
atomic_zu_t extent_avail;
arena_stats_decay_t decay_dirty;
arena_stats_decay_t decay_muzzy;
atomic_zu_t base; /* Derived. */
atomic_zu_t internal;
atomic_zu_t resident; /* Derived. */
atomic_zu_t metadata_thp;
atomic_zu_t allocated_large; /* Derived. */
arena_stats_u64_t nmalloc_large; /* Derived. */
arena_stats_u64_t ndalloc_large; /* Derived. */
arena_stats_u64_t nfills_large; /* Derived. */
arena_stats_u64_t nflushes_large; /* Derived. */
arena_stats_u64_t nrequests_large; /* Derived. */
/* VM space had to be leaked (undocumented). Normally 0. */
atomic_zu_t abandoned_vm;
/* Number of bytes cached in tcache associated with this arena. */
atomic_zu_t tcache_bytes; /* Derived. */
mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];
/* One element for each large size class. */
arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
/* Arena uptime. */
nstime_t uptime;
};
static inline bool
arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) {
if (config_debug) {
for (size_t i = 0; i < sizeof(arena_stats_t); i++) {
assert(((char *)arena_stats)[i] == 0);
}
}
#ifndef JEMALLOC_ATOMIC_U64
if (malloc_mutex_init(&arena_stats->mtx, "arena_stats",
WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) {
return true;
}
#endif
/* Memory is zeroed, so there is no need to clear stats. */
return false;
}
static inline void
arena_stats_lock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
#ifndef JEMALLOC_ATOMIC_U64
malloc_mutex_lock(tsdn, &arena_stats->mtx);
#endif
}
static inline void
arena_stats_unlock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
#ifndef JEMALLOC_ATOMIC_U64
malloc_mutex_unlock(tsdn, &arena_stats->mtx);
#endif
}
static inline uint64_t
arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
arena_stats_u64_t *p) {
#ifdef JEMALLOC_ATOMIC_U64
return atomic_load_u64(p, ATOMIC_RELAXED);
#else
malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
return *p;
#endif
}
static inline void
arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
arena_stats_u64_t *p, uint64_t x) {
#ifdef JEMALLOC_ATOMIC_U64
atomic_fetch_add_u64(p, x, ATOMIC_RELAXED);
#else
malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
*p += x;
#endif
}
static inline void
arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
arena_stats_u64_t *p, uint64_t x) {
#ifdef JEMALLOC_ATOMIC_U64
uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED);
assert(r - x <= r);
#else
malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
*p -= x;
assert(*p + x >= *p);
#endif
}
/*
* Non-atomically sets *dst += src. *dst needs external synchronization.
* This lets us avoid the cost of a fetch_add when its unnecessary (note that
* the types here are atomic).
*/
static inline void
arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) {
#ifdef JEMALLOC_ATOMIC_U64
uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED);
atomic_store_u64(dst, src + cur_dst, ATOMIC_RELAXED);
#else
*dst += src;
#endif
}
static inline size_t
arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats,
atomic_zu_t *p) {
#ifdef JEMALLOC_ATOMIC_U64
return atomic_load_zu(p, ATOMIC_RELAXED);
#else
malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
return atomic_load_zu(p, ATOMIC_RELAXED);
#endif
}
static inline void
arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats,
atomic_zu_t *p, size_t x) {
#ifdef JEMALLOC_ATOMIC_U64
atomic_fetch_add_zu(p, x, ATOMIC_RELAXED);
#else
malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
atomic_store_zu(p, cur + x, ATOMIC_RELAXED);
#endif
}
static inline void
arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats,
atomic_zu_t *p, size_t x) {
#ifdef JEMALLOC_ATOMIC_U64
size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED);
assert(r - x <= r);
#else
malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
atomic_store_zu(p, cur - x, ATOMIC_RELAXED);
#endif
}
/* Like the _u64 variant, needs an externally synchronized *dst. */
static inline void
arena_stats_accum_zu(atomic_zu_t *dst, size_t src) {
size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
atomic_store_zu(dst, src + cur_dst, ATOMIC_RELAXED);
}
static inline void
arena_stats_large_flush_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
szind_t szind, uint64_t nrequests) {
arena_stats_lock(tsdn, arena_stats);
arena_stats_large_t *lstats = &arena_stats->lstats[szind - SC_NBINS];
arena_stats_add_u64(tsdn, arena_stats, &lstats->nrequests, nrequests);
arena_stats_add_u64(tsdn, arena_stats, &lstats->nflushes, 1);
arena_stats_unlock(tsdn, arena_stats);
}
static inline void
arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) {
arena_stats_lock(tsdn, arena_stats);
arena_stats_add_zu(tsdn, arena_stats, &arena_stats->mapped, size);
arena_stats_unlock(tsdn, arena_stats);
}
#endif /* JEMALLOC_INTERNAL_ARENA_STATS_H */

View File

@@ -1,54 +1,19 @@
#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
#define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
#include "jemalloc/internal/arena_stats.h"
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/bin.h"
#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/extent_dss.h"
#include "jemalloc/internal/jemalloc_internal_types.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/nstime.h"
#include "jemalloc/internal/ql.h"
#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/smoothstep.h"
#include "jemalloc/internal/stats.h"
#include "jemalloc/internal/ticker.h"
/*
* Read-only information associated with each element of arena_t's bins array
* is stored separately, partly to reduce memory usage (only one copy, rather
* than one per arena), but mainly to avoid false cacheline sharing.
*
* Each slab has the following layout:
*
* /--------------------\
* | region 0 |
* |--------------------|
* | region 1 |
* |--------------------|
* | ... |
* | ... |
* | ... |
* |--------------------|
* | region nregs-1 |
* \--------------------/
*/
struct arena_bin_info_s {
/* Size of regions in a slab for this bin's size class. */
size_t reg_size;
/* Total size of a slab for this bin's size class. */
size_t slab_size;
/* Total number of regions in a slab for this bin's size class. */
uint32_t nregs;
/*
* Metadata used to manipulate bitmaps for slabs associated with this
* bin.
*/
bitmap_info_t bitmap_info;
};
struct arena_decay_s {
/* Synchronizes all non-atomic fields. */
malloc_mutex_t mtx;
@@ -104,37 +69,11 @@ struct arena_decay_s {
* arena and ctl code.
*
* Synchronization: Same as associated arena's stats field. */
decay_stats_t *stats;
arena_stats_decay_t *stats;
/* Peak number of pages in associated extents. Used for debug only. */
uint64_t ceil_npages;
};
struct arena_bin_s {
/* All operations on arena_bin_t fields require lock ownership. */
malloc_mutex_t lock;
/*
* Current slab being used to service allocations of this bin's size
* class. slabcur is independent of slabs_{nonfull,full}; whenever
* slabcur is reassigned, the previous slab must be deallocated or
* inserted into slabs_{nonfull,full}.
*/
extent_t *slabcur;
/*
* Heap of non-full slabs. This heap is used to assure that new
* allocations come from the non-full slab that is oldest/lowest in
* memory.
*/
extent_heap_t slabs_nonfull;
/* List used to track full slabs. */
extent_list_t slabs_full;
/* Bin statistics. */
malloc_bin_stats_t stats;
};
struct arena_s {
/*
* Number of threads currently assigned to this arena. Each thread has
@@ -151,6 +90,9 @@ struct arena_s {
*/
atomic_u_t nthreads[2];
/* Next bin shard for binding new threads. Synchronization: atomic. */
atomic_u_t binshard_next;
/*
* When percpu_arena is enabled, to amortize the cost of reading /
* updating the current CPU id, track the most recent thread accessing
@@ -162,18 +104,18 @@ struct arena_s {
arena_stats_t stats;
/*
* List of tcaches for extant threads associated with this arena.
* Stats from these are merged incrementally, and at exit if
* opt_stats_print is enabled.
* Lists of tcaches and cache_bin_array_descriptors for extant threads
* associated with this arena. Stats from these are merged
* incrementally, and at exit if opt_stats_print is enabled.
*
* Synchronization: tcache_ql_mtx.
*/
ql_head(tcache_t) tcache_ql;
ql_head(cache_bin_array_descriptor_t) cache_bin_array_descriptor_ql;
malloc_mutex_t tcache_ql_mtx;
/* Synchronization: internal. */
prof_accum_t prof_accum;
uint64_t prof_accumbytes;
/*
* PRNG state for cache index randomization of large allocation base
@@ -239,9 +181,14 @@ struct arena_s {
* be effective even if multiple arenas' extent allocation requests are
* highly interleaved.
*
* retain_grow_limit is the max allowed size ind to expand (unless the
* required size is greater). Default is no limit, and controlled
* through mallctl only.
*
* Synchronization: extent_grow_mtx
*/
pszind_t extent_grow_next;
pszind_t retain_grow_limit;
malloc_mutex_t extent_grow_mtx;
/*
@@ -251,6 +198,7 @@ struct arena_s {
* Synchronization: extent_avail_mtx.
*/
extent_tree_t extent_avail;
atomic_zu_t extent_avail_cnt;
malloc_mutex_t extent_avail_mtx;
/*
@@ -258,7 +206,7 @@ struct arena_s {
*
* Synchronization: internal.
*/
arena_bin_t bins[NBINS];
bins_t bins[SC_NBINS];
/*
* Base allocator, from which arena metadata are allocated.

View File

@@ -1,20 +1,20 @@
#ifndef JEMALLOC_INTERNAL_ARENA_TYPES_H
#define JEMALLOC_INTERNAL_ARENA_TYPES_H
#include "jemalloc/internal/sc.h"
/* Maximum number of regions in one slab. */
#define LG_SLAB_MAXREGS (LG_PAGE - LG_TINY_MIN)
#define LG_SLAB_MAXREGS (LG_PAGE - SC_LG_TINY_MIN)
#define SLAB_MAXREGS (1U << LG_SLAB_MAXREGS)
/* Default decay times in milliseconds. */
#define DIRTY_DECAY_MS_DEFAULT ZD(10 * 1000)
#define MUZZY_DECAY_MS_DEFAULT ZD(10 * 1000)
#define MUZZY_DECAY_MS_DEFAULT (0)
/* Number of event ticks between time checks. */
#define DECAY_NTICKS_PER_UPDATE 1000
typedef struct arena_slab_data_s arena_slab_data_t;
typedef struct arena_bin_info_s arena_bin_info_t;
typedef struct arena_decay_s arena_decay_t;
typedef struct arena_bin_s arena_bin_t;
typedef struct arena_s arena_t;
typedef struct arena_tdata_s arena_tdata_t;
typedef struct alloc_ctx_s alloc_ctx_t;
@@ -42,4 +42,10 @@ typedef enum {
#define PERCPU_ARENA_ENABLED(m) ((m) >= percpu_arena_mode_enabled_base)
#define PERCPU_ARENA_DEFAULT percpu_arena_disabled
/*
* When allocation_size >= oversize_threshold, use the dedicated huge arena
* (unless have explicitly spicified arena index). 0 disables the feature.
*/
#define OVERSIZE_THRESHOLD_DEFAULT (8 << 20)
#endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */

View File

@@ -1,12 +1,19 @@
#ifndef JEMALLOC_INTERNAL_ATOMIC_H
#define JEMALLOC_INTERNAL_ATOMIC_H
#define ATOMIC_INLINE static inline
#define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
#define JEMALLOC_U8_ATOMICS
#if defined(JEMALLOC_GCC_ATOMIC_ATOMICS)
# include "jemalloc/internal/atomic_gcc_atomic.h"
# if !defined(JEMALLOC_GCC_U8_ATOMIC_ATOMICS)
# undef JEMALLOC_U8_ATOMICS
# endif
#elif defined(JEMALLOC_GCC_SYNC_ATOMICS)
# include "jemalloc/internal/atomic_gcc_sync.h"
# if !defined(JEMALLOC_GCC_U8_SYNC_ATOMICS)
# undef JEMALLOC_U8_ATOMICS
# endif
#elif defined(_MSC_VER)
# include "jemalloc/internal/atomic_msvc.h"
#elif defined(JEMALLOC_C11_ATOMICS)
@@ -66,6 +73,8 @@ JEMALLOC_GENERATE_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR)
JEMALLOC_GENERATE_INT_ATOMICS(ssize_t, zd, LG_SIZEOF_PTR)
JEMALLOC_GENERATE_INT_ATOMICS(uint8_t, u8, 0)
JEMALLOC_GENERATE_INT_ATOMICS(uint32_t, u32, 2)
#ifdef JEMALLOC_ATOMIC_U64

View File

@@ -67,7 +67,8 @@ atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
\
ATOMIC_INLINE bool \
atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
type *expected, type desired, atomic_memory_order_t success_mo, \
UNUSED type *expected, type desired, \
atomic_memory_order_t success_mo, \
atomic_memory_order_t failure_mo) { \
return __atomic_compare_exchange(&a->repr, expected, &desired, \
true, atomic_enum_to_builtin(success_mo), \
@@ -76,7 +77,8 @@ atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
\
ATOMIC_INLINE bool \
atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
type *expected, type desired, atomic_memory_order_t success_mo, \
UNUSED type *expected, type desired, \
atomic_memory_order_t success_mo, \
atomic_memory_order_t failure_mo) { \
return __atomic_compare_exchange(&a->repr, expected, &desired, \
false, \

View File

@@ -27,8 +27,10 @@ atomic_fence(atomic_memory_order_t mo) {
asm volatile("" ::: "memory");
# if defined(__i386__) || defined(__x86_64__)
/* This is implicit on x86. */
# elif defined(__ppc__)
# elif defined(__ppc64__)
asm volatile("lwsync");
# elif defined(__ppc__)
asm volatile("sync");
# elif defined(__sparc__) && defined(__arch64__)
if (mo == atomic_memory_order_acquire) {
asm volatile("membar #LoadLoad | #LoadStore");
@@ -129,7 +131,8 @@ atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
\
ATOMIC_INLINE bool \
atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
type *expected, type desired, atomic_memory_order_t success_mo, \
type *expected, type desired, \
atomic_memory_order_t success_mo, \
atomic_memory_order_t failure_mo) { \
type prev = __sync_val_compare_and_swap(&a->repr, *expected, \
desired); \
@@ -142,7 +145,8 @@ atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
} \
ATOMIC_INLINE bool \
atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
type *expected, type desired, atomic_memory_order_t success_mo, \
type *expected, type desired, \
atomic_memory_order_t success_mo, \
atomic_memory_order_t failure_mo) { \
type prev = __sync_val_compare_and_swap(&a->repr, *expected, \
desired); \

View File

@@ -2,11 +2,12 @@
#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H
extern bool opt_background_thread;
extern size_t opt_max_background_threads;
extern malloc_mutex_t background_thread_lock;
extern atomic_b_t background_thread_enabled_state;
extern size_t n_background_threads;
extern size_t max_background_threads;
extern background_thread_info_t *background_thread_info;
extern bool can_enable_background_thread;
bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
bool background_threads_enable(tsd_t *tsd);

View File

@@ -15,7 +15,12 @@ background_thread_enabled_set(tsdn_t *tsdn, bool state) {
JEMALLOC_ALWAYS_INLINE background_thread_info_t *
arena_background_thread_info_get(arena_t *arena) {
unsigned arena_ind = arena_ind_get(arena);
return &background_thread_info[arena_ind % ncpus];
return &background_thread_info[arena_ind % max_background_threads];
}
JEMALLOC_ALWAYS_INLINE background_thread_info_t *
background_thread_info_get(size_t ind) {
return &background_thread_info[ind % max_background_threads];
}
JEMALLOC_ALWAYS_INLINE uint64_t

View File

@@ -8,6 +8,8 @@
#endif
#define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX
#define MAX_BACKGROUND_THREAD_LIMIT MALLOCX_ARENA_LIMIT
#define DEFAULT_NUM_BACKGROUND_THREAD 4
typedef enum {
background_thread_stopped,

View File

@@ -1,6 +1,9 @@
#ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H
#define JEMALLOC_INTERNAL_BASE_EXTERNS_H
extern metadata_thp_mode_t opt_metadata_thp;
extern const char *metadata_thp_mode_names[];
base_t *b0get(void);
base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
void base_delete(tsdn_t *tsdn, base_t *base);
@@ -10,7 +13,7 @@ extent_hooks_t *base_extent_hooks_set(base_t *base,
void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
extent_t *base_alloc_extent(tsdn_t *tsdn, base_t *base);
void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
size_t *resident, size_t *mapped);
size_t *resident, size_t *mapped, size_t *n_thp);
void base_prefork(tsdn_t *tsdn, base_t *base);
void base_postfork_parent(tsdn_t *tsdn, base_t *base);
void base_postfork_child(tsdn_t *tsdn, base_t *base);

View File

@@ -6,4 +6,8 @@ base_ind_get(const base_t *base) {
return base->ind;
}
static inline bool
metadata_thp_enabled(void) {
return (opt_metadata_thp != metadata_thp_disabled);
}
#endif /* JEMALLOC_INTERNAL_BASE_INLINES_H */

View File

@@ -3,7 +3,7 @@
#include "jemalloc/internal/jemalloc_internal_types.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/sc.h"
/* Embedded at the beginning of every block of base-managed virtual memory. */
struct base_block_s {
@@ -30,6 +30,8 @@ struct base_s {
/* Protects base_alloc() and base_stats_get() operations. */
malloc_mutex_t mtx;
/* Using THP when true (metadata_thp auto mode). */
bool auto_thp_switched;
/*
* Most recent size class in the series of increasingly large base
* extents. Logarithmic spacing between subsequent allocations ensures
@@ -44,12 +46,14 @@ struct base_s {
base_block_t *blocks;
/* Heap of extents that track unused trailing space within blocks. */
extent_heap_t avail[NSIZES];
extent_heap_t avail[SC_NSIZES];
/* Stats, only maintained if config_stats. */
size_t allocated;
size_t resident;
size_t mapped;
/* Number of THP regions touched. */
size_t n_thp;
};
#endif /* JEMALLOC_INTERNAL_BASE_STRUCTS_H */

View File

@@ -4,4 +4,30 @@
typedef struct base_block_s base_block_t;
typedef struct base_s base_t;
#define METADATA_THP_DEFAULT metadata_thp_disabled
/*
* In auto mode, arenas switch to huge pages for the base allocator on the
* second base block. a0 switches to thp on the 5th block (after 20 megabytes
* of metadata), since more metadata (e.g. rtree nodes) come from a0's base.
*/
#define BASE_AUTO_THP_THRESHOLD 2
#define BASE_AUTO_THP_THRESHOLD_A0 5
typedef enum {
metadata_thp_disabled = 0,
/*
* Lazily enable hugepage for metadata. To avoid high RSS caused by THP
* + low usage arena (i.e. THP becomes a significant percentage), the
* "auto" option only starts using THP after a base allocator used up
* the first THP region. Starting from the second hugepage (in a single
* arena), "auto" behaves the same as "always", i.e. madvise hugepage
* right away.
*/
metadata_thp_auto = 1,
metadata_thp_always = 2,
metadata_thp_mode_limit = 3
} metadata_thp_mode_t;
#endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */

View File

@@ -0,0 +1,123 @@
#ifndef JEMALLOC_INTERNAL_BIN_H
#define JEMALLOC_INTERNAL_BIN_H
#include "jemalloc/internal/bin_stats.h"
#include "jemalloc/internal/bin_types.h"
#include "jemalloc/internal/extent_types.h"
#include "jemalloc/internal/extent_structs.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/sc.h"
/*
* A bin contains a set of extents that are currently being used for slab
* allocations.
*/
/*
* Read-only information associated with each element of arena_t's bins array
* is stored separately, partly to reduce memory usage (only one copy, rather
* than one per arena), but mainly to avoid false cacheline sharing.
*
* Each slab has the following layout:
*
* /--------------------\
* | region 0 |
* |--------------------|
* | region 1 |
* |--------------------|
* | ... |
* | ... |
* | ... |
* |--------------------|
* | region nregs-1 |
* \--------------------/
*/
typedef struct bin_info_s bin_info_t;
struct bin_info_s {
/* Size of regions in a slab for this bin's size class. */
size_t reg_size;
/* Total size of a slab for this bin's size class. */
size_t slab_size;
/* Total number of regions in a slab for this bin's size class. */
uint32_t nregs;
/* Number of sharded bins in each arena for this size class. */
uint32_t n_shards;
/*
* Metadata used to manipulate bitmaps for slabs associated with this
* bin.
*/
bitmap_info_t bitmap_info;
};
extern bin_info_t bin_infos[SC_NBINS];
typedef struct bin_s bin_t;
struct bin_s {
/* All operations on bin_t fields require lock ownership. */
malloc_mutex_t lock;
/*
* Current slab being used to service allocations of this bin's size
* class. slabcur is independent of slabs_{nonfull,full}; whenever
* slabcur is reassigned, the previous slab must be deallocated or
* inserted into slabs_{nonfull,full}.
*/
extent_t *slabcur;
/*
* Heap of non-full slabs. This heap is used to assure that new
* allocations come from the non-full slab that is oldest/lowest in
* memory.
*/
extent_heap_t slabs_nonfull;
/* List used to track full slabs. */
extent_list_t slabs_full;
/* Bin statistics. */
bin_stats_t stats;
};
/* A set of sharded bins of the same size class. */
typedef struct bins_s bins_t;
struct bins_s {
/* Sharded bins. Dynamically sized. */
bin_t *bin_shards;
};
void bin_shard_sizes_boot(unsigned bin_shards[SC_NBINS]);
bool bin_update_shard_size(unsigned bin_shards[SC_NBINS], size_t start_size,
size_t end_size, size_t nshards);
void bin_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]);
/* Initializes a bin to empty. Returns true on error. */
bool bin_init(bin_t *bin);
/* Forking. */
void bin_prefork(tsdn_t *tsdn, bin_t *bin);
void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin);
void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
/* Stats. */
static inline void
bin_stats_merge(tsdn_t *tsdn, bin_stats_t *dst_bin_stats, bin_t *bin) {
malloc_mutex_lock(tsdn, &bin->lock);
malloc_mutex_prof_accum(tsdn, &dst_bin_stats->mutex_data, &bin->lock);
dst_bin_stats->nmalloc += bin->stats.nmalloc;
dst_bin_stats->ndalloc += bin->stats.ndalloc;
dst_bin_stats->nrequests += bin->stats.nrequests;
dst_bin_stats->curregs += bin->stats.curregs;
dst_bin_stats->nfills += bin->stats.nfills;
dst_bin_stats->nflushes += bin->stats.nflushes;
dst_bin_stats->nslabs += bin->stats.nslabs;
dst_bin_stats->reslabs += bin->stats.reslabs;
dst_bin_stats->curslabs += bin->stats.curslabs;
dst_bin_stats->nonfull_slabs += bin->stats.nonfull_slabs;
malloc_mutex_unlock(tsdn, &bin->lock);
}
#endif /* JEMALLOC_INTERNAL_BIN_H */

View File

@@ -0,0 +1,54 @@
#ifndef JEMALLOC_INTERNAL_BIN_STATS_H
#define JEMALLOC_INTERNAL_BIN_STATS_H
#include "jemalloc/internal/mutex_prof.h"
typedef struct bin_stats_s bin_stats_t;
struct bin_stats_s {
/*
* Total number of allocation/deallocation requests served directly by
* the bin. Note that tcache may allocate an object, then recycle it
* many times, resulting many increments to nrequests, but only one
* each to nmalloc and ndalloc.
*/
uint64_t nmalloc;
uint64_t ndalloc;
/*
* Number of allocation requests that correspond to the size of this
* bin. This includes requests served by tcache, though tcache only
* periodically merges into this counter.
*/
uint64_t nrequests;
/*
* Current number of regions of this size class, including regions
* currently cached by tcache.
*/
size_t curregs;
/* Number of tcache fills from this bin. */
uint64_t nfills;
/* Number of tcache flushes to this bin. */
uint64_t nflushes;
/* Total number of slabs created for this bin's size class. */
uint64_t nslabs;
/*
* Total number of slabs reused by extracting them from the slabs heap
* for this bin's size class.
*/
uint64_t reslabs;
/* Current number of slabs in this bin. */
size_t curslabs;
/* Current size of nonfull slabs heap in this bin. */
size_t nonfull_slabs;
mutex_prof_data_t mutex_data;
};
#endif /* JEMALLOC_INTERNAL_BIN_STATS_H */

View File

@@ -0,0 +1,17 @@
#ifndef JEMALLOC_INTERNAL_BIN_TYPES_H
#define JEMALLOC_INTERNAL_BIN_TYPES_H
#include "jemalloc/internal/sc.h"
#define BIN_SHARDS_MAX (1 << EXTENT_BITS_BINSHARD_WIDTH)
#define N_BIN_SHARDS_DEFAULT 1
/* Used in TSD static initializer only. Real init in arena_bind(). */
#define TSD_BINSHARDS_ZERO_INITIALIZER {{UINT8_MAX}}
typedef struct tsd_binshards_s tsd_binshards_t;
struct tsd_binshards_s {
uint8_t binshard[SC_NBINS];
};
#endif /* JEMALLOC_INTERNAL_BIN_TYPES_H */

View File

@@ -27,6 +27,25 @@ ffs_u(unsigned bitmap) {
return JEMALLOC_INTERNAL_FFS(bitmap);
}
#ifdef JEMALLOC_INTERNAL_POPCOUNTL
BIT_UTIL_INLINE unsigned
popcount_lu(unsigned long bitmap) {
return JEMALLOC_INTERNAL_POPCOUNTL(bitmap);
}
#endif
/*
* Clears first unset bit in bitmap, and returns
* place of bit. bitmap *must not* be 0.
*/
BIT_UTIL_INLINE size_t
cfs_lu(unsigned long* bitmap) {
size_t bit = ffs_lu(*bitmap) - 1;
*bitmap ^= ZU(1) << bit;
return bit;
}
BIT_UTIL_INLINE unsigned
ffs_zu(size_t bitmap) {
#if LG_SIZEOF_PTR == LG_SIZEOF_INT
@@ -63,6 +82,22 @@ ffs_u32(uint32_t bitmap) {
BIT_UTIL_INLINE uint64_t
pow2_ceil_u64(uint64_t x) {
#if (defined(__amd64__) || defined(__x86_64__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ))
if(unlikely(x <= 1)) {
return x;
}
size_t msb_on_index;
#if (defined(__amd64__) || defined(__x86_64__))
asm ("bsrq %1, %0"
: "=r"(msb_on_index) // Outputs.
: "r"(x-1) // Inputs.
);
#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
msb_on_index = (63 ^ __builtin_clzll(x - 1));
#endif
assert(msb_on_index < 63);
return 1ULL << (msb_on_index + 1);
#else
x--;
x |= x >> 1;
x |= x >> 2;
@@ -72,10 +107,27 @@ pow2_ceil_u64(uint64_t x) {
x |= x >> 32;
x++;
return x;
#endif
}
BIT_UTIL_INLINE uint32_t
pow2_ceil_u32(uint32_t x) {
#if ((defined(__i386__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ)) && (!defined(__s390__)))
if(unlikely(x <= 1)) {
return x;
}
size_t msb_on_index;
#if (defined(__i386__))
asm ("bsr %1, %0"
: "=r"(msb_on_index) // Outputs.
: "r"(x-1) // Inputs.
);
#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
msb_on_index = (31 ^ __builtin_clz(x - 1));
#endif
assert(msb_on_index < 31);
return 1U << (msb_on_index + 1);
#else
x--;
x |= x >> 1;
x |= x >> 2;
@@ -84,6 +136,7 @@ pow2_ceil_u32(uint32_t x) {
x |= x >> 16;
x++;
return x;
#endif
}
/* Compute the smallest power of 2 that is >= x. */
@@ -160,6 +213,27 @@ lg_floor(size_t x) {
}
#endif
BIT_UTIL_INLINE unsigned
lg_ceil(size_t x) {
return lg_floor(x) + ((x & (x - 1)) == 0 ? 0 : 1);
}
#undef BIT_UTIL_INLINE
/* A compile-time version of lg_floor and lg_ceil. */
#define LG_FLOOR_1(x) 0
#define LG_FLOOR_2(x) (x < (1ULL << 1) ? LG_FLOOR_1(x) : 1 + LG_FLOOR_1(x >> 1))
#define LG_FLOOR_4(x) (x < (1ULL << 2) ? LG_FLOOR_2(x) : 2 + LG_FLOOR_2(x >> 2))
#define LG_FLOOR_8(x) (x < (1ULL << 4) ? LG_FLOOR_4(x) : 4 + LG_FLOOR_4(x >> 4))
#define LG_FLOOR_16(x) (x < (1ULL << 8) ? LG_FLOOR_8(x) : 8 + LG_FLOOR_8(x >> 8))
#define LG_FLOOR_32(x) (x < (1ULL << 16) ? LG_FLOOR_16(x) : 16 + LG_FLOOR_16(x >> 16))
#define LG_FLOOR_64(x) (x < (1ULL << 32) ? LG_FLOOR_32(x) : 32 + LG_FLOOR_32(x >> 32))
#if LG_SIZEOF_PTR == 2
# define LG_FLOOR(x) LG_FLOOR_32((x))
#else
# define LG_FLOOR(x) LG_FLOOR_64((x))
#endif
#define LG_CEIL(x) (LG_FLOOR(x) + (((x) & ((x) - 1)) == 0 ? 0 : 1))
#endif /* JEMALLOC_INTERNAL_BIT_UTIL_H */

View File

@@ -3,18 +3,18 @@
#include "jemalloc/internal/arena_types.h"
#include "jemalloc/internal/bit_util.h"
#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/sc.h"
typedef unsigned long bitmap_t;
#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG
/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
#if LG_SLAB_MAXREGS > LG_CEIL_NSIZES
#if LG_SLAB_MAXREGS > LG_CEIL(SC_NSIZES)
/* Maximum bitmap bit count is determined by maximum regions per slab. */
# define LG_BITMAP_MAXBITS LG_SLAB_MAXREGS
#else
/* Maximum bitmap bit count is determined by number of extent size classes. */
# define LG_BITMAP_MAXBITS LG_CEIL_NSIZES
# define LG_BITMAP_MAXBITS LG_CEIL(SC_NSIZES)
#endif
#define BITMAP_MAXBITS (ZU(1) << LG_BITMAP_MAXBITS)

View File

@@ -0,0 +1,131 @@
#ifndef JEMALLOC_INTERNAL_CACHE_BIN_H
#define JEMALLOC_INTERNAL_CACHE_BIN_H
#include "jemalloc/internal/ql.h"
/*
* The cache_bins are the mechanism that the tcache and the arena use to
* communicate. The tcache fills from and flushes to the arena by passing a
* cache_bin_t to fill/flush. When the arena needs to pull stats from the
* tcaches associated with it, it does so by iterating over its
* cache_bin_array_descriptor_t objects and reading out per-bin stats it
* contains. This makes it so that the arena need not know about the existence
* of the tcache at all.
*/
/*
* The count of the number of cached allocations in a bin. We make this signed
* so that negative numbers can encode "invalid" states (e.g. a low water mark
* of -1 for a cache that has been depleted).
*/
typedef int32_t cache_bin_sz_t;
typedef struct cache_bin_stats_s cache_bin_stats_t;
struct cache_bin_stats_s {
/*
* Number of allocation requests that corresponded to the size of this
* bin.
*/
uint64_t nrequests;
};
/*
* Read-only information associated with each element of tcache_t's tbins array
* is stored separately, mainly to reduce memory usage.
*/
typedef struct cache_bin_info_s cache_bin_info_t;
struct cache_bin_info_s {
/* Upper limit on ncached. */
cache_bin_sz_t ncached_max;
};
typedef struct cache_bin_s cache_bin_t;
struct cache_bin_s {
/* Min # cached since last GC. */
cache_bin_sz_t low_water;
/* # of cached objects. */
cache_bin_sz_t ncached;
/*
* ncached and stats are both modified frequently. Let's keep them
* close so that they have a higher chance of being on the same
* cacheline, thus less write-backs.
*/
cache_bin_stats_t tstats;
/*
* Stack of available objects.
*
* To make use of adjacent cacheline prefetch, the items in the avail
* stack goes to higher address for newer allocations. avail points
* just above the available space, which means that
* avail[-ncached, ... -1] are available items and the lowest item will
* be allocated first.
*/
void **avail;
};
typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t;
struct cache_bin_array_descriptor_s {
/*
* The arena keeps a list of the cache bins associated with it, for
* stats collection.
*/
ql_elm(cache_bin_array_descriptor_t) link;
/* Pointers to the tcache bins. */
cache_bin_t *bins_small;
cache_bin_t *bins_large;
};
static inline void
cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
cache_bin_t *bins_small, cache_bin_t *bins_large) {
ql_elm_new(descriptor, link);
descriptor->bins_small = bins_small;
descriptor->bins_large = bins_large;
}
JEMALLOC_ALWAYS_INLINE void *
cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
void *ret;
bin->ncached--;
/*
* Check for both bin->ncached == 0 and ncached < low_water
* in a single branch.
*/
if (unlikely(bin->ncached <= bin->low_water)) {
bin->low_water = bin->ncached;
if (bin->ncached == -1) {
bin->ncached = 0;
*success = false;
return NULL;
}
}
/*
* success (instead of ret) should be checked upon the return of this
* function. We avoid checking (ret == NULL) because there is never a
* null stored on the avail stack (which is unknown to the compiler),
* and eagerly checking ret would cause pipeline stall (waiting for the
* cacheline).
*/
*success = true;
ret = *(bin->avail - (bin->ncached + 1));
return ret;
}
JEMALLOC_ALWAYS_INLINE bool
cache_bin_dalloc_easy(cache_bin_t *bin, cache_bin_info_t *bin_info, void *ptr) {
if (unlikely(bin->ncached == bin_info->ncached_max)) {
return false;
}
assert(bin->ncached < bin_info->ncached_max);
bin->ncached++;
*(bin->avail - bin->ncached) = ptr;
return true;
}
#endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */

View File

@@ -5,7 +5,7 @@
#include "jemalloc/internal/malloc_io.h"
#include "jemalloc/internal/mutex_prof.h"
#include "jemalloc/internal/ql.h"
#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/stats.h"
/* Maximum ctl tree depth. */
@@ -39,15 +39,19 @@ typedef struct ctl_arena_stats_s {
uint64_t nmalloc_small;
uint64_t ndalloc_small;
uint64_t nrequests_small;
uint64_t nfills_small;
uint64_t nflushes_small;
malloc_bin_stats_t bstats[NBINS];
malloc_large_stats_t lstats[NSIZES - NBINS];
bin_stats_t bstats[SC_NBINS];
arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
arena_stats_extents_t estats[SC_NPSIZES];
} ctl_arena_stats_t;
typedef struct ctl_stats_s {
size_t allocated;
size_t active;
size_t metadata;
size_t metadata_thp;
size_t resident;
size_t mapped;
size_t retained;

View File

@@ -0,0 +1,41 @@
#ifndef JEMALLOC_INTERNAL_DIV_H
#define JEMALLOC_INTERNAL_DIV_H
#include "jemalloc/internal/assert.h"
/*
* This module does the division that computes the index of a region in a slab,
* given its offset relative to the base.
* That is, given a divisor d, an n = i * d (all integers), we'll return i.
* We do some pre-computation to do this more quickly than a CPU division
* instruction.
* We bound n < 2^32, and don't support dividing by one.
*/
typedef struct div_info_s div_info_t;
struct div_info_s {
uint32_t magic;
#ifdef JEMALLOC_DEBUG
size_t d;
#endif
};
void div_init(div_info_t *div_info, size_t divisor);
static inline size_t
div_compute(div_info_t *div_info, size_t n) {
assert(n <= (uint32_t)-1);
/*
* This generates, e.g. mov; imul; shr on x86-64. On a 32-bit machine,
* the compilers I tried were all smart enough to turn this into the
* appropriate "get the high 32 bits of the result of a multiply" (e.g.
* mul; mov edx eax; on x86, umull on arm, etc.).
*/
size_t i = ((uint64_t)n * (uint64_t)div_info->magic) >> 32;
#ifdef JEMALLOC_DEBUG
assert(i * div_info->d == n);
#endif
return i;
}
#endif /* JEMALLOC_INTERNAL_DIV_H */

View File

@@ -0,0 +1,486 @@
#ifndef JEMALLOC_INTERNAL_EMITTER_H
#define JEMALLOC_INTERNAL_EMITTER_H
#include "jemalloc/internal/ql.h"
typedef enum emitter_output_e emitter_output_t;
enum emitter_output_e {
emitter_output_json,
emitter_output_table
};
typedef enum emitter_justify_e emitter_justify_t;
enum emitter_justify_e {
emitter_justify_left,
emitter_justify_right,
/* Not for users; just to pass to internal functions. */
emitter_justify_none
};
typedef enum emitter_type_e emitter_type_t;
enum emitter_type_e {
emitter_type_bool,
emitter_type_int,
emitter_type_unsigned,
emitter_type_uint32,
emitter_type_uint64,
emitter_type_size,
emitter_type_ssize,
emitter_type_string,
/*
* A title is a column title in a table; it's just a string, but it's
* not quoted.
*/
emitter_type_title,
};
typedef struct emitter_col_s emitter_col_t;
struct emitter_col_s {
/* Filled in by the user. */
emitter_justify_t justify;
int width;
emitter_type_t type;
union {
bool bool_val;
int int_val;
unsigned unsigned_val;
uint32_t uint32_val;
uint32_t uint32_t_val;
uint64_t uint64_val;
uint64_t uint64_t_val;
size_t size_val;
ssize_t ssize_val;
const char *str_val;
};
/* Filled in by initialization. */
ql_elm(emitter_col_t) link;
};
typedef struct emitter_row_s emitter_row_t;
struct emitter_row_s {
ql_head(emitter_col_t) cols;
};
typedef struct emitter_s emitter_t;
struct emitter_s {
emitter_output_t output;
/* The output information. */
void (*write_cb)(void *, const char *);
void *cbopaque;
int nesting_depth;
/* True if we've already emitted a value at the given depth. */
bool item_at_depth;
/* True if we emitted a key and will emit corresponding value next. */
bool emitted_key;
};
/* Internal convenience function. Write to the emitter the given string. */
JEMALLOC_FORMAT_PRINTF(2, 3)
static inline void
emitter_printf(emitter_t *emitter, const char *format, ...) {
va_list ap;
va_start(ap, format);
malloc_vcprintf(emitter->write_cb, emitter->cbopaque, format, ap);
va_end(ap);
}
static inline const char * JEMALLOC_FORMAT_ARG(3)
emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier,
emitter_justify_t justify, int width) {
size_t written;
fmt_specifier++;
if (justify == emitter_justify_none) {
written = malloc_snprintf(out_fmt, out_size,
"%%%s", fmt_specifier);
} else if (justify == emitter_justify_left) {
written = malloc_snprintf(out_fmt, out_size,
"%%-%d%s", width, fmt_specifier);
} else {
written = malloc_snprintf(out_fmt, out_size,
"%%%d%s", width, fmt_specifier);
}
/* Only happens in case of bad format string, which *we* choose. */
assert(written < out_size);
return out_fmt;
}
/*
* Internal. Emit the given value type in the relevant encoding (so that the
* bool true gets mapped to json "true", but the string "true" gets mapped to
* json "\"true\"", for instance.
*
* Width is ignored if justify is emitter_justify_none.
*/
static inline void
emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
emitter_type_t value_type, const void *value) {
size_t str_written;
#define BUF_SIZE 256
#define FMT_SIZE 10
/*
* We dynamically generate a format string to emit, to let us use the
* snprintf machinery. This is kinda hacky, but gets the job done
* quickly without having to think about the various snprintf edge
* cases.
*/
char fmt[FMT_SIZE];
char buf[BUF_SIZE];
#define EMIT_SIMPLE(type, format) \
emitter_printf(emitter, \
emitter_gen_fmt(fmt, FMT_SIZE, format, justify, width), \
*(const type *)value);
switch (value_type) {
case emitter_type_bool:
emitter_printf(emitter,
emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width),
*(const bool *)value ? "true" : "false");
break;
case emitter_type_int:
EMIT_SIMPLE(int, "%d")
break;
case emitter_type_unsigned:
EMIT_SIMPLE(unsigned, "%u")
break;
case emitter_type_ssize:
EMIT_SIMPLE(ssize_t, "%zd")
break;
case emitter_type_size:
EMIT_SIMPLE(size_t, "%zu")
break;
case emitter_type_string:
str_written = malloc_snprintf(buf, BUF_SIZE, "\"%s\"",
*(const char *const *)value);
/*
* We control the strings we output; we shouldn't get anything
* anywhere near the fmt size.
*/
assert(str_written < BUF_SIZE);
emitter_printf(emitter,
emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width), buf);
break;
case emitter_type_uint32:
EMIT_SIMPLE(uint32_t, "%" FMTu32)
break;
case emitter_type_uint64:
EMIT_SIMPLE(uint64_t, "%" FMTu64)
break;
case emitter_type_title:
EMIT_SIMPLE(char *const, "%s");
break;
default:
unreachable();
}
#undef BUF_SIZE
#undef FMT_SIZE
}
/* Internal functions. In json mode, tracks nesting state. */
static inline void
emitter_nest_inc(emitter_t *emitter) {
emitter->nesting_depth++;
emitter->item_at_depth = false;
}
static inline void
emitter_nest_dec(emitter_t *emitter) {
emitter->nesting_depth--;
emitter->item_at_depth = true;
}
static inline void
emitter_indent(emitter_t *emitter) {
int amount = emitter->nesting_depth;
const char *indent_str;
if (emitter->output == emitter_output_json) {
indent_str = "\t";
} else {
amount *= 2;
indent_str = " ";
}
for (int i = 0; i < amount; i++) {
emitter_printf(emitter, "%s", indent_str);
}
}
static inline void
emitter_json_key_prefix(emitter_t *emitter) {
if (emitter->emitted_key) {
emitter->emitted_key = false;
return;
}
emitter_printf(emitter, "%s\n", emitter->item_at_depth ? "," : "");
emitter_indent(emitter);
}
/******************************************************************************/
/* Public functions for emitter_t. */
static inline void
emitter_init(emitter_t *emitter, emitter_output_t emitter_output,
void (*write_cb)(void *, const char *), void *cbopaque) {
emitter->output = emitter_output;
emitter->write_cb = write_cb;
emitter->cbopaque = cbopaque;
emitter->item_at_depth = false;
emitter->emitted_key = false;
emitter->nesting_depth = 0;
}
/******************************************************************************/
/* JSON public API. */
/*
* Emits a key (e.g. as appears in an object). The next json entity emitted will
* be the corresponding value.
*/
static inline void
emitter_json_key(emitter_t *emitter, const char *json_key) {
if (emitter->output == emitter_output_json) {
emitter_json_key_prefix(emitter);
emitter_printf(emitter, "\"%s\": ", json_key);
emitter->emitted_key = true;
}
}
static inline void
emitter_json_value(emitter_t *emitter, emitter_type_t value_type,
const void *value) {
if (emitter->output == emitter_output_json) {
emitter_json_key_prefix(emitter);
emitter_print_value(emitter, emitter_justify_none, -1,
value_type, value);
emitter->item_at_depth = true;
}
}
/* Shorthand for calling emitter_json_key and then emitter_json_value. */
static inline void
emitter_json_kv(emitter_t *emitter, const char *json_key,
emitter_type_t value_type, const void *value) {
emitter_json_key(emitter, json_key);
emitter_json_value(emitter, value_type, value);
}
static inline void
emitter_json_array_begin(emitter_t *emitter) {
if (emitter->output == emitter_output_json) {
emitter_json_key_prefix(emitter);
emitter_printf(emitter, "[");
emitter_nest_inc(emitter);
}
}
/* Shorthand for calling emitter_json_key and then emitter_json_array_begin. */
static inline void
emitter_json_array_kv_begin(emitter_t *emitter, const char *json_key) {
emitter_json_key(emitter, json_key);
emitter_json_array_begin(emitter);
}
static inline void
emitter_json_array_end(emitter_t *emitter) {
if (emitter->output == emitter_output_json) {
assert(emitter->nesting_depth > 0);
emitter_nest_dec(emitter);
emitter_printf(emitter, "\n");
emitter_indent(emitter);
emitter_printf(emitter, "]");
}
}
static inline void
emitter_json_object_begin(emitter_t *emitter) {
if (emitter->output == emitter_output_json) {
emitter_json_key_prefix(emitter);
emitter_printf(emitter, "{");
emitter_nest_inc(emitter);
}
}
/* Shorthand for calling emitter_json_key and then emitter_json_object_begin. */
static inline void
emitter_json_object_kv_begin(emitter_t *emitter, const char *json_key) {
emitter_json_key(emitter, json_key);
emitter_json_object_begin(emitter);
}
static inline void
emitter_json_object_end(emitter_t *emitter) {
if (emitter->output == emitter_output_json) {
assert(emitter->nesting_depth > 0);
emitter_nest_dec(emitter);
emitter_printf(emitter, "\n");
emitter_indent(emitter);
emitter_printf(emitter, "}");
}
}
/******************************************************************************/
/* Table public API. */
static inline void
emitter_table_dict_begin(emitter_t *emitter, const char *table_key) {
if (emitter->output == emitter_output_table) {
emitter_indent(emitter);
emitter_printf(emitter, "%s\n", table_key);
emitter_nest_inc(emitter);
}
}
static inline void
emitter_table_dict_end(emitter_t *emitter) {
if (emitter->output == emitter_output_table) {
emitter_nest_dec(emitter);
}
}
static inline void
emitter_table_kv_note(emitter_t *emitter, const char *table_key,
emitter_type_t value_type, const void *value,
const char *table_note_key, emitter_type_t table_note_value_type,
const void *table_note_value) {
if (emitter->output == emitter_output_table) {
emitter_indent(emitter);
emitter_printf(emitter, "%s: ", table_key);
emitter_print_value(emitter, emitter_justify_none, -1,
value_type, value);
if (table_note_key != NULL) {
emitter_printf(emitter, " (%s: ", table_note_key);
emitter_print_value(emitter, emitter_justify_none, -1,
table_note_value_type, table_note_value);
emitter_printf(emitter, ")");
}
emitter_printf(emitter, "\n");
}
emitter->item_at_depth = true;
}
static inline void
emitter_table_kv(emitter_t *emitter, const char *table_key,
emitter_type_t value_type, const void *value) {
emitter_table_kv_note(emitter, table_key, value_type, value, NULL,
emitter_type_bool, NULL);
}
/* Write to the emitter the given string, but only in table mode. */
JEMALLOC_FORMAT_PRINTF(2, 3)
static inline void
emitter_table_printf(emitter_t *emitter, const char *format, ...) {
if (emitter->output == emitter_output_table) {
va_list ap;
va_start(ap, format);
malloc_vcprintf(emitter->write_cb, emitter->cbopaque, format, ap);
va_end(ap);
}
}
static inline void
emitter_table_row(emitter_t *emitter, emitter_row_t *row) {
if (emitter->output != emitter_output_table) {
return;
}
emitter_col_t *col;
ql_foreach(col, &row->cols, link) {
emitter_print_value(emitter, col->justify, col->width,
col->type, (const void *)&col->bool_val);
}
emitter_table_printf(emitter, "\n");
}
static inline void
emitter_row_init(emitter_row_t *row) {
ql_new(&row->cols);
}
static inline void
emitter_col_init(emitter_col_t *col, emitter_row_t *row) {
ql_elm_new(col, link);
ql_tail_insert(&row->cols, col, link);
}
/******************************************************************************/
/*
* Generalized public API. Emits using either JSON or table, according to
* settings in the emitter_t. */
/*
* Note emits a different kv pair as well, but only in table mode. Omits the
* note if table_note_key is NULL.
*/
static inline void
emitter_kv_note(emitter_t *emitter, const char *json_key, const char *table_key,
emitter_type_t value_type, const void *value,
const char *table_note_key, emitter_type_t table_note_value_type,
const void *table_note_value) {
if (emitter->output == emitter_output_json) {
emitter_json_key(emitter, json_key);
emitter_json_value(emitter, value_type, value);
} else {
emitter_table_kv_note(emitter, table_key, value_type, value,
table_note_key, table_note_value_type, table_note_value);
}
emitter->item_at_depth = true;
}
static inline void
emitter_kv(emitter_t *emitter, const char *json_key, const char *table_key,
emitter_type_t value_type, const void *value) {
emitter_kv_note(emitter, json_key, table_key, value_type, value, NULL,
emitter_type_bool, NULL);
}
static inline void
emitter_dict_begin(emitter_t *emitter, const char *json_key,
const char *table_header) {
if (emitter->output == emitter_output_json) {
emitter_json_key(emitter, json_key);
emitter_json_object_begin(emitter);
} else {
emitter_table_dict_begin(emitter, table_header);
}
}
static inline void
emitter_dict_end(emitter_t *emitter) {
if (emitter->output == emitter_output_json) {
emitter_json_object_end(emitter);
} else {
emitter_table_dict_end(emitter);
}
}
static inline void
emitter_begin(emitter_t *emitter) {
if (emitter->output == emitter_output_json) {
assert(emitter->nesting_depth == 0);
emitter_printf(emitter, "{");
emitter_nest_inc(emitter);
} else {
/*
* This guarantees that we always call write_cb at least once.
* This is useful if some invariant is established by each call
* to write_cb, but doesn't hold initially: e.g., some buffer
* holds a null-terminated string.
*/
emitter_printf(emitter, "%s", "");
}
}
static inline void
emitter_end(emitter_t *emitter) {
if (emitter->output == emitter_output_json) {
assert(emitter->nesting_depth == 1);
emitter_nest_dec(emitter);
emitter_printf(emitter, "\n}\n");
}
}
#endif /* JEMALLOC_INTERNAL_EMITTER_H */

View File

@@ -4,9 +4,10 @@
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/mutex_pool.h"
#include "jemalloc/internal/ph.h"
#include "jemalloc/internal/rb.h"
#include "jemalloc/internal/rtree.h"
extern size_t opt_lg_extent_max_active_fit;
extern rtree_t extents_rtree;
extern const extent_hooks_t extent_hooks_default;
extern mutex_pool_t extent_mutex_pool;
@@ -23,13 +24,17 @@ size_t extent_size_quantize_floor(size_t size);
size_t extent_size_quantize_ceil(size_t size);
#endif
rb_proto(, extent_avail_, extent_tree_t, extent_t)
ph_proto(, extent_avail_, extent_tree_t, extent_t)
ph_proto(, extent_heap_, extent_heap_t, extent_t)
bool extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
bool delay_coalesce);
extent_state_t extents_state_get(const extents_t *extents);
size_t extents_npages_get(extents_t *extents);
/* Get the number of extents in the given page size index. */
size_t extents_nextents_get(extents_t *extents, pszind_t ind);
/* Get the sum total bytes of the extents in the given page size index. */
size_t extents_nbytes_get(extents_t *extents, pszind_t ind);
extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena,
extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr,
size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
@@ -69,4 +74,10 @@ bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
bool extent_boot(void);
void extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
size_t *nfree, size_t *nregs, size_t *size);
void extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
size_t *nfree, size_t *nregs, size_t *size,
size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr);
#endif /* JEMALLOC_INTERNAL_EXTENT_EXTERNS_H */

View File

@@ -6,6 +6,7 @@
#include "jemalloc/internal/pages.h"
#include "jemalloc/internal/prng.h"
#include "jemalloc/internal/ql.h"
#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/sz.h"
static inline void
@@ -34,18 +35,19 @@ extent_unlock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
(uintptr_t)extent2);
}
static inline arena_t *
extent_arena_get(const extent_t *extent) {
static inline unsigned
extent_arena_ind_get(const extent_t *extent) {
unsigned arena_ind = (unsigned)((extent->e_bits &
EXTENT_BITS_ARENA_MASK) >> EXTENT_BITS_ARENA_SHIFT);
/*
* The following check is omitted because we should never actually read
* a NULL arena pointer.
*/
if (false && arena_ind >= MALLOCX_ARENA_LIMIT) {
return NULL;
}
assert(arena_ind < MALLOCX_ARENA_LIMIT);
return arena_ind;
}
static inline arena_t *
extent_arena_get(const extent_t *extent) {
unsigned arena_ind = extent_arena_ind_get(extent);
return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_ACQUIRE);
}
@@ -53,14 +55,14 @@ static inline szind_t
extent_szind_get_maybe_invalid(const extent_t *extent) {
szind_t szind = (szind_t)((extent->e_bits & EXTENT_BITS_SZIND_MASK) >>
EXTENT_BITS_SZIND_SHIFT);
assert(szind <= NSIZES);
assert(szind <= SC_NSIZES);
return szind;
}
static inline szind_t
extent_szind_get(const extent_t *extent) {
szind_t szind = extent_szind_get_maybe_invalid(extent);
assert(szind < NSIZES); /* Never call when "invalid". */
assert(szind < SC_NSIZES); /* Never call when "invalid". */
return szind;
}
@@ -69,6 +71,14 @@ extent_usize_get(const extent_t *extent) {
return sz_index2size(extent_szind_get(extent));
}
static inline unsigned
extent_binshard_get(const extent_t *extent) {
unsigned binshard = (unsigned)((extent->e_bits &
EXTENT_BITS_BINSHARD_MASK) >> EXTENT_BITS_BINSHARD_SHIFT);
assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
return binshard;
}
static inline size_t
extent_sn_get(const extent_t *extent) {
return (size_t)((extent->e_bits & EXTENT_BITS_SN_MASK) >>
@@ -93,6 +103,12 @@ extent_committed_get(const extent_t *extent) {
EXTENT_BITS_COMMITTED_SHIFT);
}
static inline bool
extent_dumpable_get(const extent_t *extent) {
return (bool)((extent->e_bits & EXTENT_BITS_DUMPABLE_MASK) >>
EXTENT_BITS_DUMPABLE_SHIFT);
}
static inline bool
extent_slab_get(const extent_t *extent) {
return (bool)((extent->e_bits & EXTENT_BITS_SLAB_MASK) >>
@@ -170,6 +186,11 @@ extent_prof_tctx_get(const extent_t *extent) {
ATOMIC_ACQUIRE);
}
static inline nstime_t
extent_prof_alloc_time_get(const extent_t *extent) {
return extent->e_alloc_time;
}
static inline void
extent_arena_set(extent_t *extent, arena_t *arena) {
unsigned arena_ind = (arena != NULL) ? arena_ind_get(arena) : ((1U <<
@@ -178,6 +199,14 @@ extent_arena_set(extent_t *extent, arena_t *arena) {
((uint64_t)arena_ind << EXTENT_BITS_ARENA_SHIFT);
}
static inline void
extent_binshard_set(extent_t *extent, unsigned binshard) {
/* The assertion assumes szind is set already. */
assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
extent->e_bits = (extent->e_bits & ~EXTENT_BITS_BINSHARD_MASK) |
((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT);
}
static inline void
extent_addr_set(extent_t *extent, void *addr) {
extent->e_addr = addr;
@@ -190,9 +219,16 @@ extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment) {
if (alignment < PAGE) {
unsigned lg_range = LG_PAGE -
lg_floor(CACHELINE_CEILING(alignment));
size_t r =
prng_lg_range_zu(&extent_arena_get(extent)->offset_state,
size_t r;
if (!tsdn_null(tsdn)) {
tsd_t *tsd = tsdn_tsd(tsdn);
r = (size_t)prng_lg_range_u64(
tsd_offset_statep_get(tsd), lg_range);
} else {
r = prng_lg_range_zu(
&extent_arena_get(extent)->offset_state,
lg_range, true);
}
uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
lg_range);
extent->e_addr = (void *)((uintptr_t)extent->e_addr +
@@ -221,7 +257,7 @@ extent_bsize_set(extent_t *extent, size_t bsize) {
static inline void
extent_szind_set(extent_t *extent, szind_t szind) {
assert(szind <= NSIZES); /* NSIZES means "invalid". */
assert(szind <= SC_NSIZES); /* SC_NSIZES means "invalid". */
extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SZIND_MASK) |
((uint64_t)szind << EXTENT_BITS_SZIND_SHIFT);
}
@@ -233,6 +269,16 @@ extent_nfree_set(extent_t *extent, unsigned nfree) {
((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
}
static inline void
extent_nfree_binshard_set(extent_t *extent, unsigned nfree, unsigned binshard) {
/* The assertion assumes szind is set already. */
assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
extent->e_bits = (extent->e_bits &
(~EXTENT_BITS_NFREE_MASK & ~EXTENT_BITS_BINSHARD_MASK)) |
((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT) |
((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
}
static inline void
extent_nfree_inc(extent_t *extent) {
assert(extent_slab_get(extent));
@@ -245,6 +291,12 @@ extent_nfree_dec(extent_t *extent) {
extent->e_bits -= ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
}
static inline void
extent_nfree_sub(extent_t *extent, uint64_t n) {
assert(extent_slab_get(extent));
extent->e_bits -= (n << EXTENT_BITS_NFREE_SHIFT);
}
static inline void
extent_sn_set(extent_t *extent, size_t sn) {
extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SN_MASK) |
@@ -269,6 +321,12 @@ extent_committed_set(extent_t *extent, bool committed) {
((uint64_t)committed << EXTENT_BITS_COMMITTED_SHIFT);
}
static inline void
extent_dumpable_set(extent_t *extent, bool dumpable) {
extent->e_bits = (extent->e_bits & ~EXTENT_BITS_DUMPABLE_MASK) |
((uint64_t)dumpable << EXTENT_BITS_DUMPABLE_SHIFT);
}
static inline void
extent_slab_set(extent_t *extent, bool slab) {
extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SLAB_MASK) |
@@ -280,10 +338,35 @@ extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
atomic_store_p(&extent->e_prof_tctx, tctx, ATOMIC_RELEASE);
}
static inline void
extent_prof_alloc_time_set(extent_t *extent, nstime_t t) {
nstime_copy(&extent->e_alloc_time, &t);
}
static inline bool
extent_is_head_get(extent_t *extent) {
if (maps_coalesce) {
not_reached();
}
return (bool)((extent->e_bits & EXTENT_BITS_IS_HEAD_MASK) >>
EXTENT_BITS_IS_HEAD_SHIFT);
}
static inline void
extent_is_head_set(extent_t *extent, bool is_head) {
if (maps_coalesce) {
not_reached();
}
extent->e_bits = (extent->e_bits & ~EXTENT_BITS_IS_HEAD_MASK) |
((uint64_t)is_head << EXTENT_BITS_IS_HEAD_SHIFT);
}
static inline void
extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
bool committed) {
bool committed, bool dumpable, extent_head_state_t is_head) {
assert(addr == PAGE_ADDR2BASE(addr) || !slab);
extent_arena_set(extent, arena);
@@ -295,7 +378,12 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
extent_state_set(extent, state);
extent_zeroed_set(extent, zeroed);
extent_committed_set(extent, committed);
extent_dumpable_set(extent, dumpable);
ql_elm_new(extent, ql_link);
if (!maps_coalesce) {
extent_is_head_set(extent, (is_head == EXTENT_IS_HEAD) ? true :
false);
}
if (config_prof) {
extent_prof_tctx_set(extent, NULL);
}
@@ -307,11 +395,12 @@ extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
extent_addr_set(extent, addr);
extent_bsize_set(extent, bsize);
extent_slab_set(extent, false);
extent_szind_set(extent, NSIZES);
extent_szind_set(extent, SC_NSIZES);
extent_sn_set(extent, sn);
extent_state_set(extent, extent_state_active);
extent_zeroed_set(extent, true);
extent_committed_set(extent, true);
extent_dumpable_set(extent, true);
}
static inline void
@@ -334,6 +423,11 @@ extent_list_append(extent_list_t *list, extent_t *extent) {
ql_tail_insert(list, extent, ql_link);
}
static inline void
extent_list_prepend(extent_list_t *list, extent_t *extent) {
ql_head_insert(list, extent, ql_link);
}
static inline void
extent_list_replace(extent_list_t *list, extent_t *to_remove,
extent_t *to_insert) {

View File

@@ -2,12 +2,12 @@
#define JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/bit_util.h"
#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/ql.h"
#include "jemalloc/internal/rb.h"
#include "jemalloc/internal/ph.h"
#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/sc.h"
typedef enum {
extent_state_active = 0,
@@ -24,13 +24,15 @@ struct extent_s {
* a: arena_ind
* b: slab
* c: committed
* d: dumpable
* z: zeroed
* t: state
* i: szind
* f: nfree
* s: bin_shard
* n: sn
*
* nnnnnnnn ... nnnnnfff fffffffi iiiiiiit tzcbaaaa aaaaaaaa
* nnnnnnnn ... nnnnnnss ssssffff ffffffii iiiiiitt zdcbaaaa aaaaaaaa
*
* arena_ind: Arena from which this extent came, or all 1 bits if
* unassociated.
@@ -45,6 +47,23 @@ struct extent_s {
* as on a system that overcommits and satisfies physical
* memory needs on demand via soft page faults.
*
* dumpable: The dumpable flag indicates whether or not we've set the
* memory in question to be dumpable. Note that this
* interacts somewhat subtly with user-specified extent hooks,
* since we don't know if *they* are fiddling with
* dumpability (in which case, we don't want to undo whatever
* they're doing). To deal with this scenario, we:
* - Make dumpable false only for memory allocated with the
* default hooks.
* - Only allow memory to go from non-dumpable to dumpable,
* and only once.
* - Never make the OS call to allow dumping when the
* dumpable bit is already set.
* These three constraints mean that we will never
* accidentally dump user memory that the user meant to set
* nondumpable with their extent hooks.
*
*
* zeroed: The zeroed flag is used by extent recycling code to track
* whether memory is zero-filled.
*
@@ -58,6 +77,8 @@ struct extent_s {
*
* nfree: Number of free regions in slab.
*
* bin_shard: the shard of the bin from which this extent came.
*
* sn: Serial number (potentially non-unique).
*
* Serial numbers may wrap around if !opt_retain, but as long as
@@ -69,37 +90,49 @@ struct extent_s {
* serial number to both resulting adjacent extents.
*/
uint64_t e_bits;
#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT))
#define EXTENT_BITS_ARENA_WIDTH MALLOCX_ARENA_BITS
#define EXTENT_BITS_ARENA_SHIFT 0
#define EXTENT_BITS_ARENA_MASK \
(((uint64_t)(1U << MALLOCX_ARENA_BITS) - 1) << EXTENT_BITS_ARENA_SHIFT)
#define EXTENT_BITS_ARENA_MASK MASK(EXTENT_BITS_ARENA_WIDTH, EXTENT_BITS_ARENA_SHIFT)
#define EXTENT_BITS_SLAB_SHIFT MALLOCX_ARENA_BITS
#define EXTENT_BITS_SLAB_MASK \
((uint64_t)0x1U << EXTENT_BITS_SLAB_SHIFT)
#define EXTENT_BITS_SLAB_WIDTH 1
#define EXTENT_BITS_SLAB_SHIFT (EXTENT_BITS_ARENA_WIDTH + EXTENT_BITS_ARENA_SHIFT)
#define EXTENT_BITS_SLAB_MASK MASK(EXTENT_BITS_SLAB_WIDTH, EXTENT_BITS_SLAB_SHIFT)
#define EXTENT_BITS_COMMITTED_SHIFT (MALLOCX_ARENA_BITS + 1)
#define EXTENT_BITS_COMMITTED_MASK \
((uint64_t)0x1U << EXTENT_BITS_COMMITTED_SHIFT)
#define EXTENT_BITS_COMMITTED_WIDTH 1
#define EXTENT_BITS_COMMITTED_SHIFT (EXTENT_BITS_SLAB_WIDTH + EXTENT_BITS_SLAB_SHIFT)
#define EXTENT_BITS_COMMITTED_MASK MASK(EXTENT_BITS_COMMITTED_WIDTH, EXTENT_BITS_COMMITTED_SHIFT)
#define EXTENT_BITS_ZEROED_SHIFT (MALLOCX_ARENA_BITS + 2)
#define EXTENT_BITS_ZEROED_MASK \
((uint64_t)0x1U << EXTENT_BITS_ZEROED_SHIFT)
#define EXTENT_BITS_DUMPABLE_WIDTH 1
#define EXTENT_BITS_DUMPABLE_SHIFT (EXTENT_BITS_COMMITTED_WIDTH + EXTENT_BITS_COMMITTED_SHIFT)
#define EXTENT_BITS_DUMPABLE_MASK MASK(EXTENT_BITS_DUMPABLE_WIDTH, EXTENT_BITS_DUMPABLE_SHIFT)
#define EXTENT_BITS_STATE_SHIFT (MALLOCX_ARENA_BITS + 3)
#define EXTENT_BITS_STATE_MASK \
((uint64_t)0x3U << EXTENT_BITS_STATE_SHIFT)
#define EXTENT_BITS_ZEROED_WIDTH 1
#define EXTENT_BITS_ZEROED_SHIFT (EXTENT_BITS_DUMPABLE_WIDTH + EXTENT_BITS_DUMPABLE_SHIFT)
#define EXTENT_BITS_ZEROED_MASK MASK(EXTENT_BITS_ZEROED_WIDTH, EXTENT_BITS_ZEROED_SHIFT)
#define EXTENT_BITS_SZIND_SHIFT (MALLOCX_ARENA_BITS + 5)
#define EXTENT_BITS_SZIND_MASK \
(((uint64_t)(1U << LG_CEIL_NSIZES) - 1) << EXTENT_BITS_SZIND_SHIFT)
#define EXTENT_BITS_STATE_WIDTH 2
#define EXTENT_BITS_STATE_SHIFT (EXTENT_BITS_ZEROED_WIDTH + EXTENT_BITS_ZEROED_SHIFT)
#define EXTENT_BITS_STATE_MASK MASK(EXTENT_BITS_STATE_WIDTH, EXTENT_BITS_STATE_SHIFT)
#define EXTENT_BITS_NFREE_SHIFT \
(MALLOCX_ARENA_BITS + 5 + LG_CEIL_NSIZES)
#define EXTENT_BITS_NFREE_MASK \
((uint64_t)((1U << (LG_SLAB_MAXREGS + 1)) - 1) << EXTENT_BITS_NFREE_SHIFT)
#define EXTENT_BITS_SZIND_WIDTH LG_CEIL(SC_NSIZES)
#define EXTENT_BITS_SZIND_SHIFT (EXTENT_BITS_STATE_WIDTH + EXTENT_BITS_STATE_SHIFT)
#define EXTENT_BITS_SZIND_MASK MASK(EXTENT_BITS_SZIND_WIDTH, EXTENT_BITS_SZIND_SHIFT)
#define EXTENT_BITS_SN_SHIFT \
(MALLOCX_ARENA_BITS + 5 + LG_CEIL_NSIZES + (LG_SLAB_MAXREGS + 1))
#define EXTENT_BITS_NFREE_WIDTH (LG_SLAB_MAXREGS + 1)
#define EXTENT_BITS_NFREE_SHIFT (EXTENT_BITS_SZIND_WIDTH + EXTENT_BITS_SZIND_SHIFT)
#define EXTENT_BITS_NFREE_MASK MASK(EXTENT_BITS_NFREE_WIDTH, EXTENT_BITS_NFREE_SHIFT)
#define EXTENT_BITS_BINSHARD_WIDTH 6
#define EXTENT_BITS_BINSHARD_SHIFT (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT)
#define EXTENT_BITS_BINSHARD_MASK MASK(EXTENT_BITS_BINSHARD_WIDTH, EXTENT_BITS_BINSHARD_SHIFT)
#define EXTENT_BITS_IS_HEAD_WIDTH 1
#define EXTENT_BITS_IS_HEAD_SHIFT (EXTENT_BITS_BINSHARD_WIDTH + EXTENT_BITS_BINSHARD_SHIFT)
#define EXTENT_BITS_IS_HEAD_MASK MASK(EXTENT_BITS_IS_HEAD_WIDTH, EXTENT_BITS_IS_HEAD_SHIFT)
#define EXTENT_BITS_SN_SHIFT (EXTENT_BITS_IS_HEAD_WIDTH + EXTENT_BITS_IS_HEAD_SHIFT)
#define EXTENT_BITS_SN_MASK (UINT64_MAX << EXTENT_BITS_SN_SHIFT)
/* Pointer to the extent that this structure is responsible for. */
@@ -120,35 +153,36 @@ struct extent_s {
size_t e_bsize;
};
union {
/*
* List linkage, used by a variety of lists:
* - arena_bin_t's slabs_full
* - bin_t's slabs_full
* - extents_t's LRU
* - stashed dirty extents
* - arena's large allocations
*/
ql_elm(extent_t) ql_link;
/* Red-black tree linkage, used by arena's extent_avail. */
rb_node(extent_t) rb_link;
};
/* Linkage for per size class sn/address-ordered heaps. */
/*
* Linkage for per size class sn/address-ordered heaps, and
* for extent_avail
*/
phn(extent_t) ph_link;
union {
/* Small region slab metadata. */
arena_slab_data_t e_slab_data;
/*
* Profile counters, used for large objects. Points to a
* prof_tctx_t.
*/
/* Profiling data, used for large objects. */
struct {
/* Time when this was allocated. */
nstime_t e_alloc_time;
/* Points to a prof_tctx_t. */
atomic_p_t e_prof_tctx;
};
};
};
typedef ql_head(extent_t) extent_list_t;
typedef rb_tree(extent_t) extent_tree_t;
typedef ph(extent_t) extent_tree_t;
typedef ph(extent_t) extent_heap_t;
/* Quantized collection of extents, with built-in LRU queue. */
@@ -160,14 +194,16 @@ struct extents_s {
*
* Synchronization: mtx.
*/
extent_heap_t heaps[NPSIZES+1];
extent_heap_t heaps[SC_NPSIZES + 1];
atomic_zu_t nextents[SC_NPSIZES + 1];
atomic_zu_t nbytes[SC_NPSIZES + 1];
/*
* Bitmap for which set bits correspond to non-empty heaps.
*
* Synchronization: mtx.
*/
bitmap_t bitmap[BITMAP_GROUPS(NPSIZES+1)];
bitmap_t bitmap[BITMAP_GROUPS(SC_NPSIZES + 1)];
/*
* LRU of all extents in heaps.
@@ -196,4 +232,25 @@ struct extents_s {
bool delay_coalesce;
};
/*
* The following two structs are for experimental purposes. See
* experimental_utilization_query_ctl and
* experimental_utilization_batch_query_ctl in src/ctl.c.
*/
struct extent_util_stats_s {
size_t nfree;
size_t nregs;
size_t size;
};
struct extent_util_stats_verbose_s {
void *slabcur_addr;
size_t nfree;
size_t nregs;
size_t size;
size_t bin_nfree;
size_t bin_nregs;
};
#endif /* JEMALLOC_INTERNAL_EXTENT_STRUCTS_H */

View File

@@ -4,6 +4,20 @@
typedef struct extent_s extent_t;
typedef struct extents_s extents_t;
typedef struct extent_util_stats_s extent_util_stats_t;
typedef struct extent_util_stats_verbose_s extent_util_stats_verbose_t;
#define EXTENT_HOOKS_INITIALIZER NULL
/*
* When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit)
* is the max ratio between the size of the active extent and the new extent.
*/
#define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
typedef enum {
EXTENT_NOT_HEAD,
EXTENT_IS_HEAD /* Only relevant for Windows && opt.retain. */
} extent_head_state_t;
#endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */

View File

@@ -104,8 +104,8 @@ hash_x86_32(const void *key, int len, uint32_t seed) {
uint32_t k1 = 0;
switch (len & 3) {
case 3: k1 ^= tail[2] << 16;
case 2: k1 ^= tail[1] << 8;
case 3: k1 ^= tail[2] << 16; JEMALLOC_FALLTHROUGH
case 2: k1 ^= tail[1] << 8; JEMALLOC_FALLTHROUGH
case 1: k1 ^= tail[0]; k1 *= c1; k1 = hash_rotl_32(k1, 15);
k1 *= c2; h1 ^= k1;
}
@@ -119,7 +119,7 @@ hash_x86_32(const void *key, int len, uint32_t seed) {
return h1;
}
UNUSED static inline void
static inline void
hash_x86_128(const void *key, const int len, uint32_t seed,
uint64_t r_out[2]) {
const uint8_t * data = (const uint8_t *) key;
@@ -177,28 +177,29 @@ hash_x86_128(const void *key, const int len, uint32_t seed,
uint32_t k4 = 0;
switch (len & 15) {
case 15: k4 ^= tail[14] << 16;
case 14: k4 ^= tail[13] << 8;
case 15: k4 ^= tail[14] << 16; JEMALLOC_FALLTHROUGH
case 14: k4 ^= tail[13] << 8; JEMALLOC_FALLTHROUGH
case 13: k4 ^= tail[12] << 0;
k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4;
case 12: k3 ^= tail[11] << 24;
case 11: k3 ^= tail[10] << 16;
case 10: k3 ^= tail[ 9] << 8;
JEMALLOC_FALLTHROUGH
case 12: k3 ^= tail[11] << 24; JEMALLOC_FALLTHROUGH
case 11: k3 ^= tail[10] << 16; JEMALLOC_FALLTHROUGH
case 10: k3 ^= tail[ 9] << 8; JEMALLOC_FALLTHROUGH
case 9: k3 ^= tail[ 8] << 0;
k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3;
case 8: k2 ^= tail[ 7] << 24;
case 7: k2 ^= tail[ 6] << 16;
case 6: k2 ^= tail[ 5] << 8;
JEMALLOC_FALLTHROUGH
case 8: k2 ^= tail[ 7] << 24; JEMALLOC_FALLTHROUGH
case 7: k2 ^= tail[ 6] << 16; JEMALLOC_FALLTHROUGH
case 6: k2 ^= tail[ 5] << 8; JEMALLOC_FALLTHROUGH
case 5: k2 ^= tail[ 4] << 0;
k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2;
case 4: k1 ^= tail[ 3] << 24;
case 3: k1 ^= tail[ 2] << 16;
case 2: k1 ^= tail[ 1] << 8;
JEMALLOC_FALLTHROUGH
case 4: k1 ^= tail[ 3] << 24; JEMALLOC_FALLTHROUGH
case 3: k1 ^= tail[ 2] << 16; JEMALLOC_FALLTHROUGH
case 2: k1 ^= tail[ 1] << 8; JEMALLOC_FALLTHROUGH
case 1: k1 ^= tail[ 0] << 0;
k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1;
JEMALLOC_FALLTHROUGH
}
}
@@ -220,7 +221,7 @@ hash_x86_128(const void *key, const int len, uint32_t seed,
r_out[1] = (((uint64_t) h4) << 32) | h3;
}
UNUSED static inline void
static inline void
hash_x64_128(const void *key, const int len, const uint32_t seed,
uint64_t r_out[2]) {
const uint8_t *data = (const uint8_t *) key;
@@ -260,22 +261,22 @@ hash_x64_128(const void *key, const int len, const uint32_t seed,
uint64_t k2 = 0;
switch (len & 15) {
case 15: k2 ^= ((uint64_t)(tail[14])) << 48;
case 14: k2 ^= ((uint64_t)(tail[13])) << 40;
case 13: k2 ^= ((uint64_t)(tail[12])) << 32;
case 12: k2 ^= ((uint64_t)(tail[11])) << 24;
case 11: k2 ^= ((uint64_t)(tail[10])) << 16;
case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8;
case 15: k2 ^= ((uint64_t)(tail[14])) << 48; JEMALLOC_FALLTHROUGH
case 14: k2 ^= ((uint64_t)(tail[13])) << 40; JEMALLOC_FALLTHROUGH
case 13: k2 ^= ((uint64_t)(tail[12])) << 32; JEMALLOC_FALLTHROUGH
case 12: k2 ^= ((uint64_t)(tail[11])) << 24; JEMALLOC_FALLTHROUGH
case 11: k2 ^= ((uint64_t)(tail[10])) << 16; JEMALLOC_FALLTHROUGH
case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; JEMALLOC_FALLTHROUGH
case 9: k2 ^= ((uint64_t)(tail[ 8])) << 0;
k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2;
case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56;
case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48;
case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40;
case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32;
case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24;
case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16;
case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8;
JEMALLOC_FALLTHROUGH
case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; JEMALLOC_FALLTHROUGH
case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; JEMALLOC_FALLTHROUGH
case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; JEMALLOC_FALLTHROUGH
case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; JEMALLOC_FALLTHROUGH
case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; JEMALLOC_FALLTHROUGH
case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; JEMALLOC_FALLTHROUGH
case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; JEMALLOC_FALLTHROUGH
case 1: k1 ^= ((uint64_t)(tail[ 0])) << 0;
k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1;
}

View File

@@ -0,0 +1,163 @@
#ifndef JEMALLOC_INTERNAL_HOOK_H
#define JEMALLOC_INTERNAL_HOOK_H
#include "jemalloc/internal/tsd.h"
/*
* This API is *extremely* experimental, and may get ripped out, changed in API-
* and ABI-incompatible ways, be insufficiently or incorrectly documented, etc.
*
* It allows hooking the stateful parts of the API to see changes as they
* happen.
*
* Allocation hooks are called after the allocation is done, free hooks are
* called before the free is done, and expand hooks are called after the
* allocation is expanded.
*
* For realloc and rallocx, if the expansion happens in place, the expansion
* hook is called. If it is moved, then the alloc hook is called on the new
* location, and then the free hook is called on the old location (i.e. both
* hooks are invoked in between the alloc and the dalloc).
*
* If we return NULL from OOM, then usize might not be trustworthy. Calling
* realloc(NULL, size) only calls the alloc hook, and calling realloc(ptr, 0)
* only calls the free hook. (Calling realloc(NULL, 0) is treated as malloc(0),
* and only calls the alloc hook).
*
* Reentrancy:
* Reentrancy is guarded against from within the hook implementation. If you
* call allocator functions from within a hook, the hooks will not be invoked
* again.
* Threading:
* The installation of a hook synchronizes with all its uses. If you can
* prove the installation of a hook happens-before a jemalloc entry point,
* then the hook will get invoked (unless there's a racing removal).
*
* Hook insertion appears to be atomic at a per-thread level (i.e. if a thread
* allocates and has the alloc hook invoked, then a subsequent free on the
* same thread will also have the free hook invoked).
*
* The *removal* of a hook does *not* block until all threads are done with
* the hook. Hook authors have to be resilient to this, and need some
* out-of-band mechanism for cleaning up any dynamically allocated memory
* associated with their hook.
* Ordering:
* Order of hook execution is unspecified, and may be different than insertion
* order.
*/
#define HOOK_MAX 4
enum hook_alloc_e {
hook_alloc_malloc,
hook_alloc_posix_memalign,
hook_alloc_aligned_alloc,
hook_alloc_calloc,
hook_alloc_memalign,
hook_alloc_valloc,
hook_alloc_mallocx,
/* The reallocating functions have both alloc and dalloc variants */
hook_alloc_realloc,
hook_alloc_rallocx,
};
/*
* We put the enum typedef after the enum, since this file may get included by
* jemalloc_cpp.cpp, and C++ disallows enum forward declarations.
*/
typedef enum hook_alloc_e hook_alloc_t;
enum hook_dalloc_e {
hook_dalloc_free,
hook_dalloc_dallocx,
hook_dalloc_sdallocx,
/*
* The dalloc halves of reallocation (not called if in-place expansion
* happens).
*/
hook_dalloc_realloc,
hook_dalloc_rallocx,
};
typedef enum hook_dalloc_e hook_dalloc_t;
enum hook_expand_e {
hook_expand_realloc,
hook_expand_rallocx,
hook_expand_xallocx,
};
typedef enum hook_expand_e hook_expand_t;
typedef void (*hook_alloc)(
void *extra, hook_alloc_t type, void *result, uintptr_t result_raw,
uintptr_t args_raw[3]);
typedef void (*hook_dalloc)(
void *extra, hook_dalloc_t type, void *address, uintptr_t args_raw[3]);
typedef void (*hook_expand)(
void *extra, hook_expand_t type, void *address, size_t old_usize,
size_t new_usize, uintptr_t result_raw, uintptr_t args_raw[4]);
typedef struct hooks_s hooks_t;
struct hooks_s {
hook_alloc alloc_hook;
hook_dalloc dalloc_hook;
hook_expand expand_hook;
void *extra;
};
/*
* Begin implementation details; everything above this point might one day live
* in a public API. Everything below this point never will.
*/
/*
* The realloc pathways haven't gotten any refactoring love in a while, and it's
* fairly difficult to pass information from the entry point to the hooks. We
* put the informaiton the hooks will need into a struct to encapsulate
* everything.
*
* Much of these pathways are force-inlined, so that the compiler can avoid
* materializing this struct until we hit an extern arena function. For fairly
* goofy reasons, *many* of the realloc paths hit an extern arena function.
* These paths are cold enough that it doesn't matter; eventually, we should
* rewrite the realloc code to make the expand-in-place and the
* free-then-realloc paths more orthogonal, at which point we don't need to
* spread the hook logic all over the place.
*/
typedef struct hook_ralloc_args_s hook_ralloc_args_t;
struct hook_ralloc_args_s {
/* I.e. as opposed to rallocx. */
bool is_realloc;
/*
* The expand hook takes 4 arguments, even if only 3 are actually used;
* we add an extra one in case the user decides to memcpy without
* looking too closely at the hooked function.
*/
uintptr_t args[4];
};
/*
* Returns an opaque handle to be used when removing the hook. NULL means that
* we couldn't install the hook.
*/
bool hook_boot();
void *hook_install(tsdn_t *tsdn, hooks_t *hooks);
/* Uninstalls the hook with the handle previously returned from hook_install. */
void hook_remove(tsdn_t *tsdn, void *opaque);
/* Hooks */
void hook_invoke_alloc(hook_alloc_t type, void *result, uintptr_t result_raw,
uintptr_t args_raw[3]);
void hook_invoke_dalloc(hook_dalloc_t type, void *address,
uintptr_t args_raw[3]);
void hook_invoke_expand(hook_expand_t type, void *address, size_t old_usize,
size_t new_usize, uintptr_t result_raw, uintptr_t args_raw[4]);
#endif /* JEMALLOC_INTERNAL_HOOK_H */

View File

@@ -5,7 +5,16 @@
#ifdef _WIN32
# include <windows.h>
# include "msvc_compat/windows_extra.h"
# ifdef _WIN64
# if LG_VADDR <= 32
# error Generate the headers using x64 vcargs
# endif
# else
# if LG_VADDR > 32
# undef LG_VADDR
# define LG_VADDR 32
# endif
# endif
#else
# include <sys/param.h>
# include <sys/mman.h>
@@ -22,6 +31,9 @@
# include <sys/uio.h>
# endif
# include <pthread.h>
# ifdef __FreeBSD__
# include <pthread_np.h>
# endif
# include <signal.h>
# ifdef JEMALLOC_OS_UNFAIR_LOCK
# include <os/lock.h>

View File

@@ -2,7 +2,6 @@
#define JEMALLOC_INTERNAL_EXTERNS_H
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/tsd_types.h"
/* TSD checks this to set thread local slow state accordingly. */
@@ -11,6 +10,7 @@ extern bool malloc_slow;
/* Run-time options. */
extern bool opt_abort;
extern bool opt_abort_conf;
extern bool opt_confirm_conf;
extern const char *opt_junk;
extern bool opt_junk_alloc;
extern bool opt_junk_free;
@@ -25,6 +25,9 @@ extern unsigned ncpus;
/* Number of arenas used for automatic multiplexing of threads and arenas. */
extern unsigned narenas_auto;
/* Base index for manual arenas. */
extern unsigned manual_arena_base;
/*
* Arenas that are used to service external requests. Not all elements of the
* arenas array are necessarily used; arenas are created lazily as needed.
@@ -49,5 +52,6 @@ void jemalloc_prefork(void);
void jemalloc_postfork_parent(void);
void jemalloc_postfork_child(void);
bool malloc_initialized(void);
void je_sdallocx_noflags(void *ptr, size_t size);
#endif /* JEMALLOC_INTERNAL_EXTERNS_H */

View File

@@ -4,13 +4,15 @@
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/bit_util.h"
#include "jemalloc/internal/jemalloc_internal_types.h"
#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/ticker.h"
JEMALLOC_ALWAYS_INLINE malloc_cpuid_t
malloc_getcpu(void) {
assert(have_percpu_arena);
#if defined(JEMALLOC_HAVE_SCHED_GETCPU)
#if defined(_WIN32)
return GetCurrentProcessorNumber();
#elif defined(JEMALLOC_HAVE_SCHED_GETCPU)
return (malloc_cpuid_t)sched_getcpu();
#else
not_reached();
@@ -106,16 +108,16 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) {
return &tdata->decay_ticker;
}
JEMALLOC_ALWAYS_INLINE tcache_bin_t *
JEMALLOC_ALWAYS_INLINE cache_bin_t *
tcache_small_bin_get(tcache_t *tcache, szind_t binind) {
assert(binind < NBINS);
return &tcache->tbins_small[binind];
assert(binind < SC_NBINS);
return &tcache->bins_small[binind];
}
JEMALLOC_ALWAYS_INLINE tcache_bin_t *
JEMALLOC_ALWAYS_INLINE cache_bin_t *
tcache_large_bin_get(tcache_t *tcache, szind_t binind) {
assert(binind >= NBINS &&binind < nhbins);
return &tcache->tbins_large[binind - NBINS];
assert(binind >= SC_NBINS &&binind < nhbins);
return &tcache->bins_large[binind - SC_NBINS];
}
JEMALLOC_ALWAYS_INLINE bool
@@ -151,11 +153,12 @@ pre_reentrancy(tsd_t *tsd, arena_t *arena) {
assert(arena != arena_get(tsd_tsdn(tsd), 0, false));
bool fast = tsd_fast(tsd);
assert(tsd_reentrancy_level_get(tsd) < INT8_MAX);
++*tsd_reentrancy_levelp_get(tsd);
if (fast) {
/* Prepare slow path for reentrancy. */
tsd_slow_update(tsd);
assert(tsd->state == tsd_state_nominal_slow);
assert(tsd_state_get(tsd) == tsd_state_nominal_slow);
}
}

View File

@@ -71,7 +71,8 @@ arena_ichoose(tsd_t *tsd, arena_t *arena) {
static inline bool
arena_is_auto(arena_t *arena) {
assert(narenas_auto > 0);
return (arena_ind_get(arena) < narenas_auto);
return (arena_ind_get(arena) < manual_arena_base);
}
JEMALLOC_ALWAYS_INLINE extent_t *

View File

@@ -1,10 +1,29 @@
#ifndef JEMALLOC_INTERNAL_INLINES_C_H
#define JEMALLOC_INTERNAL_INLINES_C_H
#include "jemalloc/internal/hook.h"
#include "jemalloc/internal/jemalloc_internal_types.h"
#include "jemalloc/internal/sz.h"
#include "jemalloc/internal/witness.h"
/*
* Translating the names of the 'i' functions:
* Abbreviations used in the first part of the function name (before
* alloc/dalloc) describe what that function accomplishes:
* a: arena (query)
* s: size (query, or sized deallocation)
* e: extent (query)
* p: aligned (allocates)
* vs: size (query, without knowing that the pointer is into the heap)
* r: rallocx implementation
* x: xallocx implementation
* Abbreviations used in the second part of the function name (after
* alloc/dalloc) describe the arguments it takes
* z: whether to return zeroed memory
* t: accepts a tcache_t * parameter
* m: accepts an arena_t * parameter
*/
JEMALLOC_ALWAYS_INLINE arena_t *
iaalloc(tsdn_t *tsdn, const void *ptr) {
assert(ptr != NULL);
@@ -24,11 +43,12 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
bool is_internal, arena_t *arena, bool slow_path) {
void *ret;
assert(size != 0);
assert(!is_internal || tcache == NULL);
assert(!is_internal || arena == NULL || arena_is_auto(arena));
if (!tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) == 0) {
witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
WITNESS_RANK_CORE, 0);
}
ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
if (config_stats && is_internal && likely(ret != NULL)) {
@@ -91,7 +111,8 @@ idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx_t *alloc_ctx,
if (config_stats && is_internal) {
arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr));
}
if (!is_internal && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) {
if (!is_internal && !tsdn_null(tsdn) &&
tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) {
assert(tcache == NULL);
}
arena_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path);
@@ -112,45 +133,47 @@ isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
JEMALLOC_ALWAYS_INLINE void *
iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
size_t extra, size_t alignment, bool zero, tcache_t *tcache,
arena_t *arena) {
size_t alignment, bool zero, tcache_t *tcache, arena_t *arena,
hook_ralloc_args_t *hook_args) {
witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
WITNESS_RANK_CORE, 0);
void *p;
size_t usize, copysize;
usize = sz_sa2u(size + extra, alignment);
if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
return NULL;
}
p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
if (p == NULL) {
if (extra == 0) {
return NULL;
}
/* Try again, without extra this time. */
usize = sz_sa2u(size, alignment);
if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
return NULL;
}
p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
if (p == NULL) {
return NULL;
}
}
/*
* Copy at most size bytes (not size+extra), since the caller has no
* expectation that the extra bytes will be reliably preserved.
*/
copysize = (size < oldsize) ? size : oldsize;
memcpy(p, ptr, copysize);
hook_invoke_alloc(hook_args->is_realloc
? hook_alloc_realloc : hook_alloc_rallocx, p, (uintptr_t)p,
hook_args->args);
hook_invoke_dalloc(hook_args->is_realloc
? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args);
isdalloct(tsdn, ptr, oldsize, tcache, NULL, true);
return p;
}
/*
* is_realloc threads through the knowledge of whether or not this call comes
* from je_realloc (as opposed to je_rallocx); this ensures that we pass the
* correct entry point into any hooks.
* Note that these functions are all force-inlined, so no actual bool gets
* passed-around anywhere.
*/
JEMALLOC_ALWAYS_INLINE void *
iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
bool zero, tcache_t *tcache, arena_t *arena) {
bool zero, tcache_t *tcache, arena_t *arena, hook_ralloc_args_t *hook_args)
{
assert(ptr != NULL);
assert(size != 0);
witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -162,24 +185,24 @@ iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
* Existing object alignment is inadequate; allocate new space
* and copy.
*/
return iralloct_realign(tsdn, ptr, oldsize, size, 0, alignment,
zero, tcache, arena);
return iralloct_realign(tsdn, ptr, oldsize, size, alignment,
zero, tcache, arena, hook_args);
}
return arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero,
tcache);
tcache, hook_args);
}
JEMALLOC_ALWAYS_INLINE void *
iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
bool zero) {
bool zero, hook_ralloc_args_t *hook_args) {
return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero,
tcache_get(tsd), NULL);
tcache_get(tsd), NULL, hook_args);
}
JEMALLOC_ALWAYS_INLINE bool
ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
size_t alignment, bool zero) {
size_t alignment, bool zero, size_t *newsize) {
assert(ptr != NULL);
assert(size != 0);
witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -188,10 +211,12 @@ ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
!= 0) {
/* Existing object alignment is inadequate. */
*newsize = oldsize;
return true;
}
return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero);
return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero,
newsize);
}
#endif /* JEMALLOC_INTERNAL_INLINES_C_H */

View File

@@ -30,11 +30,85 @@
# define restrict
#endif
/* Various function pointers are statick and immutable except during testing. */
/* Various function pointers are static and immutable except during testing. */
#ifdef JEMALLOC_JET
# define JET_MUTABLE
#else
# define JET_MUTABLE const
#endif
#define JEMALLOC_VA_ARGS_HEAD(head, ...) head
#define JEMALLOC_VA_ARGS_TAIL(head, ...) __VA_ARGS__
#if (defined(__GNUC__) || defined(__GNUG__)) && !defined(__clang__) \
&& defined(JEMALLOC_HAVE_ATTR) && (__GNUC__ >= 7)
#define JEMALLOC_FALLTHROUGH JEMALLOC_ATTR(fallthrough);
#else
#define JEMALLOC_FALLTHROUGH /* falls through */
#endif
/* Diagnostic suppression macros */
#if defined(_MSC_VER) && !defined(__clang__)
# define JEMALLOC_DIAGNOSTIC_PUSH __pragma(warning(push))
# define JEMALLOC_DIAGNOSTIC_POP __pragma(warning(pop))
# define JEMALLOC_DIAGNOSTIC_IGNORE(W) __pragma(warning(disable:W))
# define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
# define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
# define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
# define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
/* #pragma GCC diagnostic first appeared in gcc 4.6. */
#elif (defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && \
(__GNUC_MINOR__ > 5)))) || defined(__clang__)
/*
* The JEMALLOC_PRAGMA__ macro is an implementation detail of the GCC and Clang
* diagnostic suppression macros and should not be used anywhere else.
*/
# define JEMALLOC_PRAGMA__(X) _Pragma(#X)
# define JEMALLOC_DIAGNOSTIC_PUSH JEMALLOC_PRAGMA__(GCC diagnostic push)
# define JEMALLOC_DIAGNOSTIC_POP JEMALLOC_PRAGMA__(GCC diagnostic pop)
# define JEMALLOC_DIAGNOSTIC_IGNORE(W) \
JEMALLOC_PRAGMA__(GCC diagnostic ignored W)
/*
* The -Wmissing-field-initializers warning is buggy in GCC versions < 5.1 and
* all clang versions up to version 7 (currently trunk, unreleased). This macro
* suppresses the warning for the affected compiler versions only.
*/
# if ((defined(__GNUC__) && !defined(__clang__)) && (__GNUC__ < 5)) || \
defined(__clang__)
# define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS \
JEMALLOC_DIAGNOSTIC_IGNORE("-Wmissing-field-initializers")
# else
# define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
# endif
# define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS \
JEMALLOC_DIAGNOSTIC_IGNORE("-Wtype-limits")
# define JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER \
JEMALLOC_DIAGNOSTIC_IGNORE("-Wunused-parameter")
# if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ >= 7)
# define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN \
JEMALLOC_DIAGNOSTIC_IGNORE("-Walloc-size-larger-than=")
# else
# define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
# endif
# define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS \
JEMALLOC_DIAGNOSTIC_PUSH \
JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER
#else
# define JEMALLOC_DIAGNOSTIC_PUSH
# define JEMALLOC_DIAGNOSTIC_POP
# define JEMALLOC_DIAGNOSTIC_IGNORE(W)
# define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
# define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
# define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
# define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
#endif
/*
* Disables spurious diagnostics for all headers. Since these headers are not
* included by users directly, it does not affect their diagnostic settings.
*/
JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
#endif /* JEMALLOC_INTERNAL_MACROS_H */

View File

@@ -1,6 +1,8 @@
#ifndef JEMALLOC_INTERNAL_TYPES_H
#define JEMALLOC_INTERNAL_TYPES_H
#include "jemalloc/internal/quantum.h"
/* Page size index type. */
typedef unsigned pszind_t;
@@ -50,72 +52,6 @@ typedef int malloc_cpuid_t;
/* Smallest size class to support. */
#define TINY_MIN (1U << LG_TINY_MIN)
/*
* Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
* classes).
*/
#ifndef LG_QUANTUM
# if (defined(__i386__) || defined(_M_IX86))
# define LG_QUANTUM 4
# endif
# ifdef __ia64__
# define LG_QUANTUM 4
# endif
# ifdef __alpha__
# define LG_QUANTUM 4
# endif
# if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__))
# define LG_QUANTUM 4
# endif
# if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
# define LG_QUANTUM 4
# endif
# ifdef __arm__
# define LG_QUANTUM 3
# endif
# ifdef __aarch64__
# define LG_QUANTUM 4
# endif
# ifdef __hppa__
# define LG_QUANTUM 4
# endif
# ifdef __mips__
# define LG_QUANTUM 3
# endif
# ifdef __or1k__
# define LG_QUANTUM 3
# endif
# ifdef __powerpc__
# define LG_QUANTUM 4
# endif
# ifdef __riscv__
# define LG_QUANTUM 4
# endif
# ifdef __s390__
# define LG_QUANTUM 4
# endif
# ifdef __SH4__
# define LG_QUANTUM 4
# endif
# ifdef __tile__
# define LG_QUANTUM 4
# endif
# ifdef __le32__
# define LG_QUANTUM 4
# endif
# ifndef LG_QUANTUM
# error "Unknown minimum alignment for architecture; specify via "
"--with-lg-quantum"
# endif
#endif
#define QUANTUM ((size_t)(1U << LG_QUANTUM))
#define QUANTUM_MASK (QUANTUM - 1)
/* Return the smallest quantum multiple that is >= a. */
#define QUANTUM_CEILING(a) \
(((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
#define LONG ((size_t)(1U << LG_SIZEOF_LONG))
#define LONG_MASK (LONG - 1)

View File

@@ -21,7 +21,7 @@
# include "../jemalloc.h"
#endif
#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN))
#if defined(JEMALLOC_OSATOMIC)
#include <libkern/OSAtomic.h>
#endif
@@ -45,7 +45,11 @@
# include "jemalloc/internal/private_namespace_jet.h"
# endif
#endif
#include "jemalloc/internal/hooks.h"
#include "jemalloc/internal/test_hooks.h"
#ifdef JEMALLOC_DEFINE_MADVISE_FREE
# define JEMALLOC_MADV_FREE 8
#endif
static const bool config_debug =
#ifdef JEMALLOC_DEBUG
@@ -61,6 +65,13 @@ static const bool have_dss =
false
#endif
;
static const bool have_madvise_huge =
#ifdef JEMALLOC_HAVE_MADVISE_HUGE
true
#else
false
#endif
;
static const bool config_fill =
#ifdef JEMALLOC_FILL
true
@@ -111,13 +122,6 @@ static const bool config_stats =
false
#endif
;
static const bool config_thp =
#ifdef JEMALLOC_THP
true
#else
false
#endif
;
static const bool config_tls =
#ifdef JEMALLOC_TLS
true
@@ -146,7 +150,37 @@ static const bool config_cache_oblivious =
false
#endif
;
#ifdef JEMALLOC_HAVE_SCHED_GETCPU
/*
* Undocumented, for jemalloc development use only at the moment. See the note
* in jemalloc/internal/log.h.
*/
static const bool config_log =
#ifdef JEMALLOC_LOG
true
#else
false
#endif
;
/*
* Are extra safety checks enabled; things like checking the size of sized
* deallocations, double-frees, etc.
*/
static const bool config_opt_safety_checks =
#ifdef JEMALLOC_OPT_SAFETY_CHECKS
true
#elif defined(JEMALLOC_DEBUG)
/*
* This lets us only guard safety checks by one flag instead of two; fast
* checks can guard solely by config_opt_safety_checks and run in debug mode
* too.
*/
true
#else
false
#endif
;
#if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU)
/* Currently percpu_arena depends on sched_getcpu. */
#define JEMALLOC_PERCPU_ARENA
#endif

View File

@@ -1,13 +1,16 @@
#ifndef JEMALLOC_INTERNAL_LARGE_EXTERNS_H
#define JEMALLOC_INTERNAL_LARGE_EXTERNS_H
#include "jemalloc/internal/hook.h"
void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
void *large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
bool zero);
bool large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
size_t usize_max, bool zero);
void *large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
size_t alignment, bool zero, tcache_t *tcache);
void *large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
size_t alignment, bool zero, tcache_t *tcache,
hook_ralloc_args_t *hook_args);
typedef void (large_dalloc_junk_t)(void *, size_t);
extern large_dalloc_junk_t *JET_MUTABLE large_dalloc_junk;
@@ -23,4 +26,7 @@ prof_tctx_t *large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent);
void large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx);
void large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent);
nstime_t large_prof_alloc_time_get(const extent_t *extent);
void large_prof_alloc_time_set(extent_t *extent, nstime_t time);
#endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */

View File

@@ -0,0 +1,115 @@
#ifndef JEMALLOC_INTERNAL_LOG_H
#define JEMALLOC_INTERNAL_LOG_H
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/malloc_io.h"
#include "jemalloc/internal/mutex.h"
#ifdef JEMALLOC_LOG
# define JEMALLOC_LOG_VAR_BUFSIZE 1000
#else
# define JEMALLOC_LOG_VAR_BUFSIZE 1
#endif
#define JEMALLOC_LOG_BUFSIZE 4096
/*
* The log malloc_conf option is a '|'-delimited list of log_var name segments
* which should be logged. The names are themselves hierarchical, with '.' as
* the delimiter (a "segment" is just a prefix in the log namespace). So, if
* you have:
*
* log("arena", "log msg for arena"); // 1
* log("arena.a", "log msg for arena.a"); // 2
* log("arena.b", "log msg for arena.b"); // 3
* log("arena.a.a", "log msg for arena.a.a"); // 4
* log("extent.a", "log msg for extent.a"); // 5
* log("extent.b", "log msg for extent.b"); // 6
*
* And your malloc_conf option is "log=arena.a|extent", then lines 2, 4, 5, and
* 6 will print at runtime. You can enable logging from all log vars by
* writing "log=.".
*
* None of this should be regarded as a stable API for right now. It's intended
* as a debugging interface, to let us keep around some of our printf-debugging
* statements.
*/
extern char log_var_names[JEMALLOC_LOG_VAR_BUFSIZE];
extern atomic_b_t log_init_done;
typedef struct log_var_s log_var_t;
struct log_var_s {
/*
* Lowest bit is "inited", second lowest is "enabled". Putting them in
* a single word lets us avoid any fences on weak architectures.
*/
atomic_u_t state;
const char *name;
};
#define LOG_NOT_INITIALIZED 0U
#define LOG_INITIALIZED_NOT_ENABLED 1U
#define LOG_ENABLED 2U
#define LOG_VAR_INIT(name_str) {ATOMIC_INIT(LOG_NOT_INITIALIZED), name_str}
/*
* Returns the value we should assume for state (which is not necessarily
* accurate; if logging is done before logging has finished initializing, then
* we default to doing the safe thing by logging everything).
*/
unsigned log_var_update_state(log_var_t *log_var);
/* We factor out the metadata management to allow us to test more easily. */
#define log_do_begin(log_var) \
if (config_log) { \
unsigned log_state = atomic_load_u(&(log_var).state, \
ATOMIC_RELAXED); \
if (unlikely(log_state == LOG_NOT_INITIALIZED)) { \
log_state = log_var_update_state(&(log_var)); \
assert(log_state != LOG_NOT_INITIALIZED); \
} \
if (log_state == LOG_ENABLED) { \
{
/* User code executes here. */
#define log_do_end(log_var) \
} \
} \
}
/*
* MSVC has some preprocessor bugs in its expansion of __VA_ARGS__ during
* preprocessing. To work around this, we take all potential extra arguments in
* a var-args functions. Since a varargs macro needs at least one argument in
* the "...", we accept the format string there, and require that the first
* argument in this "..." is a const char *.
*/
static inline void
log_impl_varargs(const char *name, ...) {
char buf[JEMALLOC_LOG_BUFSIZE];
va_list ap;
va_start(ap, name);
const char *format = va_arg(ap, const char *);
size_t dst_offset = 0;
dst_offset += malloc_snprintf(buf, JEMALLOC_LOG_BUFSIZE, "%s: ", name);
dst_offset += malloc_vsnprintf(buf + dst_offset,
JEMALLOC_LOG_BUFSIZE - dst_offset, format, ap);
dst_offset += malloc_snprintf(buf + dst_offset,
JEMALLOC_LOG_BUFSIZE - dst_offset, "\n");
va_end(ap);
malloc_write(buf);
}
/* Call as log("log.var.str", "format_string %d", arg_for_format_string); */
#define LOG(log_var_str, ...) \
do { \
static log_var_t log_var = LOG_VAR_INIT(log_var_str); \
log_do_begin(log_var) \
log_impl_varargs((log_var).name, __VA_ARGS__); \
log_do_end(log_var) \
} while (0)
#endif /* JEMALLOC_INTERNAL_LOG_H */

View File

@@ -53,10 +53,50 @@ size_t malloc_vsnprintf(char *str, size_t size, const char *format,
va_list ap);
size_t malloc_snprintf(char *str, size_t size, const char *format, ...)
JEMALLOC_FORMAT_PRINTF(3, 4);
/*
* The caller can set write_cb to null to choose to print with the
* je_malloc_message hook.
*/
void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
const char *format, va_list ap);
void malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4);
void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
static inline ssize_t
malloc_write_fd(int fd, const void *buf, size_t count) {
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write)
/*
* Use syscall(2) rather than write(2) when possible in order to avoid
* the possibility of memory allocation within libc. This is necessary
* on FreeBSD; most operating systems do not have this problem though.
*
* syscall() returns long or int, depending on platform, so capture the
* result in the widest plausible type to avoid compiler warnings.
*/
long result = syscall(SYS_write, fd, buf, count);
#else
ssize_t result = (ssize_t)write(fd, buf,
#ifdef _WIN32
(unsigned int)
#endif
count);
#endif
return (ssize_t)result;
}
static inline ssize_t
malloc_read_fd(int fd, void *buf, size_t count) {
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
long result = syscall(SYS_read, fd, buf, count);
#else
ssize_t result = read(fd, buf,
#ifdef _WIN32
(unsigned int)
#endif
count);
#endif
return (ssize_t)result;
}
#endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */

View File

@@ -37,14 +37,17 @@ struct malloc_mutex_s {
# endif
#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
os_unfair_lock lock;
#elif (defined(JEMALLOC_OSSPIN))
OSSpinLock lock;
#elif (defined(JEMALLOC_MUTEX_INIT_CB))
pthread_mutex_t lock;
malloc_mutex_t *postponed_next;
#else
pthread_mutex_t lock;
#endif
/*
* Hint flag to avoid exclusive cache line contention
* during spin waiting
*/
atomic_b_t locked;
};
/*
* We only touch witness when configured w/ debug. However we
@@ -84,10 +87,6 @@ struct malloc_mutex_s {
# define MALLOC_MUTEX_LOCK(m) os_unfair_lock_lock(&(m)->lock)
# define MALLOC_MUTEX_UNLOCK(m) os_unfair_lock_unlock(&(m)->lock)
# define MALLOC_MUTEX_TRYLOCK(m) (!os_unfair_lock_trylock(&(m)->lock))
#elif (defined(JEMALLOC_OSSPIN))
# define MALLOC_MUTEX_LOCK(m) OSSpinLockLock(&(m)->lock)
# define MALLOC_MUTEX_UNLOCK(m) OSSpinLockUnlock(&(m)->lock)
# define MALLOC_MUTEX_TRYLOCK(m) (!OSSpinLockTry(&(m)->lock))
#else
# define MALLOC_MUTEX_LOCK(m) pthread_mutex_lock(&(m)->lock)
# define MALLOC_MUTEX_UNLOCK(m) pthread_mutex_unlock(&(m)->lock)
@@ -101,22 +100,37 @@ struct malloc_mutex_s {
#ifdef _WIN32
# define MALLOC_MUTEX_INITIALIZER
#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
# if defined(JEMALLOC_DEBUG)
# define MALLOC_MUTEX_INITIALIZER \
{{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT}}, \
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
#elif (defined(JEMALLOC_OSSPIN))
{{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT, ATOMIC_INIT(false)}}, \
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
# else
# define MALLOC_MUTEX_INITIALIZER \
{{{LOCK_PROF_DATA_INITIALIZER, 0}}, \
{{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT, ATOMIC_INIT(false)}}, \
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
# endif
#elif (defined(JEMALLOC_MUTEX_INIT_CB))
# if (defined(JEMALLOC_DEBUG))
# define MALLOC_MUTEX_INITIALIZER \
{{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL}}, \
{{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL, ATOMIC_INIT(false)}}, \
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
# else
# define MALLOC_MUTEX_INITIALIZER \
{{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL, ATOMIC_INIT(false)}}, \
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
# endif
#else
# define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
# if defined(JEMALLOC_DEBUG)
# define MALLOC_MUTEX_INITIALIZER \
{{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER}}, \
{{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, ATOMIC_INIT(false)}}, \
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
# else
# define MALLOC_MUTEX_INITIALIZER \
{{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, ATOMIC_INIT(false)}}, \
WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
# endif
#endif
#ifdef JEMALLOC_LAZY_LOCK
@@ -139,6 +153,7 @@ void malloc_mutex_lock_slow(malloc_mutex_t *mutex);
static inline void
malloc_mutex_lock_final(malloc_mutex_t *mutex) {
MALLOC_MUTEX_LOCK(mutex);
atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED);
}
static inline bool
@@ -164,6 +179,7 @@ malloc_mutex_trylock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
if (isthreaded) {
if (malloc_mutex_trylock_final(mutex)) {
atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED);
return true;
}
mutex_owner_stats_update(tsdn, mutex);
@@ -203,6 +219,7 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
if (isthreaded) {
if (malloc_mutex_trylock_final(mutex)) {
malloc_mutex_lock_slow(mutex);
atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED);
}
mutex_owner_stats_update(tsdn, mutex);
}
@@ -211,6 +228,7 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
static inline void
malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
atomic_store_b(&mutex->locked, false, ATOMIC_RELAXED);
witness_unlock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
if (isthreaded) {
MALLOC_MUTEX_UNLOCK(mutex);
@@ -245,4 +263,26 @@ malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED);
}
static inline void
malloc_mutex_prof_accum(tsdn_t *tsdn, mutex_prof_data_t *data,
malloc_mutex_t *mutex) {
mutex_prof_data_t *source = &mutex->prof_data;
/* Can only read holding the mutex. */
malloc_mutex_assert_owner(tsdn, mutex);
nstime_add(&data->tot_wait_time, &source->tot_wait_time);
if (nstime_compare(&source->max_wait_time, &data->max_wait_time) > 0) {
nstime_copy(&data->max_wait_time, &source->max_wait_time);
}
data->n_wait_times += source->n_wait_times;
data->n_spin_acquired += source->n_spin_acquired;
if (data->max_n_thds < source->max_n_thds) {
data->max_n_thds = source->max_n_thds;
}
/* n_wait_thds is not reported. */
atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED);
data->n_owner_switches += source->n_owner_switches;
data->n_lock_ops += source->n_lock_ops;
}
#endif /* JEMALLOC_INTERNAL_MUTEX_H */

View File

@@ -35,21 +35,43 @@ typedef enum {
mutex_prof_num_arena_mutexes
} mutex_prof_arena_ind_t;
#define MUTEX_PROF_COUNTERS \
OP(num_ops, uint64_t) \
OP(num_wait, uint64_t) \
OP(num_spin_acq, uint64_t) \
OP(num_owner_switch, uint64_t) \
OP(total_wait_time, uint64_t) \
OP(max_wait_time, uint64_t) \
OP(max_num_thds, uint32_t)
/*
* The forth parameter is a boolean value that is true for derived rate counters
* and false for real ones.
*/
#define MUTEX_PROF_UINT64_COUNTERS \
OP(num_ops, uint64_t, "n_lock_ops", false, num_ops) \
OP(num_ops_ps, uint64_t, "(#/sec)", true, num_ops) \
OP(num_wait, uint64_t, "n_waiting", false, num_wait) \
OP(num_wait_ps, uint64_t, "(#/sec)", true, num_wait) \
OP(num_spin_acq, uint64_t, "n_spin_acq", false, num_spin_acq) \
OP(num_spin_acq_ps, uint64_t, "(#/sec)", true, num_spin_acq) \
OP(num_owner_switch, uint64_t, "n_owner_switch", false, num_owner_switch) \
OP(num_owner_switch_ps, uint64_t, "(#/sec)", true, num_owner_switch) \
OP(total_wait_time, uint64_t, "total_wait_ns", false, total_wait_time) \
OP(total_wait_time_ps, uint64_t, "(#/sec)", true, total_wait_time) \
OP(max_wait_time, uint64_t, "max_wait_ns", false, max_wait_time)
typedef enum {
#define OP(counter, type) mutex_counter_##counter,
MUTEX_PROF_COUNTERS
#define MUTEX_PROF_UINT32_COUNTERS \
OP(max_num_thds, uint32_t, "max_n_thds", false, max_num_thds)
#define MUTEX_PROF_COUNTERS \
MUTEX_PROF_UINT64_COUNTERS \
MUTEX_PROF_UINT32_COUNTERS
#define OP(counter, type, human, derived, base_counter) mutex_counter_##counter,
#define COUNTER_ENUM(counter_list, t) \
typedef enum { \
counter_list \
mutex_prof_num_##t##_counters \
} mutex_prof_##t##_counter_ind_t;
COUNTER_ENUM(MUTEX_PROF_UINT64_COUNTERS, uint64_t)
COUNTER_ENUM(MUTEX_PROF_UINT32_COUNTERS, uint32_t)
#undef COUNTER_ENUM
#undef OP
mutex_prof_num_counters
} mutex_prof_counter_ind_t;
typedef struct {
/*

View File

@@ -58,6 +58,20 @@ static const bool pages_can_purge_forced =
#endif
;
typedef enum {
thp_mode_default = 0, /* Do not change hugepage settings. */
thp_mode_always = 1, /* Always set MADV_HUGEPAGE. */
thp_mode_never = 2, /* Always set MADV_NOHUGEPAGE. */
thp_mode_names_limit = 3, /* Used for option processing. */
thp_mode_not_supported = 3 /* No THP support detected. */
} thp_mode_t;
#define THP_MODE_DEFAULT thp_mode_default
extern thp_mode_t opt_thp;
extern thp_mode_t init_system_thp_mode; /* Initial system wide state. */
extern const char *thp_mode_names[];
void *pages_map(void *addr, size_t size, size_t alignment, bool *commit);
void pages_unmap(void *addr, size_t size);
bool pages_commit(void *addr, size_t size);
@@ -66,6 +80,9 @@ bool pages_purge_lazy(void *addr, size_t size);
bool pages_purge_forced(void *addr, size_t size);
bool pages_huge(void *addr, size_t size);
bool pages_nohuge(void *addr, size_t size);
bool pages_dontdump(void *addr, size_t size);
bool pages_dodump(void *addr, size_t size);
bool pages_boot(void);
void pages_set_thp_state (void *ptr, size_t size);
#endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */

View File

@@ -1,87 +1,72 @@
#define a0dalloc JEMALLOC_N(a0dalloc)
#define a0get JEMALLOC_N(a0get)
#define a0malloc JEMALLOC_N(a0malloc)
#define arena_aalloc JEMALLOC_N(arena_aalloc)
#define arena_choose_hard JEMALLOC_N(arena_choose_hard)
#define arena_cleanup JEMALLOC_N(arena_cleanup)
#define arena_init JEMALLOC_N(arena_init)
#define arena_migrate JEMALLOC_N(arena_migrate)
#define arenas JEMALLOC_N(arenas)
#define arena_set JEMALLOC_N(arena_set)
#define arenas_lock JEMALLOC_N(arenas_lock)
#define arenas_tdata_cleanup JEMALLOC_N(arenas_tdata_cleanup)
#define arena_tdata_get_hard JEMALLOC_N(arena_tdata_get_hard)
#define bootstrap_calloc JEMALLOC_N(bootstrap_calloc)
#define bootstrap_free JEMALLOC_N(bootstrap_free)
#define bootstrap_malloc JEMALLOC_N(bootstrap_malloc)
#define free_default JEMALLOC_N(free_default)
#define iarena_cleanup JEMALLOC_N(iarena_cleanup)
#define jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child)
#define jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent)
#define jemalloc_prefork JEMALLOC_N(jemalloc_prefork)
#define je_sdallocx_noflags JEMALLOC_N(je_sdallocx_noflags)
#define malloc_default JEMALLOC_N(malloc_default)
#define malloc_initialized JEMALLOC_N(malloc_initialized)
#define malloc_slow JEMALLOC_N(malloc_slow)
#define manual_arena_base JEMALLOC_N(manual_arena_base)
#define narenas_auto JEMALLOC_N(narenas_auto)
#define narenas_total_get JEMALLOC_N(narenas_total_get)
#define ncpus JEMALLOC_N(ncpus)
#define opt_abort JEMALLOC_N(opt_abort)
#define opt_abort_conf JEMALLOC_N(opt_abort_conf)
#define opt_confirm_conf JEMALLOC_N(opt_confirm_conf)
#define opt_junk JEMALLOC_N(opt_junk)
#define opt_junk_alloc JEMALLOC_N(opt_junk_alloc)
#define opt_junk_free JEMALLOC_N(opt_junk_free)
#define opt_narenas JEMALLOC_N(opt_narenas)
#define opt_utrace JEMALLOC_N(opt_utrace)
#define opt_xmalloc JEMALLOC_N(opt_xmalloc)
#define opt_zero JEMALLOC_N(opt_zero)
#define sdallocx_default JEMALLOC_N(sdallocx_default)
#define arena_alloc_junk_small JEMALLOC_N(arena_alloc_junk_small)
#define arena_basic_stats_merge JEMALLOC_N(arena_basic_stats_merge)
#define arena_bin_index JEMALLOC_N(arena_bin_index)
#define arena_bin_info JEMALLOC_N(arena_bin_info)
#define arena_bitselm_get_const JEMALLOC_N(arena_bitselm_get_const)
#define arena_bitselm_get_mutable JEMALLOC_N(arena_bitselm_get_mutable)
#define arena_bin_choose_lock JEMALLOC_N(arena_bin_choose_lock)
#define arena_boot JEMALLOC_N(arena_boot)
#define arena_choose JEMALLOC_N(arena_choose)
#define arena_choose_hard JEMALLOC_N(arena_choose_hard)
#define arena_choose_impl JEMALLOC_N(arena_choose_impl)
#define arena_chunk_alloc_huge JEMALLOC_N(arena_chunk_alloc_huge)
#define arena_chunk_cache_maybe_insert JEMALLOC_N(arena_chunk_cache_maybe_insert)
#define arena_chunk_cache_maybe_remove JEMALLOC_N(arena_chunk_cache_maybe_remove)
#define arena_chunk_dalloc_huge JEMALLOC_N(arena_chunk_dalloc_huge)
#define arena_chunk_ralloc_huge_expand JEMALLOC_N(arena_chunk_ralloc_huge_expand)
#define arena_chunk_ralloc_huge_shrink JEMALLOC_N(arena_chunk_ralloc_huge_shrink)
#define arena_chunk_ralloc_huge_similar JEMALLOC_N(arena_chunk_ralloc_huge_similar)
#define arena_cleanup JEMALLOC_N(arena_cleanup)
#define arena_dalloc JEMALLOC_N(arena_dalloc)
#define arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin)
#define arena_choose_huge JEMALLOC_N(arena_choose_huge)
#define arena_dalloc_bin_junked_locked JEMALLOC_N(arena_dalloc_bin_junked_locked)
#define arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large)
#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small)
#define arena_dalloc_large JEMALLOC_N(arena_dalloc_large)
#define arena_dalloc_large_junked_locked JEMALLOC_N(arena_dalloc_large_junked_locked)
#define arena_dalloc_promoted JEMALLOC_N(arena_dalloc_promoted)
#define arena_dalloc_small JEMALLOC_N(arena_dalloc_small)
#define arena_decay_tick JEMALLOC_N(arena_decay_tick)
#define arena_decay_ticks JEMALLOC_N(arena_decay_ticks)
#define arena_decay_time_default_get JEMALLOC_N(arena_decay_time_default_get)
#define arena_decay_time_default_set JEMALLOC_N(arena_decay_time_default_set)
#define arena_decay_time_get JEMALLOC_N(arena_decay_time_get)
#define arena_decay_time_set JEMALLOC_N(arena_decay_time_set)
#define arena_decay JEMALLOC_N(arena_decay)
#define arena_destroy JEMALLOC_N(arena_destroy)
#define arena_dirty_decay_ms_default_get JEMALLOC_N(arena_dirty_decay_ms_default_get)
#define arena_dirty_decay_ms_default_set JEMALLOC_N(arena_dirty_decay_ms_default_set)
#define arena_dirty_decay_ms_get JEMALLOC_N(arena_dirty_decay_ms_get)
#define arena_dirty_decay_ms_set JEMALLOC_N(arena_dirty_decay_ms_set)
#define arena_dss_prec_get JEMALLOC_N(arena_dss_prec_get)
#define arena_dss_prec_set JEMALLOC_N(arena_dss_prec_set)
#define arena_extent_alloc_large JEMALLOC_N(arena_extent_alloc_large)
#define arena_extent_dalloc_large_prep JEMALLOC_N(arena_extent_dalloc_large_prep)
#define arena_extent_ralloc_large_expand JEMALLOC_N(arena_extent_ralloc_large_expand)
#define arena_extent_ralloc_large_shrink JEMALLOC_N(arena_extent_ralloc_large_shrink)
#define arena_extents_dirty_dalloc JEMALLOC_N(arena_extents_dirty_dalloc)
#define arena_extent_sn_next JEMALLOC_N(arena_extent_sn_next)
#define arena_get JEMALLOC_N(arena_get)
#define arena_ichoose JEMALLOC_N(arena_ichoose)
#define arena_init JEMALLOC_N(arena_init)
#define arena_lg_dirty_mult_default_get JEMALLOC_N(arena_lg_dirty_mult_default_get)
#define arena_lg_dirty_mult_default_set JEMALLOC_N(arena_lg_dirty_mult_default_set)
#define arena_lg_dirty_mult_get JEMALLOC_N(arena_lg_dirty_mult_get)
#define arena_lg_dirty_mult_set JEMALLOC_N(arena_lg_dirty_mult_set)
#define arena_malloc JEMALLOC_N(arena_malloc)
#define arena_init_huge JEMALLOC_N(arena_init_huge)
#define arena_is_huge JEMALLOC_N(arena_is_huge)
#define arena_malloc_hard JEMALLOC_N(arena_malloc_hard)
#define arena_malloc_large JEMALLOC_N(arena_malloc_large)
#define arena_mapbits_allocated_get JEMALLOC_N(arena_mapbits_allocated_get)
#define arena_mapbits_binind_get JEMALLOC_N(arena_mapbits_binind_get)
#define arena_mapbits_decommitted_get JEMALLOC_N(arena_mapbits_decommitted_get)
#define arena_mapbits_dirty_get JEMALLOC_N(arena_mapbits_dirty_get)
#define arena_mapbits_get JEMALLOC_N(arena_mapbits_get)
#define arena_mapbits_internal_set JEMALLOC_N(arena_mapbits_internal_set)
#define arena_mapbits_large_binind_set JEMALLOC_N(arena_mapbits_large_binind_set)
#define arena_mapbits_large_get JEMALLOC_N(arena_mapbits_large_get)
#define arena_mapbits_large_set JEMALLOC_N(arena_mapbits_large_set)
#define arena_mapbits_large_size_get JEMALLOC_N(arena_mapbits_large_size_get)
#define arena_mapbits_size_decode JEMALLOC_N(arena_mapbits_size_decode)
#define arena_mapbits_size_encode JEMALLOC_N(arena_mapbits_size_encode)
#define arena_mapbits_small_runind_get JEMALLOC_N(arena_mapbits_small_runind_get)
#define arena_mapbits_small_set JEMALLOC_N(arena_mapbits_small_set)
#define arena_mapbits_unallocated_set JEMALLOC_N(arena_mapbits_unallocated_set)
#define arena_mapbits_unallocated_size_get JEMALLOC_N(arena_mapbits_unallocated_size_get)
#define arena_mapbits_unallocated_size_set JEMALLOC_N(arena_mapbits_unallocated_size_set)
#define arena_mapbits_unzeroed_get JEMALLOC_N(arena_mapbits_unzeroed_get)
#define arena_mapbitsp_get_const JEMALLOC_N(arena_mapbitsp_get_const)
#define arena_mapbitsp_get_mutable JEMALLOC_N(arena_mapbitsp_get_mutable)
#define arena_mapbitsp_read JEMALLOC_N(arena_mapbitsp_read)
#define arena_mapbitsp_write JEMALLOC_N(arena_mapbitsp_write)
#define arena_maxrun JEMALLOC_N(arena_maxrun)
#define arena_maybe_purge JEMALLOC_N(arena_maybe_purge)
#define arena_metadata_allocated_add JEMALLOC_N(arena_metadata_allocated_add)
#define arena_metadata_allocated_get JEMALLOC_N(arena_metadata_allocated_get)
#define arena_metadata_allocated_sub JEMALLOC_N(arena_metadata_allocated_sub)
#define arena_migrate JEMALLOC_N(arena_migrate)
#define arena_miscelm_get_const JEMALLOC_N(arena_miscelm_get_const)
#define arena_miscelm_get_mutable JEMALLOC_N(arena_miscelm_get_mutable)
#define arena_miscelm_to_pageind JEMALLOC_N(arena_miscelm_to_pageind)
#define arena_miscelm_to_rpages JEMALLOC_N(arena_miscelm_to_rpages)
#define arena_muzzy_decay_ms_default_get JEMALLOC_N(arena_muzzy_decay_ms_default_get)
#define arena_muzzy_decay_ms_default_set JEMALLOC_N(arena_muzzy_decay_ms_default_set)
#define arena_muzzy_decay_ms_get JEMALLOC_N(arena_muzzy_decay_ms_get)
#define arena_muzzy_decay_ms_set JEMALLOC_N(arena_muzzy_decay_ms_set)
#define arena_new JEMALLOC_N(arena_new)
#define arena_node_alloc JEMALLOC_N(arena_node_alloc)
#define arena_node_dalloc JEMALLOC_N(arena_node_dalloc)
#define arena_nthreads_dec JEMALLOC_N(arena_nthreads_dec)
#define arena_nthreads_get JEMALLOC_N(arena_nthreads_get)
#define arena_nthreads_inc JEMALLOC_N(arena_nthreads_inc)
@@ -92,97 +77,69 @@
#define arena_prefork1 JEMALLOC_N(arena_prefork1)
#define arena_prefork2 JEMALLOC_N(arena_prefork2)
#define arena_prefork3 JEMALLOC_N(arena_prefork3)
#define arena_prof_accum JEMALLOC_N(arena_prof_accum)
#define arena_prof_accum_impl JEMALLOC_N(arena_prof_accum_impl)
#define arena_prof_accum_locked JEMALLOC_N(arena_prof_accum_locked)
#define arena_prof_promoted JEMALLOC_N(arena_prof_promoted)
#define arena_prof_tctx_get JEMALLOC_N(arena_prof_tctx_get)
#define arena_prof_tctx_reset JEMALLOC_N(arena_prof_tctx_reset)
#define arena_prof_tctx_set JEMALLOC_N(arena_prof_tctx_set)
#define arena_ptr_small_binind_get JEMALLOC_N(arena_ptr_small_binind_get)
#define arena_purge JEMALLOC_N(arena_purge)
#define arena_quarantine_junk_small JEMALLOC_N(arena_quarantine_junk_small)
#define arena_prefork4 JEMALLOC_N(arena_prefork4)
#define arena_prefork5 JEMALLOC_N(arena_prefork5)
#define arena_prefork6 JEMALLOC_N(arena_prefork6)
#define arena_prefork7 JEMALLOC_N(arena_prefork7)
#define arena_prof_promote JEMALLOC_N(arena_prof_promote)
#define arena_ralloc JEMALLOC_N(arena_ralloc)
#define arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large)
#define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move)
#define arena_rd_to_miscelm JEMALLOC_N(arena_rd_to_miscelm)
#define arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption)
#define arena_reset JEMALLOC_N(arena_reset)
#define arena_run_regind JEMALLOC_N(arena_run_regind)
#define arena_run_to_miscelm JEMALLOC_N(arena_run_to_miscelm)
#define arena_salloc JEMALLOC_N(arena_salloc)
#define arena_sdalloc JEMALLOC_N(arena_sdalloc)
#define arena_retain_grow_limit_get_set JEMALLOC_N(arena_retain_grow_limit_get_set)
#define arena_stats_merge JEMALLOC_N(arena_stats_merge)
#define arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small)
#define arena_tdata_get JEMALLOC_N(arena_tdata_get)
#define arena_tdata_get_hard JEMALLOC_N(arena_tdata_get_hard)
#define arenas JEMALLOC_N(arenas)
#define arenas_tdata_bypass_cleanup JEMALLOC_N(arenas_tdata_bypass_cleanup)
#define arenas_tdata_cleanup JEMALLOC_N(arenas_tdata_cleanup)
#define atomic_add_p JEMALLOC_N(atomic_add_p)
#define atomic_add_u JEMALLOC_N(atomic_add_u)
#define atomic_add_uint32 JEMALLOC_N(atomic_add_uint32)
#define atomic_add_uint64 JEMALLOC_N(atomic_add_uint64)
#define atomic_add_z JEMALLOC_N(atomic_add_z)
#define atomic_cas_p JEMALLOC_N(atomic_cas_p)
#define atomic_cas_u JEMALLOC_N(atomic_cas_u)
#define atomic_cas_uint32 JEMALLOC_N(atomic_cas_uint32)
#define atomic_cas_uint64 JEMALLOC_N(atomic_cas_uint64)
#define atomic_cas_z JEMALLOC_N(atomic_cas_z)
#define atomic_sub_p JEMALLOC_N(atomic_sub_p)
#define atomic_sub_u JEMALLOC_N(atomic_sub_u)
#define atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32)
#define atomic_sub_uint64 JEMALLOC_N(atomic_sub_uint64)
#define atomic_sub_z JEMALLOC_N(atomic_sub_z)
#define atomic_write_p JEMALLOC_N(atomic_write_p)
#define atomic_write_u JEMALLOC_N(atomic_write_u)
#define atomic_write_uint32 JEMALLOC_N(atomic_write_uint32)
#define atomic_write_uint64 JEMALLOC_N(atomic_write_uint64)
#define atomic_write_z JEMALLOC_N(atomic_write_z)
#define h_steps JEMALLOC_N(h_steps)
#define opt_dirty_decay_ms JEMALLOC_N(opt_dirty_decay_ms)
#define opt_muzzy_decay_ms JEMALLOC_N(opt_muzzy_decay_ms)
#define opt_oversize_threshold JEMALLOC_N(opt_oversize_threshold)
#define opt_percpu_arena JEMALLOC_N(opt_percpu_arena)
#define oversize_threshold JEMALLOC_N(oversize_threshold)
#define percpu_arena_mode_names JEMALLOC_N(percpu_arena_mode_names)
#define background_thread_boot0 JEMALLOC_N(background_thread_boot0)
#define background_thread_boot1 JEMALLOC_N(background_thread_boot1)
#define background_thread_create JEMALLOC_N(background_thread_create)
#define background_thread_ctl_init JEMALLOC_N(background_thread_ctl_init)
#define background_thread_enabled_state JEMALLOC_N(background_thread_enabled_state)
#define background_thread_info JEMALLOC_N(background_thread_info)
#define background_thread_interval_check JEMALLOC_N(background_thread_interval_check)
#define background_thread_lock JEMALLOC_N(background_thread_lock)
#define background_thread_postfork_child JEMALLOC_N(background_thread_postfork_child)
#define background_thread_postfork_parent JEMALLOC_N(background_thread_postfork_parent)
#define background_thread_prefork0 JEMALLOC_N(background_thread_prefork0)
#define background_thread_prefork1 JEMALLOC_N(background_thread_prefork1)
#define background_threads_disable JEMALLOC_N(background_threads_disable)
#define background_threads_enable JEMALLOC_N(background_threads_enable)
#define background_thread_stats_read JEMALLOC_N(background_thread_stats_read)
#define max_background_threads JEMALLOC_N(max_background_threads)
#define n_background_threads JEMALLOC_N(n_background_threads)
#define opt_background_thread JEMALLOC_N(opt_background_thread)
#define opt_max_background_threads JEMALLOC_N(opt_max_background_threads)
#define pthread_create_wrapper JEMALLOC_N(pthread_create_wrapper)
#define b0get JEMALLOC_N(b0get)
#define base_alloc JEMALLOC_N(base_alloc)
#define base_alloc_extent JEMALLOC_N(base_alloc_extent)
#define base_boot JEMALLOC_N(base_boot)
#define base_delete JEMALLOC_N(base_delete)
#define base_extent_hooks_get JEMALLOC_N(base_extent_hooks_get)
#define base_extent_hooks_set JEMALLOC_N(base_extent_hooks_set)
#define base_new JEMALLOC_N(base_new)
#define base_postfork_child JEMALLOC_N(base_postfork_child)
#define base_postfork_parent JEMALLOC_N(base_postfork_parent)
#define base_prefork JEMALLOC_N(base_prefork)
#define base_stats_get JEMALLOC_N(base_stats_get)
#define bitmap_full JEMALLOC_N(bitmap_full)
#define bitmap_get JEMALLOC_N(bitmap_get)
#define metadata_thp_mode_names JEMALLOC_N(metadata_thp_mode_names)
#define opt_metadata_thp JEMALLOC_N(opt_metadata_thp)
#define bin_boot JEMALLOC_N(bin_boot)
#define bin_infos JEMALLOC_N(bin_infos)
#define bin_init JEMALLOC_N(bin_init)
#define bin_postfork_child JEMALLOC_N(bin_postfork_child)
#define bin_postfork_parent JEMALLOC_N(bin_postfork_parent)
#define bin_prefork JEMALLOC_N(bin_prefork)
#define bin_shard_sizes_boot JEMALLOC_N(bin_shard_sizes_boot)
#define bin_update_shard_size JEMALLOC_N(bin_update_shard_size)
#define bitmap_info_init JEMALLOC_N(bitmap_info_init)
#define bitmap_init JEMALLOC_N(bitmap_init)
#define bitmap_set JEMALLOC_N(bitmap_set)
#define bitmap_sfu JEMALLOC_N(bitmap_sfu)
#define bitmap_size JEMALLOC_N(bitmap_size)
#define bitmap_unset JEMALLOC_N(bitmap_unset)
#define bootstrap_calloc JEMALLOC_N(bootstrap_calloc)
#define bootstrap_free JEMALLOC_N(bootstrap_free)
#define bootstrap_malloc JEMALLOC_N(bootstrap_malloc)
#define bt_init JEMALLOC_N(bt_init)
#define buferror JEMALLOC_N(buferror)
#define chunk_alloc_base JEMALLOC_N(chunk_alloc_base)
#define chunk_alloc_cache JEMALLOC_N(chunk_alloc_cache)
#define chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss)
#define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap)
#define chunk_alloc_wrapper JEMALLOC_N(chunk_alloc_wrapper)
#define chunk_boot JEMALLOC_N(chunk_boot)
#define chunk_dalloc_cache JEMALLOC_N(chunk_dalloc_cache)
#define chunk_dalloc_mmap JEMALLOC_N(chunk_dalloc_mmap)
#define chunk_dalloc_wrapper JEMALLOC_N(chunk_dalloc_wrapper)
#define chunk_deregister JEMALLOC_N(chunk_deregister)
#define chunk_dss_boot JEMALLOC_N(chunk_dss_boot)
#define chunk_dss_mergeable JEMALLOC_N(chunk_dss_mergeable)
#define chunk_dss_prec_get JEMALLOC_N(chunk_dss_prec_get)
#define chunk_dss_prec_set JEMALLOC_N(chunk_dss_prec_set)
#define chunk_hooks_default JEMALLOC_N(chunk_hooks_default)
#define chunk_hooks_get JEMALLOC_N(chunk_hooks_get)
#define chunk_hooks_set JEMALLOC_N(chunk_hooks_set)
#define chunk_in_dss JEMALLOC_N(chunk_in_dss)
#define chunk_lookup JEMALLOC_N(chunk_lookup)
#define chunk_npages JEMALLOC_N(chunk_npages)
#define chunk_purge_wrapper JEMALLOC_N(chunk_purge_wrapper)
#define chunk_register JEMALLOC_N(chunk_register)
#define chunks_rtree JEMALLOC_N(chunks_rtree)
#define chunksize JEMALLOC_N(chunksize)
#define chunksize_mask JEMALLOC_N(chunksize_mask)
#define ckh_count JEMALLOC_N(ckh_count)
#define ckh_delete JEMALLOC_N(ckh_delete)
#define ckh_insert JEMALLOC_N(ckh_insert)
@@ -201,267 +158,183 @@
#define ctl_postfork_child JEMALLOC_N(ctl_postfork_child)
#define ctl_postfork_parent JEMALLOC_N(ctl_postfork_parent)
#define ctl_prefork JEMALLOC_N(ctl_prefork)
#define decay_ticker_get JEMALLOC_N(decay_ticker_get)
#define div_init JEMALLOC_N(div_init)
#define extent_alloc JEMALLOC_N(extent_alloc)
#define extent_alloc_wrapper JEMALLOC_N(extent_alloc_wrapper)
#define extent_avail_any JEMALLOC_N(extent_avail_any)
#define extent_avail_empty JEMALLOC_N(extent_avail_empty)
#define extent_avail_first JEMALLOC_N(extent_avail_first)
#define extent_avail_insert JEMALLOC_N(extent_avail_insert)
#define extent_avail_new JEMALLOC_N(extent_avail_new)
#define extent_avail_remove JEMALLOC_N(extent_avail_remove)
#define extent_avail_remove_any JEMALLOC_N(extent_avail_remove_any)
#define extent_avail_remove_first JEMALLOC_N(extent_avail_remove_first)
#define extent_boot JEMALLOC_N(extent_boot)
#define extent_commit_wrapper JEMALLOC_N(extent_commit_wrapper)
#define extent_dalloc JEMALLOC_N(extent_dalloc)
#define extent_dalloc_gap JEMALLOC_N(extent_dalloc_gap)
#define extent_dalloc_wrapper JEMALLOC_N(extent_dalloc_wrapper)
#define extent_decommit_wrapper JEMALLOC_N(extent_decommit_wrapper)
#define extent_destroy_wrapper JEMALLOC_N(extent_destroy_wrapper)
#define extent_heap_any JEMALLOC_N(extent_heap_any)
#define extent_heap_empty JEMALLOC_N(extent_heap_empty)
#define extent_heap_first JEMALLOC_N(extent_heap_first)
#define extent_heap_insert JEMALLOC_N(extent_heap_insert)
#define extent_heap_new JEMALLOC_N(extent_heap_new)
#define extent_heap_remove JEMALLOC_N(extent_heap_remove)
#define extent_heap_remove_any JEMALLOC_N(extent_heap_remove_any)
#define extent_heap_remove_first JEMALLOC_N(extent_heap_remove_first)
#define extent_hooks_default JEMALLOC_N(extent_hooks_default)
#define extent_hooks_get JEMALLOC_N(extent_hooks_get)
#define extent_hooks_set JEMALLOC_N(extent_hooks_set)
#define extent_merge_wrapper JEMALLOC_N(extent_merge_wrapper)
#define extent_mutex_pool JEMALLOC_N(extent_mutex_pool)
#define extent_purge_forced_wrapper JEMALLOC_N(extent_purge_forced_wrapper)
#define extent_purge_lazy_wrapper JEMALLOC_N(extent_purge_lazy_wrapper)
#define extents_alloc JEMALLOC_N(extents_alloc)
#define extents_dalloc JEMALLOC_N(extents_dalloc)
#define extents_evict JEMALLOC_N(extents_evict)
#define extents_init JEMALLOC_N(extents_init)
#define extents_nbytes_get JEMALLOC_N(extents_nbytes_get)
#define extents_nextents_get JEMALLOC_N(extents_nextents_get)
#define extents_npages_get JEMALLOC_N(extents_npages_get)
#define extent_split_wrapper JEMALLOC_N(extent_split_wrapper)
#define extents_postfork_child JEMALLOC_N(extents_postfork_child)
#define extents_postfork_parent JEMALLOC_N(extents_postfork_parent)
#define extents_prefork JEMALLOC_N(extents_prefork)
#define extents_rtree JEMALLOC_N(extents_rtree)
#define extents_state_get JEMALLOC_N(extents_state_get)
#define extent_util_stats_get JEMALLOC_N(extent_util_stats_get)
#define extent_util_stats_verbose_get JEMALLOC_N(extent_util_stats_verbose_get)
#define opt_lg_extent_max_active_fit JEMALLOC_N(opt_lg_extent_max_active_fit)
#define dss_prec_names JEMALLOC_N(dss_prec_names)
#define extent_node_achunk_get JEMALLOC_N(extent_node_achunk_get)
#define extent_node_achunk_set JEMALLOC_N(extent_node_achunk_set)
#define extent_node_addr_get JEMALLOC_N(extent_node_addr_get)
#define extent_node_addr_set JEMALLOC_N(extent_node_addr_set)
#define extent_node_arena_get JEMALLOC_N(extent_node_arena_get)
#define extent_node_arena_set JEMALLOC_N(extent_node_arena_set)
#define extent_node_committed_get JEMALLOC_N(extent_node_committed_get)
#define extent_node_committed_set JEMALLOC_N(extent_node_committed_set)
#define extent_node_dirty_insert JEMALLOC_N(extent_node_dirty_insert)
#define extent_node_dirty_linkage_init JEMALLOC_N(extent_node_dirty_linkage_init)
#define extent_node_dirty_remove JEMALLOC_N(extent_node_dirty_remove)
#define extent_node_init JEMALLOC_N(extent_node_init)
#define extent_node_prof_tctx_get JEMALLOC_N(extent_node_prof_tctx_get)
#define extent_node_prof_tctx_set JEMALLOC_N(extent_node_prof_tctx_set)
#define extent_node_size_get JEMALLOC_N(extent_node_size_get)
#define extent_node_size_set JEMALLOC_N(extent_node_size_set)
#define extent_node_sn_get JEMALLOC_N(extent_node_sn_get)
#define extent_node_sn_set JEMALLOC_N(extent_node_sn_set)
#define extent_node_zeroed_get JEMALLOC_N(extent_node_zeroed_get)
#define extent_node_zeroed_set JEMALLOC_N(extent_node_zeroed_set)
#define extent_size_quantize_ceil JEMALLOC_N(extent_size_quantize_ceil)
#define extent_size_quantize_floor JEMALLOC_N(extent_size_quantize_floor)
#define extent_tree_ad_destroy JEMALLOC_N(extent_tree_ad_destroy)
#define extent_tree_ad_destroy_recurse JEMALLOC_N(extent_tree_ad_destroy_recurse)
#define extent_tree_ad_empty JEMALLOC_N(extent_tree_ad_empty)
#define extent_tree_ad_first JEMALLOC_N(extent_tree_ad_first)
#define extent_tree_ad_insert JEMALLOC_N(extent_tree_ad_insert)
#define extent_tree_ad_iter JEMALLOC_N(extent_tree_ad_iter)
#define extent_tree_ad_iter_recurse JEMALLOC_N(extent_tree_ad_iter_recurse)
#define extent_tree_ad_iter_start JEMALLOC_N(extent_tree_ad_iter_start)
#define extent_tree_ad_last JEMALLOC_N(extent_tree_ad_last)
#define extent_tree_ad_new JEMALLOC_N(extent_tree_ad_new)
#define extent_tree_ad_next JEMALLOC_N(extent_tree_ad_next)
#define extent_tree_ad_nsearch JEMALLOC_N(extent_tree_ad_nsearch)
#define extent_tree_ad_prev JEMALLOC_N(extent_tree_ad_prev)
#define extent_tree_ad_psearch JEMALLOC_N(extent_tree_ad_psearch)
#define extent_tree_ad_remove JEMALLOC_N(extent_tree_ad_remove)
#define extent_tree_ad_reverse_iter JEMALLOC_N(extent_tree_ad_reverse_iter)
#define extent_tree_ad_reverse_iter_recurse JEMALLOC_N(extent_tree_ad_reverse_iter_recurse)
#define extent_tree_ad_reverse_iter_start JEMALLOC_N(extent_tree_ad_reverse_iter_start)
#define extent_tree_ad_search JEMALLOC_N(extent_tree_ad_search)
#define extent_tree_szsnad_destroy JEMALLOC_N(extent_tree_szsnad_destroy)
#define extent_tree_szsnad_destroy_recurse JEMALLOC_N(extent_tree_szsnad_destroy_recurse)
#define extent_tree_szsnad_empty JEMALLOC_N(extent_tree_szsnad_empty)
#define extent_tree_szsnad_first JEMALLOC_N(extent_tree_szsnad_first)
#define extent_tree_szsnad_insert JEMALLOC_N(extent_tree_szsnad_insert)
#define extent_tree_szsnad_iter JEMALLOC_N(extent_tree_szsnad_iter)
#define extent_tree_szsnad_iter_recurse JEMALLOC_N(extent_tree_szsnad_iter_recurse)
#define extent_tree_szsnad_iter_start JEMALLOC_N(extent_tree_szsnad_iter_start)
#define extent_tree_szsnad_last JEMALLOC_N(extent_tree_szsnad_last)
#define extent_tree_szsnad_new JEMALLOC_N(extent_tree_szsnad_new)
#define extent_tree_szsnad_next JEMALLOC_N(extent_tree_szsnad_next)
#define extent_tree_szsnad_nsearch JEMALLOC_N(extent_tree_szsnad_nsearch)
#define extent_tree_szsnad_prev JEMALLOC_N(extent_tree_szsnad_prev)
#define extent_tree_szsnad_psearch JEMALLOC_N(extent_tree_szsnad_psearch)
#define extent_tree_szsnad_remove JEMALLOC_N(extent_tree_szsnad_remove)
#define extent_tree_szsnad_reverse_iter JEMALLOC_N(extent_tree_szsnad_reverse_iter)
#define extent_tree_szsnad_reverse_iter_recurse JEMALLOC_N(extent_tree_szsnad_reverse_iter_recurse)
#define extent_tree_szsnad_reverse_iter_start JEMALLOC_N(extent_tree_szsnad_reverse_iter_start)
#define extent_tree_szsnad_search JEMALLOC_N(extent_tree_szsnad_search)
#define ffs_llu JEMALLOC_N(ffs_llu)
#define ffs_lu JEMALLOC_N(ffs_lu)
#define ffs_u JEMALLOC_N(ffs_u)
#define ffs_u32 JEMALLOC_N(ffs_u32)
#define ffs_u64 JEMALLOC_N(ffs_u64)
#define ffs_zu JEMALLOC_N(ffs_zu)
#define get_errno JEMALLOC_N(get_errno)
#define hash JEMALLOC_N(hash)
#define hash_fmix_32 JEMALLOC_N(hash_fmix_32)
#define hash_fmix_64 JEMALLOC_N(hash_fmix_64)
#define hash_get_block_32 JEMALLOC_N(hash_get_block_32)
#define hash_get_block_64 JEMALLOC_N(hash_get_block_64)
#define hash_rotl_32 JEMALLOC_N(hash_rotl_32)
#define hash_rotl_64 JEMALLOC_N(hash_rotl_64)
#define hash_x64_128 JEMALLOC_N(hash_x64_128)
#define hash_x86_128 JEMALLOC_N(hash_x86_128)
#define hash_x86_32 JEMALLOC_N(hash_x86_32)
#define huge_aalloc JEMALLOC_N(huge_aalloc)
#define huge_dalloc JEMALLOC_N(huge_dalloc)
#define huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk)
#define huge_malloc JEMALLOC_N(huge_malloc)
#define huge_palloc JEMALLOC_N(huge_palloc)
#define huge_prof_tctx_get JEMALLOC_N(huge_prof_tctx_get)
#define huge_prof_tctx_reset JEMALLOC_N(huge_prof_tctx_reset)
#define huge_prof_tctx_set JEMALLOC_N(huge_prof_tctx_set)
#define huge_ralloc JEMALLOC_N(huge_ralloc)
#define huge_ralloc_no_move JEMALLOC_N(huge_ralloc_no_move)
#define huge_salloc JEMALLOC_N(huge_salloc)
#define iaalloc JEMALLOC_N(iaalloc)
#define ialloc JEMALLOC_N(ialloc)
#define iallocztm JEMALLOC_N(iallocztm)
#define iarena_cleanup JEMALLOC_N(iarena_cleanup)
#define idalloc JEMALLOC_N(idalloc)
#define idalloctm JEMALLOC_N(idalloctm)
#define in_valgrind JEMALLOC_N(in_valgrind)
#define index2size JEMALLOC_N(index2size)
#define index2size_compute JEMALLOC_N(index2size_compute)
#define index2size_lookup JEMALLOC_N(index2size_lookup)
#define index2size_tab JEMALLOC_N(index2size_tab)
#define ipalloc JEMALLOC_N(ipalloc)
#define ipalloct JEMALLOC_N(ipalloct)
#define ipallocztm JEMALLOC_N(ipallocztm)
#define iqalloc JEMALLOC_N(iqalloc)
#define iralloc JEMALLOC_N(iralloc)
#define iralloct JEMALLOC_N(iralloct)
#define iralloct_realign JEMALLOC_N(iralloct_realign)
#define isalloc JEMALLOC_N(isalloc)
#define isdalloct JEMALLOC_N(isdalloct)
#define isqalloc JEMALLOC_N(isqalloc)
#define isthreaded JEMALLOC_N(isthreaded)
#define ivsalloc JEMALLOC_N(ivsalloc)
#define ixalloc JEMALLOC_N(ixalloc)
#define jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child)
#define jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent)
#define jemalloc_prefork JEMALLOC_N(jemalloc_prefork)
#define large_maxclass JEMALLOC_N(large_maxclass)
#define lg_floor JEMALLOC_N(lg_floor)
#define lg_prof_sample JEMALLOC_N(lg_prof_sample)
#define extent_alloc_dss JEMALLOC_N(extent_alloc_dss)
#define extent_dss_boot JEMALLOC_N(extent_dss_boot)
#define extent_dss_mergeable JEMALLOC_N(extent_dss_mergeable)
#define extent_dss_prec_get JEMALLOC_N(extent_dss_prec_get)
#define extent_dss_prec_set JEMALLOC_N(extent_dss_prec_set)
#define extent_in_dss JEMALLOC_N(extent_in_dss)
#define opt_dss JEMALLOC_N(opt_dss)
#define extent_alloc_mmap JEMALLOC_N(extent_alloc_mmap)
#define extent_dalloc_mmap JEMALLOC_N(extent_dalloc_mmap)
#define opt_retain JEMALLOC_N(opt_retain)
#define hook_boot JEMALLOC_N(hook_boot)
#define hook_install JEMALLOC_N(hook_install)
#define hook_invoke_alloc JEMALLOC_N(hook_invoke_alloc)
#define hook_invoke_dalloc JEMALLOC_N(hook_invoke_dalloc)
#define hook_invoke_expand JEMALLOC_N(hook_invoke_expand)
#define hook_remove JEMALLOC_N(hook_remove)
#define large_dalloc JEMALLOC_N(large_dalloc)
#define large_dalloc_finish JEMALLOC_N(large_dalloc_finish)
#define large_dalloc_junk JEMALLOC_N(large_dalloc_junk)
#define large_dalloc_maybe_junk JEMALLOC_N(large_dalloc_maybe_junk)
#define large_dalloc_prep_junked_locked JEMALLOC_N(large_dalloc_prep_junked_locked)
#define large_malloc JEMALLOC_N(large_malloc)
#define large_palloc JEMALLOC_N(large_palloc)
#define large_prof_alloc_time_get JEMALLOC_N(large_prof_alloc_time_get)
#define large_prof_alloc_time_set JEMALLOC_N(large_prof_alloc_time_set)
#define large_prof_tctx_get JEMALLOC_N(large_prof_tctx_get)
#define large_prof_tctx_reset JEMALLOC_N(large_prof_tctx_reset)
#define large_prof_tctx_set JEMALLOC_N(large_prof_tctx_set)
#define large_ralloc JEMALLOC_N(large_ralloc)
#define large_ralloc_no_move JEMALLOC_N(large_ralloc_no_move)
#define large_salloc JEMALLOC_N(large_salloc)
#define log_init_done JEMALLOC_N(log_init_done)
#define log_var_names JEMALLOC_N(log_var_names)
#define log_var_update_state JEMALLOC_N(log_var_update_state)
#define buferror JEMALLOC_N(buferror)
#define malloc_cprintf JEMALLOC_N(malloc_cprintf)
#define malloc_mutex_assert_not_owner JEMALLOC_N(malloc_mutex_assert_not_owner)
#define malloc_mutex_assert_owner JEMALLOC_N(malloc_mutex_assert_owner)
#define malloc_mutex_boot JEMALLOC_N(malloc_mutex_boot)
#define malloc_mutex_init JEMALLOC_N(malloc_mutex_init)
#define malloc_mutex_lock JEMALLOC_N(malloc_mutex_lock)
#define malloc_mutex_postfork_child JEMALLOC_N(malloc_mutex_postfork_child)
#define malloc_mutex_postfork_parent JEMALLOC_N(malloc_mutex_postfork_parent)
#define malloc_mutex_prefork JEMALLOC_N(malloc_mutex_prefork)
#define malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock)
#define malloc_printf JEMALLOC_N(malloc_printf)
#define malloc_snprintf JEMALLOC_N(malloc_snprintf)
#define malloc_strtoumax JEMALLOC_N(malloc_strtoumax)
#define malloc_tsd_boot0 JEMALLOC_N(malloc_tsd_boot0)
#define malloc_tsd_boot1 JEMALLOC_N(malloc_tsd_boot1)
#define malloc_tsd_cleanup_register JEMALLOC_N(malloc_tsd_cleanup_register)
#define malloc_tsd_dalloc JEMALLOC_N(malloc_tsd_dalloc)
#define malloc_tsd_malloc JEMALLOC_N(malloc_tsd_malloc)
#define malloc_tsd_no_cleanup JEMALLOC_N(malloc_tsd_no_cleanup)
#define malloc_vcprintf JEMALLOC_N(malloc_vcprintf)
#define malloc_vsnprintf JEMALLOC_N(malloc_vsnprintf)
#define malloc_write JEMALLOC_N(malloc_write)
#define map_bias JEMALLOC_N(map_bias)
#define map_misc_offset JEMALLOC_N(map_misc_offset)
#define mb_write JEMALLOC_N(mb_write)
#define narenas_auto JEMALLOC_N(narenas_auto)
#define narenas_tdata_cleanup JEMALLOC_N(narenas_tdata_cleanup)
#define narenas_total_get JEMALLOC_N(narenas_total_get)
#define ncpus JEMALLOC_N(ncpus)
#define nhbins JEMALLOC_N(nhbins)
#define nhclasses JEMALLOC_N(nhclasses)
#define nlclasses JEMALLOC_N(nlclasses)
#define malloc_mutex_boot JEMALLOC_N(malloc_mutex_boot)
#define malloc_mutex_init JEMALLOC_N(malloc_mutex_init)
#define malloc_mutex_lock_slow JEMALLOC_N(malloc_mutex_lock_slow)
#define malloc_mutex_postfork_child JEMALLOC_N(malloc_mutex_postfork_child)
#define malloc_mutex_postfork_parent JEMALLOC_N(malloc_mutex_postfork_parent)
#define malloc_mutex_prefork JEMALLOC_N(malloc_mutex_prefork)
#define malloc_mutex_prof_data_reset JEMALLOC_N(malloc_mutex_prof_data_reset)
#define mutex_pool_init JEMALLOC_N(mutex_pool_init)
#define nstime_add JEMALLOC_N(nstime_add)
#define nstime_compare JEMALLOC_N(nstime_compare)
#define nstime_copy JEMALLOC_N(nstime_copy)
#define nstime_divide JEMALLOC_N(nstime_divide)
#define nstime_iadd JEMALLOC_N(nstime_iadd)
#define nstime_idivide JEMALLOC_N(nstime_idivide)
#define nstime_imultiply JEMALLOC_N(nstime_imultiply)
#define nstime_init JEMALLOC_N(nstime_init)
#define nstime_init2 JEMALLOC_N(nstime_init2)
#define nstime_isubtract JEMALLOC_N(nstime_isubtract)
#define nstime_monotonic JEMALLOC_N(nstime_monotonic)
#define nstime_msec JEMALLOC_N(nstime_msec)
#define nstime_ns JEMALLOC_N(nstime_ns)
#define nstime_nsec JEMALLOC_N(nstime_nsec)
#define nstime_sec JEMALLOC_N(nstime_sec)
#define nstime_subtract JEMALLOC_N(nstime_subtract)
#define nstime_update JEMALLOC_N(nstime_update)
#define opt_abort JEMALLOC_N(opt_abort)
#define opt_decay_time JEMALLOC_N(opt_decay_time)
#define opt_dss JEMALLOC_N(opt_dss)
#define opt_junk JEMALLOC_N(opt_junk)
#define opt_junk_alloc JEMALLOC_N(opt_junk_alloc)
#define opt_junk_free JEMALLOC_N(opt_junk_free)
#define opt_lg_chunk JEMALLOC_N(opt_lg_chunk)
#define opt_lg_dirty_mult JEMALLOC_N(opt_lg_dirty_mult)
#define init_system_thp_mode JEMALLOC_N(init_system_thp_mode)
#define opt_thp JEMALLOC_N(opt_thp)
#define pages_boot JEMALLOC_N(pages_boot)
#define pages_commit JEMALLOC_N(pages_commit)
#define pages_decommit JEMALLOC_N(pages_decommit)
#define pages_dodump JEMALLOC_N(pages_dodump)
#define pages_dontdump JEMALLOC_N(pages_dontdump)
#define pages_huge JEMALLOC_N(pages_huge)
#define pages_map JEMALLOC_N(pages_map)
#define pages_nohuge JEMALLOC_N(pages_nohuge)
#define pages_purge_forced JEMALLOC_N(pages_purge_forced)
#define pages_purge_lazy JEMALLOC_N(pages_purge_lazy)
#define pages_set_thp_state JEMALLOC_N(pages_set_thp_state)
#define pages_unmap JEMALLOC_N(pages_unmap)
#define thp_mode_names JEMALLOC_N(thp_mode_names)
#define bt2gctx_mtx JEMALLOC_N(bt2gctx_mtx)
#define bt_init JEMALLOC_N(bt_init)
#define lg_prof_sample JEMALLOC_N(lg_prof_sample)
#define opt_lg_prof_interval JEMALLOC_N(opt_lg_prof_interval)
#define opt_lg_prof_sample JEMALLOC_N(opt_lg_prof_sample)
#define opt_lg_tcache_max JEMALLOC_N(opt_lg_tcache_max)
#define opt_narenas JEMALLOC_N(opt_narenas)
#define opt_prof JEMALLOC_N(opt_prof)
#define opt_prof_accum JEMALLOC_N(opt_prof_accum)
#define opt_prof_active JEMALLOC_N(opt_prof_active)
#define opt_prof_final JEMALLOC_N(opt_prof_final)
#define opt_prof_gdump JEMALLOC_N(opt_prof_gdump)
#define opt_prof_leak JEMALLOC_N(opt_prof_leak)
#define opt_prof_log JEMALLOC_N(opt_prof_log)
#define opt_prof_prefix JEMALLOC_N(opt_prof_prefix)
#define opt_prof_thread_active_init JEMALLOC_N(opt_prof_thread_active_init)
#define opt_purge JEMALLOC_N(opt_purge)
#define opt_quarantine JEMALLOC_N(opt_quarantine)
#define opt_redzone JEMALLOC_N(opt_redzone)
#define opt_stats_print JEMALLOC_N(opt_stats_print)
#define opt_tcache JEMALLOC_N(opt_tcache)
#define opt_thp JEMALLOC_N(opt_thp)
#define opt_utrace JEMALLOC_N(opt_utrace)
#define opt_xmalloc JEMALLOC_N(opt_xmalloc)
#define opt_zero JEMALLOC_N(opt_zero)
#define p2rz JEMALLOC_N(p2rz)
#define pages_boot JEMALLOC_N(pages_boot)
#define pages_commit JEMALLOC_N(pages_commit)
#define pages_decommit JEMALLOC_N(pages_decommit)
#define pages_huge JEMALLOC_N(pages_huge)
#define pages_map JEMALLOC_N(pages_map)
#define pages_nohuge JEMALLOC_N(pages_nohuge)
#define pages_purge JEMALLOC_N(pages_purge)
#define pages_trim JEMALLOC_N(pages_trim)
#define pages_unmap JEMALLOC_N(pages_unmap)
#define pind2sz JEMALLOC_N(pind2sz)
#define pind2sz_compute JEMALLOC_N(pind2sz_compute)
#define pind2sz_lookup JEMALLOC_N(pind2sz_lookup)
#define pind2sz_tab JEMALLOC_N(pind2sz_tab)
#define pow2_ceil_u32 JEMALLOC_N(pow2_ceil_u32)
#define pow2_ceil_u64 JEMALLOC_N(pow2_ceil_u64)
#define pow2_ceil_zu JEMALLOC_N(pow2_ceil_zu)
#define prng_lg_range_u32 JEMALLOC_N(prng_lg_range_u32)
#define prng_lg_range_u64 JEMALLOC_N(prng_lg_range_u64)
#define prng_lg_range_zu JEMALLOC_N(prng_lg_range_zu)
#define prng_range_u32 JEMALLOC_N(prng_range_u32)
#define prng_range_u64 JEMALLOC_N(prng_range_u64)
#define prng_range_zu JEMALLOC_N(prng_range_zu)
#define prng_state_next_u32 JEMALLOC_N(prng_state_next_u32)
#define prng_state_next_u64 JEMALLOC_N(prng_state_next_u64)
#define prng_state_next_zu JEMALLOC_N(prng_state_next_zu)
#define prof_accum_init JEMALLOC_N(prof_accum_init)
#define prof_active JEMALLOC_N(prof_active)
#define prof_active_get JEMALLOC_N(prof_active_get)
#define prof_active_get_unlocked JEMALLOC_N(prof_active_get_unlocked)
#define prof_active_set JEMALLOC_N(prof_active_set)
#define prof_alloc_prep JEMALLOC_N(prof_alloc_prep)
#define prof_alloc_rollback JEMALLOC_N(prof_alloc_rollback)
#define prof_backtrace JEMALLOC_N(prof_backtrace)
#define prof_boot0 JEMALLOC_N(prof_boot0)
#define prof_boot1 JEMALLOC_N(prof_boot1)
#define prof_boot2 JEMALLOC_N(prof_boot2)
#define prof_bt_count JEMALLOC_N(prof_bt_count)
#define prof_dump_header JEMALLOC_N(prof_dump_header)
#define prof_dump_open JEMALLOC_N(prof_dump_open)
#define prof_free JEMALLOC_N(prof_free)
#define prof_free_sampled_object JEMALLOC_N(prof_free_sampled_object)
#define prof_gdump JEMALLOC_N(prof_gdump)
#define prof_gdump_get JEMALLOC_N(prof_gdump_get)
#define prof_gdump_get_unlocked JEMALLOC_N(prof_gdump_get_unlocked)
#define prof_gdump_set JEMALLOC_N(prof_gdump_set)
#define prof_gdump_val JEMALLOC_N(prof_gdump_val)
#define prof_idump JEMALLOC_N(prof_idump)
#define prof_interval JEMALLOC_N(prof_interval)
#define prof_logging_state JEMALLOC_N(prof_logging_state)
#define prof_log_start JEMALLOC_N(prof_log_start)
#define prof_log_stop JEMALLOC_N(prof_log_stop)
#define prof_lookup JEMALLOC_N(prof_lookup)
#define prof_malloc JEMALLOC_N(prof_malloc)
#define prof_malloc_sample_object JEMALLOC_N(prof_malloc_sample_object)
#define prof_mdump JEMALLOC_N(prof_mdump)
#define prof_postfork_child JEMALLOC_N(prof_postfork_child)
#define prof_postfork_parent JEMALLOC_N(prof_postfork_parent)
#define prof_prefork0 JEMALLOC_N(prof_prefork0)
#define prof_prefork1 JEMALLOC_N(prof_prefork1)
#define prof_realloc JEMALLOC_N(prof_realloc)
#define prof_reset JEMALLOC_N(prof_reset)
#define prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update)
#define prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update)
#define prof_tctx_get JEMALLOC_N(prof_tctx_get)
#define prof_tctx_reset JEMALLOC_N(prof_tctx_reset)
#define prof_tctx_set JEMALLOC_N(prof_tctx_set)
#define prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup)
#define prof_tdata_count JEMALLOC_N(prof_tdata_count)
#define prof_tdata_get JEMALLOC_N(prof_tdata_get)
#define prof_tdata_init JEMALLOC_N(prof_tdata_init)
#define prof_tdata_reinit JEMALLOC_N(prof_tdata_reinit)
#define prof_thread_active_get JEMALLOC_N(prof_thread_active_get)
@@ -470,170 +343,80 @@
#define prof_thread_active_set JEMALLOC_N(prof_thread_active_set)
#define prof_thread_name_get JEMALLOC_N(prof_thread_name_get)
#define prof_thread_name_set JEMALLOC_N(prof_thread_name_set)
#define psz2ind JEMALLOC_N(psz2ind)
#define psz2u JEMALLOC_N(psz2u)
#define purge_mode_names JEMALLOC_N(purge_mode_names)
#define quarantine JEMALLOC_N(quarantine)
#define quarantine_alloc_hook JEMALLOC_N(quarantine_alloc_hook)
#define quarantine_alloc_hook_work JEMALLOC_N(quarantine_alloc_hook_work)
#define quarantine_cleanup JEMALLOC_N(quarantine_cleanup)
#define rtree_child_read JEMALLOC_N(rtree_child_read)
#define rtree_child_read_hard JEMALLOC_N(rtree_child_read_hard)
#define rtree_child_tryread JEMALLOC_N(rtree_child_tryread)
#define rtree_delete JEMALLOC_N(rtree_delete)
#define rtree_get JEMALLOC_N(rtree_get)
#define rtree_ctx_data_init JEMALLOC_N(rtree_ctx_data_init)
#define rtree_leaf_alloc JEMALLOC_N(rtree_leaf_alloc)
#define rtree_leaf_dalloc JEMALLOC_N(rtree_leaf_dalloc)
#define rtree_leaf_elm_lookup_hard JEMALLOC_N(rtree_leaf_elm_lookup_hard)
#define rtree_new JEMALLOC_N(rtree_new)
#define rtree_node_valid JEMALLOC_N(rtree_node_valid)
#define rtree_set JEMALLOC_N(rtree_set)
#define rtree_start_level JEMALLOC_N(rtree_start_level)
#define rtree_subkey JEMALLOC_N(rtree_subkey)
#define rtree_subtree_read JEMALLOC_N(rtree_subtree_read)
#define rtree_subtree_read_hard JEMALLOC_N(rtree_subtree_read_hard)
#define rtree_subtree_tryread JEMALLOC_N(rtree_subtree_tryread)
#define rtree_val_read JEMALLOC_N(rtree_val_read)
#define rtree_val_write JEMALLOC_N(rtree_val_write)
#define run_quantize_ceil JEMALLOC_N(run_quantize_ceil)
#define run_quantize_floor JEMALLOC_N(run_quantize_floor)
#define s2u JEMALLOC_N(s2u)
#define s2u_compute JEMALLOC_N(s2u_compute)
#define s2u_lookup JEMALLOC_N(s2u_lookup)
#define sa2u JEMALLOC_N(sa2u)
#define set_errno JEMALLOC_N(set_errno)
#define size2index JEMALLOC_N(size2index)
#define size2index_compute JEMALLOC_N(size2index_compute)
#define size2index_lookup JEMALLOC_N(size2index_lookup)
#define size2index_tab JEMALLOC_N(size2index_tab)
#define spin_adaptive JEMALLOC_N(spin_adaptive)
#define spin_init JEMALLOC_N(spin_init)
#define stats_cactive JEMALLOC_N(stats_cactive)
#define stats_cactive_add JEMALLOC_N(stats_cactive_add)
#define stats_cactive_get JEMALLOC_N(stats_cactive_get)
#define stats_cactive_sub JEMALLOC_N(stats_cactive_sub)
#define rtree_node_alloc JEMALLOC_N(rtree_node_alloc)
#define rtree_node_dalloc JEMALLOC_N(rtree_node_dalloc)
#define safety_check_fail JEMALLOC_N(safety_check_fail)
#define safety_check_set_abort JEMALLOC_N(safety_check_set_abort)
#define arena_mutex_names JEMALLOC_N(arena_mutex_names)
#define global_mutex_names JEMALLOC_N(global_mutex_names)
#define opt_stats_print JEMALLOC_N(opt_stats_print)
#define opt_stats_print_opts JEMALLOC_N(opt_stats_print_opts)
#define stats_print JEMALLOC_N(stats_print)
#define tcache_alloc_easy JEMALLOC_N(tcache_alloc_easy)
#define tcache_alloc_large JEMALLOC_N(tcache_alloc_large)
#define tcache_alloc_small JEMALLOC_N(tcache_alloc_small)
#define sc_boot JEMALLOC_N(sc_boot)
#define sc_data_global JEMALLOC_N(sc_data_global)
#define sc_data_init JEMALLOC_N(sc_data_init)
#define sc_data_update_slab_size JEMALLOC_N(sc_data_update_slab_size)
#define sz_boot JEMALLOC_N(sz_boot)
#define sz_index2size_tab JEMALLOC_N(sz_index2size_tab)
#define sz_pind2sz_tab JEMALLOC_N(sz_pind2sz_tab)
#define sz_size2index_tab JEMALLOC_N(sz_size2index_tab)
#define nhbins JEMALLOC_N(nhbins)
#define opt_lg_tcache_max JEMALLOC_N(opt_lg_tcache_max)
#define opt_tcache JEMALLOC_N(opt_tcache)
#define tcache_alloc_small_hard JEMALLOC_N(tcache_alloc_small_hard)
#define tcache_arena_associate JEMALLOC_N(tcache_arena_associate)
#define tcache_arena_reassociate JEMALLOC_N(tcache_arena_reassociate)
#define tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large)
#define tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small)
#define tcache_bin_info JEMALLOC_N(tcache_bin_info)
#define tcache_boot JEMALLOC_N(tcache_boot)
#define tcache_cleanup JEMALLOC_N(tcache_cleanup)
#define tcache_create JEMALLOC_N(tcache_create)
#define tcache_dalloc_large JEMALLOC_N(tcache_dalloc_large)
#define tcache_dalloc_small JEMALLOC_N(tcache_dalloc_small)
#define tcache_enabled_cleanup JEMALLOC_N(tcache_enabled_cleanup)
#define tcache_enabled_get JEMALLOC_N(tcache_enabled_get)
#define tcache_enabled_set JEMALLOC_N(tcache_enabled_set)
#define tcache_event JEMALLOC_N(tcache_event)
#define tcache_create_explicit JEMALLOC_N(tcache_create_explicit)
#define tcache_event_hard JEMALLOC_N(tcache_event_hard)
#define tcache_flush JEMALLOC_N(tcache_flush)
#define tcache_get JEMALLOC_N(tcache_get)
#define tcache_get_hard JEMALLOC_N(tcache_get_hard)
#define tcache_maxclass JEMALLOC_N(tcache_maxclass)
#define tcache_postfork_child JEMALLOC_N(tcache_postfork_child)
#define tcache_postfork_parent JEMALLOC_N(tcache_postfork_parent)
#define tcache_prefork JEMALLOC_N(tcache_prefork)
#define tcache_salloc JEMALLOC_N(tcache_salloc)
#define tcache_stats_merge JEMALLOC_N(tcache_stats_merge)
#define tcaches JEMALLOC_N(tcaches)
#define tcache_salloc JEMALLOC_N(tcache_salloc)
#define tcaches_create JEMALLOC_N(tcaches_create)
#define tcaches_destroy JEMALLOC_N(tcaches_destroy)
#define tcaches_flush JEMALLOC_N(tcaches_flush)
#define tcaches_get JEMALLOC_N(tcaches_get)
#define thread_allocated_cleanup JEMALLOC_N(thread_allocated_cleanup)
#define thread_deallocated_cleanup JEMALLOC_N(thread_deallocated_cleanup)
#define ticker_copy JEMALLOC_N(ticker_copy)
#define ticker_init JEMALLOC_N(ticker_init)
#define ticker_read JEMALLOC_N(ticker_read)
#define ticker_tick JEMALLOC_N(ticker_tick)
#define ticker_ticks JEMALLOC_N(ticker_ticks)
#define tsd_arena_get JEMALLOC_N(tsd_arena_get)
#define tsd_arena_set JEMALLOC_N(tsd_arena_set)
#define tsd_arenap_get JEMALLOC_N(tsd_arenap_get)
#define tsd_arenas_tdata_bypass_get JEMALLOC_N(tsd_arenas_tdata_bypass_get)
#define tsd_arenas_tdata_bypass_set JEMALLOC_N(tsd_arenas_tdata_bypass_set)
#define tsd_arenas_tdata_bypassp_get JEMALLOC_N(tsd_arenas_tdata_bypassp_get)
#define tsd_arenas_tdata_get JEMALLOC_N(tsd_arenas_tdata_get)
#define tsd_arenas_tdata_set JEMALLOC_N(tsd_arenas_tdata_set)
#define tsd_arenas_tdatap_get JEMALLOC_N(tsd_arenas_tdatap_get)
#define tsd_boot JEMALLOC_N(tsd_boot)
#define tsd_boot0 JEMALLOC_N(tsd_boot0)
#define tsd_boot1 JEMALLOC_N(tsd_boot1)
#define tcache_stats_merge JEMALLOC_N(tcache_stats_merge)
#define tsd_tcache_data_init JEMALLOC_N(tsd_tcache_data_init)
#define tsd_tcache_enabled_data_init JEMALLOC_N(tsd_tcache_enabled_data_init)
#define test_hooks_arena_new_hook JEMALLOC_N(test_hooks_arena_new_hook)
#define test_hooks_libc_hook JEMALLOC_N(test_hooks_libc_hook)
#define malloc_tsd_boot0 JEMALLOC_N(malloc_tsd_boot0)
#define malloc_tsd_boot1 JEMALLOC_N(malloc_tsd_boot1)
#define malloc_tsd_cleanup_register JEMALLOC_N(malloc_tsd_cleanup_register)
#define malloc_tsd_dalloc JEMALLOC_N(malloc_tsd_dalloc)
#define malloc_tsd_malloc JEMALLOC_N(malloc_tsd_malloc)
#define tsd_booted JEMALLOC_N(tsd_booted)
#define tsd_booted_get JEMALLOC_N(tsd_booted_get)
#define tsd_cleanup JEMALLOC_N(tsd_cleanup)
#define tsd_cleanup_wrapper JEMALLOC_N(tsd_cleanup_wrapper)
#define tsd_fetch JEMALLOC_N(tsd_fetch)
#define tsd_fetch_impl JEMALLOC_N(tsd_fetch_impl)
#define tsd_get JEMALLOC_N(tsd_get)
#define tsd_get_allocates JEMALLOC_N(tsd_get_allocates)
#define tsd_iarena_get JEMALLOC_N(tsd_iarena_get)
#define tsd_iarena_set JEMALLOC_N(tsd_iarena_set)
#define tsd_iarenap_get JEMALLOC_N(tsd_iarenap_get)
#define tsd_initialized JEMALLOC_N(tsd_initialized)
#define tsd_init_check_recursion JEMALLOC_N(tsd_init_check_recursion)
#define tsd_init_finish JEMALLOC_N(tsd_init_finish)
#define tsd_init_head JEMALLOC_N(tsd_init_head)
#define tsd_narenas_tdata_get JEMALLOC_N(tsd_narenas_tdata_get)
#define tsd_narenas_tdata_set JEMALLOC_N(tsd_narenas_tdata_set)
#define tsd_narenas_tdatap_get JEMALLOC_N(tsd_narenas_tdatap_get)
#define tsd_wrapper_get JEMALLOC_N(tsd_wrapper_get)
#define tsd_wrapper_set JEMALLOC_N(tsd_wrapper_set)
#define tsd_nominal JEMALLOC_N(tsd_nominal)
#define tsd_prof_tdata_get JEMALLOC_N(tsd_prof_tdata_get)
#define tsd_prof_tdata_set JEMALLOC_N(tsd_prof_tdata_set)
#define tsd_prof_tdatap_get JEMALLOC_N(tsd_prof_tdatap_get)
#define tsd_quarantine_get JEMALLOC_N(tsd_quarantine_get)
#define tsd_quarantine_set JEMALLOC_N(tsd_quarantine_set)
#define tsd_quarantinep_get JEMALLOC_N(tsd_quarantinep_get)
#define tsd_set JEMALLOC_N(tsd_set)
#define tsd_tcache_enabled_get JEMALLOC_N(tsd_tcache_enabled_get)
#define tsd_tcache_enabled_set JEMALLOC_N(tsd_tcache_enabled_set)
#define tsd_tcache_enabledp_get JEMALLOC_N(tsd_tcache_enabledp_get)
#define tsd_tcache_get JEMALLOC_N(tsd_tcache_get)
#define tsd_tcache_set JEMALLOC_N(tsd_tcache_set)
#define tsd_tcachep_get JEMALLOC_N(tsd_tcachep_get)
#define tsd_thread_allocated_get JEMALLOC_N(tsd_thread_allocated_get)
#define tsd_thread_allocated_set JEMALLOC_N(tsd_thread_allocated_set)
#define tsd_thread_allocatedp_get JEMALLOC_N(tsd_thread_allocatedp_get)
#define tsd_thread_deallocated_get JEMALLOC_N(tsd_thread_deallocated_get)
#define tsd_thread_deallocated_set JEMALLOC_N(tsd_thread_deallocated_set)
#define tsd_thread_deallocatedp_get JEMALLOC_N(tsd_thread_deallocatedp_get)
#define tsd_fetch_slow JEMALLOC_N(tsd_fetch_slow)
#define tsd_global_slow JEMALLOC_N(tsd_global_slow)
#define tsd_global_slow_dec JEMALLOC_N(tsd_global_slow_dec)
#define tsd_global_slow_inc JEMALLOC_N(tsd_global_slow_inc)
#define tsd_postfork_child JEMALLOC_N(tsd_postfork_child)
#define tsd_postfork_parent JEMALLOC_N(tsd_postfork_parent)
#define tsd_prefork JEMALLOC_N(tsd_prefork)
#define tsd_slow_update JEMALLOC_N(tsd_slow_update)
#define tsd_state_set JEMALLOC_N(tsd_state_set)
#define tsd_tls JEMALLOC_N(tsd_tls)
#define tsd_tsd JEMALLOC_N(tsd_tsd)
#define tsd_tsdn JEMALLOC_N(tsd_tsdn)
#define tsd_witness_fork_get JEMALLOC_N(tsd_witness_fork_get)
#define tsd_witness_fork_set JEMALLOC_N(tsd_witness_fork_set)
#define tsd_witness_forkp_get JEMALLOC_N(tsd_witness_forkp_get)
#define tsd_witnesses_get JEMALLOC_N(tsd_witnesses_get)
#define tsd_witnesses_set JEMALLOC_N(tsd_witnesses_set)
#define tsd_witnessesp_get JEMALLOC_N(tsd_witnessesp_get)
#define tsdn_fetch JEMALLOC_N(tsdn_fetch)
#define tsdn_null JEMALLOC_N(tsdn_null)
#define tsdn_tsd JEMALLOC_N(tsdn_tsd)
#define u2rz JEMALLOC_N(u2rz)
#define valgrind_freelike_block JEMALLOC_N(valgrind_freelike_block)
#define valgrind_make_mem_defined JEMALLOC_N(valgrind_make_mem_defined)
#define valgrind_make_mem_noaccess JEMALLOC_N(valgrind_make_mem_noaccess)
#define valgrind_make_mem_undefined JEMALLOC_N(valgrind_make_mem_undefined)
#define witness_assert_depth JEMALLOC_N(witness_assert_depth)
#define witness_assert_depth_to_rank JEMALLOC_N(witness_assert_depth_to_rank)
#define witness_assert_lockless JEMALLOC_N(witness_assert_lockless)
#define witness_assert_not_owner JEMALLOC_N(witness_assert_not_owner)
#define witness_assert_owner JEMALLOC_N(witness_assert_owner)
#define witness_depth_error JEMALLOC_N(witness_depth_error)
#define witness_fork_cleanup JEMALLOC_N(witness_fork_cleanup)
#define witnesses_cleanup JEMALLOC_N(witnesses_cleanup)
#define witness_init JEMALLOC_N(witness_init)
#define witness_lock JEMALLOC_N(witness_lock)
#define witness_lock_error JEMALLOC_N(witness_lock_error)
#define witness_not_owner_error JEMALLOC_N(witness_not_owner_error)
#define witness_owner JEMALLOC_N(witness_owner)
#define witness_owner_error JEMALLOC_N(witness_owner_error)
#define witness_postfork_child JEMALLOC_N(witness_postfork_child)
#define witness_postfork_parent JEMALLOC_N(witness_postfork_parent)
#define witness_prefork JEMALLOC_N(witness_prefork)
#define witness_unlock JEMALLOC_N(witness_unlock)
#define witnesses_cleanup JEMALLOC_N(witnesses_cleanup)
#define zone_register JEMALLOC_N(zone_register)

View File

@@ -14,6 +14,7 @@ extern bool opt_prof_gdump; /* High-water memory dumping. */
extern bool opt_prof_final; /* Final profile dumping. */
extern bool opt_prof_leak; /* Dump leak summary at exit. */
extern bool opt_prof_accum; /* Report cumulative bytes. */
extern bool opt_prof_log; /* Turn logging on at boot. */
extern char opt_prof_prefix[
/* Minimize memory bloat for non-prof builds. */
#ifdef JEMALLOC_PROF
@@ -45,7 +46,8 @@ extern size_t lg_prof_sample;
void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
prof_tctx_t *tctx);
void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
void prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
prof_tctx_t *tctx);
void bt_init(prof_bt_t *bt, void **vec);
void prof_backtrace(prof_bt_t *bt);
prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
@@ -89,4 +91,15 @@ void prof_postfork_parent(tsdn_t *tsdn);
void prof_postfork_child(tsdn_t *tsdn);
void prof_sample_threshold_update(prof_tdata_t *tdata);
bool prof_log_start(tsdn_t *tsdn, const char *filename);
bool prof_log_stop(tsdn_t *tsdn);
#ifdef JEMALLOC_JET
size_t prof_log_bt_count(void);
size_t prof_log_alloc_count(void);
size_t prof_log_thr_count(void);
bool prof_log_is_logging(void);
bool prof_log_rep_check(void);
void prof_log_dummy_set(bool new_value);
#endif
#endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */

View File

@@ -4,7 +4,8 @@
#include "jemalloc/internal/mutex.h"
static inline bool
prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) {
prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum,
uint64_t accumbytes) {
cassert(config_prof);
bool overflow;
@@ -42,7 +43,8 @@ prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) {
}
static inline void
prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) {
prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum,
size_t usize) {
cassert(config_prof);
/*
@@ -55,18 +57,29 @@ prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) {
#ifdef JEMALLOC_ATOMIC_U64
a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED);
do {
a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS -
usize) : 0;
a1 = (a0 >= SC_LARGE_MINCLASS - usize)
? a0 - (SC_LARGE_MINCLASS - usize) : 0;
} while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0,
a1, ATOMIC_RELAXED, ATOMIC_RELAXED));
#else
malloc_mutex_lock(tsdn, &prof_accum->mtx);
a0 = prof_accum->accumbytes;
a1 = (a0 >= LARGE_MINCLASS - usize) ? a0 - (LARGE_MINCLASS - usize) :
0;
a1 = (a0 >= SC_LARGE_MINCLASS - usize)
? a0 - (SC_LARGE_MINCLASS - usize) : 0;
prof_accum->accumbytes = a1;
malloc_mutex_unlock(tsdn, &prof_accum->mtx);
#endif
}
JEMALLOC_ALWAYS_INLINE bool
prof_active_get_unlocked(void) {
/*
* Even if opt_prof is true, sampling can be temporarily disabled by
* setting prof_active to false. No locking is used when reading
* prof_active in the fast path, so there are no guarantees regarding
* how long it will take for all threads to notice state changes.
*/
return prof_active;
}
#endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */

View File

@@ -1,19 +1,9 @@
#ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H
#define JEMALLOC_INTERNAL_PROF_INLINES_B_H
#include "jemalloc/internal/safety_check.h"
#include "jemalloc/internal/sz.h"
JEMALLOC_ALWAYS_INLINE bool
prof_active_get_unlocked(void) {
/*
* Even if opt_prof is true, sampling can be temporarily disabled by
* setting prof_active to false. No locking is used when reading
* prof_active in the fast path, so there are no guarantees regarding
* how long it will take for all threads to notice state changes.
*/
return prof_active;
}
JEMALLOC_ALWAYS_INLINE bool
prof_gdump_get_unlocked(void) {
/*
@@ -72,6 +62,41 @@ prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
arena_prof_tctx_reset(tsdn, ptr, tctx);
}
JEMALLOC_ALWAYS_INLINE nstime_t
prof_alloc_time_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
cassert(config_prof);
assert(ptr != NULL);
return arena_prof_alloc_time_get(tsdn, ptr, alloc_ctx);
}
JEMALLOC_ALWAYS_INLINE void
prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
nstime_t t) {
cassert(config_prof);
assert(ptr != NULL);
arena_prof_alloc_time_set(tsdn, ptr, alloc_ctx, t);
}
JEMALLOC_ALWAYS_INLINE bool
prof_sample_check(tsd_t *tsd, size_t usize, bool update) {
ssize_t check = update ? 0 : usize;
int64_t bytes_until_sample = tsd_bytes_until_sample_get(tsd);
if (update) {
bytes_until_sample -= usize;
if (tsd_nominal(tsd)) {
tsd_bytes_until_sample_set(tsd, bytes_until_sample);
}
}
if (likely(bytes_until_sample >= check)) {
return true;
}
return false;
}
JEMALLOC_ALWAYS_INLINE bool
prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
prof_tdata_t **tdata_out) {
@@ -79,6 +104,12 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
cassert(config_prof);
/* Fastpath: no need to load tdata */
if (likely(prof_sample_check(tsd, usize, update))) {
return true;
}
bool booted = tsd_prof_tdata_get(tsd);
tdata = prof_tdata_get(tsd, true);
if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
tdata = NULL;
@@ -92,12 +123,15 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
return true;
}
if (likely(tdata->bytes_until_sample >= usize)) {
if (update) {
tdata->bytes_until_sample -= usize;
}
/*
* If this was the first creation of tdata, then
* prof_tdata_get() reset bytes_until_sample, so decrement and
* check it again
*/
if (!booted && prof_sample_check(tsd, usize, update)) {
return true;
} else {
}
if (tsd_reentrancy_level_get(tsd) > 0) {
return true;
}
@@ -106,7 +140,6 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
prof_sample_threshold_update(tdata);
}
return !tdata->active;
}
}
JEMALLOC_ALWAYS_INLINE prof_tctx_t *
@@ -198,7 +231,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
* counters.
*/
if (unlikely(old_sampled)) {
prof_free_sampled_object(tsd, old_usize, old_tctx);
prof_free_sampled_object(tsd, ptr, old_usize, old_tctx);
}
}
@@ -210,7 +243,7 @@ prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
assert(usize == isalloc(tsd_tsdn(tsd), ptr));
if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
prof_free_sampled_object(tsd, usize, tctx);
prof_free_sampled_object(tsd, ptr, usize, tctx);
}
}

View File

@@ -169,7 +169,6 @@ struct prof_tdata_s {
/* Sampling state. */
uint64_t prng_state;
uint64_t bytes_until_sample;
/* State used to avoid dumping while operating on prof internals. */
bool enq;

View File

@@ -0,0 +1,77 @@
#ifndef JEMALLOC_INTERNAL_QUANTUM_H
#define JEMALLOC_INTERNAL_QUANTUM_H
/*
* Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
* classes).
*/
#ifndef LG_QUANTUM
# if (defined(__i386__) || defined(_M_IX86))
# define LG_QUANTUM 4
# endif
# ifdef __ia64__
# define LG_QUANTUM 4
# endif
# ifdef __alpha__
# define LG_QUANTUM 4
# endif
# if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__))
# define LG_QUANTUM 4
# endif
# if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
# define LG_QUANTUM 4
# endif
# ifdef __arm__
# define LG_QUANTUM 3
# endif
# ifdef __aarch64__
# define LG_QUANTUM 4
# endif
# ifdef __hppa__
# define LG_QUANTUM 4
# endif
# ifdef __m68k__
# define LG_QUANTUM 3
# endif
# ifdef __mips__
# define LG_QUANTUM 3
# endif
# ifdef __nios2__
# define LG_QUANTUM 3
# endif
# ifdef __or1k__
# define LG_QUANTUM 3
# endif
# ifdef __powerpc__
# define LG_QUANTUM 4
# endif
# if defined(__riscv) || defined(__riscv__)
# define LG_QUANTUM 4
# endif
# ifdef __s390__
# define LG_QUANTUM 4
# endif
# if (defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || \
defined(__SH4_SINGLE_ONLY__))
# define LG_QUANTUM 4
# endif
# ifdef __tile__
# define LG_QUANTUM 4
# endif
# ifdef __le32__
# define LG_QUANTUM 4
# endif
# ifndef LG_QUANTUM
# error "Unknown minimum alignment for architecture; specify via "
"--with-lg-quantum"
# endif
#endif
#define QUANTUM ((size_t)(1U << LG_QUANTUM))
#define QUANTUM_MASK (QUANTUM - 1)
/* Return the smallest quantum multiple that is >= a. */
#define QUANTUM_CEILING(a) \
(((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
#endif /* JEMALLOC_INTERNAL_QUANTUM_H */

View File

@@ -4,7 +4,7 @@
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/rtree_tsd.h"
#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/tsd.h"
/*
@@ -31,7 +31,7 @@
# error Unsupported number of significant virtual address bits
#endif
/* Use compact leaf representation if virtual address encoding allows. */
#if RTREE_NHIB >= LG_CEIL_NSIZES
#if RTREE_NHIB >= LG_CEIL(SC_NSIZES)
# define RTREE_LEAF_COMPACT
#endif
@@ -170,17 +170,29 @@ rtree_subkey(uintptr_t key, unsigned level) {
*/
# ifdef RTREE_LEAF_COMPACT
JEMALLOC_ALWAYS_INLINE uintptr_t
rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
bool dependent) {
rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, bool dependent) {
return (uintptr_t)atomic_load_p(&elm->le_bits, dependent
? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
}
JEMALLOC_ALWAYS_INLINE extent_t *
rtree_leaf_elm_bits_extent_get(uintptr_t bits) {
# ifdef __aarch64__
/*
* aarch64 doesn't sign extend the highest virtual address bit to set
* the higher ones. Instead, the high bits gets zeroed.
*/
uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1;
/* Mask off the slab bit. */
uintptr_t low_bit_mask = ~(uintptr_t)1;
uintptr_t mask = high_bit_mask & low_bit_mask;
return (extent_t *)(bits & mask);
# else
/* Restore sign-extended high bits, mask slab bit. */
return (extent_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >>
RTREE_NHIB) & ~((uintptr_t)0x1));
# endif
}
JEMALLOC_ALWAYS_INLINE szind_t
@@ -196,8 +208,8 @@ rtree_leaf_elm_bits_slab_get(uintptr_t bits) {
# endif
JEMALLOC_ALWAYS_INLINE extent_t *
rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
bool dependent) {
rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, bool dependent) {
#ifdef RTREE_LEAF_COMPACT
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
return rtree_leaf_elm_bits_extent_get(bits);
@@ -209,8 +221,8 @@ rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
}
JEMALLOC_ALWAYS_INLINE szind_t
rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
bool dependent) {
rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, bool dependent) {
#ifdef RTREE_LEAF_COMPACT
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
return rtree_leaf_elm_bits_szind_get(bits);
@@ -221,8 +233,8 @@ rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
}
JEMALLOC_ALWAYS_INLINE bool
rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
bool dependent) {
rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, bool dependent) {
#ifdef RTREE_LEAF_COMPACT
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
return rtree_leaf_elm_bits_slab_get(bits);
@@ -233,8 +245,8 @@ rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
}
static inline void
rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
extent_t *extent) {
rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, extent_t *extent) {
#ifdef RTREE_LEAF_COMPACT
uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true);
uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
@@ -247,9 +259,9 @@ rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
}
static inline void
rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
szind_t szind) {
assert(szind <= NSIZES);
rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, szind_t szind) {
assert(szind <= SC_NSIZES);
#ifdef RTREE_LEAF_COMPACT
uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
@@ -265,8 +277,8 @@ rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
}
static inline void
rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
bool slab) {
rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, bool slab) {
#ifdef RTREE_LEAF_COMPACT
uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
true);
@@ -280,8 +292,8 @@ rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
}
static inline void
rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
extent_t *extent, szind_t szind, bool slab) {
rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, extent_t *extent, szind_t szind, bool slab) {
#ifdef RTREE_LEAF_COMPACT
uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
@@ -301,7 +313,7 @@ rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
static inline void
rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
rtree_leaf_elm_t *elm, szind_t szind, bool slab) {
assert(!slab || szind < NBINS);
assert(!slab || szind < SC_NBINS);
/*
* The caller implicitly assures that it is the only writer to the szind
@@ -417,7 +429,7 @@ rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
dependent);
if (!dependent && elm == NULL) {
return NSIZES;
return SC_NSIZES;
}
return rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
}
@@ -440,6 +452,42 @@ rtree_extent_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
return false;
}
/*
* Try to read szind_slab from the L1 cache. Returns true on a hit,
* and fills in r_szind and r_slab. Otherwise returns false.
*
* Key is allowed to be NULL in order to save an extra branch on the
* fastpath. returns false in this case.
*/
JEMALLOC_ALWAYS_INLINE bool
rtree_szind_slab_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t key, szind_t *r_szind, bool *r_slab) {
rtree_leaf_elm_t *elm;
size_t slot = rtree_cache_direct_map(key);
uintptr_t leafkey = rtree_leafkey(key);
assert(leafkey != RTREE_LEAFKEY_INVALID);
if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) {
rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
assert(leaf != NULL);
uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
elm = &leaf[subkey];
#ifdef RTREE_LEAF_COMPACT
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree,
elm, true);
*r_szind = rtree_leaf_elm_bits_szind_get(bits);
*r_slab = rtree_leaf_elm_bits_slab_get(bits);
#else
*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, true);
*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, true);
#endif
return true;
} else {
return false;
}
}
JEMALLOC_ALWAYS_INLINE bool
rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t key, bool dependent, szind_t *r_szind, bool *r_slab) {
@@ -448,15 +496,21 @@ rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
if (!dependent && elm == NULL) {
return true;
}
#ifdef RTREE_LEAF_COMPACT
uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
*r_szind = rtree_leaf_elm_bits_szind_get(bits);
*r_slab = rtree_leaf_elm_bits_slab_get(bits);
#else
*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, dependent);
#endif
return false;
}
static inline void
rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
uintptr_t key, szind_t szind, bool slab) {
assert(!slab || szind < NBINS);
assert(!slab || szind < SC_NBINS);
rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
rtree_leaf_elm_szind_slab_update(tsdn, rtree, elm, szind, slab);
@@ -468,7 +522,7 @@ rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) !=
NULL);
rtree_leaf_elm_write(tsdn, rtree, elm, NULL, NSIZES, false);
rtree_leaf_elm_write(tsdn, rtree, elm, NULL, SC_NSIZES, false);
}
#endif /* JEMALLOC_INTERNAL_RTREE_H */

View File

@@ -26,7 +26,7 @@
* Zero initializer required for tsd initialization only. Proper initialization
* done via rtree_ctx_data_init().
*/
#define RTREE_CTX_ZERO_INITIALIZER {{{0}}}
#define RTREE_CTX_ZERO_INITIALIZER {{{0, 0}}, {{0, 0}}}
typedef struct rtree_leaf_elm_s rtree_leaf_elm_t;

View File

@@ -0,0 +1,26 @@
#ifndef JEMALLOC_INTERNAL_SAFETY_CHECK_H
#define JEMALLOC_INTERNAL_SAFETY_CHECK_H
void safety_check_fail(const char *format, ...);
/* Can set to NULL for a default. */
void safety_check_set_abort(void (*abort_fn)());
JEMALLOC_ALWAYS_INLINE void
safety_check_set_redzone(void *ptr, size_t usize, size_t bumped_usize) {
assert(usize < bumped_usize);
for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) {
*((unsigned char *)ptr + i) = 0xBC;
}
}
JEMALLOC_ALWAYS_INLINE void
safety_check_verify_redzone(const void *ptr, size_t usize, size_t bumped_usize)
{
for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) {
if (unlikely(*((unsigned char *)ptr + i) != 0xBC)) {
safety_check_fail("Use after free error\n");
}
}
}
#endif /*JEMALLOC_INTERNAL_SAFETY_CHECK_H */

View File

@@ -0,0 +1,333 @@
#ifndef JEMALLOC_INTERNAL_SC_H
#define JEMALLOC_INTERNAL_SC_H
#include "jemalloc/internal/jemalloc_internal_types.h"
/*
* Size class computations:
*
* These are a little tricky; we'll first start by describing how things
* generally work, and then describe some of the details.
*
* Ignore the first few size classes for a moment. We can then split all the
* remaining size classes into groups. The size classes in a group are spaced
* such that they cover allocation request sizes in a power-of-2 range. The
* power of two is called the base of the group, and the size classes in it
* satisfy allocations in the half-open range (base, base * 2]. There are
* SC_NGROUP size classes in each group, equally spaced in the range, so that
* each one covers allocations for base / SC_NGROUP possible allocation sizes.
* We call that value (base / SC_NGROUP) the delta of the group. Each size class
* is delta larger than the one before it (including the initial size class in a
* group, which is delta larger than base, the largest size class in the
* previous group).
* To make the math all work out nicely, we require that SC_NGROUP is a power of
* two, and define it in terms of SC_LG_NGROUP. We'll often talk in terms of
* lg_base and lg_delta. For each of these groups then, we have that
* lg_delta == lg_base - SC_LG_NGROUP.
* The size classes in a group with a given lg_base and lg_delta (which, recall,
* can be computed from lg_base for these groups) are therefore:
* base + 1 * delta
* which covers allocations in (base, base + 1 * delta]
* base + 2 * delta
* which covers allocations in (base + 1 * delta, base + 2 * delta].
* base + 3 * delta
* which covers allocations in (base + 2 * delta, base + 3 * delta].
* ...
* base + SC_NGROUP * delta ( == 2 * base)
* which covers allocations in (base + (SC_NGROUP - 1) * delta, 2 * base].
* (Note that currently SC_NGROUP is always 4, so the "..." is empty in
* practice.)
* Note that the last size class in the group is the next power of two (after
* base), so that we've set up the induction correctly for the next group's
* selection of delta.
*
* Now, let's start considering the first few size classes. Two extra constants
* come into play here: LG_QUANTUM and SC_LG_TINY_MIN. LG_QUANTUM ensures
* correct platform alignment; all objects of size (1 << LG_QUANTUM) or larger
* are at least (1 << LG_QUANTUM) aligned; this can be used to ensure that we
* never return improperly aligned memory, by making (1 << LG_QUANTUM) equal the
* highest required alignment of a platform. For allocation sizes smaller than
* (1 << LG_QUANTUM) though, we can be more relaxed (since we don't support
* platforms with types with alignment larger than their size). To allow such
* allocations (without wasting space unnecessarily), we introduce tiny size
* classes; one per power of two, up until we hit the quantum size. There are
* therefore LG_QUANTUM - SC_LG_TINY_MIN such size classes.
*
* Next, we have a size class of size (1 << LG_QUANTUM). This can't be the
* start of a group in the sense we described above (covering a power of two
* range) since, if we divided into it to pick a value of delta, we'd get a
* delta smaller than (1 << LG_QUANTUM) for sizes >= (1 << LG_QUANTUM), which
* is against the rules.
*
* The first base we can divide by SC_NGROUP while still being at least
* (1 << LG_QUANTUM) is SC_NGROUP * (1 << LG_QUANTUM). We can get there by
* having SC_NGROUP size classes, spaced (1 << LG_QUANTUM) apart. These size
* classes are:
* 1 * (1 << LG_QUANTUM)
* 2 * (1 << LG_QUANTUM)
* 3 * (1 << LG_QUANTUM)
* ... (although, as above, this "..." is empty in practice)
* SC_NGROUP * (1 << LG_QUANTUM).
*
* There are SC_NGROUP of these size classes, so we can regard it as a sort of
* pseudo-group, even though it spans multiple powers of 2, is divided
* differently, and both starts and ends on a power of 2 (as opposed to just
* ending). SC_NGROUP is itself a power of two, so the first group after the
* pseudo-group has the power-of-two base SC_NGROUP * (1 << LG_QUANTUM), for a
* lg_base of LG_QUANTUM + SC_LG_NGROUP. We can divide this base into SC_NGROUP
* sizes without violating our LG_QUANTUM requirements, so we can safely set
* lg_delta = lg_base - SC_LG_GROUP (== LG_QUANTUM).
*
* So, in order, the size classes are:
*
* Tiny size classes:
* - Count: LG_QUANTUM - SC_LG_TINY_MIN.
* - Sizes:
* 1 << SC_LG_TINY_MIN
* 1 << (SC_LG_TINY_MIN + 1)
* 1 << (SC_LG_TINY_MIN + 2)
* ...
* 1 << (LG_QUANTUM - 1)
*
* Initial pseudo-group:
* - Count: SC_NGROUP
* - Sizes:
* 1 * (1 << LG_QUANTUM)
* 2 * (1 << LG_QUANTUM)
* 3 * (1 << LG_QUANTUM)
* ...
* SC_NGROUP * (1 << LG_QUANTUM)
*
* Regular group 0:
* - Count: SC_NGROUP
* - Sizes:
* (relative to lg_base of LG_QUANTUM + SC_LG_NGROUP and lg_delta of
* lg_base - SC_LG_NGROUP)
* (1 << lg_base) + 1 * (1 << lg_delta)
* (1 << lg_base) + 2 * (1 << lg_delta)
* (1 << lg_base) + 3 * (1 << lg_delta)
* ...
* (1 << lg_base) + SC_NGROUP * (1 << lg_delta) [ == (1 << (lg_base + 1)) ]
*
* Regular group 1:
* - Count: SC_NGROUP
* - Sizes:
* (relative to lg_base of LG_QUANTUM + SC_LG_NGROUP + 1 and lg_delta of
* lg_base - SC_LG_NGROUP)
* (1 << lg_base) + 1 * (1 << lg_delta)
* (1 << lg_base) + 2 * (1 << lg_delta)
* (1 << lg_base) + 3 * (1 << lg_delta)
* ...
* (1 << lg_base) + SC_NGROUP * (1 << lg_delta) [ == (1 << (lg_base + 1)) ]
*
* ...
*
* Regular group N:
* - Count: SC_NGROUP
* - Sizes:
* (relative to lg_base of LG_QUANTUM + SC_LG_NGROUP + N and lg_delta of
* lg_base - SC_LG_NGROUP)
* (1 << lg_base) + 1 * (1 << lg_delta)
* (1 << lg_base) + 2 * (1 << lg_delta)
* (1 << lg_base) + 3 * (1 << lg_delta)
* ...
* (1 << lg_base) + SC_NGROUP * (1 << lg_delta) [ == (1 << (lg_base + 1)) ]
*
*
* Representation of metadata:
* To make the math easy, we'll mostly work in lg quantities. We record lg_base,
* lg_delta, and ndelta (i.e. number of deltas above the base) on a
* per-size-class basis, and maintain the invariant that, across all size
* classes, size == (1 << lg_base) + ndelta * (1 << lg_delta).
*
* For regular groups (i.e. those with lg_base >= LG_QUANTUM + SC_LG_NGROUP),
* lg_delta is lg_base - SC_LG_NGROUP, and ndelta goes from 1 to SC_NGROUP.
*
* For the initial tiny size classes (if any), lg_base is lg(size class size).
* lg_delta is lg_base for the first size class, and lg_base - 1 for all
* subsequent ones. ndelta is always 0.
*
* For the pseudo-group, if there are no tiny size classes, then we set
* lg_base == LG_QUANTUM, lg_delta == LG_QUANTUM, and have ndelta range from 0
* to SC_NGROUP - 1. (Note that delta == base, so base + (SC_NGROUP - 1) * delta
* is just SC_NGROUP * base, or (1 << (SC_LG_NGROUP + LG_QUANTUM)), so we do
* indeed get a power of two that way). If there *are* tiny size classes, then
* the first size class needs to have lg_delta relative to the largest tiny size
* class. We therefore set lg_base == LG_QUANTUM - 1,
* lg_delta == LG_QUANTUM - 1, and ndelta == 1, keeping the rest of the
* pseudo-group the same.
*
*
* Other terminology:
* "Small" size classes mean those that are allocated out of bins, which is the
* same as those that are slab allocated.
* "Large" size classes are those that are not small. The cutoff for counting as
* large is page size * group size.
*/
/*
* Size class N + (1 << SC_LG_NGROUP) twice the size of size class N.
*/
#define SC_LG_NGROUP 2
#define SC_LG_TINY_MIN 3
#if SC_LG_TINY_MIN == 0
/* The div module doesn't support division by 1, which this would require. */
#error "Unsupported LG_TINY_MIN"
#endif
/*
* The definitions below are all determined by the above settings and system
* characteristics.
*/
#define SC_NGROUP (1ULL << SC_LG_NGROUP)
#define SC_PTR_BITS ((1ULL << LG_SIZEOF_PTR) * 8)
#define SC_NTINY (LG_QUANTUM - SC_LG_TINY_MIN)
#define SC_LG_TINY_MAXCLASS (LG_QUANTUM > SC_LG_TINY_MIN ? LG_QUANTUM - 1 : -1)
#define SC_NPSEUDO SC_NGROUP
#define SC_LG_FIRST_REGULAR_BASE (LG_QUANTUM + SC_LG_NGROUP)
/*
* We cap allocations to be less than 2 ** (ptr_bits - 1), so the highest base
* we need is 2 ** (ptr_bits - 2). (This also means that the last group is 1
* size class shorter than the others).
* We could probably save some space in arenas by capping this at LG_VADDR size.
*/
#define SC_LG_BASE_MAX (SC_PTR_BITS - 2)
#define SC_NREGULAR (SC_NGROUP * \
(SC_LG_BASE_MAX - SC_LG_FIRST_REGULAR_BASE + 1) - 1)
#define SC_NSIZES (SC_NTINY + SC_NPSEUDO + SC_NREGULAR)
/* The number of size classes that are a multiple of the page size. */
#define SC_NPSIZES ( \
/* Start with all the size classes. */ \
SC_NSIZES \
/* Subtract out those groups with too small a base. */ \
- (LG_PAGE - 1 - SC_LG_FIRST_REGULAR_BASE) * SC_NGROUP \
/* And the pseudo-group. */ \
- SC_NPSEUDO \
/* And the tiny group. */ \
- SC_NTINY \
/* Sizes where ndelta*delta is not a multiple of the page size. */ \
- (SC_LG_NGROUP * SC_NGROUP))
/*
* Note that the last line is computed as the sum of the second column in the
* following table:
* lg(base) | count of sizes to exclude
* ------------------------------|-----------------------------
* LG_PAGE - 1 | SC_NGROUP - 1
* LG_PAGE | SC_NGROUP - 1
* LG_PAGE + 1 | SC_NGROUP - 2
* LG_PAGE + 2 | SC_NGROUP - 4
* ... | ...
* LG_PAGE + (SC_LG_NGROUP - 1) | SC_NGROUP - (SC_NGROUP / 2)
*/
/*
* We declare a size class is binnable if size < page size * group. Or, in other
* words, lg(size) < lg(page size) + lg(group size).
*/
#define SC_NBINS ( \
/* Sub-regular size classes. */ \
SC_NTINY + SC_NPSEUDO \
/* Groups with lg_regular_min_base <= lg_base <= lg_base_max */ \
+ SC_NGROUP * (LG_PAGE + SC_LG_NGROUP - SC_LG_FIRST_REGULAR_BASE) \
/* Last SC of the last group hits the bound exactly; exclude it. */ \
- 1)
/*
* The size2index_tab lookup table uses uint8_t to encode each bin index, so we
* cannot support more than 256 small size classes.
*/
#if (SC_NBINS > 256)
# error "Too many small size classes"
#endif
/* The largest size class in the lookup table. */
#define SC_LOOKUP_MAXCLASS ((size_t)1 << 12)
/* Internal, only used for the definition of SC_SMALL_MAXCLASS. */
#define SC_SMALL_MAX_BASE ((size_t)1 << (LG_PAGE + SC_LG_NGROUP - 1))
#define SC_SMALL_MAX_DELTA ((size_t)1 << (LG_PAGE - 1))
/* The largest size class allocated out of a slab. */
#define SC_SMALL_MAXCLASS (SC_SMALL_MAX_BASE \
+ (SC_NGROUP - 1) * SC_SMALL_MAX_DELTA)
/* The smallest size class not allocated out of a slab. */
#define SC_LARGE_MINCLASS ((size_t)1ULL << (LG_PAGE + SC_LG_NGROUP))
#define SC_LG_LARGE_MINCLASS (LG_PAGE + SC_LG_NGROUP)
/* Internal; only used for the definition of SC_LARGE_MAXCLASS. */
#define SC_MAX_BASE ((size_t)1 << (SC_PTR_BITS - 2))
#define SC_MAX_DELTA ((size_t)1 << (SC_PTR_BITS - 2 - SC_LG_NGROUP))
/* The largest size class supported. */
#define SC_LARGE_MAXCLASS (SC_MAX_BASE + (SC_NGROUP - 1) * SC_MAX_DELTA)
typedef struct sc_s sc_t;
struct sc_s {
/* Size class index, or -1 if not a valid size class. */
int index;
/* Lg group base size (no deltas added). */
int lg_base;
/* Lg delta to previous size class. */
int lg_delta;
/* Delta multiplier. size == 1<<lg_base + ndelta<<lg_delta */
int ndelta;
/*
* True if the size class is a multiple of the page size, false
* otherwise.
*/
bool psz;
/*
* True if the size class is a small, bin, size class. False otherwise.
*/
bool bin;
/* The slab page count if a small bin size class, 0 otherwise. */
int pgs;
/* Same as lg_delta if a lookup table size class, 0 otherwise. */
int lg_delta_lookup;
};
typedef struct sc_data_s sc_data_t;
struct sc_data_s {
/* Number of tiny size classes. */
unsigned ntiny;
/* Number of bins supported by the lookup table. */
int nlbins;
/* Number of small size class bins. */
int nbins;
/* Number of size classes. */
int nsizes;
/* Number of bits required to store NSIZES. */
int lg_ceil_nsizes;
/* Number of size classes that are a multiple of (1U << LG_PAGE). */
unsigned npsizes;
/* Lg of maximum tiny size class (or -1, if none). */
int lg_tiny_maxclass;
/* Maximum size class included in lookup table. */
size_t lookup_maxclass;
/* Maximum small size class. */
size_t small_maxclass;
/* Lg of minimum large size class. */
int lg_large_minclass;
/* The minimum large size class. */
size_t large_minclass;
/* Maximum (large) size class. */
size_t large_maxclass;
/* True if the sc_data_t has been initialized (for debugging only). */
bool initialized;
sc_t sc[SC_NSIZES];
};
void sc_data_init(sc_data_t *data);
/*
* Updates slab sizes in [begin, end] to be pgs pages in length, if possible.
* Otherwise, does its best to accomodate the request.
*/
void sc_data_update_slab_size(sc_data_t *data, size_t begin, size_t end,
int pgs);
void sc_boot(sc_data_t *data);
#endif /* JEMALLOC_INTERNAL_SC_H */

View File

@@ -0,0 +1,55 @@
#ifndef JEMALLOC_INTERNAL_SEQ_H
#define JEMALLOC_INTERNAL_SEQ_H
#include "jemalloc/internal/atomic.h"
/*
* A simple seqlock implementation.
*/
#define seq_define(type, short_type) \
typedef struct { \
atomic_zu_t seq; \
atomic_zu_t data[ \
(sizeof(type) + sizeof(size_t) - 1) / sizeof(size_t)]; \
} seq_##short_type##_t; \
\
/* \
* No internal synchronization -- the caller must ensure that there's \
* only a single writer at a time. \
*/ \
static inline void \
seq_store_##short_type(seq_##short_type##_t *dst, type *src) { \
size_t buf[sizeof(dst->data) / sizeof(size_t)]; \
buf[sizeof(buf) / sizeof(size_t) - 1] = 0; \
memcpy(buf, src, sizeof(type)); \
size_t old_seq = atomic_load_zu(&dst->seq, ATOMIC_RELAXED); \
atomic_store_zu(&dst->seq, old_seq + 1, ATOMIC_RELAXED); \
atomic_fence(ATOMIC_RELEASE); \
for (size_t i = 0; i < sizeof(buf) / sizeof(size_t); i++) { \
atomic_store_zu(&dst->data[i], buf[i], ATOMIC_RELAXED); \
} \
atomic_store_zu(&dst->seq, old_seq + 2, ATOMIC_RELEASE); \
} \
\
/* Returns whether or not the read was consistent. */ \
static inline bool \
seq_try_load_##short_type(type *dst, seq_##short_type##_t *src) { \
size_t buf[sizeof(src->data) / sizeof(size_t)]; \
size_t seq1 = atomic_load_zu(&src->seq, ATOMIC_ACQUIRE); \
if (seq1 % 2 != 0) { \
return false; \
} \
for (size_t i = 0; i < sizeof(buf) / sizeof(size_t); i++) { \
buf[i] = atomic_load_zu(&src->data[i], ATOMIC_RELAXED); \
} \
atomic_fence(ATOMIC_ACQUIRE); \
size_t seq2 = atomic_load_zu(&src->seq, ATOMIC_RELAXED); \
if (seq1 != seq2) { \
return false; \
} \
memcpy(dst, buf, sizeof(type)); \
return true; \
}
#endif /* JEMALLOC_INTERNAL_SEQ_H */

View File

@@ -1,25 +1,29 @@
#ifndef JEMALLOC_INTERNAL_SPIN_H
#define JEMALLOC_INTERNAL_SPIN_H
#ifdef JEMALLOC_SPIN_C_
# define SPIN_INLINE extern inline
#else
# define SPIN_INLINE inline
#endif
#define SPIN_INITIALIZER {0U}
typedef struct {
unsigned iteration;
} spin_t;
SPIN_INLINE void
static inline void
spin_cpu_spinwait() {
# if HAVE_CPU_SPINWAIT
CPU_SPINWAIT;
# else
volatile int x = 0;
x = x;
# endif
}
static inline void
spin_adaptive(spin_t *spin) {
volatile uint32_t i;
if (spin->iteration < 5) {
for (i = 0; i < (1U << spin->iteration); i++) {
CPU_SPINWAIT;
spin_cpu_spinwait();
}
spin->iteration++;
} else {

View File

@@ -1,12 +1,6 @@
#ifndef JEMALLOC_INTERNAL_STATS_H
#define JEMALLOC_INTERNAL_STATS_H
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/mutex_prof.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/stats_tsd.h"
/* OPTION(opt, var_name, default, set_value_to) */
#define STATS_PRINT_OPTIONS \
OPTION('J', json, false, true) \
@@ -16,7 +10,8 @@
OPTION('a', unmerged, config_stats, false) \
OPTION('b', bins, true, false) \
OPTION('l', large, true, false) \
OPTION('x', mutex, true, false)
OPTION('x', mutex, true, false) \
OPTION('e', extents, true, false)
enum {
#define OPTION(o, v, d, s) stats_print_option_num_##v,
@@ -33,132 +28,4 @@ extern char opt_stats_print_opts[stats_print_tot_num_options+1];
void stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
const char *opts);
/*
* In those architectures that support 64-bit atomics, we use atomic updates for
* our 64-bit values. Otherwise, we use a plain uint64_t and synchronize
* externally.
*/
#ifdef JEMALLOC_ATOMIC_U64
typedef atomic_u64_t arena_stats_u64_t;
#else
/* Must hold the arena stats mutex while reading atomically. */
typedef uint64_t arena_stats_u64_t;
#endif
typedef struct malloc_bin_stats_s {
/*
* Total number of allocation/deallocation requests served directly by
* the bin. Note that tcache may allocate an object, then recycle it
* many times, resulting many increments to nrequests, but only one
* each to nmalloc and ndalloc.
*/
uint64_t nmalloc;
uint64_t ndalloc;
/*
* Number of allocation requests that correspond to the size of this
* bin. This includes requests served by tcache, though tcache only
* periodically merges into this counter.
*/
uint64_t nrequests;
/*
* Current number of regions of this size class, including regions
* currently cached by tcache.
*/
size_t curregs;
/* Number of tcache fills from this bin. */
uint64_t nfills;
/* Number of tcache flushes to this bin. */
uint64_t nflushes;
/* Total number of slabs created for this bin's size class. */
uint64_t nslabs;
/*
* Total number of slabs reused by extracting them from the slabs heap
* for this bin's size class.
*/
uint64_t reslabs;
/* Current number of slabs in this bin. */
size_t curslabs;
mutex_prof_data_t mutex_data;
} malloc_bin_stats_t;
typedef struct malloc_large_stats_s {
/*
* Total number of allocation/deallocation requests served directly by
* the arena.
*/
arena_stats_u64_t nmalloc;
arena_stats_u64_t ndalloc;
/*
* Number of allocation requests that correspond to this size class.
* This includes requests served by tcache, though tcache only
* periodically merges into this counter.
*/
arena_stats_u64_t nrequests; /* Partially derived. */
/* Current number of allocations of this size class. */
size_t curlextents; /* Derived. */
} malloc_large_stats_t;
typedef struct decay_stats_s {
/* Total number of purge sweeps. */
arena_stats_u64_t npurge;
/* Total number of madvise calls made. */
arena_stats_u64_t nmadvise;
/* Total number of pages purged. */
arena_stats_u64_t purged;
} decay_stats_t;
/*
* Arena stats. Note that fields marked "derived" are not directly maintained
* within the arena code; rather their values are derived during stats merge
* requests.
*/
typedef struct arena_stats_s {
#ifndef JEMALLOC_ATOMIC_U64
malloc_mutex_t mtx;
#endif
/* Number of bytes currently mapped, excluding retained memory. */
atomic_zu_t mapped; /* Partially derived. */
/*
* Number of unused virtual memory bytes currently retained. Retained
* bytes are technically mapped (though always decommitted or purged),
* but they are excluded from the mapped statistic (above).
*/
atomic_zu_t retained; /* Derived. */
decay_stats_t decay_dirty;
decay_stats_t decay_muzzy;
atomic_zu_t base; /* Derived. */
atomic_zu_t internal;
atomic_zu_t resident; /* Derived. */
atomic_zu_t allocated_large; /* Derived. */
arena_stats_u64_t nmalloc_large; /* Derived. */
arena_stats_u64_t ndalloc_large; /* Derived. */
arena_stats_u64_t nrequests_large; /* Derived. */
/* Number of bytes cached in tcache associated with this arena. */
atomic_zu_t tcache_bytes; /* Derived. */
mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];
/* One element for each large size class. */
malloc_large_stats_t lstats[NSIZES - NBINS];
/* Arena uptime. */
nstime_t uptime;
} arena_stats_t;
#endif /* JEMALLOC_INTERNAL_STATS_H */

View File

@@ -3,7 +3,7 @@
#include "jemalloc/internal/bit_util.h"
#include "jemalloc/internal/pages.h"
#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/util.h"
/*
@@ -26,18 +26,18 @@
* sz_pind2sz_tab encodes the same information as could be computed by
* sz_pind2sz_compute().
*/
extern size_t const sz_pind2sz_tab[NPSIZES+1];
extern size_t sz_pind2sz_tab[SC_NPSIZES + 1];
/*
* sz_index2size_tab encodes the same information as could be computed (at
* unacceptable cost in some code paths) by sz_index2size_compute().
*/
extern size_t const sz_index2size_tab[NSIZES];
extern size_t sz_index2size_tab[SC_NSIZES];
/*
* sz_size2index_tab is a compact lookup table that rounds request sizes up to
* size classes. In order to reduce cache footprint, the table is compressed,
* and all accesses are via sz_size2index().
*/
extern uint8_t const sz_size2index_tab[];
extern uint8_t sz_size2index_tab[];
static const size_t sz_large_pad =
#ifdef JEMALLOC_CACHE_OBLIVIOUS
@@ -47,41 +47,40 @@ static const size_t sz_large_pad =
#endif
;
extern void sz_boot(const sc_data_t *sc_data);
JEMALLOC_ALWAYS_INLINE pszind_t
sz_psz2ind(size_t psz) {
if (unlikely(psz > LARGE_MAXCLASS)) {
return NPSIZES;
if (unlikely(psz > SC_LARGE_MAXCLASS)) {
return SC_NPSIZES;
}
{
pszind_t x = lg_floor((psz<<1)-1);
pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x -
(LG_SIZE_CLASS_GROUP + LG_PAGE);
pszind_t grp = shift << LG_SIZE_CLASS_GROUP;
pszind_t shift = (x < SC_LG_NGROUP + LG_PAGE) ?
0 : x - (SC_LG_NGROUP + LG_PAGE);
pszind_t grp = shift << SC_LG_NGROUP;
pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
pszind_t lg_delta = (x < SC_LG_NGROUP + LG_PAGE + 1) ?
LG_PAGE : x - SC_LG_NGROUP - 1;
size_t delta_inverse_mask = ZD(-1) << lg_delta;
size_t delta_inverse_mask = ZU(-1) << lg_delta;
pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
((ZU(1) << SC_LG_NGROUP) - 1);
pszind_t ind = grp + mod;
return ind;
}
}
static inline size_t
sz_pind2sz_compute(pszind_t pind) {
if (unlikely(pind == NPSIZES)) {
return LARGE_MAXCLASS + PAGE;
if (unlikely(pind == SC_NPSIZES)) {
return SC_LARGE_MAXCLASS + PAGE;
}
{
size_t grp = pind >> LG_SIZE_CLASS_GROUP;
size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
size_t grp = pind >> SC_LG_NGROUP;
size_t mod = pind & ((ZU(1) << SC_LG_NGROUP) - 1);
size_t grp_size_mask = ~((!!grp)-1);
size_t grp_size = ((ZU(1) << (LG_PAGE +
(LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
size_t grp_size = ((ZU(1) << (LG_PAGE + (SC_LG_NGROUP-1))) << grp)
& grp_size_mask;
size_t shift = (grp == 0) ? 1 : grp;
size_t lg_delta = shift + (LG_PAGE-1);
@@ -89,7 +88,6 @@ sz_pind2sz_compute(pszind_t pind) {
size_t sz = grp_size + mod_size;
return sz;
}
}
static inline size_t
@@ -101,70 +99,70 @@ sz_pind2sz_lookup(pszind_t pind) {
static inline size_t
sz_pind2sz(pszind_t pind) {
assert(pind < NPSIZES+1);
assert(pind < SC_NPSIZES + 1);
return sz_pind2sz_lookup(pind);
}
static inline size_t
sz_psz2u(size_t psz) {
if (unlikely(psz > LARGE_MAXCLASS)) {
return LARGE_MAXCLASS + PAGE;
if (unlikely(psz > SC_LARGE_MAXCLASS)) {
return SC_LARGE_MAXCLASS + PAGE;
}
{
size_t x = lg_floor((psz<<1)-1);
size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
size_t lg_delta = (x < SC_LG_NGROUP + LG_PAGE + 1) ?
LG_PAGE : x - SC_LG_NGROUP - 1;
size_t delta = ZU(1) << lg_delta;
size_t delta_mask = delta - 1;
size_t usize = (psz + delta_mask) & ~delta_mask;
return usize;
}
}
static inline szind_t
sz_size2index_compute(size_t size) {
if (unlikely(size > LARGE_MAXCLASS)) {
return NSIZES;
if (unlikely(size > SC_LARGE_MAXCLASS)) {
return SC_NSIZES;
}
#if (NTBINS != 0)
if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
if (size == 0) {
return 0;
}
#if (SC_NTINY != 0)
if (size <= (ZU(1) << SC_LG_TINY_MAXCLASS)) {
szind_t lg_tmin = SC_LG_TINY_MAXCLASS - SC_NTINY + 1;
szind_t lg_ceil = lg_floor(pow2_ceil_zu(size));
return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin);
}
#endif
{
szind_t x = lg_floor((size<<1)-1);
szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
szind_t grp = shift << LG_SIZE_CLASS_GROUP;
szind_t shift = (x < SC_LG_NGROUP + LG_QUANTUM) ? 0 :
x - (SC_LG_NGROUP + LG_QUANTUM);
szind_t grp = shift << SC_LG_NGROUP;
szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
szind_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1)
? LG_QUANTUM : x - SC_LG_NGROUP - 1;
size_t delta_inverse_mask = ZD(-1) << lg_delta;
size_t delta_inverse_mask = ZU(-1) << lg_delta;
szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
((ZU(1) << SC_LG_NGROUP) - 1);
szind_t index = NTBINS + grp + mod;
szind_t index = SC_NTINY + grp + mod;
return index;
}
}
JEMALLOC_ALWAYS_INLINE szind_t
sz_size2index_lookup(size_t size) {
assert(size <= LOOKUP_MAXCLASS);
{
szind_t ret = (sz_size2index_tab[(size-1) >> LG_TINY_MIN]);
assert(size <= SC_LOOKUP_MAXCLASS);
szind_t ret = (sz_size2index_tab[(size + (ZU(1) << SC_LG_TINY_MIN) - 1)
>> SC_LG_TINY_MIN]);
assert(ret == sz_size2index_compute(size));
return ret;
}
}
JEMALLOC_ALWAYS_INLINE szind_t
sz_size2index(size_t size) {
assert(size > 0);
if (likely(size <= LOOKUP_MAXCLASS)) {
if (likely(size <= SC_LOOKUP_MAXCLASS)) {
return sz_size2index_lookup(size);
}
return sz_size2index_compute(size);
@@ -172,20 +170,20 @@ sz_size2index(size_t size) {
static inline size_t
sz_index2size_compute(szind_t index) {
#if (NTBINS > 0)
if (index < NTBINS) {
return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index));
#if (SC_NTINY > 0)
if (index < SC_NTINY) {
return (ZU(1) << (SC_LG_TINY_MAXCLASS - SC_NTINY + 1 + index));
}
#endif
{
size_t reduced_index = index - NTBINS;
size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP;
size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) -
size_t reduced_index = index - SC_NTINY;
size_t grp = reduced_index >> SC_LG_NGROUP;
size_t mod = reduced_index & ((ZU(1) << SC_LG_NGROUP) -
1);
size_t grp_size_mask = ~((!!grp)-1);
size_t grp_size = ((ZU(1) << (LG_QUANTUM +
(LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
(SC_LG_NGROUP-1))) << grp) & grp_size_mask;
size_t shift = (grp == 0) ? 1 : grp;
size_t lg_delta = shift + (LG_QUANTUM-1);
@@ -205,18 +203,22 @@ sz_index2size_lookup(szind_t index) {
JEMALLOC_ALWAYS_INLINE size_t
sz_index2size(szind_t index) {
assert(index < NSIZES);
assert(index < SC_NSIZES);
return sz_index2size_lookup(index);
}
JEMALLOC_ALWAYS_INLINE size_t
sz_s2u_compute(size_t size) {
if (unlikely(size > LARGE_MAXCLASS)) {
if (unlikely(size > SC_LARGE_MAXCLASS)) {
return 0;
}
#if (NTBINS > 0)
if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
if (size == 0) {
size++;
}
#if (SC_NTINY > 0)
if (size <= (ZU(1) << SC_LG_TINY_MAXCLASS)) {
size_t lg_tmin = SC_LG_TINY_MAXCLASS - SC_NTINY + 1;
size_t lg_ceil = lg_floor(pow2_ceil_zu(size));
return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
(ZU(1) << lg_ceil));
@@ -224,8 +226,8 @@ sz_s2u_compute(size_t size) {
#endif
{
size_t x = lg_floor((size<<1)-1);
size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
size_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1)
? LG_QUANTUM : x - SC_LG_NGROUP - 1;
size_t delta = ZU(1) << lg_delta;
size_t delta_mask = delta - 1;
size_t usize = (size + delta_mask) & ~delta_mask;
@@ -247,8 +249,7 @@ sz_s2u_lookup(size_t size) {
*/
JEMALLOC_ALWAYS_INLINE size_t
sz_s2u(size_t size) {
assert(size > 0);
if (likely(size <= LOOKUP_MAXCLASS)) {
if (likely(size <= SC_LOOKUP_MAXCLASS)) {
return sz_s2u_lookup(size);
}
return sz_s2u_compute(size);
@@ -265,7 +266,7 @@ sz_sa2u(size_t size, size_t alignment) {
assert(alignment != 0 && ((alignment - 1) & alignment) == 0);
/* Try for a small size class. */
if (size <= SMALL_MAXCLASS && alignment < PAGE) {
if (size <= SC_SMALL_MAXCLASS && alignment < PAGE) {
/*
* Round size up to the nearest multiple of alignment.
*
@@ -281,20 +282,20 @@ sz_sa2u(size_t size, size_t alignment) {
* 192 | 11000000 | 64
*/
usize = sz_s2u(ALIGNMENT_CEILING(size, alignment));
if (usize < LARGE_MINCLASS) {
if (usize < SC_LARGE_MINCLASS) {
return usize;
}
}
/* Large size class. Beware of overflow. */
if (unlikely(alignment > LARGE_MAXCLASS)) {
if (unlikely(alignment > SC_LARGE_MAXCLASS)) {
return 0;
}
/* Make sure result is a large size class. */
if (size <= LARGE_MINCLASS) {
usize = LARGE_MINCLASS;
if (size <= SC_LARGE_MINCLASS) {
usize = SC_LARGE_MINCLASS;
} else {
usize = sz_s2u(size);
if (usize < size) {

View File

@@ -1,15 +1,13 @@
#ifndef JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
#define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
#include "jemalloc/internal/size_classes.h"
extern bool opt_tcache;
extern ssize_t opt_lg_tcache_max;
extern tcache_bin_info_t *tcache_bin_info;
extern cache_bin_info_t *tcache_bin_info;
/*
* Number of tcache bins. There are NBINS small-object bins, plus 0 or more
* Number of tcache bins. There are SC_NBINS small-object bins, plus 0 or more
* large-object bins.
*/
extern unsigned nhbins;
@@ -30,10 +28,10 @@ extern tcaches_t *tcaches;
size_t tcache_salloc(tsdn_t *tsdn, const void *ptr);
void tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
cache_bin_t *tbin, szind_t binind, bool *tcache_success);
void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
szind_t binind, unsigned rem);
void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
void tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
unsigned rem, tcache_t *tcache);
void tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
arena_t *arena);

View File

@@ -1,8 +1,9 @@
#ifndef JEMALLOC_INTERNAL_TCACHE_INLINES_H
#define JEMALLOC_INTERNAL_TCACHE_INLINES_H
#include "jemalloc/internal/bin.h"
#include "jemalloc/internal/jemalloc_internal_types.h"
#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/sz.h"
#include "jemalloc/internal/ticker.h"
#include "jemalloc/internal/util.h"
@@ -38,43 +39,16 @@ tcache_event(tsd_t *tsd, tcache_t *tcache) {
}
JEMALLOC_ALWAYS_INLINE void *
tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) {
tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
size_t size, szind_t binind, bool zero, bool slow_path) {
void *ret;
if (unlikely(tbin->ncached == 0)) {
tbin->low_water = -1;
*tcache_success = false;
return NULL;
}
/*
* tcache_success (instead of ret) should be checked upon the return of
* this function. We avoid checking (ret == NULL) because there is
* never a null stored on the avail stack (which is unknown to the
* compiler), and eagerly checking ret would cause pipeline stall
* (waiting for the cacheline).
*/
*tcache_success = true;
ret = *(tbin->avail - tbin->ncached);
tbin->ncached--;
if (unlikely((low_water_t)tbin->ncached < tbin->low_water)) {
tbin->low_water = tbin->ncached;
}
return ret;
}
JEMALLOC_ALWAYS_INLINE void *
tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
szind_t binind, bool zero, bool slow_path) {
void *ret;
tcache_bin_t *tbin;
cache_bin_t *bin;
bool tcache_success;
size_t usize JEMALLOC_CC_SILENCE_INIT(0);
assert(binind < NBINS);
tbin = tcache_small_bin_get(tcache, binind);
ret = tcache_alloc_easy(tbin, &tcache_success);
assert(binind < SC_NBINS);
bin = tcache_small_bin_get(tcache, binind);
ret = cache_bin_alloc_easy(bin, &tcache_success);
assert(tcache_success == (ret != NULL));
if (unlikely(!tcache_success)) {
bool tcache_hard_success;
@@ -84,7 +58,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
}
ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
tbin, binind, &tcache_hard_success);
bin, binind, &tcache_hard_success);
if (tcache_hard_success == false) {
return NULL;
}
@@ -103,22 +77,21 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
if (likely(!zero)) {
if (slow_path && config_fill) {
if (unlikely(opt_junk_alloc)) {
arena_alloc_junk_small(ret,
&arena_bin_info[binind], false);
arena_alloc_junk_small(ret, &bin_infos[binind],
false);
} else if (unlikely(opt_zero)) {
memset(ret, 0, usize);
}
}
} else {
if (slow_path && config_fill && unlikely(opt_junk_alloc)) {
arena_alloc_junk_small(ret, &arena_bin_info[binind],
true);
arena_alloc_junk_small(ret, &bin_infos[binind], true);
}
memset(ret, 0, usize);
}
if (config_stats) {
tbin->tstats.nrequests++;
bin->tstats.nrequests++;
}
if (config_prof) {
tcache->prof_accumbytes += usize;
@@ -131,12 +104,12 @@ JEMALLOC_ALWAYS_INLINE void *
tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
szind_t binind, bool zero, bool slow_path) {
void *ret;
tcache_bin_t *tbin;
cache_bin_t *bin;
bool tcache_success;
assert(binind >= NBINS &&binind < nhbins);
tbin = tcache_large_bin_get(tcache, binind);
ret = tcache_alloc_easy(tbin, &tcache_success);
assert(binind >= SC_NBINS &&binind < nhbins);
bin = tcache_large_bin_get(tcache, binind);
ret = cache_bin_alloc_easy(bin, &tcache_success);
assert(tcache_success == (ret != NULL));
if (unlikely(!tcache_success)) {
/*
@@ -176,7 +149,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
}
if (config_stats) {
tbin->tstats.nrequests++;
bin->tstats.nrequests++;
}
if (config_prof) {
tcache->prof_accumbytes += usize;
@@ -190,24 +163,24 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
JEMALLOC_ALWAYS_INLINE void
tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
bool slow_path) {
tcache_bin_t *tbin;
tcache_bin_info_t *tbin_info;
cache_bin_t *bin;
cache_bin_info_t *bin_info;
assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS);
assert(tcache_salloc(tsd_tsdn(tsd), ptr)
<= SC_SMALL_MAXCLASS);
if (slow_path && config_fill && unlikely(opt_junk_free)) {
arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
arena_dalloc_junk_small(ptr, &bin_infos[binind]);
}
tbin = tcache_small_bin_get(tcache, binind);
tbin_info = &tcache_bin_info[binind];
if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
tcache_bin_flush_small(tsd, tcache, tbin, binind,
(tbin_info->ncached_max >> 1));
bin = tcache_small_bin_get(tcache, binind);
bin_info = &tcache_bin_info[binind];
if (unlikely(!cache_bin_dalloc_easy(bin, bin_info, ptr))) {
tcache_bin_flush_small(tsd, tcache, bin, binind,
(bin_info->ncached_max >> 1));
bool ret = cache_bin_dalloc_easy(bin, bin_info, ptr);
assert(ret);
}
assert(tbin->ncached < tbin_info->ncached_max);
tbin->ncached++;
*(tbin->avail - tbin->ncached) = ptr;
tcache_event(tsd, tcache);
}
@@ -215,25 +188,26 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
JEMALLOC_ALWAYS_INLINE void
tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
bool slow_path) {
tcache_bin_t *tbin;
tcache_bin_info_t *tbin_info;
cache_bin_t *bin;
cache_bin_info_t *bin_info;
assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS);
assert(tcache_salloc(tsd_tsdn(tsd), ptr)
> SC_SMALL_MAXCLASS);
assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
if (slow_path && config_fill && unlikely(opt_junk_free)) {
large_dalloc_junk(ptr, sz_index2size(binind));
}
tbin = tcache_large_bin_get(tcache, binind);
tbin_info = &tcache_bin_info[binind];
if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
tcache_bin_flush_large(tsd, tbin, binind,
(tbin_info->ncached_max >> 1), tcache);
bin = tcache_large_bin_get(tcache, binind);
bin_info = &tcache_bin_info[binind];
if (unlikely(bin->ncached == bin_info->ncached_max)) {
tcache_bin_flush_large(tsd, bin, binind,
(bin_info->ncached_max >> 1), tcache);
}
assert(tbin->ncached < tbin_info->ncached_max);
tbin->ncached++;
*(tbin->avail - tbin->ncached) = ptr;
assert(bin->ncached < bin_info->ncached_max);
bin->ncached++;
*(bin->avail - bin->ncached) = ptr;
tcache_event(tsd, tcache);
}
@@ -242,6 +216,9 @@ JEMALLOC_ALWAYS_INLINE tcache_t *
tcaches_get(tsd_t *tsd, unsigned ind) {
tcaches_t *elm = &tcaches[ind];
if (unlikely(elm->tcache == NULL)) {
malloc_printf("<jemalloc>: invalid tcache id (%u).\n", ind);
abort();
} else if (unlikely(elm->tcache == TCACHES_ELM_NEED_REINIT)) {
elm->tcache = tcache_create_explicit(tsd);
}
return elm->tcache;

View File

@@ -1,56 +1,62 @@
#ifndef JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
#define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
#include "jemalloc/internal/cache_bin.h"
#include "jemalloc/internal/ql.h"
#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/stats_tsd.h"
#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/ticker.h"
#include "jemalloc/internal/tsd_types.h"
/*
* Read-only information associated with each element of tcache_t's tbins array
* is stored separately, mainly to reduce memory usage.
*/
struct tcache_bin_info_s {
unsigned ncached_max; /* Upper limit on ncached. */
};
struct tcache_bin_s {
low_water_t low_water; /* Min # cached since last GC. */
uint32_t ncached; /* # of cached objects. */
/*
* ncached and stats are both modified frequently. Let's keep them
* close so that they have a higher chance of being on the same
* cacheline, thus less write-backs.
*/
tcache_bin_stats_t tstats;
/*
* To make use of adjacent cacheline prefetch, the items in the avail
* stack goes to higher address for newer allocations. avail points
* just above the available space, which means that
* avail[-ncached, ... -1] are available items and the lowest item will
* be allocated first.
*/
void **avail; /* Stack of available objects. */
};
/* Various uses of this struct need it to be a named type. */
typedef ql_elm(tsd_t) tsd_link_t;
struct tcache_s {
/* Data accessed frequently first: prof, ticker and small bins. */
uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */
ticker_t gc_ticker; /* Drives incremental GC. */
/*
* The pointer stacks associated with tbins follow as a contiguous
* array. During tcache initialization, the avail pointer in each
* element of tbins is initialized to point to the proper offset within
* this array.
* To minimize our cache-footprint, we put the frequently accessed data
* together at the start of this struct.
*/
tcache_bin_t tbins_small[NBINS];
/* Data accessed less often below. */
ql_elm(tcache_t) link; /* Used for aggregating stats. */
arena_t *arena; /* Associated arena. */
szind_t next_gc_bin; /* Next bin to GC. */
/* Cleared after arena_prof_accum(). */
uint64_t prof_accumbytes;
/* Drives incremental GC. */
ticker_t gc_ticker;
/*
* The pointer stacks associated with bins follow as a contiguous array.
* During tcache initialization, the avail pointer in each element of
* tbins is initialized to point to the proper offset within this array.
*/
cache_bin_t bins_small[SC_NBINS];
/*
* This data is less hot; we can be a little less careful with our
* footprint here.
*/
/* Lets us track all the tcaches in an arena. */
ql_elm(tcache_t) link;
/* Logically scoped to tsd, but put here for cache layout reasons. */
ql_elm(tsd_t) tsd_link;
bool in_hook;
/*
* The descriptor lets the arena find our cache bins without seeing the
* tcache definition. This enables arenas to aggregate stats across
* tcaches without having a tcache dependency.
*/
cache_bin_array_descriptor_t cache_bin_array_descriptor;
/* The arena this tcache is associated with. */
arena_t *arena;
/* Next bin to GC. */
szind_t next_gc_bin;
/* For small bins, fill (ncached_max >> lg_fill_div). */
uint8_t lg_fill_div[NBINS];
tcache_bin_t tbins_large[NSIZES-NBINS];
uint8_t lg_fill_div[SC_NBINS];
/*
* We put the cache bins for large size classes at the end of the
* struct, since some of them might not get used. This might end up
* letting us avoid touching an extra page if we don't have to.
*/
cache_bin_t bins_large[SC_NSIZES-SC_NBINS];
};
/* Linkage for list of available (previously used) explicit tcache IDs. */

View File

@@ -1,16 +1,11 @@
#ifndef JEMALLOC_INTERNAL_TCACHE_TYPES_H
#define JEMALLOC_INTERNAL_TCACHE_TYPES_H
#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/sc.h"
typedef struct tcache_bin_info_s tcache_bin_info_t;
typedef struct tcache_bin_s tcache_bin_t;
typedef struct tcache_s tcache_t;
typedef struct tcaches_s tcaches_t;
/* ncached is cast to this type for comparison. */
typedef int32_t low_water_t;
/*
* tcache pointers close to NULL are used to encode state information that is
* used for two purposes: preventing thread caching on a per thread basis and
@@ -50,7 +45,7 @@ typedef int32_t low_water_t;
/* Number of tcache allocation/deallocation events between incremental GCs. */
#define TCACHE_GC_INCR \
((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1))
((TCACHE_GC_SWEEP / SC_NBINS) + ((TCACHE_GC_SWEEP / SC_NBINS == 0) ? 0 : 1))
/* Used in TSD static initializer only. Real init in tcache_data_init(). */
#define TCACHE_ZERO_INITIALIZER {0}
@@ -58,4 +53,7 @@ typedef int32_t low_water_t;
/* Used in TSD static initializer only. Will be initialized to opt_tcache. */
#define TCACHE_ENABLED_ZERO_INITIALIZER false
/* Used for explicit tcache only. Means flushed but not destroyed. */
#define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1)
#endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */

View File

@@ -0,0 +1,19 @@
#ifndef JEMALLOC_INTERNAL_TEST_HOOKS_H
#define JEMALLOC_INTERNAL_TEST_HOOKS_H
extern JEMALLOC_EXPORT void (*test_hooks_arena_new_hook)();
extern JEMALLOC_EXPORT void (*test_hooks_libc_hook)();
#define JEMALLOC_HOOK(fn, hook) ((void)(hook != NULL && (hook(), 0)), fn)
#define open JEMALLOC_HOOK(open, test_hooks_libc_hook)
#define read JEMALLOC_HOOK(read, test_hooks_libc_hook)
#define write JEMALLOC_HOOK(write, test_hooks_libc_hook)
#define readlink JEMALLOC_HOOK(readlink, test_hooks_libc_hook)
#define close JEMALLOC_HOOK(close, test_hooks_libc_hook)
#define creat JEMALLOC_HOOK(creat, test_hooks_libc_hook)
#define secure_getenv JEMALLOC_HOOK(secure_getenv, test_hooks_libc_hook)
/* Note that this is undef'd and re-define'd in src/prof.c. */
#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
#endif /* JEMALLOC_INTERNAL_TEST_HOOKS_H */

View File

@@ -32,14 +32,42 @@ ticker_read(const ticker_t *ticker) {
return ticker->tick;
}
static inline bool
ticker_ticks(ticker_t *ticker, int32_t nticks) {
if (unlikely(ticker->tick < nticks)) {
/*
* Not intended to be a public API. Unfortunately, on x86, neither gcc nor
* clang seems smart enough to turn
* ticker->tick -= nticks;
* if (unlikely(ticker->tick < 0)) {
* fixup ticker
* return true;
* }
* return false;
* into
* subq %nticks_reg, (%ticker_reg)
* js fixup ticker
*
* unless we force "fixup ticker" out of line. In that case, gcc gets it right,
* but clang now does worse than before. So, on x86 with gcc, we force it out
* of line, but otherwise let the inlining occur. Ordinarily this wouldn't be
* worth the hassle, but this is on the fast path of both malloc and free (via
* tcache_event).
*/
#if defined(__GNUC__) && !defined(__clang__) \
&& (defined(__x86_64__) || defined(__i386__))
JEMALLOC_NOINLINE
#endif
static bool
ticker_fixup(ticker_t *ticker) {
ticker->tick = ticker->nticks;
return true;
}
}
static inline bool
ticker_ticks(ticker_t *ticker, int32_t nticks) {
ticker->tick -= nticks;
return(false);
if (unlikely(ticker->tick < 0)) {
return ticker_fixup(ticker);
}
return false;
}
static inline bool
@@ -47,4 +75,17 @@ ticker_tick(ticker_t *ticker) {
return ticker_ticks(ticker, 1);
}
/*
* Try to tick. If ticker would fire, return true, but rely on
* slowpath to reset ticker.
*/
static inline bool
ticker_trytick(ticker_t *ticker) {
--ticker->tick;
if (unlikely(ticker->tick < 0)) {
return true;
}
return false;
}
#endif /* JEMALLOC_INTERNAL_TICKER_H */

View File

@@ -3,6 +3,7 @@
#include "jemalloc/internal/arena_types.h"
#include "jemalloc/internal/assert.h"
#include "jemalloc/internal/bin_types.h"
#include "jemalloc/internal/jemalloc_internal_externs.h"
#include "jemalloc/internal/prof_types.h"
#include "jemalloc/internal/ql.h"
@@ -65,48 +66,120 @@ typedef void (*test_callback_t)(int *);
O(arenas_tdata_bypass, bool, bool) \
O(reentrancy_level, int8_t, int8_t) \
O(narenas_tdata, uint32_t, uint32_t) \
O(offset_state, uint64_t, uint64_t) \
O(thread_allocated, uint64_t, uint64_t) \
O(thread_deallocated, uint64_t, uint64_t) \
O(bytes_until_sample, int64_t, int64_t) \
O(prof_tdata, prof_tdata_t *, prof_tdata_t *) \
O(rtree_ctx, rtree_ctx_t, rtree_ctx_t) \
O(iarena, arena_t *, arena_t *) \
O(arena, arena_t *, arena_t *) \
O(arenas_tdata, arena_tdata_t *, arena_tdata_t *)\
O(binshards, tsd_binshards_t, tsd_binshards_t)\
O(tcache, tcache_t, tcache_t) \
O(witness_tsd, witness_tsd_t, witness_tsdn_t) \
MALLOC_TEST_TSD
#define TSD_INITIALIZER { \
tsd_state_uninitialized, \
ATOMIC_INIT(tsd_state_uninitialized), \
TCACHE_ENABLED_ZERO_INITIALIZER, \
false, \
0, \
0, \
0, \
0, \
0, \
0, \
NULL, \
RTREE_CTX_ZERO_INITIALIZER, \
NULL, \
NULL, \
NULL, \
TSD_BINSHARDS_ZERO_INITIALIZER, \
TCACHE_ZERO_INITIALIZER, \
WITNESS_TSD_INITIALIZER \
MALLOC_TEST_TSD_INITIALIZER \
}
void *malloc_tsd_malloc(size_t size);
void malloc_tsd_dalloc(void *wrapper);
void malloc_tsd_cleanup_register(bool (*f)(void));
tsd_t *malloc_tsd_boot0(void);
void malloc_tsd_boot1(void);
void tsd_cleanup(void *arg);
tsd_t *tsd_fetch_slow(tsd_t *tsd, bool internal);
void tsd_state_set(tsd_t *tsd, uint8_t new_state);
void tsd_slow_update(tsd_t *tsd);
void tsd_prefork(tsd_t *tsd);
void tsd_postfork_parent(tsd_t *tsd);
void tsd_postfork_child(tsd_t *tsd);
/*
* Call ..._inc when your module wants to take all threads down the slow paths,
* and ..._dec when it no longer needs to.
*/
void tsd_global_slow_inc(tsdn_t *tsdn);
void tsd_global_slow_dec(tsdn_t *tsdn);
bool tsd_global_slow();
enum {
tsd_state_nominal = 0, /* Common case --> jnz. */
tsd_state_nominal_slow = 1, /* Initialized but on slow path. */
/* the above 2 nominal states should be lower values. */
tsd_state_nominal_max = 1, /* used for comparison only. */
tsd_state_minimal_initialized = 2,
tsd_state_purgatory = 3,
tsd_state_reincarnated = 4,
tsd_state_uninitialized = 5
/* Common case --> jnz. */
tsd_state_nominal = 0,
/* Initialized but on slow path. */
tsd_state_nominal_slow = 1,
/*
* Some thread has changed global state in such a way that all nominal
* threads need to recompute their fast / slow status the next time they
* get a chance.
*
* Any thread can change another thread's status *to* recompute, but
* threads are the only ones who can change their status *from*
* recompute.
*/
tsd_state_nominal_recompute = 2,
/*
* The above nominal states should be lower values. We use
* tsd_nominal_max to separate nominal states from threads in the
* process of being born / dying.
*/
tsd_state_nominal_max = 2,
/*
* A thread might free() during its death as its only allocator action;
* in such scenarios, we need tsd, but set up in such a way that no
* cleanup is necessary.
*/
tsd_state_minimal_initialized = 3,
/* States during which we know we're in thread death. */
tsd_state_purgatory = 4,
tsd_state_reincarnated = 5,
/*
* What it says on the tin; tsd that hasn't been initialized. Note
* that even when the tsd struct lives in TLS, when need to keep track
* of stuff like whether or not our pthread destructors have been
* scheduled, so this really truly is different than the nominal state.
*/
tsd_state_uninitialized = 6
};
/* Manually limit tsd_state_t to a single byte. */
typedef uint8_t tsd_state_t;
/*
* Some TSD accesses can only be done in a nominal state. To enforce this, we
* wrap TSD member access in a function that asserts on TSD state, and mangle
* field names to prevent touching them accidentally.
*/
#define TSD_MANGLE(n) cant_access_tsd_items_directly_use_a_getter_or_setter_##n
#ifdef JEMALLOC_U8_ATOMICS
# define tsd_state_t atomic_u8_t
# define tsd_atomic_load atomic_load_u8
# define tsd_atomic_store atomic_store_u8
# define tsd_atomic_exchange atomic_exchange_u8
#else
# define tsd_state_t atomic_u32_t
# define tsd_atomic_load atomic_load_u32
# define tsd_atomic_store atomic_store_u32
# define tsd_atomic_exchange atomic_exchange_u32
#endif
/* The actual tsd. */
struct tsd_s {
@@ -115,13 +188,29 @@ struct tsd_s {
* module. Access any thread-local state through the getters and
* setters below.
*/
/*
* We manually limit the state to just a single byte. Unless the 8-bit
* atomics are unavailable (which is rare).
*/
tsd_state_t state;
#define O(n, t, nt) \
t use_a_getter_or_setter_instead_##n;
t TSD_MANGLE(n);
MALLOC_TSD
#undef O
};
JEMALLOC_ALWAYS_INLINE uint8_t
tsd_state_get(tsd_t *tsd) {
/*
* This should be atomic. Unfortunately, compilers right now can't tell
* that this can be done as a memory comparison, and forces a load into
* a register that hurts fast-path performance.
*/
/* return atomic_load_u8(&tsd->state, ATOMIC_RELAXED); */
return *(uint8_t *)&tsd->state;
}
/*
* Wrapper around tsd_t that makes it possible to avoid implicit conversion
* between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
@@ -148,15 +237,6 @@ tsdn_tsd(tsdn_t *tsdn) {
return &tsdn->tsd;
}
void *malloc_tsd_malloc(size_t size);
void malloc_tsd_dalloc(void *wrapper);
void malloc_tsd_cleanup_register(bool (*f)(void));
tsd_t *malloc_tsd_boot0(void);
void malloc_tsd_boot1(void);
void tsd_cleanup(void *arg);
tsd_t *tsd_fetch_slow(tsd_t *tsd, bool internal);
void tsd_slow_update(tsd_t *tsd);
/*
* We put the platform-specific data declarations and inlines into their own
* header files to avoid cluttering this file. They define tsd_boot0,
@@ -180,7 +260,7 @@ void tsd_slow_update(tsd_t *tsd);
#define O(n, t, nt) \
JEMALLOC_ALWAYS_INLINE t * \
tsd_##n##p_get_unsafe(tsd_t *tsd) { \
return &tsd->use_a_getter_or_setter_instead_##n; \
return &tsd->TSD_MANGLE(n); \
}
MALLOC_TSD
#undef O
@@ -189,10 +269,16 @@ MALLOC_TSD
#define O(n, t, nt) \
JEMALLOC_ALWAYS_INLINE t * \
tsd_##n##p_get(tsd_t *tsd) { \
assert(tsd->state == tsd_state_nominal || \
tsd->state == tsd_state_nominal_slow || \
tsd->state == tsd_state_reincarnated || \
tsd->state == tsd_state_minimal_initialized); \
/* \
* Because the state might change asynchronously if it's \
* nominal, we need to make sure that we only read it once. \
*/ \
uint8_t state = tsd_state_get(tsd); \
assert(state == tsd_state_nominal || \
state == tsd_state_nominal_slow || \
state == tsd_state_nominal_recompute || \
state == tsd_state_reincarnated || \
state == tsd_state_minimal_initialized); \
return tsd_##n##p_get_unsafe(tsd); \
}
MALLOC_TSD
@@ -227,8 +313,8 @@ MALLOC_TSD
#define O(n, t, nt) \
JEMALLOC_ALWAYS_INLINE void \
tsd_##n##_set(tsd_t *tsd, t val) { \
assert(tsd->state != tsd_state_reincarnated && \
tsd->state != tsd_state_minimal_initialized); \
assert(tsd_state_get(tsd) != tsd_state_reincarnated && \
tsd_state_get(tsd) != tsd_state_minimal_initialized); \
*tsd_##n##p_get(tsd) = val; \
}
MALLOC_TSD
@@ -236,13 +322,18 @@ MALLOC_TSD
JEMALLOC_ALWAYS_INLINE void
tsd_assert_fast(tsd_t *tsd) {
/*
* Note that our fastness assertion does *not* include global slowness
* counters; it's not in general possible to ensure that they won't
* change asynchronously from underneath us.
*/
assert(!malloc_slow && tsd_tcache_enabled_get(tsd) &&
tsd_reentrancy_level_get(tsd) == 0);
}
JEMALLOC_ALWAYS_INLINE bool
tsd_fast(tsd_t *tsd) {
bool fast = (tsd->state == tsd_state_nominal);
bool fast = (tsd_state_get(tsd) == tsd_state_nominal);
if (fast) {
tsd_assert_fast(tsd);
}
@@ -259,7 +350,7 @@ tsd_fetch_impl(bool init, bool minimal) {
}
assert(tsd != NULL);
if (unlikely(tsd->state != tsd_state_nominal)) {
if (unlikely(tsd_state_get(tsd) != tsd_state_nominal)) {
return tsd_fetch_slow(tsd, minimal);
}
assert(tsd_fast(tsd));
@@ -279,7 +370,7 @@ JEMALLOC_ALWAYS_INLINE tsd_t *
tsd_internal_fetch(void) {
tsd_t *tsd = tsd_fetch_min();
/* Use reincarnated state to prevent full initialization. */
tsd->state = tsd_state_reincarnated;
tsd_state_set(tsd, tsd_state_reincarnated);
return tsd;
}
@@ -291,7 +382,7 @@ tsd_fetch(void) {
static inline bool
tsd_nominal(tsd_t *tsd) {
return (tsd->state <= tsd_state_nominal_max);
return (tsd_state_get(tsd) <= tsd_state_nominal_max);
}
JEMALLOC_ALWAYS_INLINE tsdn_t *

View File

@@ -77,7 +77,10 @@ tsd_wrapper_get(bool init) {
abort();
} else {
wrapper->initialized = false;
JEMALLOC_DIAGNOSTIC_PUSH
JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
tsd_t initializer = TSD_INITIALIZER;
JEMALLOC_DIAGNOSTIC_POP
wrapper->val = initializer;
}
tsd_wrapper_set(wrapper);
@@ -107,7 +110,10 @@ tsd_boot1(void) {
tsd_boot_wrapper.initialized = false;
tsd_cleanup(&tsd_boot_wrapper.val);
wrapper->initialized = false;
JEMALLOC_DIAGNOSTIC_PUSH
JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
tsd_t initializer = TSD_INITIALIZER;
JEMALLOC_DIAGNOSTIC_POP
wrapper->val = initializer;
tsd_wrapper_set(wrapper);
}

View File

@@ -3,8 +3,10 @@
#endif
#define JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H
extern __thread tsd_t tsd_tls;
extern __thread bool tsd_initialized;
#define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL
extern JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls;
extern JEMALLOC_TSD_TYPE_ATTR(bool) tsd_initialized;
extern bool tsd_booted;
/* Initialization/cleanup. */
@@ -47,7 +49,6 @@ tsd_get_allocates(void) {
/* Get/set. */
JEMALLOC_ALWAYS_INLINE tsd_t *
tsd_get(bool init) {
assert(tsd_booted);
return &tsd_tls;
}
JEMALLOC_ALWAYS_INLINE void

View File

@@ -3,7 +3,9 @@
#endif
#define JEMALLOC_INTERNAL_TSD_TLS_H
extern __thread tsd_t tsd_tls;
#define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL
extern JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls;
extern pthread_key_t tsd_tsd;
extern bool tsd_booted;
@@ -40,7 +42,6 @@ tsd_get_allocates(void) {
/* Get/set. */
JEMALLOC_ALWAYS_INLINE tsd_t *
tsd_get(bool init) {
assert(tsd_booted);
return &tsd_tls;
}

View File

@@ -27,9 +27,9 @@
#define WITNESS_RANK_PROF_BT2GCTX 6U
#define WITNESS_RANK_PROF_TDATAS 7U
#define WITNESS_RANK_PROF_TDATA 8U
#define WITNESS_RANK_PROF_GCTX 9U
#define WITNESS_RANK_BACKGROUND_THREAD 10U
#define WITNESS_RANK_PROF_LOG 9U
#define WITNESS_RANK_PROF_GCTX 10U
#define WITNESS_RANK_BACKGROUND_THREAD 11U
/*
* Used as an argument to witness_assert_depth_to_rank() in order to validate
@@ -37,21 +37,22 @@
* witness_assert_depth_to_rank() is inclusive rather than exclusive, this
* definition can have the same value as the minimally ranked core lock.
*/
#define WITNESS_RANK_CORE 11U
#define WITNESS_RANK_CORE 12U
#define WITNESS_RANK_DECAY 11U
#define WITNESS_RANK_TCACHE_QL 12U
#define WITNESS_RANK_EXTENT_GROW 13U
#define WITNESS_RANK_EXTENTS 14U
#define WITNESS_RANK_EXTENT_AVAIL 15U
#define WITNESS_RANK_DECAY 12U
#define WITNESS_RANK_TCACHE_QL 13U
#define WITNESS_RANK_EXTENT_GROW 14U
#define WITNESS_RANK_EXTENTS 15U
#define WITNESS_RANK_EXTENT_AVAIL 16U
#define WITNESS_RANK_EXTENT_POOL 16U
#define WITNESS_RANK_RTREE 17U
#define WITNESS_RANK_BASE 18U
#define WITNESS_RANK_ARENA_LARGE 19U
#define WITNESS_RANK_EXTENT_POOL 17U
#define WITNESS_RANK_RTREE 18U
#define WITNESS_RANK_BASE 19U
#define WITNESS_RANK_ARENA_LARGE 20U
#define WITNESS_RANK_HOOK 21U
#define WITNESS_RANK_LEAF 0xffffffffU
#define WITNESS_RANK_ARENA_BIN WITNESS_RANK_LEAF
#define WITNESS_RANK_BIN WITNESS_RANK_LEAF
#define WITNESS_RANK_ARENA_STATS WITNESS_RANK_LEAF
#define WITNESS_RANK_DSS WITNESS_RANK_LEAF
#define WITNESS_RANK_PROF_ACTIVE WITNESS_RANK_LEAF

View File

@@ -10,6 +10,9 @@ extern "C" {
/* Defined if alloc_size attribute is supported. */
#define JEMALLOC_HAVE_ATTR_ALLOC_SIZE
/* Defined if format_arg(...) attribute is supported. */
#define JEMALLOC_HAVE_ATTR_FORMAT_ARG
/* Defined if format(gnu_printf, ...) attribute is supported. */
#define JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF
@@ -66,6 +69,7 @@ extern "C" {
# define je_malloc_stats_print malloc_stats_print
# define je_malloc_usable_size malloc_usable_size
# define je_mallocx mallocx
# define je_smallocx_ea6b3e973b477b8061e0076bb257dbd7f3faa756 smallocx_ea6b3e973b477b8061e0076bb257dbd7f3faa756
# define je_nallocx nallocx
# define je_posix_memalign posix_memalign
# define je_rallocx rallocx
@@ -83,12 +87,13 @@ extern "C" {
#include <limits.h>
#include <strings.h>
#define JEMALLOC_VERSION "5.0.1-0-g896ed3a8b3f41998d4fb4d625d30ac63ef2d51fb"
#define JEMALLOC_VERSION "5.2.1-0-gea6b3e973b477b8061e0076bb257dbd7f3faa756"
#define JEMALLOC_VERSION_MAJOR 5
#define JEMALLOC_VERSION_MINOR 0
#define JEMALLOC_VERSION_MINOR 2
#define JEMALLOC_VERSION_BUGFIX 1
#define JEMALLOC_VERSION_NREV 0
#define JEMALLOC_VERSION_GID "896ed3a8b3f41998d4fb4d625d30ac63ef2d51fb"
#define JEMALLOC_VERSION_GID "ea6b3e973b477b8061e0076bb257dbd7f3faa756"
#define JEMALLOC_VERSION_GID_IDENT ea6b3e973b477b8061e0076bb257dbd7f3faa756
#define MALLOCX_LG_ALIGN(la) ((int)(la))
#if LG_SIZEOF_PTR == 2
@@ -147,6 +152,7 @@ extern "C" {
# define JEMALLOC_EXPORT __declspec(dllimport)
# endif
# endif
# define JEMALLOC_FORMAT_ARG(i)
# define JEMALLOC_FORMAT_PRINTF(s, i)
# define JEMALLOC_NOINLINE __declspec(noinline)
# ifdef __cplusplus
@@ -174,6 +180,11 @@ extern "C" {
# ifndef JEMALLOC_EXPORT
# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default"))
# endif
# ifdef JEMALLOC_HAVE_ATTR_FORMAT_ARG
# define JEMALLOC_FORMAT_ARG(i) JEMALLOC_ATTR(__format_arg__(3))
# else
# define JEMALLOC_FORMAT_ARG(i)
# endif
# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF
# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i))
# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF)
@@ -369,6 +380,7 @@ struct extent_hooks_s {
# define malloc_stats_print je_malloc_stats_print
# define malloc_usable_size je_malloc_usable_size
# define mallocx je_mallocx
# define smallocx_ea6b3e973b477b8061e0076bb257dbd7f3faa756 je_smallocx_ea6b3e973b477b8061e0076bb257dbd7f3faa756
# define nallocx je_nallocx
# define posix_memalign je_posix_memalign
# define rallocx je_rallocx
@@ -401,6 +413,7 @@ struct extent_hooks_s {
# undef je_malloc_stats_print
# undef je_malloc_usable_size
# undef je_mallocx
# undef je_smallocx_ea6b3e973b477b8061e0076bb257dbd7f3faa756
# undef je_nallocx
# undef je_posix_memalign
# undef je_rallocx

View File

@@ -33,7 +33,9 @@
* Hyper-threaded CPUs may need a special instruction inside spin loops in
* order to yield to another virtual CPU.
*/
#define CPU_SPINWAIT __asm__ volatile("pause")
#define CPU_SPINWAIT @JEM_CPU_SPINWAIT@
/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
#define HAVE_CPU_SPINWAIT @JEM_HAVE_CPU_SPINWAIT@
/*
* Number of significant bits in virtual addresses. This may be less than the
@@ -47,25 +49,13 @@
/* Defined if GCC __atomic atomics are available. */
#define JEMALLOC_GCC_ATOMIC_ATOMICS 1
/* and the 8-bit variant support. */
#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1
/* Defined if GCC __sync atomics are available. */
#define JEMALLOC_GCC_SYNC_ATOMICS 1
/*
* Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and
* __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite
* __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the
* functions are defined in libgcc instead of being inlines).
*/
/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4 */
/*
* Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and
* __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite
* __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the
* functions are defined in libgcc instead of being inlines).
*/
/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 */
/* and the 8-bit variant support. */
#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1
/*
* Defined if __builtin_clz() and __builtin_clzl() are available.
@@ -77,12 +67,6 @@
*/
/* #undef JEMALLOC_OS_UNFAIR_LOCK */
/*
* Defined if OSSpin*() functions are available, as provided by Darwin, and
* documented in the spinlock(3) manual page.
*/
/* #undef JEMALLOC_OSSPIN */
/* Defined if syscall(2) is usable. */
#define JEMALLOC_USE_SYSCALL
@@ -152,6 +136,9 @@
/* JEMALLOC_STATS enables statistics calculation. */
/* #undef JEMALLOC_STATS */
/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */
/* JEMALLOC_PROF enables allocation profiling. */
/* #undef JEMALLOC_PROF */
@@ -232,12 +219,30 @@
#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl
#define JEMALLOC_INTERNAL_FFS __builtin_ffs
/*
* popcount*() functions to use for bitmapping.
*/
#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl
#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount
/*
* If defined, explicitly attempt to more uniformly distribute large allocation
* pointer alignments across all cache indices.
*/
#define JEMALLOC_CACHE_OBLIVIOUS
/*
* If defined, enable logging facilities. We make this a configure option to
* avoid taking extra branches everywhere.
*/
/* #undef JEMALLOC_LOG */
/*
* If defined, use readlinkat() (instead of readlink()) to follow
* /etc/malloc_conf.
*/
/* #undef JEMALLOC_READLINKAT */
/*
* Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
*/
@@ -255,6 +260,12 @@
/* Defined if madvise(2) is available. */
#define JEMALLOC_HAVE_MADVISE
/*
* Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
* arguments to madvise(2).
*/
#define JEMALLOC_HAVE_MADVISE_HUGE
/*
* Methods for purging unused pages differ between operating systems.
*
@@ -270,13 +281,21 @@
*/
@JEM_MADFREE_DEF@ JEMALLOC_PURGE_MADVISE_FREE
#define JEMALLOC_PURGE_MADVISE_DONTNEED
#define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS 1
#define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */
/* #undef JEMALLOC_DEFINE_MADVISE_FREE */
/*
* Defined if MADV_DO[NT]DUMP is supported as an argument to madvise.
*/
#define JEMALLOC_MADVISE_DONTDUMP
/*
* Defined if transparent huge pages (THPs) are supported via the
* MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
*/
#define JEMALLOC_THP
/* #undef JEMALLOC_THP */
/* Define if operating system has alloca.h header. */
#define JEMALLOC_HAS_ALLOCA_H 1
@@ -337,8 +356,15 @@
/* If defined, jemalloc takes the malloc/free/etc. symbol names. */
#define JEMALLOC_IS_MALLOC 1
/*
* Defined if strerror_r returns char * if _GNU_SOURCE is defined.
*/
#define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE
/* Performs additional safety checks when defined. */
/* #undef JEMALLOC_OPT_SAFETY_CHECKS */
/* sizeof(void *) == 2^LG_SIZEOF_PTR. */
#define LG_SIZEOF_PTR @JEM_SIZEDEF@
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -4,6 +4,8 @@
#include "jemalloc/internal/assert.h"
JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
/******************************************************************************/
/* Data. */
@@ -11,38 +13,37 @@
#define BACKGROUND_THREAD_DEFAULT false
/* Read-only after initialization. */
bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT + 1;
/* Used for thread creation, termination and stats. */
malloc_mutex_t background_thread_lock;
/* Indicates global state. Atomic because decay reads this w/o locking. */
atomic_b_t background_thread_enabled_state;
size_t n_background_threads;
size_t max_background_threads;
/* Thread info per-index. */
background_thread_info_t *background_thread_info;
/* False if no necessary runtime support. */
bool can_enable_background_thread;
/******************************************************************************/
#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
#include <dlfcn.h>
static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
void *(*)(void *), void *__restrict);
static pthread_once_t once_control = PTHREAD_ONCE_INIT;
static void
pthread_create_wrapper_once(void) {
pthread_create_wrapper_init(void) {
#ifdef JEMALLOC_LAZY_LOCK
if (!isthreaded) {
isthreaded = true;
}
#endif
}
int
pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
void *(*start_routine)(void *), void *__restrict arg) {
pthread_once(&once_control, pthread_create_wrapper_once);
pthread_create_wrapper_init();
return pthread_create_fptr(thread, attr, start_routine, arg);
}
@@ -78,7 +79,7 @@ background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
}
static inline bool
set_current_thread_affinity(UNUSED int cpu) {
set_current_thread_affinity(int cpu) {
#if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
@@ -286,7 +287,7 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigne
uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
unsigned narenas = narenas_total_get();
for (unsigned i = ind; i < narenas; i += ncpus) {
for (unsigned i = ind; i < narenas; i += max_background_threads) {
arena_t *arena = arena_get(tsdn, i, false);
if (!arena) {
continue;
@@ -379,35 +380,32 @@ background_thread_create_signals_masked(pthread_t *thread,
return create_err;
}
static void
static bool
check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
bool *created_threads) {
bool ret = false;
if (likely(*n_created == n_background_threads)) {
return;
return ret;
}
malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_info[0].mtx);
label_restart:
malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
for (unsigned i = 1; i < ncpus; i++) {
tsdn_t *tsdn = tsd_tsdn(tsd);
malloc_mutex_unlock(tsdn, &background_thread_info[0].mtx);
for (unsigned i = 1; i < max_background_threads; i++) {
if (created_threads[i]) {
continue;
}
background_thread_info_t *info = &background_thread_info[i];
malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
assert(info->state != background_thread_paused);
malloc_mutex_lock(tsdn, &info->mtx);
/*
* In case of the background_thread_paused state because of
* arena reset, delay the creation.
*/
bool create = (info->state == background_thread_started);
malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
malloc_mutex_unlock(tsdn, &info->mtx);
if (!create) {
continue;
}
/*
* To avoid deadlock with prefork handlers (which waits for the
* mutex held here), unlock before calling pthread_create().
*/
malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
pre_reentrancy(tsd, NULL);
int err = background_thread_create_signals_masked(&info->thread,
NULL, background_thread_entry, (void *)(uintptr_t)i);
@@ -423,19 +421,21 @@ label_restart:
abort();
}
}
/* Restart since we unlocked. */
goto label_restart;
/* Return to restart the loop since we unlocked. */
ret = true;
break;
}
malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_info[0].mtx);
malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
malloc_mutex_lock(tsdn, &background_thread_info[0].mtx);
return ret;
}
static void
background_thread0_work(tsd_t *tsd) {
/* Thread0 is also responsible for launching / terminating threads. */
VARIABLE_ARRAY(bool, created_threads, ncpus);
VARIABLE_ARRAY(bool, created_threads, max_background_threads);
unsigned i;
for (i = 1; i < ncpus; i++) {
for (i = 1; i < max_background_threads; i++) {
created_threads[i] = false;
}
/* Start working, and create more threads when asked. */
@@ -445,8 +445,10 @@ background_thread0_work(tsd_t *tsd) {
&background_thread_info[0])) {
continue;
}
check_background_thread_creation(tsd, &n_created,
(bool *)&created_threads);
if (check_background_thread_creation(tsd, &n_created,
(bool *)&created_threads)) {
continue;
}
background_work_sleep_once(tsd_tsdn(tsd),
&background_thread_info[0], 0);
}
@@ -456,15 +458,20 @@ background_thread0_work(tsd_t *tsd) {
* the global background_thread mutex (and is waiting) for us.
*/
assert(!background_thread_enabled());
for (i = 1; i < ncpus; i++) {
for (i = 1; i < max_background_threads; i++) {
background_thread_info_t *info = &background_thread_info[i];
assert(info->state != background_thread_paused);
if (created_threads[i]) {
background_threads_disable_single(tsd, info);
} else {
malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
/* Clear in case the thread wasn't created. */
if (info->state != background_thread_stopped) {
/* The thread was not created. */
assert(info->state ==
background_thread_started);
n_background_threads--;
info->state = background_thread_stopped;
}
malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
}
}
@@ -498,9 +505,11 @@ background_work(tsd_t *tsd, unsigned ind) {
static void *
background_thread_entry(void *ind_arg) {
unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
assert(thread_ind < ncpus);
assert(thread_ind < max_background_threads);
#ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
#elif defined(__FreeBSD__)
pthread_set_name_np(pthread_self(), "jemalloc_bg_thd");
#endif
if (opt_percpu_arena != percpu_arena_disabled) {
set_current_thread_affinity((int)thread_ind);
@@ -525,14 +534,13 @@ background_thread_init(tsd_t *tsd, background_thread_info_t *info) {
n_background_threads++;
}
/* Create a new background thread if needed. */
bool
background_thread_create(tsd_t *tsd, unsigned arena_ind) {
static bool
background_thread_create_locked(tsd_t *tsd, unsigned arena_ind) {
assert(have_background_thread);
malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
/* We create at most NCPUs threads. */
size_t thread_ind = arena_ind % ncpus;
size_t thread_ind = arena_ind % max_background_threads;
background_thread_info_t *info = &background_thread_info[thread_ind];
bool need_new_thread;
@@ -580,37 +588,53 @@ background_thread_create(tsd_t *tsd, unsigned arena_ind) {
return false;
}
/* Create a new background thread if needed. */
bool
background_thread_create(tsd_t *tsd, unsigned arena_ind) {
assert(have_background_thread);
bool ret;
malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
ret = background_thread_create_locked(tsd, arena_ind);
malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
return ret;
}
bool
background_threads_enable(tsd_t *tsd) {
assert(n_background_threads == 0);
assert(background_thread_enabled());
malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
VARIABLE_ARRAY(bool, marked, ncpus);
VARIABLE_ARRAY(bool, marked, max_background_threads);
unsigned i, nmarked;
for (i = 0; i < ncpus; i++) {
for (i = 0; i < max_background_threads; i++) {
marked[i] = false;
}
nmarked = 0;
/* Thread 0 is required and created at the end. */
marked[0] = true;
/* Mark the threads we need to create for thread 0. */
unsigned n = narenas_total_get();
for (i = 1; i < n; i++) {
if (marked[i % ncpus] ||
if (marked[i % max_background_threads] ||
arena_get(tsd_tsdn(tsd), i, false) == NULL) {
continue;
}
background_thread_info_t *info = &background_thread_info[i];
background_thread_info_t *info = &background_thread_info[
i % max_background_threads];
malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
assert(info->state == background_thread_stopped);
background_thread_init(tsd, info);
malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
marked[i % ncpus] = true;
if (++nmarked == ncpus) {
marked[i % max_background_threads] = true;
if (++nmarked == max_background_threads) {
break;
}
}
return background_thread_create(tsd, 0);
return background_thread_create_locked(tsd, 0);
}
bool
@@ -720,14 +744,14 @@ background_thread_prefork0(tsdn_t *tsdn) {
void
background_thread_prefork1(tsdn_t *tsdn) {
for (unsigned i = 0; i < ncpus; i++) {
for (unsigned i = 0; i < max_background_threads; i++) {
malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx);
}
}
void
background_thread_postfork_parent(tsdn_t *tsdn) {
for (unsigned i = 0; i < ncpus; i++) {
for (unsigned i = 0; i < max_background_threads; i++) {
malloc_mutex_postfork_parent(tsdn,
&background_thread_info[i].mtx);
}
@@ -736,7 +760,7 @@ background_thread_postfork_parent(tsdn_t *tsdn) {
void
background_thread_postfork_child(tsdn_t *tsdn) {
for (unsigned i = 0; i < ncpus; i++) {
for (unsigned i = 0; i < max_background_threads; i++) {
malloc_mutex_postfork_child(tsdn,
&background_thread_info[i].mtx);
}
@@ -749,7 +773,7 @@ background_thread_postfork_child(tsdn_t *tsdn) {
malloc_mutex_lock(tsdn, &background_thread_lock);
n_background_threads = 0;
background_thread_enabled_set(tsdn, false);
for (unsigned i = 0; i < ncpus; i++) {
for (unsigned i = 0; i < max_background_threads; i++) {
background_thread_info_t *info = &background_thread_info[i];
malloc_mutex_lock(tsdn, &info->mtx);
info->state = background_thread_stopped;
@@ -773,9 +797,15 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
stats->num_threads = n_background_threads;
uint64_t num_runs = 0;
nstime_init(&stats->run_interval, 0);
for (unsigned i = 0; i < ncpus; i++) {
for (unsigned i = 0; i < max_background_threads; i++) {
background_thread_info_t *info = &background_thread_info[i];
malloc_mutex_lock(tsdn, &info->mtx);
if (malloc_mutex_trylock(tsdn, &info->mtx)) {
/*
* Each background thread run may take a long time;
* avoid waiting on the stats if the thread is active.
*/
continue;
}
if (info->state != background_thread_stopped) {
num_runs += info->tot_n_runs;
nstime_add(&stats->run_interval, &info->tot_sleep_time);
@@ -795,6 +825,39 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
#undef BILLION
#undef BACKGROUND_THREAD_MIN_INTERVAL_NS
#ifdef JEMALLOC_HAVE_DLSYM
#include <dlfcn.h>
#endif
static bool
pthread_create_fptr_init(void) {
if (pthread_create_fptr != NULL) {
return false;
}
/*
* Try the next symbol first, because 1) when use lazy_lock we have a
* wrapper for pthread_create; and 2) application may define its own
* wrapper as well (and can call malloc within the wrapper).
*/
#ifdef JEMALLOC_HAVE_DLSYM
pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
#else
pthread_create_fptr = NULL;
#endif
if (pthread_create_fptr == NULL) {
if (config_lazy_lock) {
malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
"\"pthread_create\")\n");
abort();
} else {
/* Fall back to the default symbol. */
pthread_create_fptr = pthread_create;
}
}
return false;
}
/*
* When lazy lock is enabled, we need to make sure setting isthreaded before
* taking any background_thread locks. This is called early in ctl (instead of
@@ -805,7 +868,8 @@ void
background_thread_ctl_init(tsdn_t *tsdn) {
malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
pthread_once(&once_control, pthread_create_wrapper_once);
pthread_create_fptr_init();
pthread_create_wrapper_init();
#endif
}
@@ -818,18 +882,10 @@ background_thread_boot0(void) {
"supports pthread only\n");
return true;
}
#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
if (pthread_create_fptr == NULL) {
can_enable_background_thread = false;
if (config_lazy_lock || opt_background_thread) {
malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
"\"pthread_create\")\n");
abort();
}
} else {
can_enable_background_thread = true;
if ((config_lazy_lock || opt_background_thread) &&
pthread_create_fptr_init()) {
return true;
}
#endif
return false;
@@ -841,6 +897,11 @@ background_thread_boot1(tsdn_t *tsdn) {
assert(have_background_thread);
assert(narenas_total_get() > 0);
if (opt_max_background_threads > MAX_BACKGROUND_THREAD_LIMIT) {
opt_max_background_threads = DEFAULT_NUM_BACKGROUND_THREAD;
}
max_background_threads = opt_max_background_threads;
background_thread_enabled_set(tsdn, opt_background_thread);
if (malloc_mutex_init(&background_thread_lock,
"background_thread_global",
@@ -848,17 +909,15 @@ background_thread_boot1(tsdn_t *tsdn) {
malloc_mutex_rank_exclusive)) {
return true;
}
if (opt_background_thread) {
background_thread_ctl_init(tsdn);
}
background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
b0get(), ncpus * sizeof(background_thread_info_t), CACHELINE);
b0get(), opt_max_background_threads *
sizeof(background_thread_info_t), CACHELINE);
if (background_thread_info == NULL) {
return true;
}
for (unsigned i = 0; i < ncpus; i++) {
for (unsigned i = 0; i < max_background_threads; i++) {
background_thread_info_t *info = &background_thread_info[i];
/* Thread mutex is rank_inclusive because of thread0. */
if (malloc_mutex_init(&info->mtx, "background_thread",

View File

@@ -12,23 +12,38 @@
static base_t *b0;
metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT;
const char *metadata_thp_mode_names[] = {
"disabled",
"auto",
"always"
};
/******************************************************************************/
static inline bool
metadata_thp_madvise(void) {
return (metadata_thp_enabled() &&
(init_system_thp_mode == thp_mode_default));
}
static void *
base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size) {
void *addr;
bool zero = true;
bool commit = true;
/* Use huge page sizes and alignment regardless of opt_metadata_thp. */
assert(size == HUGEPAGE_CEILING(size));
size_t alignment = HUGEPAGE;
if (extent_hooks == &extent_hooks_default) {
addr = extent_alloc_mmap(NULL, size, PAGE, &zero, &commit);
addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
} else {
/* No arena context as we are creating new arenas. */
tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
pre_reentrancy(tsd, NULL);
addr = extent_hooks->alloc(extent_hooks, NULL, size, PAGE,
addr = extent_hooks->alloc(extent_hooks, NULL, size, alignment,
&zero, &commit, ind);
post_reentrancy(tsd);
}
@@ -51,16 +66,16 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
*/
if (extent_hooks == &extent_hooks_default) {
if (!extent_dalloc_mmap(addr, size)) {
return;
goto label_done;
}
if (!pages_decommit(addr, size)) {
return;
goto label_done;
}
if (!pages_purge_forced(addr, size)) {
return;
goto label_done;
}
if (!pages_purge_lazy(addr, size)) {
return;
goto label_done;
}
/* Nothing worked. This should never happen. */
not_reached();
@@ -70,27 +85,33 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
if (extent_hooks->dalloc != NULL &&
!extent_hooks->dalloc(extent_hooks, addr, size, true,
ind)) {
goto label_done;
goto label_post_reentrancy;
}
if (extent_hooks->decommit != NULL &&
!extent_hooks->decommit(extent_hooks, addr, size, 0, size,
ind)) {
goto label_done;
goto label_post_reentrancy;
}
if (extent_hooks->purge_forced != NULL &&
!extent_hooks->purge_forced(extent_hooks, addr, size, 0,
size, ind)) {
goto label_done;
goto label_post_reentrancy;
}
if (extent_hooks->purge_lazy != NULL &&
!extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size,
ind)) {
goto label_done;
goto label_post_reentrancy;
}
/* Nothing worked. That's the application's problem. */
label_done:
label_post_reentrancy:
post_reentrancy(tsd);
return;
}
label_done:
if (metadata_thp_madvise()) {
/* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
(size & HUGEPAGE_MASK) == 0);
pages_nohuge(addr, size);
}
}
@@ -105,6 +126,56 @@ base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr,
extent_binit(extent, addr, size, sn);
}
static size_t
base_get_num_blocks(base_t *base, bool with_new_block) {
base_block_t *b = base->blocks;
assert(b != NULL);
size_t n_blocks = with_new_block ? 2 : 1;
while (b->next != NULL) {
n_blocks++;
b = b->next;
}
return n_blocks;
}
static void
base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
assert(opt_metadata_thp == metadata_thp_auto);
malloc_mutex_assert_owner(tsdn, &base->mtx);
if (base->auto_thp_switched) {
return;
}
/* Called when adding a new block. */
bool should_switch;
if (base_ind_get(base) != 0) {
should_switch = (base_get_num_blocks(base, true) ==
BASE_AUTO_THP_THRESHOLD);
} else {
should_switch = (base_get_num_blocks(base, true) ==
BASE_AUTO_THP_THRESHOLD_A0);
}
if (!should_switch) {
return;
}
base->auto_thp_switched = true;
assert(!config_stats || base->n_thp == 0);
/* Make the initial blocks THP lazily. */
base_block_t *block = base->blocks;
while (block != NULL) {
assert((block->size & HUGEPAGE_MASK) == 0);
pages_huge(block, block->size);
if (config_stats) {
base->n_thp += HUGEPAGE_CEILING(block->size -
extent_bsize_get(&block->extent)) >> LG_HUGEPAGE;
}
block = block->next;
assert(block == NULL || (base_ind_get(base) == 0));
}
}
static void *
base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size,
size_t alignment) {
@@ -124,8 +195,8 @@ base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size,
}
static void
base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent,
size_t gap_size, void *addr, size_t size) {
base_extent_bump_alloc_post(base_t *base, extent_t *extent, size_t gap_size,
void *addr, size_t size) {
if (extent_bsize_get(extent) > 0) {
/*
* Compute the index for the largest size class that does not
@@ -140,23 +211,31 @@ base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent,
base->allocated += size;
/*
* Add one PAGE to base_resident for every page boundary that is
* crossed by the new allocation.
* crossed by the new allocation. Adjust n_thp similarly when
* metadata_thp is enabled.
*/
base->resident += PAGE_CEILING((uintptr_t)addr + size) -
PAGE_CEILING((uintptr_t)addr - gap_size);
assert(base->allocated <= base->resident);
assert(base->resident <= base->mapped);
if (metadata_thp_madvise() && (opt_metadata_thp ==
metadata_thp_always || base->auto_thp_switched)) {
base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size)
- HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >>
LG_HUGEPAGE;
assert(base->mapped >= base->n_thp << LG_HUGEPAGE);
}
}
}
static void *
base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, extent_t *extent,
size_t size, size_t alignment) {
base_extent_bump_alloc(base_t *base, extent_t *extent, size_t size,
size_t alignment) {
void *ret;
size_t gap_size;
ret = base_extent_bump_alloc_helper(extent, &gap_size, size, alignment);
base_extent_bump_alloc_post(tsdn, base, extent, gap_size, ret, size);
base_extent_bump_alloc_post(base, extent, gap_size, ret, size);
return ret;
}
@@ -166,8 +245,8 @@ base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, extent_t *extent,
* On success a pointer to the initialized base_block_t header is returned.
*/
static base_block_t *
base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind,
pszind_t *pind_last, size_t *extent_sn_next, size_t size,
base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
unsigned ind, pszind_t *pind_last, size_t *extent_sn_next, size_t size,
size_t alignment) {
alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
size_t usize = ALIGNMENT_CEILING(size, alignment);
@@ -183,8 +262,8 @@ base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind,
*/
size_t min_block_size = HUGEPAGE_CEILING(sz_psz2u(header_size + gap_size
+ usize));
pszind_t pind_next = (*pind_last + 1 < NPSIZES) ? *pind_last + 1 :
*pind_last;
pszind_t pind_next = (*pind_last + 1 < sz_psz2ind(SC_LARGE_MAXCLASS)) ?
*pind_last + 1 : *pind_last;
size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next));
size_t block_size = (min_block_size > next_block_size) ? min_block_size
: next_block_size;
@@ -193,6 +272,25 @@ base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind,
if (block == NULL) {
return NULL;
}
if (metadata_thp_madvise()) {
void *addr = (void *)block;
assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
(block_size & HUGEPAGE_MASK) == 0);
if (opt_metadata_thp == metadata_thp_always) {
pages_huge(addr, block_size);
} else if (opt_metadata_thp == metadata_thp_auto &&
base != NULL) {
/* base != NULL indicates this is not a new base. */
malloc_mutex_lock(tsdn, &base->mtx);
base_auto_thp_switch(tsdn, base);
if (base->auto_thp_switched) {
pages_huge(addr, block_size);
}
malloc_mutex_unlock(tsdn, &base->mtx);
}
}
*pind_last = sz_psz2ind(block_size);
block->size = block_size;
block->next = NULL;
@@ -216,7 +314,7 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
* called.
*/
malloc_mutex_unlock(tsdn, &base->mtx);
base_block_t *block = base_block_alloc(tsdn, extent_hooks,
base_block_t *block = base_block_alloc(tsdn, base, extent_hooks,
base_ind_get(base), &base->pind_last, &base->extent_sn_next, size,
alignment);
malloc_mutex_lock(tsdn, &base->mtx);
@@ -229,8 +327,16 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
base->allocated += sizeof(base_block_t);
base->resident += PAGE_CEILING(sizeof(base_block_t));
base->mapped += block->size;
if (metadata_thp_madvise() &&
!(opt_metadata_thp == metadata_thp_auto
&& !base->auto_thp_switched)) {
assert(base->n_thp > 0);
base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >>
LG_HUGEPAGE;
}
assert(base->allocated <= base->resident);
assert(base->resident <= base->mapped);
assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
}
return &block->extent;
}
@@ -244,7 +350,7 @@ base_t *
base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
pszind_t pind_last = 0;
size_t extent_sn_next = 0;
base_block_t *block = base_block_alloc(tsdn, extent_hooks, ind,
base_block_t *block = base_block_alloc(tsdn, NULL, extent_hooks, ind,
&pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
if (block == NULL) {
return NULL;
@@ -265,17 +371,22 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
base->pind_last = pind_last;
base->extent_sn_next = extent_sn_next;
base->blocks = block;
for (szind_t i = 0; i < NSIZES; i++) {
base->auto_thp_switched = false;
for (szind_t i = 0; i < SC_NSIZES; i++) {
extent_heap_new(&base->avail[i]);
}
if (config_stats) {
base->allocated = sizeof(base_block_t);
base->resident = PAGE_CEILING(sizeof(base_block_t));
base->mapped = block->size;
base->n_thp = (opt_metadata_thp == metadata_thp_always) &&
metadata_thp_madvise() ? HUGEPAGE_CEILING(sizeof(base_block_t))
>> LG_HUGEPAGE : 0;
assert(base->allocated <= base->resident);
assert(base->resident <= base->mapped);
assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
}
base_extent_bump_alloc_post(tsdn, base, &block->extent, gap_size, base,
base_extent_bump_alloc_post(base, &block->extent, gap_size, base,
base_size);
return base;
@@ -315,7 +426,7 @@ base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
extent_t *extent = NULL;
malloc_mutex_lock(tsdn, &base->mtx);
for (szind_t i = sz_size2index(asize); i < NSIZES; i++) {
for (szind_t i = sz_size2index(asize); i < SC_NSIZES; i++) {
extent = extent_heap_remove_first(&base->avail[i]);
if (extent != NULL) {
/* Use existing space. */
@@ -332,7 +443,7 @@ base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
goto label_return;
}
ret = base_extent_bump_alloc(tsdn, base, extent, usize, alignment);
ret = base_extent_bump_alloc(base, extent, usize, alignment);
if (esn != NULL) {
*esn = extent_sn_get(extent);
}
@@ -368,7 +479,7 @@ base_alloc_extent(tsdn_t *tsdn, base_t *base) {
void
base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
size_t *mapped) {
size_t *mapped, size_t *n_thp) {
cassert(config_stats);
malloc_mutex_lock(tsdn, &base->mtx);
@@ -377,6 +488,7 @@ base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
*allocated = base->allocated;
*resident = base->resident;
*mapped = base->mapped;
*n_thp = base->n_thp;
malloc_mutex_unlock(tsdn, &base->mtx);
}

95
deps/jemalloc/src/bin.c vendored Normal file
View File

@@ -0,0 +1,95 @@
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/jemalloc_internal_includes.h"
#include "jemalloc/internal/assert.h"
#include "jemalloc/internal/bin.h"
#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/witness.h"
bin_info_t bin_infos[SC_NBINS];
static void
bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
bin_info_t bin_infos[SC_NBINS]) {
for (unsigned i = 0; i < SC_NBINS; i++) {
bin_info_t *bin_info = &bin_infos[i];
sc_t *sc = &sc_data->sc[i];
bin_info->reg_size = ((size_t)1U << sc->lg_base)
+ ((size_t)sc->ndelta << sc->lg_delta);
bin_info->slab_size = (sc->pgs << LG_PAGE);
bin_info->nregs =
(uint32_t)(bin_info->slab_size / bin_info->reg_size);
bin_info->n_shards = bin_shard_sizes[i];
bitmap_info_t bitmap_info = BITMAP_INFO_INITIALIZER(
bin_info->nregs);
bin_info->bitmap_info = bitmap_info;
}
}
bool
bin_update_shard_size(unsigned bin_shard_sizes[SC_NBINS], size_t start_size,
size_t end_size, size_t nshards) {
if (nshards > BIN_SHARDS_MAX || nshards == 0) {
return true;
}
if (start_size > SC_SMALL_MAXCLASS) {
return false;
}
if (end_size > SC_SMALL_MAXCLASS) {
end_size = SC_SMALL_MAXCLASS;
}
/* Compute the index since this may happen before sz init. */
szind_t ind1 = sz_size2index_compute(start_size);
szind_t ind2 = sz_size2index_compute(end_size);
for (unsigned i = ind1; i <= ind2; i++) {
bin_shard_sizes[i] = (unsigned)nshards;
}
return false;
}
void
bin_shard_sizes_boot(unsigned bin_shard_sizes[SC_NBINS]) {
/* Load the default number of shards. */
for (unsigned i = 0; i < SC_NBINS; i++) {
bin_shard_sizes[i] = N_BIN_SHARDS_DEFAULT;
}
}
void
bin_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
assert(sc_data->initialized);
bin_infos_init(sc_data, bin_shard_sizes, bin_infos);
}
bool
bin_init(bin_t *bin) {
if (malloc_mutex_init(&bin->lock, "bin", WITNESS_RANK_BIN,
malloc_mutex_rank_exclusive)) {
return true;
}
bin->slabcur = NULL;
extent_heap_new(&bin->slabs_nonfull);
extent_list_init(&bin->slabs_full);
if (config_stats) {
memset(&bin->stats, 0, sizeof(bin_stats_t));
}
return false;
}
void
bin_prefork(tsdn_t *tsdn, bin_t *bin) {
malloc_mutex_prefork(tsdn, &bin->lock);
}
void
bin_postfork_parent(tsdn_t *tsdn, bin_t *bin) {
malloc_mutex_postfork_parent(tsdn, &bin->lock);
}
void
bin_postfork_child(tsdn_t *tsdn, bin_t *bin) {
malloc_mutex_postfork_child(tsdn, &bin->lock);
}

View File

@@ -275,7 +275,8 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) {
lg_curcells++;
usize = sz_sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
if (unlikely(usize == 0
|| usize > SC_LARGE_MAXCLASS)) {
ret = true;
goto label_return;
}
@@ -320,7 +321,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) {
lg_prevbuckets = ckh->lg_curbuckets;
lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
usize = sz_sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
return;
}
tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true, NULL,
@@ -396,7 +397,7 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
ckh->keycomp = keycomp;
usize = sz_sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE);
if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
ret = true;
goto label_return;
}

1023
deps/jemalloc/src/ctl.c vendored

File diff suppressed because it is too large Load Diff

55
deps/jemalloc/src/div.c vendored Normal file
View File

@@ -0,0 +1,55 @@
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/div.h"
#include "jemalloc/internal/assert.h"
/*
* Suppose we have n = q * d, all integers. We know n and d, and want q = n / d.
*
* For any k, we have (here, all division is exact; not C-style rounding):
* floor(ceil(2^k / d) * n / 2^k) = floor((2^k + r) / d * n / 2^k), where
* r = (-2^k) mod d.
*
* Expanding this out:
* ... = floor(2^k / d * n / 2^k + r / d * n / 2^k)
* = floor(n / d + (r / d) * (n / 2^k)).
*
* The fractional part of n / d is 0 (because of the assumption that d divides n
* exactly), so we have:
* ... = n / d + floor((r / d) * (n / 2^k))
*
* So that our initial expression is equal to the quantity we seek, so long as
* (r / d) * (n / 2^k) < 1.
*
* r is a remainder mod d, so r < d and r / d < 1 always. We can make
* n / 2 ^ k < 1 by setting k = 32. This gets us a value of magic that works.
*/
void
div_init(div_info_t *div_info, size_t d) {
/* Nonsensical. */
assert(d != 0);
/*
* This would make the value of magic too high to fit into a uint32_t
* (we would want magic = 2^32 exactly). This would mess with code gen
* on 32-bit machines.
*/
assert(d != 1);
uint64_t two_to_k = ((uint64_t)1 << 32);
uint32_t magic = (uint32_t)(two_to_k / d);
/*
* We want magic = ceil(2^k / d), but C gives us floor. We have to
* increment it unless the result was exact (i.e. unless d is a power of
* two).
*/
if (two_to_k % d != 0) {
magic++;
}
div_info->magic = magic;
#ifdef JEMALLOC_DEBUG
div_info->d = d;
#endif
}

File diff suppressed because it is too large Load Diff

View File

@@ -113,7 +113,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
cassert(have_dss);
assert(size > 0);
assert(alignment > 0);
assert(alignment == ALIGNMENT_CEILING(alignment, PAGE));
/*
* sbrk() uses a signed increment argument, so take care not to
@@ -154,9 +154,10 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
(uintptr_t)gap_addr_page;
if (gap_size_page != 0) {
extent_init(gap, arena, gap_addr_page,
gap_size_page, false, NSIZES,
gap_size_page, false, SC_NSIZES,
arena_extent_sn_next(arena),
extent_state_active, false, true);
extent_state_active, false, true, true,
EXTENT_NOT_HEAD);
}
/*
* Compute the address just past the end of the desired
@@ -198,8 +199,9 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
extent_t extent;
extent_init(&extent, arena, ret, size,
size, false, NSIZES,
extent_state_active, false, true);
size, false, SC_NSIZES,
extent_state_active, false, true,
true, EXTENT_NOT_HEAD);
if (extent_purge_forced_wrapper(tsdn,
arena, &extent_hooks, &extent, 0,
size)) {

View File

@@ -21,8 +21,8 @@ bool opt_retain =
void *
extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
bool *commit) {
void *ret = pages_map(new_addr, size, ALIGNMENT_CEILING(alignment,
PAGE), commit);
assert(alignment == ALIGNMENT_CEILING(alignment, PAGE));
void *ret = pages_map(new_addr, size, alignment, commit);
if (ret == NULL) {
return NULL;
}

195
deps/jemalloc/src/hook.c vendored Normal file
View File

@@ -0,0 +1,195 @@
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/hook.h"
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/seq.h"
typedef struct hooks_internal_s hooks_internal_t;
struct hooks_internal_s {
hooks_t hooks;
bool in_use;
};
seq_define(hooks_internal_t, hooks)
static atomic_u_t nhooks = ATOMIC_INIT(0);
static seq_hooks_t hooks[HOOK_MAX];
static malloc_mutex_t hooks_mu;
bool
hook_boot() {
return malloc_mutex_init(&hooks_mu, "hooks", WITNESS_RANK_HOOK,
malloc_mutex_rank_exclusive);
}
static void *
hook_install_locked(hooks_t *to_install) {
hooks_internal_t hooks_internal;
for (int i = 0; i < HOOK_MAX; i++) {
bool success = seq_try_load_hooks(&hooks_internal, &hooks[i]);
/* We hold mu; no concurrent access. */
assert(success);
if (!hooks_internal.in_use) {
hooks_internal.hooks = *to_install;
hooks_internal.in_use = true;
seq_store_hooks(&hooks[i], &hooks_internal);
atomic_store_u(&nhooks,
atomic_load_u(&nhooks, ATOMIC_RELAXED) + 1,
ATOMIC_RELAXED);
return &hooks[i];
}
}
return NULL;
}
void *
hook_install(tsdn_t *tsdn, hooks_t *to_install) {
malloc_mutex_lock(tsdn, &hooks_mu);
void *ret = hook_install_locked(to_install);
if (ret != NULL) {
tsd_global_slow_inc(tsdn);
}
malloc_mutex_unlock(tsdn, &hooks_mu);
return ret;
}
static void
hook_remove_locked(seq_hooks_t *to_remove) {
hooks_internal_t hooks_internal;
bool success = seq_try_load_hooks(&hooks_internal, to_remove);
/* We hold mu; no concurrent access. */
assert(success);
/* Should only remove hooks that were added. */
assert(hooks_internal.in_use);
hooks_internal.in_use = false;
seq_store_hooks(to_remove, &hooks_internal);
atomic_store_u(&nhooks, atomic_load_u(&nhooks, ATOMIC_RELAXED) - 1,
ATOMIC_RELAXED);
}
void
hook_remove(tsdn_t *tsdn, void *opaque) {
if (config_debug) {
char *hooks_begin = (char *)&hooks[0];
char *hooks_end = (char *)&hooks[HOOK_MAX];
char *hook = (char *)opaque;
assert(hooks_begin <= hook && hook < hooks_end
&& (hook - hooks_begin) % sizeof(seq_hooks_t) == 0);
}
malloc_mutex_lock(tsdn, &hooks_mu);
hook_remove_locked((seq_hooks_t *)opaque);
tsd_global_slow_dec(tsdn);
malloc_mutex_unlock(tsdn, &hooks_mu);
}
#define FOR_EACH_HOOK_BEGIN(hooks_internal_ptr) \
for (int for_each_hook_counter = 0; \
for_each_hook_counter < HOOK_MAX; \
for_each_hook_counter++) { \
bool for_each_hook_success = seq_try_load_hooks( \
(hooks_internal_ptr), &hooks[for_each_hook_counter]); \
if (!for_each_hook_success) { \
continue; \
} \
if (!(hooks_internal_ptr)->in_use) { \
continue; \
}
#define FOR_EACH_HOOK_END \
}
static bool *
hook_reentrantp() {
/*
* We prevent user reentrancy within hooks. This is basically just a
* thread-local bool that triggers an early-exit.
*
* We don't fold in_hook into reentrancy. There are two reasons for
* this:
* - Right now, we turn on reentrancy during things like extent hook
* execution. Allocating during extent hooks is not officially
* supported, but we don't want to break it for the time being. These
* sorts of allocations should probably still be hooked, though.
* - If a hook allocates, we may want it to be relatively fast (after
* all, it executes on every allocator operation). Turning on
* reentrancy is a fairly heavyweight mode (disabling tcache,
* redirecting to arena 0, etc.). It's possible we may one day want
* to turn on reentrant mode here, if it proves too difficult to keep
* this working. But that's fairly easy for us to see; OTOH, people
* not using hooks because they're too slow is easy for us to miss.
*
* The tricky part is
* that this code might get invoked even if we don't have access to tsd.
* This function mimics getting a pointer to thread-local data, except
* that it might secretly return a pointer to some global data if we
* know that the caller will take the early-exit path.
* If we return a bool that indicates that we are reentrant, then the
* caller will go down the early exit path, leaving the global
* untouched.
*/
static bool in_hook_global = true;
tsdn_t *tsdn = tsdn_fetch();
tcache_t *tcache = tsdn_tcachep_get(tsdn);
if (tcache != NULL) {
return &tcache->in_hook;
}
return &in_hook_global;
}
#define HOOK_PROLOGUE \
if (likely(atomic_load_u(&nhooks, ATOMIC_RELAXED) == 0)) { \
return; \
} \
bool *in_hook = hook_reentrantp(); \
if (*in_hook) { \
return; \
} \
*in_hook = true;
#define HOOK_EPILOGUE \
*in_hook = false;
void
hook_invoke_alloc(hook_alloc_t type, void *result, uintptr_t result_raw,
uintptr_t args_raw[3]) {
HOOK_PROLOGUE
hooks_internal_t hook;
FOR_EACH_HOOK_BEGIN(&hook)
hook_alloc h = hook.hooks.alloc_hook;
if (h != NULL) {
h(hook.hooks.extra, type, result, result_raw, args_raw);
}
FOR_EACH_HOOK_END
HOOK_EPILOGUE
}
void
hook_invoke_dalloc(hook_dalloc_t type, void *address, uintptr_t args_raw[3]) {
HOOK_PROLOGUE
hooks_internal_t hook;
FOR_EACH_HOOK_BEGIN(&hook)
hook_dalloc h = hook.hooks.dalloc_hook;
if (h != NULL) {
h(hook.hooks.extra, type, address, args_raw);
}
FOR_EACH_HOOK_END
HOOK_EPILOGUE
}
void
hook_invoke_expand(hook_expand_t type, void *address, size_t old_usize,
size_t new_usize, uintptr_t result_raw, uintptr_t args_raw[4]) {
HOOK_PROLOGUE
hooks_internal_t hook;
FOR_EACH_HOOK_BEGIN(&hook)
hook_expand h = hook.hooks.expand_hook;
if (h != NULL) {
h(hook.hooks.extra, type, address, old_usize, new_usize,
result_raw, args_raw);
}
FOR_EACH_HOOK_END
HOOK_EPILOGUE
}

File diff suppressed because it is too large Load Diff

View File

@@ -39,12 +39,10 @@ void operator delete(void *ptr, std::size_t size) noexcept;
void operator delete[](void *ptr, std::size_t size) noexcept;
#endif
template <bool IsNoExcept>
void *
newImpl(std::size_t size) noexcept(IsNoExcept) {
void *ptr = je_malloc(size);
if (likely(ptr != nullptr))
return ptr;
JEMALLOC_NOINLINE
static void *
handleOOM(std::size_t size, bool nothrow) {
void *ptr = nullptr;
while (ptr == nullptr) {
std::new_handler handler;
@@ -68,11 +66,22 @@ newImpl(std::size_t size) noexcept(IsNoExcept) {
ptr = je_malloc(size);
}
if (ptr == nullptr && !IsNoExcept)
if (ptr == nullptr && !nothrow)
std::__throw_bad_alloc();
return ptr;
}
template <bool IsNoExcept>
JEMALLOC_ALWAYS_INLINE
void *
newImpl(std::size_t size) noexcept(IsNoExcept) {
void *ptr = je_malloc(size);
if (likely(ptr != nullptr))
return ptr;
return handleOOM(size, IsNoExcept);
}
void *
operator new(std::size_t size) {
return newImpl<false>(size);
@@ -119,14 +128,14 @@ operator delete(void *ptr, std::size_t size) noexcept {
if (unlikely(ptr == nullptr)) {
return;
}
je_sdallocx(ptr, size, /*flags=*/0);
je_sdallocx_noflags(ptr, size);
}
void operator delete[](void *ptr, std::size_t size) noexcept {
if (unlikely(ptr == nullptr)) {
return;
}
je_sdallocx(ptr, size, /*flags=*/0);
je_sdallocx_noflags(ptr, size);
}
#endif // __cpp_sized_deallocation

View File

@@ -28,7 +28,7 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
assert(!tsdn_null(tsdn) || arena != NULL);
ausize = sz_sa2u(usize, alignment);
if (unlikely(ausize == 0 || ausize > LARGE_MAXCLASS)) {
if (unlikely(ausize == 0 || ausize > SC_LARGE_MAXCLASS)) {
return NULL;
}
@@ -42,7 +42,7 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
*/
is_zeroed = zero;
if (likely(!tsdn_null(tsdn))) {
arena = arena_choose(tsdn_tsd(tsdn), arena);
arena = arena_choose_maybe_huge(tsdn_tsd(tsdn), arena, usize);
}
if (unlikely(arena == NULL) || (extent = arena_extent_alloc_large(tsdn,
arena, usize, alignment, &is_zeroed)) == NULL) {
@@ -109,7 +109,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
if (diff != 0) {
extent_t *trail = extent_split_wrapper(tsdn, arena,
&extent_hooks, extent, usize + sz_large_pad,
sz_size2index(usize), false, diff, NSIZES, false);
sz_size2index(usize), false, diff, SC_NSIZES, false);
if (trail == NULL) {
return true;
}
@@ -154,17 +154,17 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
bool new_mapping;
if ((trail = extents_alloc(tsdn, arena, &extent_hooks,
&arena->extents_dirty, extent_past_get(extent), trailsize, 0,
CACHELINE, false, NSIZES, &is_zeroed_trail, &commit)) != NULL
CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit)) != NULL
|| (trail = extents_alloc(tsdn, arena, &extent_hooks,
&arena->extents_muzzy, extent_past_get(extent), trailsize, 0,
CACHELINE, false, NSIZES, &is_zeroed_trail, &commit)) != NULL) {
CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit)) != NULL) {
if (config_stats) {
new_mapping = false;
}
} else {
if ((trail = extent_alloc_wrapper(tsdn, arena, &extent_hooks,
extent_past_get(extent), trailsize, 0, CACHELINE, false,
NSIZES, &is_zeroed_trail, &commit)) == NULL) {
SC_NSIZES, &is_zeroed_trail, &commit)) == NULL) {
return true;
}
if (config_stats) {
@@ -221,9 +221,10 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
size_t oldusize = extent_usize_get(extent);
/* The following should have been caught by callers. */
assert(usize_min > 0 && usize_max <= LARGE_MAXCLASS);
assert(usize_min > 0 && usize_max <= SC_LARGE_MAXCLASS);
/* Both allocation sizes must be large to avoid a move. */
assert(oldusize >= LARGE_MINCLASS && usize_max >= LARGE_MINCLASS);
assert(oldusize >= SC_LARGE_MINCLASS
&& usize_max >= SC_LARGE_MINCLASS);
if (usize_max > oldusize) {
/* Attempt to expand the allocation in-place. */
@@ -270,17 +271,23 @@ large_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
}
void *
large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
size_t alignment, bool zero, tcache_t *tcache) {
size_t oldusize = extent_usize_get(extent);
large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
size_t alignment, bool zero, tcache_t *tcache,
hook_ralloc_args_t *hook_args) {
extent_t *extent = iealloc(tsdn, ptr);
size_t oldusize = extent_usize_get(extent);
/* The following should have been caught by callers. */
assert(usize > 0 && usize <= LARGE_MAXCLASS);
assert(usize > 0 && usize <= SC_LARGE_MAXCLASS);
/* Both allocation sizes must be large to avoid a move. */
assert(oldusize >= LARGE_MINCLASS && usize >= LARGE_MINCLASS);
assert(oldusize >= SC_LARGE_MINCLASS
&& usize >= SC_LARGE_MINCLASS);
/* Try to avoid moving the allocation. */
if (!large_ralloc_no_move(tsdn, extent, usize, usize, zero)) {
hook_invoke_expand(hook_args->is_realloc
? hook_expand_realloc : hook_expand_rallocx, ptr, oldusize,
usize, (uintptr_t)ptr, hook_args->args);
return extent_addr_get(extent);
}
@@ -295,6 +302,12 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
return NULL;
}
hook_invoke_alloc(hook_args->is_realloc
? hook_alloc_realloc : hook_alloc_rallocx, ret, (uintptr_t)ret,
hook_args->args);
hook_invoke_dalloc(hook_args->is_realloc
? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args);
size_t copysize = (usize < oldusize) ? usize : oldusize;
memcpy(ret, extent_addr_get(extent), copysize);
isdalloct(tsdn, extent_addr_get(extent), oldusize, tcache, NULL, true);
@@ -318,8 +331,9 @@ large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
large_dalloc_maybe_junk(extent_addr_get(extent),
extent_usize_get(extent));
} else {
malloc_mutex_assert_owner(tsdn, &arena->large_mtx);
/* Only hold the large_mtx if necessary. */
if (!arena_is_auto(arena)) {
malloc_mutex_assert_owner(tsdn, &arena->large_mtx);
extent_list_remove(&arena->large, extent);
}
}
@@ -369,3 +383,13 @@ void
large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent) {
large_prof_tctx_set(tsdn, extent, (prof_tctx_t *)(uintptr_t)1U);
}
nstime_t
large_prof_alloc_time_get(const extent_t *extent) {
return extent_prof_alloc_time_get(extent);
}
void
large_prof_alloc_time_set(extent_t *extent, nstime_t t) {
extent_prof_alloc_time_set(extent, t);
}

78
deps/jemalloc/src/log.c vendored Normal file
View File

@@ -0,0 +1,78 @@
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/jemalloc_internal_includes.h"
#include "jemalloc/internal/log.h"
char log_var_names[JEMALLOC_LOG_VAR_BUFSIZE];
atomic_b_t log_init_done = ATOMIC_INIT(false);
/*
* Returns true if we were able to pick out a segment. Fills in r_segment_end
* with a pointer to the first character after the end of the string.
*/
static const char *
log_var_extract_segment(const char* segment_begin) {
const char *end;
for (end = segment_begin; *end != '\0' && *end != '|'; end++) {
}
return end;
}
static bool
log_var_matches_segment(const char *segment_begin, const char *segment_end,
const char *log_var_begin, const char *log_var_end) {
assert(segment_begin <= segment_end);
assert(log_var_begin < log_var_end);
ptrdiff_t segment_len = segment_end - segment_begin;
ptrdiff_t log_var_len = log_var_end - log_var_begin;
/* The special '.' segment matches everything. */
if (segment_len == 1 && *segment_begin == '.') {
return true;
}
if (segment_len == log_var_len) {
return strncmp(segment_begin, log_var_begin, segment_len) == 0;
} else if (segment_len < log_var_len) {
return strncmp(segment_begin, log_var_begin, segment_len) == 0
&& log_var_begin[segment_len] == '.';
} else {
return false;
}
}
unsigned
log_var_update_state(log_var_t *log_var) {
const char *log_var_begin = log_var->name;
const char *log_var_end = log_var->name + strlen(log_var->name);
/* Pointer to one before the beginning of the current segment. */
const char *segment_begin = log_var_names;
/*
* If log_init done is false, we haven't parsed the malloc conf yet. To
* avoid log-spew, we default to not displaying anything.
*/
if (!atomic_load_b(&log_init_done, ATOMIC_ACQUIRE)) {
return LOG_INITIALIZED_NOT_ENABLED;
}
while (true) {
const char *segment_end = log_var_extract_segment(
segment_begin);
assert(segment_end < log_var_names + JEMALLOC_LOG_VAR_BUFSIZE);
if (log_var_matches_segment(segment_begin, segment_end,
log_var_begin, log_var_end)) {
atomic_store_u(&log_var->state, LOG_ENABLED,
ATOMIC_RELAXED);
return LOG_ENABLED;
}
if (*segment_end == '\0') {
/* Hit the end of the segment string with no match. */
atomic_store_u(&log_var->state,
LOG_INITIALIZED_NOT_ENABLED, ATOMIC_RELAXED);
return LOG_INITIALIZED_NOT_ENABLED;
}
/* Otherwise, skip the delimiter and continue. */
segment_begin = segment_end + 1;
}
}

View File

@@ -70,20 +70,7 @@ static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s,
/* malloc_message() setup. */
static void
wrtmessage(void *cbopaque, const char *s) {
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write)
/*
* Use syscall(2) rather than write(2) when possible in order to avoid
* the possibility of memory allocation within libc. This is necessary
* on FreeBSD; most operating systems do not have this problem though.
*
* syscall() returns long or int, depending on platform, so capture the
* unused result in the widest plausible type to avoid compiler
* warnings.
*/
UNUSED long result = syscall(SYS_write, STDERR_FILENO, s, strlen(s));
#else
UNUSED ssize_t result = write(STDERR_FILENO, s, strlen(s));
#endif
malloc_write_fd(STDERR_FILENO, s, strlen(s));
}
JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s);
@@ -111,7 +98,7 @@ buferror(int err, char *buf, size_t buflen) {
FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0,
(LPSTR)buf, (DWORD)buflen, NULL);
return 0;
#elif defined(__GLIBC__) && defined(_GNU_SOURCE)
#elif defined(JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE) && defined(_GNU_SOURCE)
char *b = strerror_r(err, buf, buflen);
if (b != buf) {
strncpy(buf, b, buflen);
@@ -375,7 +362,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
} \
} while (0)
#define GET_ARG_NUMERIC(val, len) do { \
switch (len) { \
switch ((unsigned char)len) { \
case '?': \
val = va_arg(ap, int); \
break; \
@@ -645,7 +632,6 @@ malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
*/
write_cb = (je_malloc_message != NULL) ? je_malloc_message :
wrtmessage;
cbopaque = NULL;
}
malloc_vsnprintf(buf, sizeof(buf), format, ap);

View File

@@ -4,6 +4,7 @@
#include "jemalloc/internal/assert.h"
#include "jemalloc/internal/malloc_io.h"
#include "jemalloc/internal/spin.h"
#ifndef _CRT_SPINCOUNT
#define _CRT_SPINCOUNT 4000
@@ -45,7 +46,7 @@ JEMALLOC_EXPORT int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
void
malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
mutex_prof_data_t *data = &mutex->prof_data;
UNUSED nstime_t before = NSTIME_ZERO_INITIALIZER;
nstime_t before = NSTIME_ZERO_INITIALIZER;
if (ncpus == 1) {
goto label_spin_done;
@@ -53,8 +54,9 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
int cnt = 0, max_cnt = MALLOC_MUTEX_MAX_SPIN;
do {
CPU_SPINWAIT;
if (!malloc_mutex_trylock_final(mutex)) {
spin_cpu_spinwait();
if (!atomic_load_b(&mutex->locked, ATOMIC_RELAXED)
&& !malloc_mutex_trylock_final(mutex)) {
data->n_spin_acquired++;
return;
}
@@ -144,8 +146,6 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
# endif
#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
mutex->lock = OS_UNFAIR_LOCK_INIT;
#elif (defined(JEMALLOC_OSSPIN))
mutex->lock = 0;
#elif (defined(JEMALLOC_MUTEX_INIT_CB))
if (postpone_init) {
mutex->postponed_next = postponed_mutexes;
@@ -173,7 +173,7 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
mutex->lock_order = lock_order;
if (lock_order == malloc_mutex_address_ordered) {
witness_init(&mutex->witness, name, rank,
mutex_addr_comp, &mutex);
mutex_addr_comp, mutex);
} else {
witness_init(&mutex->witness, name, rank, NULL, NULL);
}

View File

@@ -10,6 +10,9 @@
#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
#include <sys/sysctl.h>
#ifdef __FreeBSD__
#include <vm/vm_param.h>
#endif
#endif
/******************************************************************************/
@@ -25,6 +28,18 @@ static int mmap_flags;
#endif
static bool os_overcommits;
const char *thp_mode_names[] = {
"default",
"always",
"never",
"not supported"
};
thp_mode_t opt_thp = THP_MODE_DEFAULT;
thp_mode_t init_system_thp_mode;
/* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
static bool pages_can_purge_lazy_runtime = true;
/******************************************************************************/
/*
* Function prototypes for static functions that are referenced prior to
@@ -165,6 +180,35 @@ pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
assert(alignment >= PAGE);
assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr);
#if defined(__FreeBSD__) && defined(MAP_EXCL)
/*
* FreeBSD has mechanisms both to mmap at specific address without
* touching existing mappings, and to mmap with specific alignment.
*/
{
if (os_overcommits) {
*commit = true;
}
int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
int flags = mmap_flags;
if (addr != NULL) {
flags |= MAP_FIXED | MAP_EXCL;
} else {
unsigned alignment_bits = ffs_zu(alignment);
assert(alignment_bits > 1);
flags |= MAP_ALIGNED(alignment_bits - 1);
}
void *ret = mmap(addr, size, prot, flags, -1, 0);
if (ret == MAP_FAILED) {
ret = NULL;
}
return ret;
}
#endif
/*
* Ideally, there would be a way to specify alignment to mmap() (like
* NetBSD has), but in the absence of such a feature, we have to work
@@ -246,19 +290,31 @@ pages_decommit(void *addr, size_t size) {
bool
pages_purge_lazy(void *addr, size_t size) {
assert(PAGE_ADDR2BASE(addr) == addr);
assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
assert(PAGE_CEILING(size) == size);
if (!pages_can_purge_lazy) {
return true;
}
if (!pages_can_purge_lazy_runtime) {
/*
* Built with lazy purge enabled, but detected it was not
* supported on the current system.
*/
return true;
}
#ifdef _WIN32
VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
return false;
#elif defined(JEMALLOC_PURGE_MADVISE_FREE) && \
!defined(PAGES_CAN_PURGE_LAZY)
return (madvise(addr, size, MADV_FREE) != 0);
#elif defined(JEMALLOC_PURGE_MADVISE_FREE)
return (madvise(addr, size,
# ifdef MADV_FREE
MADV_FREE
# else
JEMALLOC_MADV_FREE
# endif
) != 0);
#elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
!defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
return (madvise(addr, size, MADV_DONTNEED) != 0);
@@ -287,12 +343,13 @@ pages_purge_forced(void *addr, size_t size) {
#endif
}
bool
pages_huge(void *addr, size_t size) {
static bool
pages_huge_impl(void *addr, size_t size, bool aligned) {
if (aligned) {
assert(HUGEPAGE_ADDR2BASE(addr) == addr);
assert(HUGEPAGE_CEILING(size) == size);
#ifdef JEMALLOC_THP
}
#ifdef JEMALLOC_HAVE_MADVISE_HUGE
return (madvise(addr, size, MADV_HUGEPAGE) != 0);
#else
return true;
@@ -300,23 +357,74 @@ pages_huge(void *addr, size_t size) {
}
bool
pages_nohuge(void *addr, size_t size) {
pages_huge(void *addr, size_t size) {
return pages_huge_impl(addr, size, true);
}
static bool
pages_huge_unaligned(void *addr, size_t size) {
return pages_huge_impl(addr, size, false);
}
static bool
pages_nohuge_impl(void *addr, size_t size, bool aligned) {
if (aligned) {
assert(HUGEPAGE_ADDR2BASE(addr) == addr);
assert(HUGEPAGE_CEILING(size) == size);
}
#ifdef JEMALLOC_THP
#ifdef JEMALLOC_HAVE_MADVISE_HUGE
return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
#else
return false;
#endif
}
bool
pages_nohuge(void *addr, size_t size) {
return pages_nohuge_impl(addr, size, true);
}
static bool
pages_nohuge_unaligned(void *addr, size_t size) {
return pages_nohuge_impl(addr, size, false);
}
bool
pages_dontdump(void *addr, size_t size) {
assert(PAGE_ADDR2BASE(addr) == addr);
assert(PAGE_CEILING(size) == size);
#ifdef JEMALLOC_MADVISE_DONTDUMP
return madvise(addr, size, MADV_DONTDUMP) != 0;
#else
return false;
#endif
}
bool
pages_dodump(void *addr, size_t size) {
assert(PAGE_ADDR2BASE(addr) == addr);
assert(PAGE_CEILING(size) == size);
#ifdef JEMALLOC_MADVISE_DONTDUMP
return madvise(addr, size, MADV_DODUMP) != 0;
#else
return false;
#endif
}
static size_t
os_page_detect(void) {
#ifdef _WIN32
SYSTEM_INFO si;
GetSystemInfo(&si);
return si.dwPageSize;
#elif defined(__FreeBSD__)
/*
* This returns the value obtained from
* the auxv vector, avoiding a syscall.
*/
return getpagesize();
#else
long result = sysconf(_SC_PAGESIZE);
if (result == -1) {
@@ -333,9 +441,19 @@ os_overcommits_sysctl(void) {
size_t sz;
sz = sizeof(vm_overcommit);
#if defined(__FreeBSD__) && defined(VM_OVERCOMMIT)
int mib[2];
mib[0] = CTL_VM;
mib[1] = VM_OVERCOMMIT;
if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) {
return false; /* Error. */
}
#else
if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) {
return false; /* Error. */
}
#endif
return ((vm_overcommit & 0x3) == 0);
}
@@ -351,27 +469,44 @@ static bool
os_overcommits_proc(void) {
int fd;
char buf[1];
ssize_t nread;
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
#if defined(O_CLOEXEC)
fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY |
O_CLOEXEC);
#else
fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
if (fd != -1) {
fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
}
#endif
#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
#if defined(O_CLOEXEC)
fd = (int)syscall(SYS_openat,
AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
#else
fd = (int)syscall(SYS_openat,
AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY);
if (fd != -1) {
fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
}
#endif
#else
#if defined(O_CLOEXEC)
fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
#else
fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
if (fd != -1) {
fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
}
#endif
#endif
if (fd == -1) {
return false; /* Error. */
}
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
#else
nread = read(fd, &buf, sizeof(buf));
#endif
ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
syscall(SYS_close, fd);
#else
@@ -391,6 +526,75 @@ os_overcommits_proc(void) {
}
#endif
void
pages_set_thp_state (void *ptr, size_t size) {
if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) {
return;
}
assert(opt_thp != thp_mode_not_supported &&
init_system_thp_mode != thp_mode_not_supported);
if (opt_thp == thp_mode_always
&& init_system_thp_mode != thp_mode_never) {
assert(init_system_thp_mode == thp_mode_default);
pages_huge_unaligned(ptr, size);
} else if (opt_thp == thp_mode_never) {
assert(init_system_thp_mode == thp_mode_default ||
init_system_thp_mode == thp_mode_always);
pages_nohuge_unaligned(ptr, size);
}
}
static void
init_thp_state(void) {
if (!have_madvise_huge) {
if (metadata_thp_enabled() && opt_abort) {
malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
abort();
}
goto label_error;
}
static const char sys_state_madvise[] = "always [madvise] never\n";
static const char sys_state_always[] = "[always] madvise never\n";
static const char sys_state_never[] = "always madvise [never]\n";
char buf[sizeof(sys_state_madvise)];
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
int fd = (int)syscall(SYS_open,
"/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
#else
int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
#endif
if (fd == -1) {
goto label_error;
}
ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
syscall(SYS_close, fd);
#else
close(fd);
#endif
if (nread < 0) {
goto label_error;
}
if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) {
init_system_thp_mode = thp_mode_default;
} else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) {
init_system_thp_mode = thp_mode_always;
} else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) {
init_system_thp_mode = thp_mode_never;
} else {
goto label_error;
}
return;
label_error:
opt_thp = init_system_thp_mode = thp_mode_not_supported;
}
bool
pages_boot(void) {
os_page = os_page_detect();
@@ -419,5 +623,27 @@ pages_boot(void) {
os_overcommits = false;
#endif
init_thp_state();
#ifdef __FreeBSD__
/*
* FreeBSD doesn't need the check; madvise(2) is known to work.
*/
#else
/* Detect lazy purge runtime support. */
if (pages_can_purge_lazy) {
bool committed = false;
void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed);
if (madv_free_page == NULL) {
return true;
}
assert(pages_can_purge_lazy_runtime);
if (pages_purge_lazy(madv_free_page, PAGE)) {
pages_can_purge_lazy_runtime = false;
}
os_pages_unmap(madv_free_page, PAGE);
}
#endif
return false;
}

View File

@@ -7,6 +7,7 @@
#include "jemalloc/internal/hash.h"
#include "jemalloc/internal/malloc_io.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/emitter.h"
/******************************************************************************/
@@ -23,7 +24,7 @@
*/
#undef _Unwind_Backtrace
#include <unwind.h>
#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, hooks_libc_hook)
#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
#endif
/******************************************************************************/
@@ -38,6 +39,7 @@ bool opt_prof_gdump = false;
bool opt_prof_final = false;
bool opt_prof_leak = false;
bool opt_prof_accum = false;
bool opt_prof_log = false;
char opt_prof_prefix[
/* Minimize memory bloat for non-prof builds. */
#ifdef JEMALLOC_PROF
@@ -70,6 +72,100 @@ uint64_t prof_interval = 0;
size_t lg_prof_sample;
typedef enum prof_logging_state_e prof_logging_state_t;
enum prof_logging_state_e {
prof_logging_state_stopped,
prof_logging_state_started,
prof_logging_state_dumping
};
/*
* - stopped: log_start never called, or previous log_stop has completed.
* - started: log_start called, log_stop not called yet. Allocations are logged.
* - dumping: log_stop called but not finished; samples are not logged anymore.
*/
prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
#ifdef JEMALLOC_JET
static bool prof_log_dummy = false;
#endif
/* Incremented for every log file that is output. */
static uint64_t log_seq = 0;
static char log_filename[
/* Minimize memory bloat for non-prof builds. */
#ifdef JEMALLOC_PROF
PATH_MAX +
#endif
1];
/* Timestamp for most recent call to log_start(). */
static nstime_t log_start_timestamp = NSTIME_ZERO_INITIALIZER;
/* Increment these when adding to the log_bt and log_thr linked lists. */
static size_t log_bt_index = 0;
static size_t log_thr_index = 0;
/* Linked list node definitions. These are only used in prof.c. */
typedef struct prof_bt_node_s prof_bt_node_t;
struct prof_bt_node_s {
prof_bt_node_t *next;
size_t index;
prof_bt_t bt;
/* Variable size backtrace vector pointed to by bt. */
void *vec[1];
};
typedef struct prof_thr_node_s prof_thr_node_t;
struct prof_thr_node_s {
prof_thr_node_t *next;
size_t index;
uint64_t thr_uid;
/* Variable size based on thr_name_sz. */
char name[1];
};
typedef struct prof_alloc_node_s prof_alloc_node_t;
/* This is output when logging sampled allocations. */
struct prof_alloc_node_s {
prof_alloc_node_t *next;
/* Indices into an array of thread data. */
size_t alloc_thr_ind;
size_t free_thr_ind;
/* Indices into an array of backtraces. */
size_t alloc_bt_ind;
size_t free_bt_ind;
uint64_t alloc_time_ns;
uint64_t free_time_ns;
size_t usize;
};
/*
* Created on the first call to prof_log_start and deleted on prof_log_stop.
* These are the backtraces and threads that have already been logged by an
* allocation.
*/
static bool log_tables_initialized = false;
static ckh_t log_bt_node_set;
static ckh_t log_thr_node_set;
/* Store linked lists for logged data. */
static prof_bt_node_t *log_bt_first = NULL;
static prof_bt_node_t *log_bt_last = NULL;
static prof_thr_node_t *log_thr_first = NULL;
static prof_thr_node_t *log_thr_last = NULL;
static prof_alloc_node_t *log_alloc_first = NULL;
static prof_alloc_node_t *log_alloc_last = NULL;
/* Protects the prof_logging_state and any log_{...} variable. */
static malloc_mutex_t log_mtx;
/*
* Table of mutexes that are shared among gctx's. These are leaf locks, so
* there is no problem with using them for more than one gctx at the same time.
@@ -145,6 +241,12 @@ static void prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
bool even_if_attached);
static char *prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
/* Hashtable functions for log_bt_node_set and log_thr_node_set. */
static void prof_thr_node_hash(const void *key, size_t r_hash[2]);
static bool prof_thr_node_keycomp(const void *k1, const void *k2);
static void prof_bt_node_hash(const void *key, size_t r_hash[2]);
static bool prof_bt_node_keycomp(const void *k1, const void *k2);
/******************************************************************************/
/* Red-black trees. */
@@ -242,6 +344,12 @@ prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
prof_tctx_t *tctx) {
prof_tctx_set(tsdn, ptr, usize, NULL, tctx);
/* Get the current time and set this in the extent_t. We'll read this
* when free() is called. */
nstime_t t = NSTIME_ZERO_INITIALIZER;
nstime_update(&t);
prof_alloc_time_set(tsdn, ptr, NULL, t);
malloc_mutex_lock(tsdn, tctx->tdata->lock);
tctx->cnts.curobjs++;
tctx->cnts.curbytes += usize;
@@ -253,14 +361,174 @@ prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
malloc_mutex_unlock(tsdn, tctx->tdata->lock);
}
static size_t
prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
assert(prof_logging_state == prof_logging_state_started);
malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
prof_bt_node_t dummy_node;
dummy_node.bt = *bt;
prof_bt_node_t *node;
/* See if this backtrace is already cached in the table. */
if (ckh_search(&log_bt_node_set, (void *)(&dummy_node),
(void **)(&node), NULL)) {
size_t sz = offsetof(prof_bt_node_t, vec) +
(bt->len * sizeof(void *));
prof_bt_node_t *new_node = (prof_bt_node_t *)
iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
true, arena_get(TSDN_NULL, 0, true), true);
if (log_bt_first == NULL) {
log_bt_first = new_node;
log_bt_last = new_node;
} else {
log_bt_last->next = new_node;
log_bt_last = new_node;
}
new_node->next = NULL;
new_node->index = log_bt_index;
/*
* Copy the backtrace: bt is inside a tdata or gctx, which
* might die before prof_log_stop is called.
*/
new_node->bt.len = bt->len;
memcpy(new_node->vec, bt->vec, bt->len * sizeof(void *));
new_node->bt.vec = new_node->vec;
log_bt_index++;
ckh_insert(tsd, &log_bt_node_set, (void *)new_node, NULL);
return new_node->index;
} else {
return node->index;
}
}
static size_t
prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
assert(prof_logging_state == prof_logging_state_started);
malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
prof_thr_node_t dummy_node;
dummy_node.thr_uid = thr_uid;
prof_thr_node_t *node;
/* See if this thread is already cached in the table. */
if (ckh_search(&log_thr_node_set, (void *)(&dummy_node),
(void **)(&node), NULL)) {
size_t sz = offsetof(prof_thr_node_t, name) + strlen(name) + 1;
prof_thr_node_t *new_node = (prof_thr_node_t *)
iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
true, arena_get(TSDN_NULL, 0, true), true);
if (log_thr_first == NULL) {
log_thr_first = new_node;
log_thr_last = new_node;
} else {
log_thr_last->next = new_node;
log_thr_last = new_node;
}
new_node->next = NULL;
new_node->index = log_thr_index;
new_node->thr_uid = thr_uid;
strcpy(new_node->name, name);
log_thr_index++;
ckh_insert(tsd, &log_thr_node_set, (void *)new_node, NULL);
return new_node->index;
} else {
return node->index;
}
}
static void
prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false);
if (cons_tdata == NULL) {
/*
* We decide not to log these allocations. cons_tdata will be
* NULL only when the current thread is in a weird state (e.g.
* it's being destroyed).
*/
return;
}
malloc_mutex_lock(tsd_tsdn(tsd), &log_mtx);
if (prof_logging_state != prof_logging_state_started) {
goto label_done;
}
if (!log_tables_initialized) {
bool err1 = ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
prof_bt_node_hash, prof_bt_node_keycomp);
bool err2 = ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
prof_thr_node_hash, prof_thr_node_keycomp);
if (err1 || err2) {
goto label_done;
}
log_tables_initialized = true;
}
nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr,
(alloc_ctx_t *)NULL);
nstime_t free_time = NSTIME_ZERO_INITIALIZER;
nstime_update(&free_time);
size_t sz = sizeof(prof_alloc_node_t);
prof_alloc_node_t *new_node = (prof_alloc_node_t *)
iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
arena_get(TSDN_NULL, 0, true), true);
const char *prod_thr_name = (tctx->tdata->thread_name == NULL)?
"" : tctx->tdata->thread_name;
const char *cons_thr_name = prof_thread_name_get(tsd);
prof_bt_t bt;
/* Initialize the backtrace, using the buffer in tdata to store it. */
bt_init(&bt, cons_tdata->vec);
prof_backtrace(&bt);
prof_bt_t *cons_bt = &bt;
/* We haven't destroyed tctx yet, so gctx should be good to read. */
prof_bt_t *prod_bt = &tctx->gctx->bt;
new_node->next = NULL;
new_node->alloc_thr_ind = prof_log_thr_index(tsd, tctx->tdata->thr_uid,
prod_thr_name);
new_node->free_thr_ind = prof_log_thr_index(tsd, cons_tdata->thr_uid,
cons_thr_name);
new_node->alloc_bt_ind = prof_log_bt_index(tsd, prod_bt);
new_node->free_bt_ind = prof_log_bt_index(tsd, cons_bt);
new_node->alloc_time_ns = nstime_ns(&alloc_time);
new_node->free_time_ns = nstime_ns(&free_time);
new_node->usize = usize;
if (log_alloc_first == NULL) {
log_alloc_first = new_node;
log_alloc_last = new_node;
} else {
log_alloc_last->next = new_node;
log_alloc_last = new_node;
}
label_done:
malloc_mutex_unlock(tsd_tsdn(tsd), &log_mtx);
}
void
prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) {
prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
prof_tctx_t *tctx) {
malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
assert(tctx->cnts.curobjs > 0);
assert(tctx->cnts.curbytes >= usize);
tctx->cnts.curobjs--;
tctx->cnts.curbytes -= usize;
prof_try_log(tsd, ptr, usize, tctx);
if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
prof_tctx_destroy(tsd, tctx);
} else {
@@ -871,15 +1139,12 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
void
prof_sample_threshold_update(prof_tdata_t *tdata) {
#ifdef JEMALLOC_PROF
uint64_t r;
double u;
if (!config_prof) {
return;
}
if (lg_prof_sample == 0) {
tdata->bytes_until_sample = 0;
tsd_bytes_until_sample_set(tsd_fetch(), 0);
return;
}
@@ -901,11 +1166,16 @@ prof_sample_threshold_update(prof_tdata_t *tdata) {
* pp 500
* (http://luc.devroye.org/rnbookindex.html)
*/
r = prng_lg_range_u64(&tdata->prng_state, 53);
u = (double)r * (1.0/9007199254740992.0L);
tdata->bytes_until_sample = (uint64_t)(log(u) /
uint64_t r = prng_lg_range_u64(&tdata->prng_state, 53);
double u = (double)r * (1.0/9007199254740992.0L);
uint64_t bytes_until_sample = (uint64_t)(log(u) /
log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
+ (uint64_t)1U;
if (bytes_until_sample > SSIZE_MAX) {
bytes_until_sample = SSIZE_MAX;
}
tsd_bytes_until_sample_set(tsd_fetch(), bytes_until_sample);
#endif
}
@@ -978,7 +1248,7 @@ prof_dump_flush(bool propagate_err) {
cassert(config_prof);
err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
if (err == -1) {
if (!propagate_err) {
malloc_write("<jemalloc>: write() failed during heap "
@@ -1022,7 +1292,7 @@ prof_dump_write(bool propagate_err, const char *s) {
}
}
if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) {
if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
/* Finish writing. */
n = slen - i;
} else {
@@ -1033,6 +1303,7 @@ prof_dump_write(bool propagate_err, const char *s) {
prof_dump_buf_end += n;
i += n;
}
assert(i == slen);
return false;
}
@@ -1409,7 +1680,15 @@ prof_open_maps(const char *format, ...) {
va_start(ap, format);
malloc_vsnprintf(filename, sizeof(filename), format, ap);
va_end(ap);
#if defined(O_CLOEXEC)
mfd = open(filename, O_RDONLY | O_CLOEXEC);
#else
mfd = open(filename, O_RDONLY);
if (mfd != -1) {
fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
}
#endif
return mfd;
}
@@ -1463,8 +1742,9 @@ prof_dump_maps(bool propagate_err) {
goto label_return;
}
}
nread = read(mfd, &prof_dump_buf[prof_dump_buf_end],
PROF_DUMP_BUFSIZE - prof_dump_buf_end);
nread = malloc_read_fd(mfd,
&prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE
- prof_dump_buf_end);
} while (nread > 0);
} else {
ret = true;
@@ -1772,7 +2052,7 @@ prof_idump(tsdn_t *tsdn) {
cassert(config_prof);
if (!prof_booted || tsdn_null(tsdn)) {
if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
return;
}
tsd = tsdn_tsd(tsdn);
@@ -1829,7 +2109,7 @@ prof_gdump(tsdn_t *tsdn) {
cassert(config_prof);
if (!prof_booted || tsdn_null(tsdn)) {
if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
return;
}
tsd = tsdn_tsd(tsdn);
@@ -1878,6 +2158,33 @@ prof_bt_keycomp(const void *k1, const void *k2) {
return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
}
static void
prof_bt_node_hash(const void *key, size_t r_hash[2]) {
const prof_bt_node_t *bt_node = (prof_bt_node_t *)key;
prof_bt_hash((void *)(&bt_node->bt), r_hash);
}
static bool
prof_bt_node_keycomp(const void *k1, const void *k2) {
const prof_bt_node_t *bt_node1 = (prof_bt_node_t *)k1;
const prof_bt_node_t *bt_node2 = (prof_bt_node_t *)k2;
return prof_bt_keycomp((void *)(&bt_node1->bt),
(void *)(&bt_node2->bt));
}
static void
prof_thr_node_hash(const void *key, size_t r_hash[2]) {
const prof_thr_node_t *thr_node = (prof_thr_node_t *)key;
hash(&thr_node->thr_uid, sizeof(uint64_t), 0x94122f35U, r_hash);
}
static bool
prof_thr_node_keycomp(const void *k1, const void *k2) {
const prof_thr_node_t *thr_node1 = (prof_thr_node_t *)k1;
const prof_thr_node_t *thr_node2 = (prof_thr_node_t *)k2;
return thr_node1->thr_uid == thr_node2->thr_uid;
}
static uint64_t
prof_thr_uid_alloc(tsdn_t *tsdn) {
uint64_t thr_uid;
@@ -2110,6 +2417,368 @@ prof_active_set(tsdn_t *tsdn, bool active) {
return prof_active_old;
}
#ifdef JEMALLOC_JET
size_t
prof_log_bt_count(void) {
size_t cnt = 0;
prof_bt_node_t *node = log_bt_first;
while (node != NULL) {
cnt++;
node = node->next;
}
return cnt;
}
size_t
prof_log_alloc_count(void) {
size_t cnt = 0;
prof_alloc_node_t *node = log_alloc_first;
while (node != NULL) {
cnt++;
node = node->next;
}
return cnt;
}
size_t
prof_log_thr_count(void) {
size_t cnt = 0;
prof_thr_node_t *node = log_thr_first;
while (node != NULL) {
cnt++;
node = node->next;
}
return cnt;
}
bool
prof_log_is_logging(void) {
return prof_logging_state == prof_logging_state_started;
}
bool
prof_log_rep_check(void) {
if (prof_logging_state == prof_logging_state_stopped
&& log_tables_initialized) {
return true;
}
if (log_bt_last != NULL && log_bt_last->next != NULL) {
return true;
}
if (log_thr_last != NULL && log_thr_last->next != NULL) {
return true;
}
if (log_alloc_last != NULL && log_alloc_last->next != NULL) {
return true;
}
size_t bt_count = prof_log_bt_count();
size_t thr_count = prof_log_thr_count();
size_t alloc_count = prof_log_alloc_count();
if (prof_logging_state == prof_logging_state_stopped) {
if (bt_count != 0 || thr_count != 0 || alloc_count || 0) {
return true;
}
}
prof_alloc_node_t *node = log_alloc_first;
while (node != NULL) {
if (node->alloc_bt_ind >= bt_count) {
return true;
}
if (node->free_bt_ind >= bt_count) {
return true;
}
if (node->alloc_thr_ind >= thr_count) {
return true;
}
if (node->free_thr_ind >= thr_count) {
return true;
}
if (node->alloc_time_ns > node->free_time_ns) {
return true;
}
node = node->next;
}
return false;
}
void
prof_log_dummy_set(bool new_value) {
prof_log_dummy = new_value;
}
#endif
bool
prof_log_start(tsdn_t *tsdn, const char *filename) {
if (!opt_prof || !prof_booted) {
return true;
}
bool ret = false;
size_t buf_size = PATH_MAX + 1;
malloc_mutex_lock(tsdn, &log_mtx);
if (prof_logging_state != prof_logging_state_stopped) {
ret = true;
} else if (filename == NULL) {
/* Make default name. */
malloc_snprintf(log_filename, buf_size, "%s.%d.%"FMTu64".json",
opt_prof_prefix, prof_getpid(), log_seq);
log_seq++;
prof_logging_state = prof_logging_state_started;
} else if (strlen(filename) >= buf_size) {
ret = true;
} else {
strcpy(log_filename, filename);
prof_logging_state = prof_logging_state_started;
}
if (!ret) {
nstime_update(&log_start_timestamp);
}
malloc_mutex_unlock(tsdn, &log_mtx);
return ret;
}
/* Used as an atexit function to stop logging on exit. */
static void
prof_log_stop_final(void) {
tsd_t *tsd = tsd_fetch();
prof_log_stop(tsd_tsdn(tsd));
}
struct prof_emitter_cb_arg_s {
int fd;
ssize_t ret;
};
static void
prof_emitter_write_cb(void *opaque, const char *to_write) {
struct prof_emitter_cb_arg_s *arg =
(struct prof_emitter_cb_arg_s *)opaque;
size_t bytes = strlen(to_write);
#ifdef JEMALLOC_JET
if (prof_log_dummy) {
return;
}
#endif
arg->ret = write(arg->fd, (void *)to_write, bytes);
}
/*
* prof_log_emit_{...} goes through the appropriate linked list, emitting each
* node to the json and deallocating it.
*/
static void
prof_log_emit_threads(tsd_t *tsd, emitter_t *emitter) {
emitter_json_array_kv_begin(emitter, "threads");
prof_thr_node_t *thr_node = log_thr_first;
prof_thr_node_t *thr_old_node;
while (thr_node != NULL) {
emitter_json_object_begin(emitter);
emitter_json_kv(emitter, "thr_uid", emitter_type_uint64,
&thr_node->thr_uid);
char *thr_name = thr_node->name;
emitter_json_kv(emitter, "thr_name", emitter_type_string,
&thr_name);
emitter_json_object_end(emitter);
thr_old_node = thr_node;
thr_node = thr_node->next;
idalloc(tsd, thr_old_node);
}
emitter_json_array_end(emitter);
}
static void
prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
emitter_json_array_kv_begin(emitter, "stack_traces");
prof_bt_node_t *bt_node = log_bt_first;
prof_bt_node_t *bt_old_node;
/*
* Calculate how many hex digits we need: twice number of bytes, two for
* "0x", and then one more for terminating '\0'.
*/
char buf[2 * sizeof(intptr_t) + 3];
size_t buf_sz = sizeof(buf);
while (bt_node != NULL) {
emitter_json_array_begin(emitter);
size_t i;
for (i = 0; i < bt_node->bt.len; i++) {
malloc_snprintf(buf, buf_sz, "%p", bt_node->bt.vec[i]);
char *trace_str = buf;
emitter_json_value(emitter, emitter_type_string,
&trace_str);
}
emitter_json_array_end(emitter);
bt_old_node = bt_node;
bt_node = bt_node->next;
idalloc(tsd, bt_old_node);
}
emitter_json_array_end(emitter);
}
static void
prof_log_emit_allocs(tsd_t *tsd, emitter_t *emitter) {
emitter_json_array_kv_begin(emitter, "allocations");
prof_alloc_node_t *alloc_node = log_alloc_first;
prof_alloc_node_t *alloc_old_node;
while (alloc_node != NULL) {
emitter_json_object_begin(emitter);
emitter_json_kv(emitter, "alloc_thread", emitter_type_size,
&alloc_node->alloc_thr_ind);
emitter_json_kv(emitter, "free_thread", emitter_type_size,
&alloc_node->free_thr_ind);
emitter_json_kv(emitter, "alloc_trace", emitter_type_size,
&alloc_node->alloc_bt_ind);
emitter_json_kv(emitter, "free_trace", emitter_type_size,
&alloc_node->free_bt_ind);
emitter_json_kv(emitter, "alloc_timestamp",
emitter_type_uint64, &alloc_node->alloc_time_ns);
emitter_json_kv(emitter, "free_timestamp", emitter_type_uint64,
&alloc_node->free_time_ns);
emitter_json_kv(emitter, "usize", emitter_type_uint64,
&alloc_node->usize);
emitter_json_object_end(emitter);
alloc_old_node = alloc_node;
alloc_node = alloc_node->next;
idalloc(tsd, alloc_old_node);
}
emitter_json_array_end(emitter);
}
static void
prof_log_emit_metadata(emitter_t *emitter) {
emitter_json_object_kv_begin(emitter, "info");
nstime_t now = NSTIME_ZERO_INITIALIZER;
nstime_update(&now);
uint64_t ns = nstime_ns(&now) - nstime_ns(&log_start_timestamp);
emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns);
char *vers = JEMALLOC_VERSION;
emitter_json_kv(emitter, "version",
emitter_type_string, &vers);
emitter_json_kv(emitter, "lg_sample_rate",
emitter_type_int, &lg_prof_sample);
int pid = prof_getpid();
emitter_json_kv(emitter, "pid", emitter_type_int, &pid);
emitter_json_object_end(emitter);
}
bool
prof_log_stop(tsdn_t *tsdn) {
if (!opt_prof || !prof_booted) {
return true;
}
tsd_t *tsd = tsdn_tsd(tsdn);
malloc_mutex_lock(tsdn, &log_mtx);
if (prof_logging_state != prof_logging_state_started) {
malloc_mutex_unlock(tsdn, &log_mtx);
return true;
}
/*
* Set the state to dumping. We'll set it to stopped when we're done.
* Since other threads won't be able to start/stop/log when the state is
* dumping, we don't have to hold the lock during the whole method.
*/
prof_logging_state = prof_logging_state_dumping;
malloc_mutex_unlock(tsdn, &log_mtx);
emitter_t emitter;
/* Create a file. */
int fd;
#ifdef JEMALLOC_JET
if (prof_log_dummy) {
fd = 0;
} else {
fd = creat(log_filename, 0644);
}
#else
fd = creat(log_filename, 0644);
#endif
if (fd == -1) {
malloc_printf("<jemalloc>: creat() for log file \"%s\" "
" failed with %d\n", log_filename, errno);
if (opt_abort) {
abort();
}
return true;
}
/* Emit to json. */
struct prof_emitter_cb_arg_s arg;
arg.fd = fd;
emitter_init(&emitter, emitter_output_json, &prof_emitter_write_cb,
(void *)(&arg));
emitter_begin(&emitter);
prof_log_emit_metadata(&emitter);
prof_log_emit_threads(tsd, &emitter);
prof_log_emit_traces(tsd, &emitter);
prof_log_emit_allocs(tsd, &emitter);
emitter_end(&emitter);
/* Reset global state. */
if (log_tables_initialized) {
ckh_delete(tsd, &log_bt_node_set);
ckh_delete(tsd, &log_thr_node_set);
}
log_tables_initialized = false;
log_bt_index = 0;
log_thr_index = 0;
log_bt_first = NULL;
log_bt_last = NULL;
log_thr_first = NULL;
log_thr_last = NULL;
log_alloc_first = NULL;
log_alloc_last = NULL;
malloc_mutex_lock(tsdn, &log_mtx);
prof_logging_state = prof_logging_state_stopped;
malloc_mutex_unlock(tsdn, &log_mtx);
#ifdef JEMALLOC_JET
if (prof_log_dummy) {
return false;
}
#endif
return close(fd);
}
const char *
prof_thread_name_get(tsd_t *tsd) {
prof_tdata_t *tdata;
@@ -2346,6 +3015,35 @@ prof_boot2(tsd_t *tsd) {
}
}
if (opt_prof_log) {
prof_log_start(tsd_tsdn(tsd), NULL);
}
if (atexit(prof_log_stop_final) != 0) {
malloc_write("<jemalloc>: Error in atexit() "
"for logging\n");
if (opt_abort) {
abort();
}
}
if (malloc_mutex_init(&log_mtx, "prof_log",
WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
return true;
}
if (ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
prof_bt_node_hash, prof_bt_node_keycomp)) {
return true;
}
if (ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
prof_thr_node_hash, prof_thr_node_keycomp)) {
return true;
}
log_tables_initialized = true;
gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
CACHELINE);
@@ -2373,16 +3071,14 @@ prof_boot2(tsd_t *tsd) {
return true;
}
}
}
#ifdef JEMALLOC_PROF_LIBGCC
/*
* Cause the backtracing machinery to allocate its internal state
* before enabling profiling.
* Cause the backtracing machinery to allocate its internal
* state before enabling profiling.
*/
_Unwind_Backtrace(prof_unwind_init_callback, NULL);
#endif
}
prof_booted = true;
return false;

View File

@@ -39,7 +39,7 @@ rtree_node_dalloc_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *node) {
/* Nodes are never deleted during normal operation. */
not_reached();
}
UNUSED rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc =
rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc =
rtree_node_dalloc_impl;
static rtree_leaf_elm_t *
@@ -54,7 +54,7 @@ rtree_leaf_dalloc_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *leaf) {
/* Leaves are never deleted during normal operation. */
not_reached();
}
UNUSED rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc =
rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc =
rtree_leaf_dalloc_impl;
#ifdef JEMALLOC_JET

24
deps/jemalloc/src/safety_check.c vendored Normal file
View File

@@ -0,0 +1,24 @@
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/jemalloc_internal_includes.h"
static void (*safety_check_abort)(const char *message);
void safety_check_set_abort(void (*abort_fn)(const char *)) {
safety_check_abort = abort_fn;
}
void safety_check_fail(const char *format, ...) {
char buf[MALLOC_PRINTF_BUFSIZE];
va_list ap;
va_start(ap, format);
malloc_vsnprintf(buf, MALLOC_PRINTF_BUFSIZE, format, ap);
va_end(ap);
if (safety_check_abort == NULL) {
malloc_write(buf);
abort();
} else {
safety_check_abort(buf);
}
}

313
deps/jemalloc/src/sc.c vendored Normal file
View File

@@ -0,0 +1,313 @@
#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/assert.h"
#include "jemalloc/internal/bit_util.h"
#include "jemalloc/internal/bitmap.h"
#include "jemalloc/internal/pages.h"
#include "jemalloc/internal/sc.h"
/*
* This module computes the size classes used to satisfy allocations. The logic
* here was ported more or less line-by-line from a shell script, and because of
* that is not the most idiomatic C. Eventually we should fix this, but for now
* at least the damage is compartmentalized to this file.
*/
sc_data_t sc_data_global;
static size_t
reg_size_compute(int lg_base, int lg_delta, int ndelta) {
return (ZU(1) << lg_base) + (ZU(ndelta) << lg_delta);
}
/* Returns the number of pages in the slab. */
static int
slab_size(int lg_page, int lg_base, int lg_delta, int ndelta) {
size_t page = (ZU(1) << lg_page);
size_t reg_size = reg_size_compute(lg_base, lg_delta, ndelta);
size_t try_slab_size = page;
size_t try_nregs = try_slab_size / reg_size;
size_t perfect_slab_size = 0;
bool perfect = false;
/*
* This loop continues until we find the least common multiple of the
* page size and size class size. Size classes are all of the form
* base + ndelta * delta == (ndelta + base/ndelta) * delta, which is
* (ndelta + ngroup) * delta. The way we choose slabbing strategies
* means that delta is at most the page size and ndelta < ngroup. So
* the loop executes for at most 2 * ngroup - 1 iterations, which is
* also the bound on the number of pages in a slab chosen by default.
* With the current default settings, this is at most 7.
*/
while (!perfect) {
perfect_slab_size = try_slab_size;
size_t perfect_nregs = try_nregs;
try_slab_size += page;
try_nregs = try_slab_size / reg_size;
if (perfect_slab_size == perfect_nregs * reg_size) {
perfect = true;
}
}
return (int)(perfect_slab_size / page);
}
static void
size_class(
/* Output. */
sc_t *sc,
/* Configuration decisions. */
int lg_max_lookup, int lg_page, int lg_ngroup,
/* Inputs specific to the size class. */
int index, int lg_base, int lg_delta, int ndelta) {
sc->index = index;
sc->lg_base = lg_base;
sc->lg_delta = lg_delta;
sc->ndelta = ndelta;
sc->psz = (reg_size_compute(lg_base, lg_delta, ndelta)
% (ZU(1) << lg_page) == 0);
size_t size = (ZU(1) << lg_base) + (ZU(ndelta) << lg_delta);
if (index == 0) {
assert(!sc->psz);
}
if (size < (ZU(1) << (lg_page + lg_ngroup))) {
sc->bin = true;
sc->pgs = slab_size(lg_page, lg_base, lg_delta, ndelta);
} else {
sc->bin = false;
sc->pgs = 0;
}
if (size <= (ZU(1) << lg_max_lookup)) {
sc->lg_delta_lookup = lg_delta;
} else {
sc->lg_delta_lookup = 0;
}
}
static void
size_classes(
/* Output. */
sc_data_t *sc_data,
/* Determined by the system. */
size_t lg_ptr_size, int lg_quantum,
/* Configuration decisions. */
int lg_tiny_min, int lg_max_lookup, int lg_page, int lg_ngroup) {
int ptr_bits = (1 << lg_ptr_size) * 8;
int ngroup = (1 << lg_ngroup);
int ntiny = 0;
int nlbins = 0;
int lg_tiny_maxclass = (unsigned)-1;
int nbins = 0;
int npsizes = 0;
int index = 0;
int ndelta = 0;
int lg_base = lg_tiny_min;
int lg_delta = lg_base;
/* Outputs that we update as we go. */
size_t lookup_maxclass = 0;
size_t small_maxclass = 0;
int lg_large_minclass = 0;
size_t large_maxclass = 0;
/* Tiny size classes. */
while (lg_base < lg_quantum) {
sc_t *sc = &sc_data->sc[index];
size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index,
lg_base, lg_delta, ndelta);
if (sc->lg_delta_lookup != 0) {
nlbins = index + 1;
}
if (sc->psz) {
npsizes++;
}
if (sc->bin) {
nbins++;
}
ntiny++;
/* Final written value is correct. */
lg_tiny_maxclass = lg_base;
index++;
lg_delta = lg_base;
lg_base++;
}
/* First non-tiny (pseudo) group. */
if (ntiny != 0) {
sc_t *sc = &sc_data->sc[index];
/*
* See the note in sc.h; the first non-tiny size class has an
* unusual encoding.
*/
lg_base--;
ndelta = 1;
size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index,
lg_base, lg_delta, ndelta);
index++;
lg_base++;
lg_delta++;
if (sc->psz) {
npsizes++;
}
if (sc->bin) {
nbins++;
}
}
while (ndelta < ngroup) {
sc_t *sc = &sc_data->sc[index];
size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index,
lg_base, lg_delta, ndelta);
index++;
ndelta++;
if (sc->psz) {
npsizes++;
}
if (sc->bin) {
nbins++;
}
}
/* All remaining groups. */
lg_base = lg_base + lg_ngroup;
while (lg_base < ptr_bits - 1) {
ndelta = 1;
int ndelta_limit;
if (lg_base == ptr_bits - 2) {
ndelta_limit = ngroup - 1;
} else {
ndelta_limit = ngroup;
}
while (ndelta <= ndelta_limit) {
sc_t *sc = &sc_data->sc[index];
size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index,
lg_base, lg_delta, ndelta);
if (sc->lg_delta_lookup != 0) {
nlbins = index + 1;
/* Final written value is correct. */
lookup_maxclass = (ZU(1) << lg_base)
+ (ZU(ndelta) << lg_delta);
}
if (sc->psz) {
npsizes++;
}
if (sc->bin) {
nbins++;
/* Final written value is correct. */
small_maxclass = (ZU(1) << lg_base)
+ (ZU(ndelta) << lg_delta);
if (lg_ngroup > 0) {
lg_large_minclass = lg_base + 1;
} else {
lg_large_minclass = lg_base + 2;
}
}
large_maxclass = (ZU(1) << lg_base)
+ (ZU(ndelta) << lg_delta);
index++;
ndelta++;
}
lg_base++;
lg_delta++;
}
/* Additional outputs. */
int nsizes = index;
unsigned lg_ceil_nsizes = lg_ceil(nsizes);
/* Fill in the output data. */
sc_data->ntiny = ntiny;
sc_data->nlbins = nlbins;
sc_data->nbins = nbins;
sc_data->nsizes = nsizes;
sc_data->lg_ceil_nsizes = lg_ceil_nsizes;
sc_data->npsizes = npsizes;
sc_data->lg_tiny_maxclass = lg_tiny_maxclass;
sc_data->lookup_maxclass = lookup_maxclass;
sc_data->small_maxclass = small_maxclass;
sc_data->lg_large_minclass = lg_large_minclass;
sc_data->large_minclass = (ZU(1) << lg_large_minclass);
sc_data->large_maxclass = large_maxclass;
/*
* We compute these values in two ways:
* - Incrementally, as above.
* - In macros, in sc.h.
* The computation is easier when done incrementally, but putting it in
* a constant makes it available to the fast paths without having to
* touch the extra global cacheline. We assert, however, that the two
* computations are equivalent.
*/
assert(sc_data->npsizes == SC_NPSIZES);
assert(sc_data->lg_tiny_maxclass == SC_LG_TINY_MAXCLASS);
assert(sc_data->small_maxclass == SC_SMALL_MAXCLASS);
assert(sc_data->large_minclass == SC_LARGE_MINCLASS);
assert(sc_data->lg_large_minclass == SC_LG_LARGE_MINCLASS);
assert(sc_data->large_maxclass == SC_LARGE_MAXCLASS);
/*
* In the allocation fastpath, we want to assume that we can
* unconditionally subtract the requested allocation size from
* a ssize_t, and detect passing through 0 correctly. This
* results in optimal generated code. For this to work, the
* maximum allocation size must be less than SSIZE_MAX.
*/
assert(SC_LARGE_MAXCLASS < SSIZE_MAX);
}
void
sc_data_init(sc_data_t *sc_data) {
assert(!sc_data->initialized);
int lg_max_lookup = 12;
size_classes(sc_data, LG_SIZEOF_PTR, LG_QUANTUM, SC_LG_TINY_MIN,
lg_max_lookup, LG_PAGE, 2);
sc_data->initialized = true;
}
static void
sc_data_update_sc_slab_size(sc_t *sc, size_t reg_size, size_t pgs_guess) {
size_t min_pgs = reg_size / PAGE;
if (reg_size % PAGE != 0) {
min_pgs++;
}
/*
* BITMAP_MAXBITS is actually determined by putting the smallest
* possible size-class on one page, so this can never be 0.
*/
size_t max_pgs = BITMAP_MAXBITS * reg_size / PAGE;
assert(min_pgs <= max_pgs);
assert(min_pgs > 0);
assert(max_pgs >= 1);
if (pgs_guess < min_pgs) {
sc->pgs = (int)min_pgs;
} else if (pgs_guess > max_pgs) {
sc->pgs = (int)max_pgs;
} else {
sc->pgs = (int)pgs_guess;
}
}
void
sc_data_update_slab_size(sc_data_t *data, size_t begin, size_t end, int pgs) {
assert(data->initialized);
for (int i = 0; i < data->nsizes; i++) {
sc_t *sc = &data->sc[i];
if (!sc->bin) {
break;
}
size_t reg_size = reg_size_compute(sc->lg_base, sc->lg_delta,
sc->ndelta);
if (begin <= reg_size && reg_size <= end) {
sc_data_update_sc_slab_size(sc, reg_size, pgs);
}
}
}
void
sc_boot(sc_data_t *data) {
sc_data_init(data);
}

File diff suppressed because it is too large Load Diff

154
deps/jemalloc/src/sz.c vendored
View File

@@ -2,105 +2,63 @@
#include "jemalloc/internal/sz.h"
JEMALLOC_ALIGNED(CACHELINE)
const size_t sz_pind2sz_tab[NPSIZES+1] = {
#define PSZ_yes(lg_grp, ndelta, lg_delta) \
(((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))),
#define PSZ_no(lg_grp, ndelta, lg_delta)
#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
PSZ_##psz(lg_grp, ndelta, lg_delta)
SIZE_CLASSES
#undef PSZ_yes
#undef PSZ_no
#undef SC
(LARGE_MAXCLASS + PAGE)
};
size_t sz_pind2sz_tab[SC_NPSIZES+1];
static void
sz_boot_pind2sz_tab(const sc_data_t *sc_data) {
int pind = 0;
for (unsigned i = 0; i < SC_NSIZES; i++) {
const sc_t *sc = &sc_data->sc[i];
if (sc->psz) {
sz_pind2sz_tab[pind] = (ZU(1) << sc->lg_base)
+ (ZU(sc->ndelta) << sc->lg_delta);
pind++;
}
}
for (int i = pind; i <= (int)SC_NPSIZES; i++) {
sz_pind2sz_tab[pind] = sc_data->large_maxclass + PAGE;
}
}
JEMALLOC_ALIGNED(CACHELINE)
const size_t sz_index2size_tab[NSIZES] = {
#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
SIZE_CLASSES
#undef SC
};
size_t sz_index2size_tab[SC_NSIZES];
static void
sz_boot_index2size_tab(const sc_data_t *sc_data) {
for (unsigned i = 0; i < SC_NSIZES; i++) {
const sc_t *sc = &sc_data->sc[i];
sz_index2size_tab[i] = (ZU(1) << sc->lg_base)
+ (ZU(sc->ndelta) << (sc->lg_delta));
}
}
/*
* To keep this table small, we divide sizes by the tiny min size, which gives
* the smallest interval for which the result can change.
*/
JEMALLOC_ALIGNED(CACHELINE)
const uint8_t sz_size2index_tab[] = {
#if LG_TINY_MIN == 0
#warning "Dangerous LG_TINY_MIN"
#define S2B_0(i) i,
#elif LG_TINY_MIN == 1
#warning "Dangerous LG_TINY_MIN"
#define S2B_1(i) i,
#elif LG_TINY_MIN == 2
#warning "Dangerous LG_TINY_MIN"
#define S2B_2(i) i,
#elif LG_TINY_MIN == 3
#define S2B_3(i) i,
#elif LG_TINY_MIN == 4
#define S2B_4(i) i,
#elif LG_TINY_MIN == 5
#define S2B_5(i) i,
#elif LG_TINY_MIN == 6
#define S2B_6(i) i,
#elif LG_TINY_MIN == 7
#define S2B_7(i) i,
#elif LG_TINY_MIN == 8
#define S2B_8(i) i,
#elif LG_TINY_MIN == 9
#define S2B_9(i) i,
#elif LG_TINY_MIN == 10
#define S2B_10(i) i,
#elif LG_TINY_MIN == 11
#define S2B_11(i) i,
#else
#error "Unsupported LG_TINY_MIN"
#endif
#if LG_TINY_MIN < 1
#define S2B_1(i) S2B_0(i) S2B_0(i)
#endif
#if LG_TINY_MIN < 2
#define S2B_2(i) S2B_1(i) S2B_1(i)
#endif
#if LG_TINY_MIN < 3
#define S2B_3(i) S2B_2(i) S2B_2(i)
#endif
#if LG_TINY_MIN < 4
#define S2B_4(i) S2B_3(i) S2B_3(i)
#endif
#if LG_TINY_MIN < 5
#define S2B_5(i) S2B_4(i) S2B_4(i)
#endif
#if LG_TINY_MIN < 6
#define S2B_6(i) S2B_5(i) S2B_5(i)
#endif
#if LG_TINY_MIN < 7
#define S2B_7(i) S2B_6(i) S2B_6(i)
#endif
#if LG_TINY_MIN < 8
#define S2B_8(i) S2B_7(i) S2B_7(i)
#endif
#if LG_TINY_MIN < 9
#define S2B_9(i) S2B_8(i) S2B_8(i)
#endif
#if LG_TINY_MIN < 10
#define S2B_10(i) S2B_9(i) S2B_9(i)
#endif
#if LG_TINY_MIN < 11
#define S2B_11(i) S2B_10(i) S2B_10(i)
#endif
#define S2B_no(i)
#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
S2B_##lg_delta_lookup(index)
SIZE_CLASSES
#undef S2B_3
#undef S2B_4
#undef S2B_5
#undef S2B_6
#undef S2B_7
#undef S2B_8
#undef S2B_9
#undef S2B_10
#undef S2B_11
#undef S2B_no
#undef SC
};
uint8_t sz_size2index_tab[(SC_LOOKUP_MAXCLASS >> SC_LG_TINY_MIN) + 1];
static void
sz_boot_size2index_tab(const sc_data_t *sc_data) {
size_t dst_max = (SC_LOOKUP_MAXCLASS >> SC_LG_TINY_MIN) + 1;
size_t dst_ind = 0;
for (unsigned sc_ind = 0; sc_ind < SC_NSIZES && dst_ind < dst_max;
sc_ind++) {
const sc_t *sc = &sc_data->sc[sc_ind];
size_t sz = (ZU(1) << sc->lg_base)
+ (ZU(sc->ndelta) << sc->lg_delta);
size_t max_ind = ((sz + (ZU(1) << SC_LG_TINY_MIN) - 1)
>> SC_LG_TINY_MIN);
for (; dst_ind <= max_ind && dst_ind < dst_max; dst_ind++) {
sz_size2index_tab[dst_ind] = sc_ind;
}
}
}
void
sz_boot(const sc_data_t *sc_data) {
sz_boot_pind2sz_tab(sc_data);
sz_boot_index2size_tab(sc_data);
sz_boot_size2index_tab(sc_data);
}

View File

@@ -4,7 +4,8 @@
#include "jemalloc/internal/assert.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/size_classes.h"
#include "jemalloc/internal/safety_check.h"
#include "jemalloc/internal/sc.h"
/******************************************************************************/
/* Data. */
@@ -12,7 +13,7 @@
bool opt_tcache = true;
ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
tcache_bin_info_t *tcache_bin_info;
cache_bin_info_t *tcache_bin_info;
static unsigned stack_nelms; /* Total stack elms per tcache. */
unsigned nhbins;
@@ -40,8 +41,8 @@ void
tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
szind_t binind = tcache->next_gc_bin;
tcache_bin_t *tbin;
if (binind < NBINS) {
cache_bin_t *tbin;
if (binind < SC_NBINS) {
tbin = tcache_small_bin_get(tcache, binind);
} else {
tbin = tcache_large_bin_get(tcache, binind);
@@ -50,7 +51,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
/*
* Flush (ceiling) 3/4 of the objects below the low water mark.
*/
if (binind < NBINS) {
if (binind < SC_NBINS) {
tcache_bin_flush_small(tsd, tcache, tbin, binind,
tbin->ncached - tbin->low_water + (tbin->low_water
>> 2));
@@ -58,7 +59,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
* Reduce fill count by 2X. Limit lg_fill_div such that
* the fill count is always at least 1.
*/
tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
cache_bin_info_t *tbin_info = &tcache_bin_info[binind];
if ((tbin_info->ncached_max >>
(tcache->lg_fill_div[binind] + 1)) >= 1) {
tcache->lg_fill_div[binind]++;
@@ -72,7 +73,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
* Increase fill count by 2X for small bins. Make sure
* lg_fill_div stays greater than 0.
*/
if (binind < NBINS && tcache->lg_fill_div[binind] > 1) {
if (binind < SC_NBINS && tcache->lg_fill_div[binind] > 1) {
tcache->lg_fill_div[binind]--;
}
}
@@ -86,7 +87,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
void *
tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
tcache_bin_t *tbin, szind_t binind, bool *tcache_success) {
cache_bin_t *tbin, szind_t binind, bool *tcache_success) {
void *ret;
assert(tcache->arena != NULL);
@@ -95,33 +96,72 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
if (config_prof) {
tcache->prof_accumbytes = 0;
}
ret = tcache_alloc_easy(tbin, tcache_success);
ret = cache_bin_alloc_easy(tbin, tcache_success);
return ret;
}
/* Enabled with --enable-extra-size-check. */
static void
tbin_extents_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
size_t nflush, extent_t **extents){
rtree_ctx_t rtree_ctx_fallback;
rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
/*
* Verify that the items in the tcache all have the correct size; this
* is useful for catching sized deallocation bugs, also to fail early
* instead of corrupting metadata. Since this can be turned on for opt
* builds, avoid the branch in the loop.
*/
szind_t szind;
size_t sz_sum = binind * nflush;
for (unsigned i = 0 ; i < nflush; i++) {
rtree_extent_szind_read(tsdn, &extents_rtree,
rtree_ctx, (uintptr_t)*(tbin->avail - 1 - i), true,
&extents[i], &szind);
sz_sum -= szind;
}
if (sz_sum != 0) {
safety_check_fail("<jemalloc>: size mismatch in thread cache "
"detected, likely caused by sized deallocation bugs by "
"application. Abort.\n");
abort();
}
}
void
tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
szind_t binind, unsigned rem) {
bool merged_stats = false;
assert(binind < NBINS);
assert(rem <= tbin->ncached);
assert(binind < SC_NBINS);
assert((cache_bin_sz_t)rem <= tbin->ncached);
arena_t *arena = tcache->arena;
assert(arena != NULL);
unsigned nflush = tbin->ncached - rem;
VARIABLE_ARRAY(extent_t *, item_extent, nflush);
/* Look up extent once per item. */
for (unsigned i = 0 ; i < nflush; i++) {
item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
}
/* Look up extent once per item. */
if (config_opt_safety_checks) {
tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind,
nflush, item_extent);
} else {
for (unsigned i = 0 ; i < nflush; i++) {
item_extent[i] = iealloc(tsd_tsdn(tsd),
*(tbin->avail - 1 - i));
}
}
while (nflush > 0) {
/* Lock the arena bin associated with the first object. */
extent_t *extent = item_extent[0];
arena_t *bin_arena = extent_arena_get(extent);
arena_bin_t *bin = &bin_arena->bins[binind];
unsigned bin_arena_ind = extent_arena_ind_get(extent);
arena_t *bin_arena = arena_get(tsd_tsdn(tsd), bin_arena_ind,
false);
unsigned binshard = extent_binshard_get(extent);
assert(binshard < bin_infos[binind].n_shards);
bin_t *bin = &bin_arena->bins[binind].bin_shards[binshard];
if (config_prof && bin_arena == arena) {
if (arena_prof_accum(tsd_tsdn(tsd), arena,
@@ -132,8 +172,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
}
malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
if (config_stats && bin_arena == arena) {
assert(!merged_stats);
if (config_stats && bin_arena == arena && !merged_stats) {
merged_stats = true;
bin->stats.nflushes++;
bin->stats.nrequests += tbin->tstats.nrequests;
@@ -145,9 +184,10 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
extent = item_extent[i];
assert(ptr != NULL && extent != NULL);
if (extent_arena_get(extent) == bin_arena) {
if (extent_arena_ind_get(extent) == bin_arena_ind
&& extent_binshard_get(extent) == binshard) {
arena_dalloc_bin_junked_locked(tsd_tsdn(tsd),
bin_arena, extent, ptr);
bin_arena, bin, binind, extent, ptr);
} else {
/*
* This object was allocated via a different
@@ -169,8 +209,9 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
* The flush loop didn't happen to flush to this thread's
* arena, so the stats didn't get merged. Manually do so now.
*/
arena_bin_t *bin = &arena->bins[binind];
malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
unsigned binshard;
bin_t *bin = arena_bin_choose_lock(tsd_tsdn(tsd), arena, binind,
&binshard);
bin->stats.nflushes++;
bin->stats.nrequests += tbin->tstats.nrequests;
tbin->tstats.nrequests = 0;
@@ -180,63 +221,76 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
sizeof(void *));
tbin->ncached = rem;
if ((low_water_t)tbin->ncached < tbin->low_water) {
if (tbin->ncached < tbin->low_water) {
tbin->low_water = tbin->ncached;
}
}
void
tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
unsigned rem, tcache_t *tcache) {
bool merged_stats = false;
assert(binind < nhbins);
assert(rem <= tbin->ncached);
assert((cache_bin_sz_t)rem <= tbin->ncached);
arena_t *arena = tcache->arena;
assert(arena != NULL);
arena_t *tcache_arena = tcache->arena;
assert(tcache_arena != NULL);
unsigned nflush = tbin->ncached - rem;
VARIABLE_ARRAY(extent_t *, item_extent, nflush);
#ifndef JEMALLOC_EXTRA_SIZE_CHECK
/* Look up extent once per item. */
for (unsigned i = 0 ; i < nflush; i++) {
item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
}
#else
tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, nflush,
item_extent);
#endif
while (nflush > 0) {
/* Lock the arena associated with the first object. */
extent_t *extent = item_extent[0];
arena_t *locked_arena = extent_arena_get(extent);
UNUSED bool idump;
unsigned locked_arena_ind = extent_arena_ind_get(extent);
arena_t *locked_arena = arena_get(tsd_tsdn(tsd),
locked_arena_ind, false);
bool idump;
if (config_prof) {
idump = false;
}
bool lock_large = !arena_is_auto(locked_arena);
if (lock_large) {
malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx);
}
for (unsigned i = 0; i < nflush; i++) {
void *ptr = *(tbin->avail - 1 - i);
assert(ptr != NULL);
extent = item_extent[i];
if (extent_arena_get(extent) == locked_arena) {
if (extent_arena_ind_get(extent) == locked_arena_ind) {
large_dalloc_prep_junked_locked(tsd_tsdn(tsd),
extent);
}
}
if ((config_prof || config_stats) && locked_arena == arena) {
if ((config_prof || config_stats) &&
(locked_arena == tcache_arena)) {
if (config_prof) {
idump = arena_prof_accum(tsd_tsdn(tsd), arena,
tcache->prof_accumbytes);
idump = arena_prof_accum(tsd_tsdn(tsd),
tcache_arena, tcache->prof_accumbytes);
tcache->prof_accumbytes = 0;
}
if (config_stats) {
merged_stats = true;
arena_stats_large_nrequests_add(tsd_tsdn(tsd),
&arena->stats, binind,
arena_stats_large_flush_nrequests_add(
tsd_tsdn(tsd), &tcache_arena->stats, binind,
tbin->tstats.nrequests);
tbin->tstats.nrequests = 0;
}
}
if (lock_large) {
malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->large_mtx);
}
unsigned ndeferred = 0;
for (unsigned i = 0; i < nflush; i++) {
@@ -244,7 +298,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
extent = item_extent[i];
assert(ptr != NULL && extent != NULL);
if (extent_arena_get(extent) == locked_arena) {
if (extent_arena_ind_get(extent) == locked_arena_ind) {
large_dalloc_finish(tsd_tsdn(tsd), extent);
} else {
/*
@@ -270,15 +324,15 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
* The flush loop didn't happen to flush to this thread's
* arena, so the stats didn't get merged. Manually do so now.
*/
arena_stats_large_nrequests_add(tsd_tsdn(tsd), &arena->stats,
binind, tbin->tstats.nrequests);
arena_stats_large_flush_nrequests_add(tsd_tsdn(tsd),
&tcache_arena->stats, binind, tbin->tstats.nrequests);
tbin->tstats.nrequests = 0;
}
memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
sizeof(void *));
tbin->ncached = rem;
if ((low_water_t)tbin->ncached < tbin->low_water) {
if (tbin->ncached < tbin->low_water) {
tbin->low_water = tbin->ncached;
}
}
@@ -291,8 +345,15 @@ tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
if (config_stats) {
/* Link into list of extant tcaches. */
malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
ql_elm_new(tcache, link);
ql_tail_insert(&arena->tcache_ql, tcache, link);
cache_bin_array_descriptor_init(
&tcache->cache_bin_array_descriptor, tcache->bins_small,
tcache->bins_large);
ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
&tcache->cache_bin_array_descriptor, link);
malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
}
}
@@ -316,6 +377,8 @@ tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache) {
assert(in_ql);
}
ql_remove(&arena->tcache_ql, tcache, link);
ql_remove(&arena->cache_bin_array_descriptor_ql,
&tcache->cache_bin_array_descriptor, link);
tcache_stats_merge(tsdn, tcache, arena);
malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
}
@@ -354,10 +417,10 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
size_t stack_offset = 0;
assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
memset(tcache->tbins_small, 0, sizeof(tcache_bin_t) * NBINS);
memset(tcache->tbins_large, 0, sizeof(tcache_bin_t) * (nhbins - NBINS));
memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - SC_NBINS));
unsigned i = 0;
for (; i < NBINS; i++) {
for (; i < SC_NBINS; i++) {
tcache->lg_fill_div[i] = 1;
stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
/*
@@ -449,16 +512,16 @@ static void
tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
assert(tcache->arena != NULL);
for (unsigned i = 0; i < NBINS; i++) {
tcache_bin_t *tbin = tcache_small_bin_get(tcache, i);
for (unsigned i = 0; i < SC_NBINS; i++) {
cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
if (config_stats) {
assert(tbin->tstats.nrequests == 0);
}
}
for (unsigned i = NBINS; i < nhbins; i++) {
tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
for (unsigned i = SC_NBINS; i < nhbins; i++) {
cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
if (config_stats) {
@@ -482,6 +545,7 @@ tcache_flush(tsd_t *tsd) {
static void
tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
tcache_flush_cache(tsd, tcache);
arena_t *arena = tcache->arena;
tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
if (tsd_tcache) {
@@ -494,6 +558,23 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
/* Release both the tcache struct and avail array. */
idalloctm(tsd_tsdn(tsd), tcache, NULL, NULL, true, true);
}
/*
* The deallocation and tcache flush above may not trigger decay since
* we are on the tcache shutdown path (potentially with non-nominal
* tsd). Manually trigger decay to avoid pathological cases. Also
* include arena 0 because the tcache array is allocated from it.
*/
arena_decay(tsd_tsdn(tsd), arena_get(tsd_tsdn(tsd), 0, false),
false, false);
if (arena_nthreads_get(arena, false) == 0 &&
!background_thread_enabled()) {
/* Force purging when no threads assigned to the arena anymore. */
arena_decay(tsd_tsdn(tsd), arena, false, true);
} else {
arena_decay(tsd_tsdn(tsd), arena, false, false);
}
}
/* For auto tcache (embedded in TSD) only. */
@@ -523,18 +604,18 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
cassert(config_stats);
/* Merge and reset tcache stats. */
for (i = 0; i < NBINS; i++) {
arena_bin_t *bin = &arena->bins[i];
tcache_bin_t *tbin = tcache_small_bin_get(tcache, i);
malloc_mutex_lock(tsdn, &bin->lock);
for (i = 0; i < SC_NBINS; i++) {
cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
unsigned binshard;
bin_t *bin = arena_bin_choose_lock(tsdn, arena, i, &binshard);
bin->stats.nrequests += tbin->tstats.nrequests;
malloc_mutex_unlock(tsdn, &bin->lock);
tbin->tstats.nrequests = 0;
}
for (; i < nhbins; i++) {
tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
arena_stats_large_nrequests_add(tsdn, &arena->stats, i,
cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
arena_stats_large_flush_nrequests_add(tsdn, &arena->stats, i,
tbin->tstats.nrequests);
tbin->tstats.nrequests = 0;
}
@@ -605,23 +686,32 @@ label_return:
}
static tcache_t *
tcaches_elm_remove(tsd_t *tsd, tcaches_t *elm) {
tcaches_elm_remove(tsd_t *tsd, tcaches_t *elm, bool allow_reinit) {
malloc_mutex_assert_owner(tsd_tsdn(tsd), &tcaches_mtx);
if (elm->tcache == NULL) {
return NULL;
}
tcache_t *tcache = elm->tcache;
if (allow_reinit) {
elm->tcache = TCACHES_ELM_NEED_REINIT;
} else {
elm->tcache = NULL;
}
if (tcache == TCACHES_ELM_NEED_REINIT) {
return NULL;
}
return tcache;
}
void
tcaches_flush(tsd_t *tsd, unsigned ind) {
malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
tcache_t *tcache = tcaches_elm_remove(tsd, &tcaches[ind]);
tcache_t *tcache = tcaches_elm_remove(tsd, &tcaches[ind], true);
malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
if (tcache != NULL) {
/* Destroy the tcache; recreate in tcaches_get() if needed. */
tcache_destroy(tsd, tcache, false);
}
}
@@ -630,7 +720,7 @@ void
tcaches_destroy(tsd_t *tsd, unsigned ind) {
malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
tcaches_t *elm = &tcaches[ind];
tcache_t *tcache = tcaches_elm_remove(tsd, elm);
tcache_t *tcache = tcaches_elm_remove(tsd, elm, false);
elm->next = tcaches_avail;
tcaches_avail = elm;
malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
@@ -643,8 +733,8 @@ bool
tcache_boot(tsdn_t *tsdn) {
/* If necessary, clamp opt_lg_tcache_max. */
if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) <
SMALL_MAXCLASS) {
tcache_maxclass = SMALL_MAXCLASS;
SC_SMALL_MAXCLASS) {
tcache_maxclass = SC_SMALL_MAXCLASS;
} else {
tcache_maxclass = (ZU(1) << opt_lg_tcache_max);
}
@@ -657,21 +747,21 @@ tcache_boot(tsdn_t *tsdn) {
nhbins = sz_size2index(tcache_maxclass) + 1;
/* Initialize tcache_bin_info. */
tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins
* sizeof(tcache_bin_info_t), CACHELINE);
tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins
* sizeof(cache_bin_info_t), CACHELINE);
if (tcache_bin_info == NULL) {
return true;
}
stack_nelms = 0;
unsigned i;
for (i = 0; i < NBINS; i++) {
if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
for (i = 0; i < SC_NBINS; i++) {
if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
tcache_bin_info[i].ncached_max =
TCACHE_NSLOTS_SMALL_MIN;
} else if ((arena_bin_info[i].nregs << 1) <=
} else if ((bin_infos[i].nregs << 1) <=
TCACHE_NSLOTS_SMALL_MAX) {
tcache_bin_info[i].ncached_max =
(arena_bin_info[i].nregs << 1);
(bin_infos[i].nregs << 1);
} else {
tcache_bin_info[i].ncached_max =
TCACHE_NSLOTS_SMALL_MAX;

Some files were not shown because too many files have changed in this diff Show More