diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 0dc27327b..27da69680 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -347,6 +347,16 @@ We have 4 models: static inline mi_theap_t* _mi_theap_default(void); static inline mi_theap_t* _mi_theap_cached(void); +// On Windows, accessing `__declspec(thread)` storage can trigger on-demand TLS +// initialization (`__dyn_tls_init`) which may run user TLS constructors that +// allocate memory. During mimalloc process initialization this can cause +// recursive allocations before global state is ready. +// Guard against this by avoiding TLS access before `mi_process_init` completes +// (used by the MI_TLS_MODEL_THREAD_LOCAL path). +#if defined(_WIN32) && !defined(MI_TLS_RECURSE_GUARD) +#define MI_TLS_RECURSE_GUARD 1 +#endif + #if defined(_WIN32) #define MI_TLS_MODEL_DYNAMIC_WIN32 1 #elif defined(__APPLE__) // macOS diff --git a/src/init.c b/src/init.c index a100becad..08b852c34 100644 --- a/src/init.c +++ b/src/init.c @@ -887,13 +887,46 @@ static void mi_detect_cpu_features(void) { // Initialize the process; called by thread_init or the process loader void mi_process_init(void) mi_attr_noexcept { - // ensure we are called once - static mi_atomic_once_t process_init; - // #if _MSC_VER < 1920 - // mi_heap_main_init(); // vs2017 can dynamically re-initialize theap_main - // #endif - if (!mi_atomic_once(&process_init)) return; - _mi_process_is_initialized = true; + // Ensure initialization runs exactly once. + // + // Note: we cannot use `mi_atomic_once` directly because it is non-blocking: + // other threads may observe initialization as "done" while it is still in + // progress, which can lead to asserts like `_mi_page_map != NULL`. + // + // At the same time, `mi_process_init()` is re-entrant on the initializing + // thread (via `mi_thread_init()` calling back into `mi_process_init()`), so + // we must not block the owner thread while it is initializing. + // + // State: 0 = not started, 1 = in progress, 2 = done. + static mi_atomic_once_t process_init_state; + static _Atomic(uintptr_t) process_init_owner_tid; + + const uintptr_t tid = (uintptr_t)_mi_thread_id(); + uintptr_t state = mi_atomic_load_acquire(&process_init_state); + if (state == 2) return; + + if (state == 1) { + // If we re-enter on the initializing thread, return immediately. + if (mi_atomic_load_relaxed(&process_init_owner_tid) == tid) return; + // Otherwise wait until the initialization completes. + do { + mi_atomic_yield(); + state = mi_atomic_load_acquire(&process_init_state); + } while (state != 2); + return; + } + + uintptr_t expected = 0; + if (!mi_atomic_cas_strong_acq_rel(&process_init_state, &expected, (uintptr_t)1)) { + // Someone else raced us; follow the waiting path. + do { + mi_atomic_yield(); + state = mi_atomic_load_acquire(&process_init_state); + } while (state != 2); + return; + } + + mi_atomic_store_release(&process_init_owner_tid, tid); _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id()); mi_detect_cpu_features(); @@ -930,6 +963,9 @@ void mi_process_init(void) mi_attr_noexcept { mi_reserve_os_memory((size_t)ksize*MI_KiB, true, true); } } + + _mi_process_is_initialized = true; + mi_atomic_store_release(&process_init_state, (uintptr_t)2); } // Called when the process is done (cdecl as it is used with `at_exit` on some platforms) diff --git a/src/page-map.c b/src/page-map.c index f0ca4459e..2ecbfbb07 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -378,10 +378,10 @@ static size_t mi_page_map_get_idx(mi_page_t* page, size_t* sub_idx, size_t* slic bool _mi_page_map_register(mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); - mi_assert_internal(_mi_page_map != NULL); // should be initialized before multi-thread access! if mi_unlikely(_mi_page_map == NULL) { if (!_mi_page_map_init()) return false; } + mi_assert_internal(_mi_page_map != NULL); // should be initialized before multi-thread access! mi_assert(_mi_page_map!=NULL); size_t slice_count; size_t sub_idx;