diff --git a/ddprof-lib/src/main/cpp/libraryPatcher_linux.cpp b/ddprof-lib/src/main/cpp/libraryPatcher_linux.cpp index f6d09c6d7..42c25cc69 100644 --- a/ddprof-lib/src/main/cpp/libraryPatcher_linux.cpp +++ b/ddprof-lib/src/main/cpp/libraryPatcher_linux.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -73,6 +74,161 @@ static void unregister_and_release(int tid) { ProfiledThread::release(); } +// pthread_cleanup_push callback for thread wrappers. +// Fires when the wrapped routine calls pthread_exit() or the thread is +// canceled. Kept noinline so its stack frame (which may hold a SignalBlocker +// via unregister_and_release) lives outside the DEOPT-corruption zone of the +// caller on musl/aarch64, and so that the SignalBlocker's sigset_t does not +// appear in the caller's frame on platforms with stack-protector canaries. +__attribute__((noinline)) +static void cleanup_unregister(void*) { + unregister_and_release(ProfiledThread::currentTid()); +} + +// Thread-cleanup wrapper that avoids the static-libgcc / forced-unwind crash. +// +// The crash: on glibc, pthread_cleanup_push in C++ mode expands to +// __pthread_cleanup_class (RAII), which adds a cleanup entry to the LSDA of +// this frame. When libjavaProfiler.so is built with -static-libgcc, the +// embedded __gxx_personality_v0 is called by the dynamic libgcc_s.so.1's +// _Unwind_ForcedUnwind. The two libgcc versions have incompatible +// _Unwind_Context layouts; calling _Unwind_SetGR (which happens when the +// personality finds a cleanup action) with a cross-version context triggers +// the cold/error path, which calls abort(). +// +// The fix: use __pthread_register_cancel / __pthread_unregister_cancel +// directly — the same thing the C macro form of pthread_cleanup_push does. +// This registers cleanup via a setjmp buffer in a runtime linked-list, NOT +// via an LSDA destructor. _Unwind_ForcedUnwind's stop function +// (__pthread_unwind_stop) handles the cleanup without ever calling +// __gxx_personality_v0 for this frame, so _Unwind_SetGR is never called and +// the cross-version incompatibility is never triggered. +// +// On musl: pthread_cleanup_push already uses the C/setjmp form (no RAII), +// and pthread_exit does not use _Unwind_ForcedUnwind, so there is no issue. +// The __GLIBC__ guard keeps the musl path unchanged. +#ifdef __GLIBC__ +// On glibc, declares __pthread_register_cancel etc. only inside +// the C (non-C++) conditional, so they're invisible in C++ code. Redeclare +// them with extern "C" so we can call them directly without the header guard. +extern "C" { + extern void __pthread_register_cancel(__pthread_unwind_buf_t*); + extern void __pthread_unregister_cancel(__pthread_unwind_buf_t*); + [[noreturn]] extern void __pthread_unwind_next(__pthread_unwind_buf_t*); +} +#endif + +__attribute__((visibility("hidden"), noinline, no_stack_protector)) +void run_with_cleanup(func_start_routine routine, void* params, + void (*cleanup_fn)(void*), void* cleanup_arg) { +#ifdef __GLIBC__ + __pthread_unwind_buf_t cancel_buf = {}; + // With savemask=0, __sigsetjmp only writes __jmp_buf + int __mask_was_saved; + // it never touches __saved_mask. The inner struct of __pthread_unwind_buf_t + // must cover exactly that writable prefix of struct __jmp_buf_tag. + static_assert(offsetof(__pthread_unwind_buf_t, __cancel_jmp_buf) == 0 && + sizeof(cancel_buf.__cancel_jmp_buf[0]) == offsetof(struct __jmp_buf_tag, __saved_mask), + "glibc __pthread_unwind_buf_t inner layout incompatible with struct __jmp_buf_tag"); + // __sigsetjmp/longjmp only intercepts _Unwind_ForcedUnwind (pthread_exit / + // cancellation). routine(params) must NOT throw a regular C++ exception + // across this boundary: an escaping exception would skip both + // __pthread_unregister_cancel and cleanup_fn below, leaking the thread + // registration and leaving cancel_buf linked against this (unwound) frame. + // We cannot defend with a try/catch here — a handler frame adds an LSDA + // action, which is exactly what triggers the static-libgcc abort this + // function exists to avoid. Production routines are JVM/native start + // routines that handle their own exceptions and do not throw across here. + if (__builtin_expect( + // set __sigsetjmp's savemask=0 (the second parameter, noting that the signal mask is NOT + // saved/restored, which is correct because the cancel mechanism does not depend on signal mask state. + __sigsetjmp((struct __jmp_buf_tag*)(void*)cancel_buf.__cancel_jmp_buf, 0), 0)) { + // Reached via longjmp from glibc's stop function when pthread_exit + // (or cancellation) fires. Run cleanup and continue unwinding. + cleanup_fn(cleanup_arg); + __pthread_unwind_next(&cancel_buf); + // __pthread_unwind_next is [[noreturn]]; this fails loudly rather than + // falling through into __pthread_register_cancel on a torn-down frame + // should a future/variant glibc ever return from it. + __builtin_unreachable(); + } + __pthread_register_cancel(&cancel_buf); + routine(params); + __pthread_unregister_cancel(&cancel_buf); + cleanup_fn(cleanup_arg); +#else + // musl / non-glibc: pthread_cleanup_push uses the C/setjmp form, no RAII. + pthread_cleanup_push(cleanup_fn, cleanup_arg); + routine(params); + pthread_cleanup_pop(1); +#endif +} + +#ifdef UNIT_TEST +// Integration test entry point: exercises the full start_routine_wrapper → +// run_with_cleanup chain without calling Profiler::registerThread or +// Profiler::unregisterThread, which dereference _cpu_engine/_wall_engine and +// crash when the profiler is not started (as in gtest). +// +// The caller supplies cleanup_fn/cleanup_arg so the test can verify cleanup +// fires and observe ProfiledThread::release() without coupling to Profiler state. +// +// Thread lifecycle: +// pthread_create_wrapped_for_test → start_routine_for_test +// → ProfiledThread::initCurrentThread() +// → run_with_cleanup(routine, params, cleanup_fn, cleanup_arg) +// → pthread_exit(nullptr) +struct WrapperTestCtx { + func_start_routine routine; + void* params; + void (*cleanup_fn)(void*); + void* cleanup_arg; +}; + +__attribute__((visibility("hidden"), noinline, no_stack_protector)) +static void* start_routine_for_test(void* raw) { + auto* ctx = static_cast(raw); + func_start_routine routine = ctx->routine; + void* params = ctx->params; + void (*cleanup_fn)(void*) = ctx->cleanup_fn; + void* cleanup_arg = ctx->cleanup_arg; + { + SignalBlocker blocker; + delete ctx; + ProfiledThread::initCurrentThread(); + } + run_with_cleanup(routine, params, cleanup_fn, cleanup_arg); + pthread_exit(nullptr); + __builtin_unreachable(); +} + +int pthread_create_wrapped_for_test(pthread_t* thread, + func_start_routine routine, void* params, + void (*cleanup_fn)(void*), void* cleanup_arg) { + WrapperTestCtx* ctx; + { + SignalBlocker blocker; + ctx = new WrapperTestCtx{routine, params, cleanup_fn, cleanup_arg}; + } + int ret = pthread_create(thread, nullptr, start_routine_for_test, ctx); + if (ret != 0) { + SignalBlocker blocker; + delete ctx; + } + return ret; +} + +// Variant that passes the production cleanup_unregister as the cleanup function. +// Exercises the full chain: start_routine_for_test → run_with_cleanup → +// cleanup_unregister → Profiler::unregisterThread + ProfiledThread::release. +// Profiler::unregisterThread is null-safe under UNIT_TEST (see profiler.cpp). +int pthread_create_with_cleanup_unregister_for_test(pthread_t* thread, + func_start_routine routine, + void* params) { + return pthread_create_wrapped_for_test(thread, routine, params, + cleanup_unregister, nullptr); +} +#endif // UNIT_TEST + #ifdef __aarch64__ // Delete RoutineInfo with profiling signals blocked to prevent ASAN // allocator lock reentrancy. Kept noinline so SignalBlocker's sigset_t @@ -99,29 +255,6 @@ static void init_tls_and_register() { Profiler::registerThread(ProfiledThread::currentTid()); } -// pthread_cleanup_push callback for start_routine_wrapper_spec. -// Fires when the wrapped routine calls pthread_exit() or the thread is -// canceled. Kept noinline so its stack frame (which may hold a SignalBlocker -// via unregister_and_release) lives outside the DEOPT-corruption zone of -// start_routine_wrapper_spec. -__attribute__((noinline)) -static void cleanup_unregister(void*) { - unregister_and_release(ProfiledThread::currentTid()); -} - -// pthread_cleanup_push declares `struct __ptcb` in the caller's frame. If that -// frame is start_routine_wrapper_spec, the structure sits inside the ~224-byte -// DEOPT-corruption zone and pthread_cleanup_pop(1) would invoke a clobbered -// function pointer. This noinline + no_stack_protector helper hoists the -// cleanup-handler frame out of the corruption zone — its own frame lives -// safely above start_routine_wrapper_spec's. -__attribute__((noinline, no_stack_protector)) -static void run_with_musl_cleanup(func_start_routine routine, void* params) { - pthread_cleanup_push(cleanup_unregister, nullptr); - routine(params); - pthread_cleanup_pop(1); -} - // Wrapper around the real start routine. // The wrapper: // 1. Register the newly created thread to profiler @@ -172,11 +305,20 @@ static void* start_routine_wrapper_spec(void* args) { delete_routine_info(thr); init_tls_and_register(); // cleanup_unregister fires on pthread_exit() or cancellation from within - // routine(params). The push/pop pair lives inside run_with_musl_cleanup so - // that `struct __ptcb` does not land in this frame's DEOPT-corruption zone. - run_with_musl_cleanup(routine, params); + // routine(params). The push/pop pair lives inside run_with_cleanup so + // that __pthread_unwind_buf_t (glibc) / struct __ptcb (musl) does not land + // in this frame's DEOPT-corruption zone. + run_with_cleanup(routine, params, cleanup_unregister, nullptr); // pthread_exit instead of 'return': the saved LR in this frame is corrupted // by DEOPT PACKING; returning would jump to a garbage address. + // cleanup_unregister has already run via run_with_cleanup's normal return + // path, so there is no registered cancel handler left. The forced unwind + // raised by pthread_exit walks this frame, but it is safe because no + // destructor-bearing local (and hence no LSDA cleanup/handler action) is + // live at this call site: __gxx_personality_v0 returns continue-unwind + // without ever calling _Unwind_SetGR, avoiding the static-libgcc abort. + // WARNING: adding any RAII local with a destructor between run_with_cleanup + // and pthread_exit would reintroduce that crash. pthread_exit(nullptr); __builtin_unreachable(); } @@ -227,14 +369,14 @@ static void* start_routine_wrapper(void* args) { ProfiledThread::currentSignalSafe()->startInitWindow(); Profiler::registerThread(ProfiledThread::currentTid()); } - // RAII cleanup: reads tid from TLS in the destructor (same rationale as - // start_routine_wrapper_spec: avoids storing state on a potentially corruptible frame). - // unregister_and_release() wraps the two calls under SignalBlocker (PROF-14603). - struct Cleanup { - ~Cleanup() { unregister_and_release(ProfiledThread::currentTid()); } - } cleanup; - routine(params); - return nullptr; + // Use POSIX cleanup instead of C++ RAII to handle pthread_exit(): see run_with_cleanup. + // cleanup_unregister has already run on run_with_cleanup's normal return path. + // The pthread_exit forced unwind is safe here for the same reason as in + // start_routine_wrapper_spec: no destructor-bearing local is live at this + // call site, so __gxx_personality_v0 never calls _Unwind_SetGR. + run_with_cleanup(routine, params, cleanup_unregister, nullptr); + pthread_exit(nullptr); + __builtin_unreachable(); } static int pthread_create_hook(pthread_t* thread, diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 41c3c71a6..6aed19568 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -134,7 +134,28 @@ int Profiler::registerThread(int tid) { return _instance->_cpu_engine->registerThread(tid) | _instance->_wall_engine->registerThread(tid); } +#ifdef UNIT_TEST +static std::atomic g_test_last_unregistered_tid{-1}; + +int Profiler::lastUnregisteredTidForTest() { + return g_test_last_unregistered_tid.load(std::memory_order_relaxed); +} +void Profiler::resetUnregisterObservableForTest() { + g_test_last_unregistered_tid.store(-1, std::memory_order_relaxed); +} +#endif + void Profiler::unregisterThread(int tid) { +#ifdef UNIT_TEST + // In gtest, _cpu_engine/_wall_engine are null (profiler not started). + // Record the tid so integration tests can verify the call happened without + // crashing on the null engine dereference. This bypasses the real engine + // unregister path entirely, so that path is covered only by JVM-level tests, + // not by these gtests. UNIT_TEST is defined solely for the gtest binaries + // (see GtestTaskBuilder); the shipped library never compiles this branch. + g_test_last_unregistered_tid.store(tid, std::memory_order_relaxed); + return; +#endif _instance->_cpu_engine->unregisterThread(tid); _instance->_wall_engine->unregisterThread(tid); } diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index 7938e3d26..c5818373b 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -403,6 +403,15 @@ class alignas(alignof(SpinLock)) Profiler { static int registerThread(int tid); static void unregisterThread(int tid); +#ifdef UNIT_TEST + // Returns the tid most recently passed to unregisterThread(), or -1 if it + // has never been called (or since the last resetUnregisterObservableForTest). + // Used by integration tests to assert that cleanup_unregister wired + // Profiler::unregisterThread correctly without needing live engine instances. + static int lastUnregisteredTidForTest(); + static void resetUnregisterObservableForTest(); +#endif + static void JNICALL ThreadStart(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread) { diff --git a/ddprof-lib/src/test/cpp/forced_unwind_ut.cpp b/ddprof-lib/src/test/cpp/forced_unwind_ut.cpp index df8d56f9e..d33cb7611 100644 --- a/ddprof-lib/src/test/cpp/forced_unwind_ut.cpp +++ b/ddprof-lib/src/test/cpp/forced_unwind_ut.cpp @@ -2,26 +2,39 @@ * Copyright 2026, Datadog, Inc. * SPDX-License-Identifier: Apache-2.0 * - * Regression test for SCP-1154: IBM J9 JVM crash in start_routine_wrapper. + * Regression test for the static-libgcc / forced-unwind incompatibility in + * start_routine_wrapper (libraryPatcher_linux.cpp). * - * Root cause: pthread_cleanup_push in C++ mode on glibc creates - * __pthread_cleanup_class with an implicitly-noexcept destructor. When IBM - * J9's thread teardown raises _Unwind_ForcedUnwind (via libgcc), the C++ - * runtime calls std::terminate() because the forced-unwind exception exits a - * noexcept-bounded destructor. + * Root cause: when libjavaProfiler.so is built with -static-libgcc, its + * embedded __gxx_personality_v0 is called by the dynamic libgcc_s.so.1's + * _Unwind_ForcedUnwind (triggered by pthread_exit / pthread_cancel). The two + * libgcc versions have incompatible _Unwind_Context layouts; calling + * _Unwind_SetGR with a cross-version context triggers _Unwind_SetGR.cold, + * which calls abort(). The same crash affects any gtest binary that is also + * built with -static-libgcc. + * + * C++ RAII destructors (struct Cleanup { ~Cleanup() {...} }) add cleanup + * entries to the LSDA, which cause __gxx_personality_v0 to call _Unwind_SetGR + * → crash. The pthread_cleanup_push C++ macro (__pthread_cleanup_class RAII) + * has the same problem. * * Cleanup strategy (matches libraryPatcher_linux.cpp): - * - glibc: RAII struct destructor. Glibc's personality function handles - * forced-unwind correctly for user-defined destructors; IBM J9's forced- - * unwind also invokes personality functions. - * - musl: pthread_cleanup_push/pop (C-style callback). musl's signal-based - * thread cancellation invokes registered C callbacks but does NOT run C++ - * destructors. + * - glibc: Use __pthread_register_cancel / __pthread_unregister_cancel + * directly (the same mechanism the C form of pthread_cleanup_push + * uses). This registers cleanup via a setjmp buffer in a runtime + * linked-list, NOT in the LSDA. _Unwind_ForcedUnwind's stop + * function (__pthread_unwind_stop) handles the cleanup without + * ever calling __gxx_personality_v0 for the registered frame, so + * _Unwind_SetGR is never called and no crash occurs. + * - musl: pthread_cleanup_push already uses the C/setjmp form (no RAII). + * pthread_exit on musl does not use _Unwind_ForcedUnwind at all, + * so there is no incompatibility issue. * * These tests verify the per-platform cleanup path: - * glibc – RAII destructor fires on pthread_cancel and pthread_exit. - * musl – pthread_cleanup_push callback fires on pthread_cancel and - * pthread_exit. + * glibc – __pthread_register_cancel callback fires on pthread_cancel, + * pthread_exit, and normal routine return. + * musl – run_with_cleanup (pthread_cleanup_push/pop) callback fires on + * pthread_cancel, pthread_exit, and normal routine return. * A shared test verifies ProfiledThread::release() is safe to call from the * chosen cleanup path. */ @@ -34,31 +47,44 @@ #include #include +#include #include +// Production function under test — defined in libraryPatcher_linux.cpp. +// Declared here (outside any platform guard) because run_with_cleanup is built +// on both glibc and musl; each platform's tests call it directly. +extern "C++" void run_with_cleanup(void* (*)(void*), void*, void(*)(void*), void*); + // =========================================================================== -// glibc path: RAII destructor tests +// glibc path: __pthread_register_cancel / __pthread_unregister_cancel tests // =========================================================================== #ifdef __GLIBC__ +// --------------------------------------------------------------------------- + static std::atomic g_bare_cleanup_called{false}; -static void* bare_raii_cancel_thread(void*) { - g_bare_cleanup_called.store(false, std::memory_order_relaxed); - struct Cleanup { - ~Cleanup() { g_bare_cleanup_called.store(true, std::memory_order_relaxed); } - } cleanup; +static void bare_cleanup(void*) { + g_bare_cleanup_called.store(true, std::memory_order_relaxed); +} + +static void* bare_spin(void*) { while (true) { pthread_testcancel(); usleep(100); } +} + +static void* bare_cancel_thread(void*) { + g_bare_cleanup_called.store(false, std::memory_order_relaxed); + run_with_cleanup(bare_spin, nullptr, bare_cleanup, nullptr); return nullptr; } -// Regression (glibc): RAII destructor fires on pthread_cancel. -TEST(ForcedUnwindTest, RaiiDestructorRunsOnPthreadCancel) { +// Regression (glibc): __pthread_register_cancel callback fires on pthread_cancel. +TEST(ForcedUnwindTest, CleanupCallbackRunsOnPthreadCancel) { pthread_t t; - ASSERT_EQ(0, pthread_create(&t, nullptr, bare_raii_cancel_thread, nullptr)); + ASSERT_EQ(0, pthread_create(&t, nullptr, bare_cancel_thread, nullptr)); usleep(5000); pthread_cancel(t); @@ -67,7 +93,7 @@ TEST(ForcedUnwindTest, RaiiDestructorRunsOnPthreadCancel) { ASSERT_EQ(0, pthread_join(t, &retval)); EXPECT_TRUE(g_bare_cleanup_called.load()) - << "RAII destructor must run during pthread_cancel forced unwind"; + << "__pthread_register_cancel callback must run during pthread_cancel"; EXPECT_EQ(PTHREAD_CANCELED, retval); } @@ -76,32 +102,32 @@ TEST(ForcedUnwindTest, RaiiDestructorRunsOnPthreadCancel) { static std::atomic g_pt_cleanup_called{false}; static std::atomic g_pt_release_called{false}; -static void* profiled_raii_cancel_thread(void*) { - g_pt_cleanup_called.store(false, std::memory_order_relaxed); - g_pt_release_called.store(false, std::memory_order_relaxed); - - ProfiledThread::initCurrentThread(); - - struct Cleanup { - ~Cleanup() { - g_pt_cleanup_called.store(true, std::memory_order_relaxed); - ProfiledThread::release(); - g_pt_release_called.store(true, std::memory_order_relaxed); - } - } cleanup; +static void profiled_cleanup(void*) { + g_pt_cleanup_called.store(true, std::memory_order_relaxed); + ProfiledThread::release(); + g_pt_release_called.store(true, std::memory_order_relaxed); +} +static void* profiled_spin(void*) { while (true) { pthread_testcancel(); usleep(100); } +} + +static void* profiled_cancel_thread(void*) { + g_pt_cleanup_called.store(false, std::memory_order_relaxed); + g_pt_release_called.store(false, std::memory_order_relaxed); + ProfiledThread::initCurrentThread(); + run_with_cleanup(profiled_spin, nullptr, profiled_cleanup, nullptr); return nullptr; } -// Regression (glibc): RAII pattern with ProfiledThread lifecycle survives -// pthread_cancel without terminate(). +// Regression (glibc): ProfiledThread lifecycle survives pthread_cancel without +// abort() from _Unwind_SetGR.cold (the static-libgcc / libgcc_s.so.1 clash). TEST(ForcedUnwindTest, ProfiledThreadReleasedOnForcedUnwind) { pthread_t t; - ASSERT_EQ(0, pthread_create(&t, nullptr, profiled_raii_cancel_thread, nullptr)); + ASSERT_EQ(0, pthread_create(&t, nullptr, profiled_cancel_thread, nullptr)); usleep(5000); pthread_cancel(t); @@ -110,9 +136,9 @@ TEST(ForcedUnwindTest, ProfiledThreadReleasedOnForcedUnwind) { ASSERT_EQ(0, pthread_join(t, &retval)); EXPECT_TRUE(g_pt_cleanup_called.load()) - << "RAII destructor must run when thread is cancelled"; + << "Cleanup callback must run when thread is cancelled"; EXPECT_TRUE(g_pt_release_called.load()) - << "ProfiledThread::release() must complete inside the RAII destructor"; + << "ProfiledThread::release() must complete inside the cleanup callback"; EXPECT_EQ(PTHREAD_CANCELED, retval); } @@ -120,56 +146,164 @@ TEST(ForcedUnwindTest, ProfiledThreadReleasedOnForcedUnwind) { static std::atomic g_exit_cleanup_called{false}; -static void* raii_pthread_exit_thread(void*) { - g_exit_cleanup_called.store(false, std::memory_order_relaxed); - struct Cleanup { - ~Cleanup() { g_exit_cleanup_called.store(true, std::memory_order_relaxed); } - } cleanup; +static void exit_cleanup(void*) { + g_exit_cleanup_called.store(true, std::memory_order_relaxed); +} + +static void* exit_fn(void*) { pthread_exit(reinterpret_cast(42)); +} + +static void* exit_thread(void*) { + g_exit_cleanup_called.store(false, std::memory_order_relaxed); + run_with_cleanup(exit_fn, nullptr, exit_cleanup, nullptr); return nullptr; } -// Regression (glibc): RAII destructor fires on pthread_exit. -TEST(ForcedUnwindTest, RaiiDestructorRunsOnPthreadExit) { +// Regression (glibc): __pthread_register_cancel callback fires on pthread_exit. +TEST(ForcedUnwindTest, CleanupCallbackRunsOnPthreadExit) { pthread_t t; - ASSERT_EQ(0, pthread_create(&t, nullptr, raii_pthread_exit_thread, nullptr)); + ASSERT_EQ(0, pthread_create(&t, nullptr, exit_thread, nullptr)); void* retval; ASSERT_EQ(0, pthread_join(t, &retval)); EXPECT_TRUE(g_exit_cleanup_called.load()) - << "RAII destructor must also run during pthread_exit"; + << "__pthread_register_cancel callback must also run during pthread_exit"; EXPECT_EQ(reinterpret_cast(42), retval); } +// --------------------------------------------------------------------------- + +static std::atomic g_nr_cleanup_count{0}; +static std::atomic g_nr_past_run{false}; + +static void* nr_routine(void*) { return nullptr; } + +static void nr_cleanup(void*) { + g_nr_cleanup_count.fetch_add(1, std::memory_order_relaxed); +} + +static void* nr_thread(void*) { + g_nr_cleanup_count.store(0, std::memory_order_relaxed); + g_nr_past_run.store(false, std::memory_order_relaxed); + run_with_cleanup(nr_routine, nullptr, nr_cleanup, nullptr); + g_nr_past_run.store(true, std::memory_order_relaxed); + // Spin with a cancellation point so the main thread can probe whether + // __pthread_unregister_cancel took effect after run_with_cleanup returned. + while (true) { + pthread_testcancel(); + usleep(50); + } + __builtin_unreachable(); +} + +// Normal-return path (matches start_routine_wrapper on non-aarch64 glibc): +// cleanup_fn is called once AND __pthread_unregister_cancel removes the buf. +// - no cleanup_fn → g_nr_cleanup_count stays 0 (deterministically detected +// by the count==1 assertion below). +// - no __pthread_unregister_cancel → post-return cancel longjmps into the +// freed cancel_buf. This is best-effort detection only: the resulting UB +// may crash, re-fire cleanup (count==2), or — depending on stack contents, +// and especially under ASAN/MSAN — do neither and pass. Do NOT rely on +// this as a deterministic mutation detector for the unregister call; the +// guaranteed coverage here is the positive count==1 assertion. +TEST(ForcedUnwindTest, CleanupCalledExactlyOnceOnNormalReturn) { + pthread_t t; + ASSERT_EQ(0, pthread_create(&t, nullptr, nr_thread, nullptr)); + + while (!g_nr_past_run.load(std::memory_order_relaxed)) { + usleep(100); + } + EXPECT_EQ(1, g_nr_cleanup_count.load()) + << "cleanup_fn must be called once on the normal-return path of run_with_cleanup"; + + // Cancel the spinning thread. If __pthread_unregister_cancel was not + // called, glibc still holds a pointer to run_with_cleanup's freed + // cancel_buf; the cancel would longjmp into garbage and either crash or + // fire nr_cleanup a second time (count == 2). + pthread_cancel(t); + void* retval; + ASSERT_EQ(0, pthread_join(t, &retval)); + + EXPECT_EQ(1, g_nr_cleanup_count.load()) + << "__pthread_unregister_cancel must remove the buf so the " + "post-return cancel does not re-fire cleanup_fn"; + EXPECT_EQ(PTHREAD_CANCELED, retval); +} + +// --------------------------------------------------------------------------- + +static std::atomic g_pt_nr_called{false}; +static std::atomic g_pt_nr_release_called{false}; + +static void* pt_nr_routine(void*) { return nullptr; } + +static void pt_nr_cleanup(void*) { + g_pt_nr_called.store(true, std::memory_order_relaxed); + ProfiledThread::release(); + g_pt_nr_release_called.store(true, std::memory_order_relaxed); +} + +static void* pt_nr_thread(void*) { + g_pt_nr_called.store(false, std::memory_order_relaxed); + g_pt_nr_release_called.store(false, std::memory_order_relaxed); + ProfiledThread::initCurrentThread(); + run_with_cleanup(pt_nr_routine, nullptr, pt_nr_cleanup, nullptr); + return reinterpret_cast(77); +} + +// Regression (glibc): cleanup_fn fires and ProfiledThread is released when +// routine returns normally. This is the start_routine_wrapper path for +// threads that finish naturally (no cancel, no pthread_exit). +// Removing cleanup_fn(cleanup_arg) after __pthread_unregister_cancel leaves +// g_pt_nr_called false and leaks the ProfiledThread entry. +TEST(ForcedUnwindTest, ProfiledThreadReleasedOnNormalReturn) { + pthread_t t; + ASSERT_EQ(0, pthread_create(&t, nullptr, pt_nr_thread, nullptr)); + + void* retval; + ASSERT_EQ(0, pthread_join(t, &retval)); + + EXPECT_TRUE(g_pt_nr_called.load()) + << "cleanup_fn must be called on the normal-return path"; + EXPECT_TRUE(g_pt_nr_release_called.load()) + << "ProfiledThread::release() must complete on the normal-return path"; + EXPECT_EQ(reinterpret_cast(77), retval); +} + #endif // __GLIBC__ // =========================================================================== -// musl (non-glibc) path: pthread_cleanup_push/pop callback tests +// musl (non-glibc) path: run_with_cleanup (pthread_cleanup_push/pop) tests // =========================================================================== #ifndef __GLIBC__ -static std::atomic g_c_cancel_called{false}; +// --------------------------------------------------------------------------- + +static std::atomic g_m_cancel_called{false}; -static void c_cleanup_cancel(void*) { - g_c_cancel_called.store(true, std::memory_order_relaxed); +static void m_cancel_cleanup(void*) { + g_m_cancel_called.store(true, std::memory_order_relaxed); } -static void* c_cleanup_cancel_thread(void*) { - g_c_cancel_called.store(false, std::memory_order_relaxed); - pthread_cleanup_push(c_cleanup_cancel, nullptr); +static void* m_cancel_spin(void*) { while (true) { pthread_testcancel(); usleep(100); } - pthread_cleanup_pop(0); +} + +static void* m_cancel_thread(void*) { + g_m_cancel_called.store(false, std::memory_order_relaxed); + run_with_cleanup(m_cancel_spin, nullptr, m_cancel_cleanup, nullptr); return nullptr; } -// Regression (musl): C-style pthread_cleanup_push callback fires on pthread_cancel. +// musl: run_with_cleanup cleanup fires on pthread_cancel. TEST(ForcedUnwindTest, CleanupCallbackRunsOnPthreadCancel) { pthread_t t; - ASSERT_EQ(0, pthread_create(&t, nullptr, c_cleanup_cancel_thread, nullptr)); + ASSERT_EQ(0, pthread_create(&t, nullptr, m_cancel_thread, nullptr)); usleep(5000); pthread_cancel(t); @@ -177,76 +311,72 @@ TEST(ForcedUnwindTest, CleanupCallbackRunsOnPthreadCancel) { void* retval; ASSERT_EQ(0, pthread_join(t, &retval)); - EXPECT_TRUE(g_c_cancel_called.load()) - << "pthread_cleanup_push callback must run during pthread_cancel on musl"; + EXPECT_TRUE(g_m_cancel_called.load()) + << "run_with_cleanup cleanup must fire during pthread_cancel on musl"; EXPECT_EQ(PTHREAD_CANCELED, retval); } // --------------------------------------------------------------------------- -static std::atomic g_c_exit_called{false}; +static std::atomic g_m_exit_called{false}; -static void c_cleanup_exit(void*) { - g_c_exit_called.store(true, std::memory_order_relaxed); +static void m_exit_cleanup(void*) { + g_m_exit_called.store(true, std::memory_order_relaxed); } -static void* c_cleanup_exit_thread(void*) { - g_c_exit_called.store(false, std::memory_order_relaxed); - pthread_cleanup_push(c_cleanup_exit, nullptr); +static void* m_exit_fn(void*) { pthread_exit(reinterpret_cast(42)); - pthread_cleanup_pop(0); +} + +static void* m_exit_thread(void*) { + g_m_exit_called.store(false, std::memory_order_relaxed); + run_with_cleanup(m_exit_fn, nullptr, m_exit_cleanup, nullptr); return nullptr; } -// Regression (musl): C-style pthread_cleanup_push callback fires on pthread_exit. +// musl: run_with_cleanup cleanup fires on pthread_exit. TEST(ForcedUnwindTest, CleanupCallbackRunsOnPthreadExit) { pthread_t t; - ASSERT_EQ(0, pthread_create(&t, nullptr, c_cleanup_exit_thread, nullptr)); + ASSERT_EQ(0, pthread_create(&t, nullptr, m_exit_thread, nullptr)); void* retval; ASSERT_EQ(0, pthread_join(t, &retval)); - EXPECT_TRUE(g_c_exit_called.load()) - << "pthread_cleanup_push callback must run during pthread_exit on musl"; + EXPECT_TRUE(g_m_exit_called.load()) + << "run_with_cleanup cleanup must fire during pthread_exit on musl"; EXPECT_EQ(reinterpret_cast(42), retval); } // --------------------------------------------------------------------------- -static std::atomic g_c_pt_called{false}; -static std::atomic g_c_pt_release_called{false}; +static std::atomic g_m_pt_called{false}; +static std::atomic g_m_pt_release_called{false}; -static void profiled_c_cleanup(void* arg) { - int tid = *static_cast(arg); - (void)tid; - g_c_pt_called.store(true, std::memory_order_relaxed); +static void m_profiled_cleanup(void*) { + g_m_pt_called.store(true, std::memory_order_relaxed); ProfiledThread::release(); - g_c_pt_release_called.store(true, std::memory_order_relaxed); + g_m_pt_release_called.store(true, std::memory_order_relaxed); } -static int g_c_pt_tid; - -static void* profiled_c_cancel_thread(void*) { - g_c_pt_called.store(false, std::memory_order_relaxed); - g_c_pt_release_called.store(false, std::memory_order_relaxed); - - ProfiledThread::initCurrentThread(); - g_c_pt_tid = ProfiledThread::currentTid(); - - pthread_cleanup_push(profiled_c_cleanup, &g_c_pt_tid); +static void* m_profiled_spin(void*) { while (true) { pthread_testcancel(); usleep(100); } - pthread_cleanup_pop(0); +} + +static void* m_profiled_cancel_thread(void*) { + g_m_pt_called.store(false, std::memory_order_relaxed); + g_m_pt_release_called.store(false, std::memory_order_relaxed); + ProfiledThread::initCurrentThread(); + run_with_cleanup(m_profiled_spin, nullptr, m_profiled_cleanup, nullptr); return nullptr; } -// Regression (musl): pthread_cleanup_push pattern with ProfiledThread lifecycle -// survives pthread_cancel. +// musl: ProfiledThread lifecycle survives pthread_cancel via run_with_cleanup. TEST(ForcedUnwindTest, ProfiledThreadReleasedOnForcedUnwind) { pthread_t t; - ASSERT_EQ(0, pthread_create(&t, nullptr, profiled_c_cancel_thread, nullptr)); + ASSERT_EQ(0, pthread_create(&t, nullptr, m_profiled_cancel_thread, nullptr)); usleep(5000); pthread_cancel(t); @@ -254,13 +384,208 @@ TEST(ForcedUnwindTest, ProfiledThreadReleasedOnForcedUnwind) { void* retval; ASSERT_EQ(0, pthread_join(t, &retval)); - EXPECT_TRUE(g_c_pt_called.load()) - << "C cleanup callback must run when thread is cancelled"; - EXPECT_TRUE(g_c_pt_release_called.load()) - << "ProfiledThread::release() must complete inside the C cleanup callback"; + EXPECT_TRUE(g_m_pt_called.load()) + << "Cleanup callback must run when thread is cancelled"; + EXPECT_TRUE(g_m_pt_release_called.load()) + << "ProfiledThread::release() must complete inside the cleanup callback"; EXPECT_EQ(PTHREAD_CANCELED, retval); } +// --------------------------------------------------------------------------- + +static std::atomic g_m_nr_count{0}; + +static void* m_nr_routine(void*) { return nullptr; } + +static void m_nr_cleanup(void*) { + g_m_nr_count.fetch_add(1, std::memory_order_relaxed); +} + +static void* m_nr_thread(void*) { + g_m_nr_count.store(0, std::memory_order_relaxed); + run_with_cleanup(m_nr_routine, nullptr, m_nr_cleanup, nullptr); + return reinterpret_cast(77); +} + +// musl: normal-return path calls cleanup exactly once via pthread_cleanup_pop(1). +// Removing the pop or changing pop(1) to pop(0) leaves count at 0. +TEST(ForcedUnwindTest, CleanupCalledExactlyOnceOnNormalReturn) { + pthread_t t; + ASSERT_EQ(0, pthread_create(&t, nullptr, m_nr_thread, nullptr)); + + void* retval; + ASSERT_EQ(0, pthread_join(t, &retval)); + + EXPECT_EQ(1, g_m_nr_count.load()) + << "cleanup_fn must be called once via pthread_cleanup_pop(1) on the normal-return path"; + EXPECT_EQ(reinterpret_cast(77), retval); +} + #endif // !__GLIBC__ +// =========================================================================== +// Integration tests: start_routine_for_test → run_with_cleanup chain +// +// These tests exercise the full path that start_routine_wrapper takes in +// production (init TLS → run_with_cleanup → cleanup on cancel/exit), without +// the Profiler::registerThread/unregisterThread calls that require a live +// profiler instance. Profiler registration is tested separately; these tests +// cover the wiring between the wrapper and run_with_cleanup. +// +// Removing run_with_cleanup from start_routine_wrapper, or wiring it to a +// no-op cleanup, would leave the unit tests passing but fail these tests. +// =========================================================================== + +// Test entry points declared in libraryPatcher_linux.cpp under #ifdef UNIT_TEST. +extern "C++" int pthread_create_wrapped_for_test( + pthread_t*, void* (*)(void*), void*, void (*)(void*), void*); +extern "C++" int pthread_create_with_cleanup_unregister_for_test( + pthread_t*, void* (*)(void*), void*); + +// --------------------------------------------------------------------------- + +static std::atomic g_int_cancel_called{false}; +static std::atomic g_int_cancel_released{false}; + +static void int_cancel_cleanup(void*) { + g_int_cancel_called.store(true, std::memory_order_relaxed); + ProfiledThread::release(); + g_int_cancel_released.store(true, std::memory_order_relaxed); +} + +static void* int_cancel_spin(void*) { + while (true) { + pthread_testcancel(); + usleep(100); + } +} + +// Integration: start_routine_for_test → run_with_cleanup → cleanup fires and +// ProfiledThread is released on pthread_cancel. +TEST(ForcedUnwindTest, WrapperReleasesProfiledThreadOnCancel) { + g_int_cancel_called.store(false, std::memory_order_relaxed); + g_int_cancel_released.store(false, std::memory_order_relaxed); + + pthread_t t; + ASSERT_EQ(0, pthread_create_wrapped_for_test( + &t, int_cancel_spin, nullptr, int_cancel_cleanup, nullptr)); + + usleep(5000); + pthread_cancel(t); + + void* retval; + ASSERT_EQ(0, pthread_join(t, &retval)); + + EXPECT_TRUE(g_int_cancel_called.load()) + << "run_with_cleanup cleanup must fire when wrapped thread is cancelled"; + EXPECT_TRUE(g_int_cancel_released.load()) + << "ProfiledThread::release() must complete inside the wrapper cleanup"; + EXPECT_EQ(PTHREAD_CANCELED, retval); +} + +// --------------------------------------------------------------------------- + +static std::atomic g_int_exit_called{false}; +static std::atomic g_int_exit_released{false}; + +static void int_exit_cleanup(void*) { + g_int_exit_called.store(true, std::memory_order_relaxed); + ProfiledThread::release(); + g_int_exit_released.store(true, std::memory_order_relaxed); +} + +static void* int_exit_fn(void*) { + pthread_exit(reinterpret_cast(99)); +} + +// Integration: start_routine_for_test → run_with_cleanup → cleanup fires and +// ProfiledThread is released on pthread_exit. +TEST(ForcedUnwindTest, WrapperReleasesProfiledThreadOnPthreadExit) { + g_int_exit_called.store(false, std::memory_order_relaxed); + g_int_exit_released.store(false, std::memory_order_relaxed); + + pthread_t t; + ASSERT_EQ(0, pthread_create_wrapped_for_test( + &t, int_exit_fn, nullptr, int_exit_cleanup, nullptr)); + + void* retval; + ASSERT_EQ(0, pthread_join(t, &retval)); + + EXPECT_TRUE(g_int_exit_called.load()) + << "run_with_cleanup cleanup must fire when wrapped thread calls pthread_exit"; + EXPECT_TRUE(g_int_exit_released.load()) + << "ProfiledThread::release() must complete inside the wrapper cleanup"; + EXPECT_EQ(reinterpret_cast(99), retval); +} + +// --------------------------------------------------------------------------- +// Production-cleanup integration: cleanup_unregister calls +// Profiler::unregisterThread with the wrapped thread's TID. +// Replacing unregister_and_release with a bare ProfiledThread::release() would +// leave WrapperReleasesProfiledThreadOnCancel/PthreadExit passing but fail here. + +#include "profiler.h" + +static std::atomic g_int_prod_cancel_tid{-1}; + +static void* int_prod_cancel_spin(void*) { + g_int_prod_cancel_tid.store(ProfiledThread::currentTid(), + std::memory_order_relaxed); + while (true) { + pthread_testcancel(); + usleep(100); + } +} + +// Integration: cleanup_unregister calls Profiler::unregisterThread(tid) on cancel. +TEST(ForcedUnwindTest, WrapperCallsProfilerUnregisterOnCancel) { + g_int_prod_cancel_tid.store(-1, std::memory_order_relaxed); + Profiler::resetUnregisterObservableForTest(); + + pthread_t t; + ASSERT_EQ(0, pthread_create_with_cleanup_unregister_for_test( + &t, int_prod_cancel_spin, nullptr)); + + while (g_int_prod_cancel_tid.load(std::memory_order_relaxed) == -1) { + usleep(100); + } + const int expected_tid = g_int_prod_cancel_tid.load(std::memory_order_relaxed); + + pthread_cancel(t); + void* retval; + ASSERT_EQ(0, pthread_join(t, &retval)); + + EXPECT_EQ(expected_tid, Profiler::lastUnregisteredTidForTest()) + << "cleanup_unregister must call Profiler::unregisterThread(tid) on cancel"; + EXPECT_EQ(PTHREAD_CANCELED, retval); +} + +// --------------------------------------------------------------------------- + +static std::atomic g_int_prod_exit_tid{-1}; + +static void* int_prod_exit_fn(void*) { + g_int_prod_exit_tid.store(ProfiledThread::currentTid(), + std::memory_order_relaxed); + pthread_exit(reinterpret_cast(55)); +} + +// Integration: cleanup_unregister calls Profiler::unregisterThread(tid) on pthread_exit. +TEST(ForcedUnwindTest, WrapperCallsProfilerUnregisterOnPthreadExit) { + g_int_prod_exit_tid.store(-1, std::memory_order_relaxed); + Profiler::resetUnregisterObservableForTest(); + + pthread_t t; + ASSERT_EQ(0, pthread_create_with_cleanup_unregister_for_test( + &t, int_prod_exit_fn, nullptr)); + + void* retval; + ASSERT_EQ(0, pthread_join(t, &retval)); + + EXPECT_EQ(g_int_prod_exit_tid.load(std::memory_order_relaxed), + Profiler::lastUnregisteredTidForTest()) + << "cleanup_unregister must call Profiler::unregisterThread(tid) on pthread_exit"; + EXPECT_EQ(reinterpret_cast(55), retval); +} + #endif // __linux__ diff --git a/ddprof-lib/src/test/cpp/thread_teardown_safety_ut.cpp b/ddprof-lib/src/test/cpp/thread_teardown_safety_ut.cpp index 545ee172f..16d9c2a65 100644 --- a/ddprof-lib/src/test/cpp/thread_teardown_safety_ut.cpp +++ b/ddprof-lib/src/test/cpp/thread_teardown_safety_ut.cpp @@ -22,12 +22,22 @@ #include "thread.h" #include -#include #include #include +#include #include #include +#ifdef __GLIBC__ +// declares these only inside the C (non-C++) conditional. +// Redeclare with extern "C" so we can call them directly from C++ code. +extern "C" { + extern void __pthread_register_cancel(__pthread_unwind_buf_t*); + extern void __pthread_unregister_cancel(__pthread_unwind_buf_t*); + [[noreturn]] extern void __pthread_unwind_next(__pthread_unwind_buf_t*); +} +#endif + // Sentinel value meaning "handler has not run yet" — distinct from both nullptr // (not registered) and any real ProfiledThread address. static ProfiledThread* const kNotYetRun = reinterpret_cast(1); @@ -255,29 +265,54 @@ static std::atomic g_t07_release_ran{false}; #ifdef __GLIBC__ -static void *t07_body(void *) { +// t07 uses __pthread_register_cancel for cleanup (same as run_with_cleanup in +// libraryPatcher_linux.cpp). Two constraints follow from the static-libgcc / +// libgcc_s.so.1 incompatibility: +// +// 1. No C++ RAII objects with destructors in this frame. After the longjmp +// from glibc's stop function, __pthread_unwind_next calls _Unwind_ForcedUnwind +// again to continue unwinding through this frame. Any LSDA cleanup entry +// (e.g. SigGuard, std::unique_ptr) would make __gxx_personality_v0 call +// _Unwind_SetGR with a cross-version context → abort(). +// +// 2. No try/catch: handler frames also add LSDA entries, same problem. +// +// Signal handler save/restore is done manually (plain struct sigaction, no +// destructor) so there is nothing in the LSDA for this frame. +__attribute__((noinline, no_stack_protector)) static void *t07_body(void *) { g_t07_cleanup_ran.store(false, std::memory_order_relaxed); g_t07_release_ran.store(false, std::memory_order_relaxed); - SigGuard guard(SIGVTALRM); + // Save and install the signal handler WITHOUT RAII (no destructor = no LSDA + // cleanup entry for this frame). + struct sigaction old_sa_vtalrm; + sigaction(SIGVTALRM, nullptr, &old_sa_vtalrm); install_handler(SIGVTALRM, SIG_IGN); + ProfiledThread::initCurrentThread(); - try { - // Inject a signal before the cancellation point to exercise the combined path. - pthread_kill(pthread_self(), SIGVTALRM); - while (true) { - pthread_testcancel(); - usleep(100); - } - } catch (abi::__forced_unwind &) { + __pthread_unwind_buf_t cancel_buf = {}; + if (__builtin_expect( + __sigsetjmp((struct __jmp_buf_tag*)(void*)cancel_buf.__cancel_jmp_buf, 0), 0)) { + // Restore handler before continuing forced unwind (no RAII to do it for us). + sigaction(SIGVTALRM, &old_sa_vtalrm, nullptr); g_t07_cleanup_ran.store(true, std::memory_order_relaxed); ProfiledThread::release(); g_t07_release_ran.store(true, std::memory_order_relaxed); - throw; + __pthread_unwind_next(&cancel_buf); + } + __pthread_register_cancel(&cancel_buf); + // No matching __pthread_unregister_cancel: the only legal exit from this body + // is the forced unwind raised by pthread_cancel, which longjmps into the + // __sigsetjmp branch above and continues via __pthread_unwind_next. The loop + // below never returns normally; if it ever did, cancel_buf would be left + // registered against a destroyed frame. + // Inject a signal before the cancellation point to exercise the combined path. + pthread_kill(pthread_self(), SIGVTALRM); + while (true) { + pthread_testcancel(); + usleep(100); } - ProfiledThread::release(); - return nullptr; } #else // !__GLIBC__ — musl: cancellation runs C cleanup callbacks