diff --git a/ci/python-gate.libsonnet b/ci/python-gate.libsonnet
index 0ae29d7123..0c560dce1f 100644
--- a/ci/python-gate.libsonnet
+++ b/ci/python-gate.libsonnet
@@ -142,6 +142,7 @@
             BISECT_EMAIL_TO_PATTERN: ".*@oracle.com",
             TRUFFLE_STRICT_OPTION_DEPRECATION: "true",
             npm_config_registry: $.overlay_imports.npm_config_registry,
+            CFLAGS: "-ggdb",
         },
         linux: {
             common: ENV_POSIX + {},
@@ -241,6 +242,8 @@
         "graal_dumps/*/*",
         "bench-results.json",
         "raw-results.json",
+        "mxbuild/*/libpythonvm/libpythonvm.so.debug",
+        "mxbuild/*/GRAALPY_STANDALONE_COMMON/lib/graalpy*/libpython-native.so",
     ],
 
     //------------------------------------------------------------------------------------------------------------------
diff --git a/docs/contributor/CONTRIBUTING.md b/docs/contributor/CONTRIBUTING.md
index 9b509d92ad..84aba00220 100644
--- a/docs/contributor/CONTRIBUTING.md
+++ b/docs/contributor/CONTRIBUTING.md
@@ -287,6 +287,9 @@ mx benchmark meso:nbody3 \
     -Dgraal.MethodFilter=*measure*
 ```
 
+For debugging native problems in benchmark runs, there's `BENCHMARK_DEBUG_ARGS` in `mx_graalpython_benchmark.py` to log more stuff for debugging, at the cost of performance.
+This is intended for focused reproducer runs on a branch.
+
 ### A note on terminology
 
 Note that there may be a little confusion about the configuration names of benchmarks.
diff --git a/graalpython/com.oracle.graal.python.cext/CMakeLists.txt b/graalpython/com.oracle.graal.python.cext/CMakeLists.txt
index 8a26d9d95e..27df0cb568 100644
--- a/graalpython/com.oracle.graal.python.cext/CMakeLists.txt
+++ b/graalpython/com.oracle.graal.python.cext/CMakeLists.txt
@@ -162,7 +162,7 @@ set(SRC_FILES ${CAPI_SRC}/codecs.c ${CAPI_SRC}/setobject.c ${CAPI_SRC}/compile.c
               ${CAPI_SRC}/fileobject.c ${CAPI_SRC}/pystrcmp.c ${CAPI_SRC}/getversion.c
               ${CAPI_SRC}/genobject.c ${CAPI_SRC}/methodobject.c ${CAPI_SRC}/boolobject.c ${CAPI_SRC}/pylifecycle.c
               ${CAPI_SRC}/errors.c ${CAPI_SRC}/signals.c ${CAPI_SRC}/datetime.c ${CAPI_SRC}/call.c
-              ${CAPI_SRC}/getargs.c ${CAPI_SRC}/tracemalloc.c ${CAPI_SRC}/initconfig.c
+              ${CAPI_SRC}/getargs.c ${CAPI_SRC}/tracemalloc.c ${CAPI_SRC}/initconfig.c ${CAPI_SRC}/graalpy_stacktrace.c
 )
 
 file(GLOB_RECURSE ACTUAL_SRC_FILES
@@ -454,6 +454,7 @@ if(WIN32)
     if (NOT MSVC)
         target_compile_options(${TARGET_LIBPYTHON} PRIVATE "-fmsc-version=1920")
     endif()
+    target_link_libraries(${TARGET_LIBPYTHON} dbghelp)
 else()
     # Link to math library; required for functions like 'hypot' or similar
     target_link_libraries(${TARGET_LIBPYTHON} m)
diff --git a/graalpython/com.oracle.graal.python.cext/src/capi.h b/graalpython/com.oracle.graal.python.cext/src/capi.h
index 4323916fdb..e20945ced8 100644
--- a/graalpython/com.oracle.graal.python.cext/src/capi.h
+++ b/graalpython/com.oracle.graal.python.cext/src/capi.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * The Universal Permissive License (UPL), Version 1.0
@@ -96,6 +96,8 @@
 #define PY_TRUFFLE_LOG_FINEST 0x20
 #define PY_TRUFFLE_DEBUG_CAPI 0x40
 #define PY_TRUFFLE_PYTHON_GC 0x80
+#define PY_TRUFFLE_POISON_NATIVE_MEMORY_ON_FREE 0x100
+#define PY_TRUFFLE_SAMPLE_NATIVE_MEMORY_ALLOC_SITES 0x200
 
 typedef struct mmap_object mmap_object;
 typedef struct _gc_runtime_state GCState; // originally in 'gcmodule.c'
@@ -169,16 +171,7 @@ extern Py_LOCAL_SYMBOL int8_t *_graalpy_finalizing;
 #if (__linux__ && __GNU_LIBRARY__)
 #include <stdlib.h>
 #include <string.h>
-#include <execinfo.h>
 #include <unistd.h>
-static void print_c_stacktrace() {
-    fprintf(stderr, "Native stacktrace:\n");
-    intptr_t stack[16];
-    size_t stack_size = backtrace((void *)stack, sizeof(stack) / sizeof(stack[0]));
-    backtrace_symbols_fd((void *)stack, stack_size, STDERR_FILENO);
-    fflush(stderr);
-}
-
 static void attach_gdb() {
     pid_t my_pid = getpid();
     char* pathname = "/bin/sh";
@@ -197,15 +190,20 @@ static void attach_gdb() {
     }
 }
 #else
-static void print_c_stacktrace() {
-    // not supported
-}
-
 static void attach_gdb() {
     // not supported
 }
 #endif
 
+size_t GraalPyPrivate_CaptureStacktrace(void **frames, size_t max_depth, size_t skip);
+void GraalPyPrivate_PrintCapturedStacktrace(FILE *file, const char *header, void *const *frames, size_t depth);
+void GraalPyPrivate_PrintCurrentStacktrace(FILE *file, const char *header, size_t max_depth, size_t skip);
+void GraalPyPrivate_LogCapturedStacktrace(int level, const char *prefix, void *const *frames, size_t depth);
+
+static void print_c_stacktrace() {
+    GraalPyPrivate_PrintCurrentStacktrace(stderr, "Native stacktrace:\n", 16, 0);
+}
+
 /* Flags definitions representing global (debug) options. */
 static MUST_INLINE int GraalPyPrivate_Trace_Memory() {
     return Py_Truffle_Options & PY_TRUFFLE_TRACE_MEM;
@@ -232,6 +230,12 @@ static MUST_INLINE int GraalPyPrivate_Debug_CAPI() {
 static MUST_INLINE int GraalPyPrivate_PythonGC() {
     return Py_Truffle_Options & PY_TRUFFLE_PYTHON_GC;
 }
+static MUST_INLINE int GraalPyPrivate_PoisonNativeMemoryOnFree() {
+    return Py_Truffle_Options & PY_TRUFFLE_POISON_NATIVE_MEMORY_ON_FREE;
+}
+static MUST_INLINE int GraalPyPrivate_SampleNativeMemoryAllocSites() {
+    return Py_Truffle_Options & PY_TRUFFLE_SAMPLE_NATIVE_MEMORY_ALLOC_SITES;
+}
 
 static void
 GraalPyPrivate_Log(int level, const char *format, ...)
diff --git a/graalpython/com.oracle.graal.python.cext/src/graalpy_stacktrace.c b/graalpython/com.oracle.graal.python.cext/src/graalpy_stacktrace.c
new file mode 100644
index 0000000000..817a0cc2d3
--- /dev/null
+++ b/graalpython/com.oracle.graal.python.cext/src/graalpy_stacktrace.c
@@ -0,0 +1,279 @@
+/*
+ * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * The Universal Permissive License (UPL), Version 1.0
+ *
+ * Subject to the condition set forth below, permission is hereby granted to any
+ * person obtaining a copy of this software, associated documentation and/or
+ * data (collectively the "Software"), free of charge and under any and all
+ * copyright rights in the Software, and any and all patent rights owned or
+ * freely licensable by each licensor hereunder covering either (i) the
+ * unmodified Software as contributed to or provided by such licensor, or (ii)
+ * the Larger Works (as defined below), to deal in both
+ *
+ * (a) the Software, and
+ *
+ * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
+ * one is included with the Software each a "Larger Work" to which the Software
+ * is contributed by such licensors),
+ *
+ * without restriction, including without limitation the rights to copy, create
+ * derivative works of, display, perform, and distribute the Software and make,
+ * use, sell, offer for sale, import, export, have made, and have sold the
+ * Software and the Larger Work(s), and to sublicense the foregoing rights on
+ * either these or other terms.
+ *
+ * This license is subject to the following condition:
+ *
+ * The above copyright notice and either this complete permission notice or at a
+ * minimum a reference to the UPL must be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "capi.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(MS_WINDOWS)
+#include <windows.h>
+#include <dbghelp.h>
+#elif (defined(__linux__) && defined(__GNU_LIBRARY__)) || defined(__APPLE__)
+#include <execinfo.h>
+#endif
+
+#define GRAALPY_NATIVE_STACK_MAX_NAME 1024
+#define GRAALPY_NATIVE_STACK_LINE_BUFFER 2048
+
+typedef void (*GraalPyStacktraceWriter)(void *ctx, const char *line);
+
+static void
+render_unavailable_stacktrace(GraalPyStacktraceWriter writer, void *ctx)
+{
+    writer(ctx, "<native stacktrace unavailable>");
+}
+
+#if defined(MS_WINDOWS)
+
+static int
+ensure_windows_symbols_initialized(void)
+{
+    static int initialized = 0;
+    if (!initialized) {
+        HANDLE process = GetCurrentProcess();
+        SymSetOptions(SymGetOptions() | SYMOPT_LOAD_LINES | SYMOPT_UNDNAME);
+        if (!SymInitialize(process, NULL, TRUE)) {
+            return 0;
+        }
+        initialized = 1;
+    }
+    return 1;
+}
+
+static const char *
+windows_basename(const char *path)
+{
+    const char *slash = strrchr(path, '\\');
+    const char *alt = strrchr(path, '/');
+    const char *base = slash != NULL ? slash + 1 : path;
+    if (alt != NULL && (slash == NULL || alt > slash)) {
+        base = alt + 1;
+    }
+    return base;
+}
+
+static void
+render_windows_stacktrace(GraalPyStacktraceWriter writer, void *ctx, void *const *frames, size_t depth)
+{
+    HANDLE process = GetCurrentProcess();
+    char line[GRAALPY_NATIVE_STACK_LINE_BUFFER];
+    char symbol_buffer[sizeof(SYMBOL_INFO) + GRAALPY_NATIVE_STACK_MAX_NAME];
+    PSYMBOL_INFO symbol = (PSYMBOL_INFO) symbol_buffer;
+
+    memset(symbol_buffer, 0, sizeof(symbol_buffer));
+    symbol->SizeOfStruct = sizeof(SYMBOL_INFO);
+    symbol->MaxNameLen = GRAALPY_NATIVE_STACK_MAX_NAME - 1;
+
+    if (!ensure_windows_symbols_initialized()) {
+        for (size_t i = 0; i < depth; i++) {
+            snprintf(line, sizeof(line), "frame[%lu]: %p",
+                            (unsigned long) i, (void *) frames[i]);
+            writer(ctx, line);
+        }
+        return;
+    }
+
+    for (size_t i = 0; i < depth; i++) {
+        DWORD64 address = (DWORD64) (uintptr_t) frames[i];
+        DWORD64 displacement = 0;
+        IMAGEHLP_LINE64 source_line;
+        DWORD source_displacement = 0;
+        char module_path[MAX_PATH] = {'\0'};
+        const char *module_name = NULL;
+        HMODULE module = NULL;
+
+        memset(&source_line, 0, sizeof(source_line));
+        source_line.SizeOfStruct = sizeof(source_line);
+
+        if (GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+                        (LPCSTR) frames[i], &module) && GetModuleFileNameA(module, module_path, MAX_PATH) > 0) {
+            module_name = windows_basename(module_path);
+        }
+
+        if (SymFromAddr(process, address, &displacement, symbol)) {
+            if (SymGetLineFromAddr64(process, address, &source_displacement, &source_line)) {
+                if (module_name != NULL) {
+                    snprintf(line, sizeof(line), "frame[%lu]: %s!%s+0x%llx (%s:%lu) [%p]",
+                                    (unsigned long) i, module_name, symbol->Name, (unsigned long long) displacement,
+                                    source_line.FileName, (unsigned long) source_line.LineNumber, (void *) frames[i]);
+                } else {
+                    snprintf(line, sizeof(line), "frame[%lu]: %s+0x%llx (%s:%lu) [%p]",
+                                    (unsigned long) i, symbol->Name, (unsigned long long) displacement,
+                                    source_line.FileName, (unsigned long) source_line.LineNumber, (void *) frames[i]);
+                }
+            } else if (module_name != NULL) {
+                snprintf(line, sizeof(line), "frame[%lu]: %s!%s+0x%llx [%p]",
+                                (unsigned long) i, module_name, symbol->Name, (unsigned long long) displacement, (void *) frames[i]);
+            } else {
+                snprintf(line, sizeof(line), "frame[%lu]: %s+0x%llx [%p]",
+                                (unsigned long) i, symbol->Name, (unsigned long long) displacement, (void *) frames[i]);
+            }
+        } else if (module_name != NULL) {
+            snprintf(line, sizeof(line), "frame[%lu]: %s [%p]",
+                            (unsigned long) i, module_name, (void *) frames[i]);
+        } else {
+            snprintf(line, sizeof(line), "frame[%lu]: %p",
+                            (unsigned long) i, (void *) frames[i]);
+        }
+        writer(ctx, line);
+    }
+}
+
+#elif (defined(__linux__) && defined(__GNU_LIBRARY__)) || defined(__APPLE__)
+
+static void
+render_execinfo_stacktrace(GraalPyStacktraceWriter writer, void *ctx, void *const *frames, size_t depth)
+{
+    char **symbols = backtrace_symbols((void *const *) frames, (int) depth);
+    char line[GRAALPY_NATIVE_STACK_LINE_BUFFER];
+    if (symbols == NULL) {
+        for (size_t i = 0; i < depth; i++) {
+            snprintf(line, sizeof(line), "frame[%lu]: %p",
+                            (unsigned long) i, (void *) frames[i]);
+            writer(ctx, line);
+        }
+        return;
+    }
+
+    for (size_t i = 0; i < depth; i++) {
+        snprintf(line, sizeof(line), "frame[%lu]: %s", (unsigned long) i, symbols[i]);
+        writer(ctx, line);
+    }
+    free(symbols);
+}
+
+#endif
+
+size_t
+GraalPyPrivate_CaptureStacktrace(void **frames, size_t max_depth, size_t skip)
+{
+    if (frames == NULL || max_depth == 0) {
+        return 0;
+    }
+#if defined(MS_WINDOWS)
+    return (size_t) CaptureStackBackTrace((ULONG) (skip + 1), (ULONG) max_depth, frames, NULL);
+#elif (defined(__linux__) && defined(__GNU_LIBRARY__)) || defined(__APPLE__)
+    int raw_depth = backtrace(frames, (int) max_depth);
+    size_t depth = raw_depth > 0 ? (size_t) raw_depth : 0;
+    size_t start = depth > (skip + 1) ? (skip + 1) : depth;
+    size_t usable_depth = depth - start;
+    if (usable_depth > 0) {
+        memmove(frames, frames + start, usable_depth * sizeof(void *));
+    }
+    return usable_depth;
+#else
+    return 0;
+#endif
+}
+
+static void
+render_stacktrace(GraalPyStacktraceWriter writer, void *ctx, void *const *frames, size_t depth)
+{
+    if (depth == 0) {
+        render_unavailable_stacktrace(writer, ctx);
+        return;
+    }
+#if defined(MS_WINDOWS)
+    render_windows_stacktrace(writer, ctx, frames, depth);
+#elif (defined(__linux__) && defined(__GNU_LIBRARY__)) || defined(__APPLE__)
+    render_execinfo_stacktrace(writer, ctx, frames, depth);
+#else
+    (void) frames;
+    render_unavailable_stacktrace(writer, ctx);
+#endif
+}
+
+static void
+file_writer(void *ctx, const char *line)
+{
+    fprintf((FILE *) ctx, "%s\n", line);
+}
+
+void
+GraalPyPrivate_PrintCapturedStacktrace(FILE *file, const char *header, void *const *frames, size_t depth)
+{
+    if (header != NULL) {
+        fputs(header, file);
+    }
+    render_stacktrace(file_writer, file, frames, depth);
+    fflush(file);
+}
+
+void
+GraalPyPrivate_PrintCurrentStacktrace(FILE *file, const char *header, size_t max_depth, size_t skip)
+{
+    void *frames[64];
+    size_t depth = max_depth;
+    if (depth > (sizeof(frames) / sizeof(frames[0]))) {
+        depth = sizeof(frames) / sizeof(frames[0]);
+    }
+    depth = GraalPyPrivate_CaptureStacktrace(frames, depth, skip + 1);
+    GraalPyPrivate_PrintCapturedStacktrace(file, header, frames, depth);
+}
+
+typedef struct {
+    int level;
+    const char *prefix;
+} LogWriterCtx;
+
+static void
+log_writer(void *ctx, const char *line)
+{
+    LogWriterCtx *log_ctx = (LogWriterCtx *) ctx;
+    if (log_ctx->prefix != NULL) {
+        GraalPyPrivate_Log(log_ctx->level, "%s%s\n", log_ctx->prefix, line);
+    } else {
+        GraalPyPrivate_Log(log_ctx->level, "%s\n", line);
+    }
+}
+
+void
+GraalPyPrivate_LogCapturedStacktrace(int level, const char *prefix, void *const *frames, size_t depth)
+{
+    if ((Py_Truffle_Options & level) == 0) {
+        return;
+    }
+    LogWriterCtx log_ctx = {level, prefix};
+    render_stacktrace(log_writer, &log_ctx, frames, depth);
+}
diff --git a/graalpython/com.oracle.graal.python.cext/src/obmalloc.c b/graalpython/com.oracle.graal.python.cext/src/obmalloc.c
index ae39e1adac..e03c961bac 100644
--- a/graalpython/com.oracle.graal.python.cext/src/obmalloc.c
+++ b/graalpython/com.oracle.graal.python.cext/src/obmalloc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * The Universal Permissive License (UPL), Version 1.0
@@ -50,12 +50,27 @@ typedef struct {
 	size_t dummy;
 } mem_head_t;
 
+typedef struct {
+    void *ptr;
+    void *stack[12];
+    size_t size;
+    size_t depth;
+    unsigned long long serial;
+    char operation;
+} GraalPyMemSample_t;
+
 /* Get an object's GC head */
 #define AS_MEM_HEAD(o) ((mem_head_t *)(o)-1)
 
 /* Get the object given the GC head */
 #define FROM_MEM_HEAD(g) ((void *)(((mem_head_t *)g)+1))
 
+#define GRAALPY_MEM_HEAD_MAGIC ((size_t)0x47505241574D454DULL)
+#define GRAALPY_MEM_HEAD_POISON ((size_t)0xDDDDBAD0DDDDBAD0ULL)
+#define GRAALPY_MEM_SAMPLE_RING_SIZE (4096)
+#define GRAALPY_MEM_SAMPLE_HISTORY (8)
+#define GRAALPY_MEM_SAMPLE_STACK_SKIP (2)
+#define GRAALPY_MEM_SAMPLE_USEFUL_DEPTH (10)
 #define MAX_COLLECTION_RETRIES (7)
 #define COLLECTION_DELAY_INCREMENT (50)
 
@@ -72,6 +87,113 @@ typedef struct {
 } GraalPyMem_t;
 
 static GraalPyMem_t _GraalPyMem_State = { 0, 0, 0 };
+static GraalPyMemSample_t _GraalPyMem_Samples[GRAALPY_MEM_SAMPLE_RING_SIZE] = {{0}};
+static unsigned long long _GraalPyMem_SampleSerial = 0;
+static size_t _GraalPyMem_SampleIndex = 0;
+
+static MUST_INLINE int
+_GraalPyMem_PoisonOnFreeEnabled(void)
+{
+    return GraalPyPrivate_PoisonNativeMemoryOnFree();
+}
+
+static MUST_INLINE int
+_GraalPyMem_SampleAllocSitesEnabled(void)
+{
+    return GraalPyPrivate_SampleNativeMemoryAllocSites();
+}
+
+static void
+_GraalPyMem_CaptureSampleStack(GraalPyMemSample_t *sample)
+{
+    sample->depth = GraalPyPrivate_CaptureStacktrace(sample->stack, GRAALPY_MEM_SAMPLE_USEFUL_DEPTH,
+                    GRAALPY_MEM_SAMPLE_STACK_SKIP);
+}
+
+static void
+_GraalPyMem_RecordSample(char operation, void *ptr, size_t size)
+{
+    if (UNLIKELY(ptr == NULL)) {
+        return;
+    }
+    if (LIKELY(!_GraalPyMem_SampleAllocSitesEnabled())) {
+        return;
+    }
+
+    size_t index = _GraalPyMem_SampleIndex++ % GRAALPY_MEM_SAMPLE_RING_SIZE;
+    GraalPyMemSample_t *sample = &_GraalPyMem_Samples[index];
+    sample->ptr = ptr;
+    sample->size = size;
+    sample->serial = ++_GraalPyMem_SampleSerial;
+    sample->operation = operation;
+    _GraalPyMem_CaptureSampleStack(sample);
+}
+
+static void
+_GraalPyMem_LogRecentSamples(const char *func, void *ptr)
+{
+    if (LIKELY(!_GraalPyMem_SampleAllocSitesEnabled())) {
+        return;
+    }
+
+    size_t next_index = _GraalPyMem_SampleIndex;
+    int printed = 0;
+    for (size_t offset = 0; offset < GRAALPY_MEM_SAMPLE_RING_SIZE && printed < GRAALPY_MEM_SAMPLE_HISTORY; offset++) {
+        size_t index = (next_index + GRAALPY_MEM_SAMPLE_RING_SIZE - offset - 1) % GRAALPY_MEM_SAMPLE_RING_SIZE;
+        const GraalPyMemSample_t *sample = &_GraalPyMem_Samples[index];
+        if (sample->ptr == ptr && sample->serial != 0) {
+            char prefix[128];
+            GraalPyPrivate_Log(PY_TRUFFLE_LOG_INFO,
+                    "%s: recent raw memory sample #%llu op=%c ptr=%p size=%lu depth=%lu\n",
+                    func, sample->serial, sample->operation, sample->ptr, (unsigned long) sample->size, (unsigned long) sample->depth);
+            snprintf(prefix, sizeof(prefix), "%s: sample #%llu ", func, sample->serial);
+            GraalPyPrivate_LogCapturedStacktrace(PY_TRUFFLE_LOG_INFO, prefix, sample->stack, sample->depth);
+            printed++;
+        }
+    }
+}
+
+static void
+_GraalPyMem_InitHeader(mem_head_t *ptr_with_head, size_t size)
+{
+    ptr_with_head->size = size;
+    ptr_with_head->dummy = GRAALPY_MEM_HEAD_MAGIC;
+}
+
+static void
+_GraalPyMem_PoisonBlock(mem_head_t *ptr_with_head, size_t size)
+{
+    if (LIKELY(!_GraalPyMem_PoisonOnFreeEnabled())) {
+        return;
+    }
+
+    memset(ptr_with_head, 0xDB, sizeof(mem_head_t) + size);
+    ptr_with_head->size = GRAALPY_MEM_HEAD_POISON;
+    ptr_with_head->dummy = GRAALPY_MEM_HEAD_POISON;
+}
+
+static void
+_GraalPyMem_FatalInvalidHeader(const char *func, void *ptr, const mem_head_t *ptr_with_head)
+{
+    const char *reason = (ptr_with_head->size == GRAALPY_MEM_HEAD_POISON && ptr_with_head->dummy == GRAALPY_MEM_HEAD_POISON)
+            ? "poisoned raw allocation header"
+            : "invalid raw allocation header";
+    GraalPyPrivate_Log(PY_TRUFFLE_LOG_INFO,
+            "%s: %s for ptr=%p head=%p size=%lu dummy=0x%lx\n",
+            func, reason, ptr, ptr_with_head, (unsigned long) ptr_with_head->size, (unsigned long) ptr_with_head->dummy);
+    _GraalPyMem_LogRecentSamples(func, ptr);
+    Py_FatalError("invalid GraalPy raw allocation header");
+}
+
+static mem_head_t *
+_GraalPyMem_GetValidatedHead(const char *func, void *ptr)
+{
+    mem_head_t *ptr_with_head = AS_MEM_HEAD(ptr);
+    if (UNLIKELY(ptr_with_head->dummy != GRAALPY_MEM_HEAD_MAGIC)) {
+        _GraalPyMem_FatalInvalidHeader(func, ptr, ptr_with_head);
+    }
+    return ptr_with_head;
+}
 
 #if 0 // GraalPy change
 /* bpo-35053: Declare tracemalloc configuration here rather than
@@ -325,7 +447,9 @@ PyMem_RawMalloc(size_t size)
      */
     if (size > (size_t)PY_SSIZE_T_MAX)
         return NULL;
-    return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
+    void *ptr = _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
+    _GraalPyMem_RecordSample('m', ptr, size == 0 ? 1 : size);
+    return ptr;
 }
 
 void *
@@ -334,7 +458,10 @@ PyMem_RawCalloc(size_t nelem, size_t elsize)
     /* see PyMem_RawMalloc() */
     if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize)
         return NULL;
-    return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
+    void *ptr = _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize);
+    size_t nbytes = (nelem == 0 || elsize == 0) ? 1 : nelem * elsize;
+    _GraalPyMem_RecordSample('c', ptr, nbytes);
+    return ptr;
 }
 
 void*
@@ -343,11 +470,14 @@ PyMem_RawRealloc(void *ptr, size_t new_size)
     /* see PyMem_RawMalloc() */
     if (new_size > (size_t)PY_SSIZE_T_MAX)
         return NULL;
-    return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
+    void *new_ptr = _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size);
+    _GraalPyMem_RecordSample('r', new_ptr, new_size == 0 ? 1 : new_size);
+    return new_ptr;
 }
 
 void PyMem_RawFree(void *ptr)
 {
+    _GraalPyMem_RecordSample('f', ptr, 0);
     _PyMem_Raw.free(_PyMem_Raw.ctx, ptr);
 }
 
@@ -426,11 +556,16 @@ _GraalPyMem_RawMalloc(void *ctx, size_t size)
        To solve these problems, allocate an extra byte. */
     if (size == 0)
         size = 1;
-    if (_GraalPyMem_PrepareAlloc((GraalPyMem_t*) ctx, size)) {
+    GraalPyMem_t *state = (GraalPyMem_t *)ctx;
+    if (_GraalPyMem_PrepareAlloc(state, size)) {
         return NULL;
     }
     mem_head_t *ptr_with_head = (mem_head_t *)malloc(size + sizeof(mem_head_t));
-    ptr_with_head->size = size;
+    if (ptr_with_head == NULL) {
+        state->allocated_memory -= size;
+        return NULL;
+    }
+    _GraalPyMem_InitHeader(ptr_with_head, size);
     return FROM_MEM_HEAD(ptr_with_head);
 }
 
@@ -450,15 +585,20 @@ _GraalPyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize)
         elsize = 1;
     }
     size_t nbytes = nelem * elsize;
-    if (_GraalPyMem_PrepareAlloc((GraalPyMem_t*) ctx, nbytes)) {
+    GraalPyMem_t *state = (GraalPyMem_t *)ctx;
+    if (_GraalPyMem_PrepareAlloc(state, nbytes)) {
         return NULL;
     }
     /* We cannot use 'calloc' because we need to allocate following layout:
        [ mem_head_t ] [ e_0 ] [ e_1 ]  [ e_2 ] ... [ n_nelem ] */
     size_t total = nbytes + sizeof(mem_head_t);
     mem_head_t *ptr_with_head = (mem_head_t *)malloc(total);
+    if (ptr_with_head == NULL) {
+        state->allocated_memory -= nbytes;
+        return NULL;
+    }
     memset(ptr_with_head, 0, total);
-    ptr_with_head->size = nbytes;
+    _GraalPyMem_InitHeader(ptr_with_head, nbytes);
     return FROM_MEM_HEAD(ptr_with_head);
 }
 
@@ -474,25 +614,33 @@ _GraalPyMem_RawRealloc(void *ctx, void *ptr, size_t size)
         size = 1;
 
     if (ptr != NULL) {
-        old = AS_MEM_HEAD(ptr);
+        old = _GraalPyMem_GetValidatedHead(__func__, ptr);
         old_size = old->size;
     } else {
         old = NULL;
         old_size = 0;
     }
 
-    // account for the difference in size
-    if (old_size >= size) {
-        /* In case of "shrinking", just subtract the counter but don't trigger
-         the Java GC. */
-        state->allocated_memory -= size;
-    } else if (_GraalPyMem_PrepareAlloc(state, size - old_size)) {
+    if (old_size < size && _GraalPyMem_PrepareAlloc(state, size - old_size)) {
         return NULL;
     }
 
     mem_head_t *ptr_with_head = (mem_head_t *)realloc(old,
             size + sizeof(mem_head_t));
-    ptr_with_head->size = size;
+    if (ptr_with_head == NULL) {
+        if (old_size < size) {
+            state->allocated_memory -= size - old_size;
+        }
+        return NULL;
+    }
+
+    if (old_size > size) {
+        /* In case of "shrinking", just subtract the difference but don't
+           trigger the Java GC. */
+        state->allocated_memory -= old_size - size;
+    }
+
+    _GraalPyMem_InitHeader(ptr_with_head, size);
     return FROM_MEM_HEAD(ptr_with_head);
 }
 
@@ -506,7 +654,7 @@ _GraalPyMem_RawFree(void *ctx, void *ptr)
     if (ptr == NULL)
         return;
     GraalPyMem_t *state = (GraalPyMem_t *)ctx;
-    mem_head_t *ptr_with_head = AS_MEM_HEAD(ptr);
+    mem_head_t *ptr_with_head = _GraalPyMem_GetValidatedHead(__func__, ptr);
     const size_t size = ptr_with_head->size;
     if (state->allocated_memory < size) {
         GraalPyPrivate_Log(PY_TRUFFLE_LOG_INFO,
@@ -515,5 +663,6 @@ _GraalPyMem_RawFree(void *ctx, void *ptr)
         state->allocated_memory = size;
     }
     state->allocated_memory -= size;
+    _GraalPyMem_PoisonBlock(ptr_with_head, size);
     free(ptr_with_head);
 }
diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltins.java
index c0bd0d172f..631acc4906 100644
--- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltins.java
+++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltins.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * The Universal Permissive License (UPL), Version 1.0
@@ -1643,6 +1643,8 @@ static int doGeneric(@Bind Node inliningTarget) {
     private static final int LOG_FINEST = 0x20;
     private static final int DEBUG_CAPI = 0x40;
     private static final int PYTHON_GC = 0x80;
+    private static final int POISON_NATIVE_MEMORY_ON_FREE = 0x100;
+    private static final int SAMPLE_NATIVE_MEMORY_ALLOC_SITES = 0x200;
 
     /*
      * These should be kept so they can be shared across multiple contexts in the same engine, if
@@ -1682,6 +1684,12 @@ int getNativeOptions() {
             if (language.getEngineOption(PythonOptions.PythonGC)) {
                 options |= PYTHON_GC;
             }
+            if (language.getEngineOption(PythonOptions.PoisonNativeMemoryOnFree)) {
+                options |= POISON_NATIVE_MEMORY_ON_FREE;
+            }
+            if (language.getEngineOption(PythonOptions.SampleNativeMemoryAllocSites)) {
+                options |= SAMPLE_NATIVE_MEMORY_ALLOC_SITES;
+            }
             return options;
         }
     }
diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonOptions.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonOptions.java
index 10f6a8bb03..7928d53441 100644
--- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonOptions.java
+++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonOptions.java
@@ -399,6 +399,12 @@ public static void checkBytecodeDSLEnv() {
     @Option(category = OptionCategory.EXPERT, usageSyntax = "<bytes>", help = "Initial native memory heap size that triggers a GC (default: 256 MB).") //
     public static final OptionKey<Long> InitialNativeMemory = new OptionKey<>(1L << 28);
 
+    @EngineOption @Option(category = OptionCategory.EXPERT, usageSyntax = "true|false", help = "Poison GraalPy raw allocator headers and payloads before freeing native memory blocks.", stability = OptionStability.EXPERIMENTAL) //
+    public static final OptionKey<Boolean> PoisonNativeMemoryOnFree = new OptionKey<>(false);
+
+    @EngineOption @Option(category = OptionCategory.EXPERT, usageSyntax = "true|false", help = "Record a lightweight rolling history of GraalPy raw native memory allocation sites for allocator debugging.", stability = OptionStability.EXPERIMENTAL) //
+    public static final OptionKey<Boolean> SampleNativeMemoryAllocSites = new OptionKey<>(false);
+
     @Option(category = OptionCategory.EXPERT, usageSyntax = "true|false", help = "Use the panama backend for NFI.", stability = OptionStability.EXPERIMENTAL) //
     public static final OptionKey<Boolean> UsePanama = new OptionKey<>(false); // see [GR-67358]
 
diff --git a/mx.graalpython/mx_graalpython_benchmark.py b/mx.graalpython/mx_graalpython_benchmark.py
index 9c1b412c8f..4877be192f 100644
--- a/mx.graalpython/mx_graalpython_benchmark.py
+++ b/mx.graalpython/mx_graalpython_benchmark.py
@@ -86,6 +86,17 @@
 
 BENCH_BGV = 'benchmarks-bgv'
 
+BENCHMARK_DEBUG_ARGS = (
+    # These first two are not /too/ bad for runtime
+    # '--python.PoisonNativeMemoryOnFree=true',
+    # '--python.SampleNativeMemoryAllocSites=true',
+
+    # These below can be *extremely* heavy
+    # '--python.TraceNativeMemory=true',
+    # '--python.TraceNativeMemoryCalls=true',
+    # '--log.python.level=FINER',
+)
+
 # ----------------------------------------------------------------------------------------------------------------------
 #
 # utils
@@ -1062,7 +1073,7 @@ def register_vms(suite, sandboxed_options):
 
     def add_graalpy_vm(name, *extra_polyglot_args):
         graalpy_vms.append((name, extra_polyglot_args))
-        python_vm_registry.add_vm(GraalPythonVm(config_name=name, extra_polyglot_args=extra_polyglot_args), suite, 10)
+        python_vm_registry.add_vm(GraalPythonVm(config_name=name, extra_polyglot_args=BENCHMARK_DEBUG_ARGS + extra_polyglot_args), suite, 10)
 
     # GraalPy VMs:
     add_graalpy_vm(CONFIGURATION_DEFAULT)