diff --git a/ci/python-gate.libsonnet b/ci/python-gate.libsonnet index 0ae29d7123..0c560dce1f 100644 --- a/ci/python-gate.libsonnet +++ b/ci/python-gate.libsonnet @@ -142,6 +142,7 @@ BISECT_EMAIL_TO_PATTERN: ".*@oracle.com", TRUFFLE_STRICT_OPTION_DEPRECATION: "true", npm_config_registry: $.overlay_imports.npm_config_registry, + CFLAGS: "-ggdb", }, linux: { common: ENV_POSIX + {}, @@ -241,6 +242,8 @@ "graal_dumps/*/*", "bench-results.json", "raw-results.json", + "mxbuild/*/libpythonvm/libpythonvm.so.debug", + "mxbuild/*/GRAALPY_STANDALONE_COMMON/lib/graalpy*/libpython-native.so", ], //------------------------------------------------------------------------------------------------------------------ diff --git a/docs/contributor/CONTRIBUTING.md b/docs/contributor/CONTRIBUTING.md index 9b509d92ad..84aba00220 100644 --- a/docs/contributor/CONTRIBUTING.md +++ b/docs/contributor/CONTRIBUTING.md @@ -287,6 +287,9 @@ mx benchmark meso:nbody3 \ -Dgraal.MethodFilter=*measure* ``` +For debugging native problems in benchmark runs, there's `BENCHMARK_DEBUG_ARGS` in `mx_graalpython_benchmark.py` to log more stuff for debugging, at the cost of performance. +This is intended for focused reproducer runs on a branch. + ### A note on terminology Note that there may be a little confusion about the configuration names of benchmarks. diff --git a/graalpython/com.oracle.graal.python.cext/CMakeLists.txt b/graalpython/com.oracle.graal.python.cext/CMakeLists.txt index 8a26d9d95e..27df0cb568 100644 --- a/graalpython/com.oracle.graal.python.cext/CMakeLists.txt +++ b/graalpython/com.oracle.graal.python.cext/CMakeLists.txt @@ -162,7 +162,7 @@ set(SRC_FILES ${CAPI_SRC}/codecs.c ${CAPI_SRC}/setobject.c ${CAPI_SRC}/compile.c ${CAPI_SRC}/fileobject.c ${CAPI_SRC}/pystrcmp.c ${CAPI_SRC}/getversion.c ${CAPI_SRC}/genobject.c ${CAPI_SRC}/methodobject.c ${CAPI_SRC}/boolobject.c ${CAPI_SRC}/pylifecycle.c ${CAPI_SRC}/errors.c ${CAPI_SRC}/signals.c ${CAPI_SRC}/datetime.c ${CAPI_SRC}/call.c - ${CAPI_SRC}/getargs.c ${CAPI_SRC}/tracemalloc.c ${CAPI_SRC}/initconfig.c + ${CAPI_SRC}/getargs.c ${CAPI_SRC}/tracemalloc.c ${CAPI_SRC}/initconfig.c ${CAPI_SRC}/graalpy_stacktrace.c ) file(GLOB_RECURSE ACTUAL_SRC_FILES @@ -454,6 +454,7 @@ if(WIN32) if (NOT MSVC) target_compile_options(${TARGET_LIBPYTHON} PRIVATE "-fmsc-version=1920") endif() + target_link_libraries(${TARGET_LIBPYTHON} dbghelp) else() # Link to math library; required for functions like 'hypot' or similar target_link_libraries(${TARGET_LIBPYTHON} m) diff --git a/graalpython/com.oracle.graal.python.cext/src/capi.h b/graalpython/com.oracle.graal.python.cext/src/capi.h index 4323916fdb..e20945ced8 100644 --- a/graalpython/com.oracle.graal.python.cext/src/capi.h +++ b/graalpython/com.oracle.graal.python.cext/src/capi.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -96,6 +96,8 @@ #define PY_TRUFFLE_LOG_FINEST 0x20 #define PY_TRUFFLE_DEBUG_CAPI 0x40 #define PY_TRUFFLE_PYTHON_GC 0x80 +#define PY_TRUFFLE_POISON_NATIVE_MEMORY_ON_FREE 0x100 +#define PY_TRUFFLE_SAMPLE_NATIVE_MEMORY_ALLOC_SITES 0x200 typedef struct mmap_object mmap_object; typedef struct _gc_runtime_state GCState; // originally in 'gcmodule.c' @@ -169,16 +171,7 @@ extern Py_LOCAL_SYMBOL int8_t *_graalpy_finalizing; #if (__linux__ && __GNU_LIBRARY__) #include #include -#include #include -static void print_c_stacktrace() { - fprintf(stderr, "Native stacktrace:\n"); - intptr_t stack[16]; - size_t stack_size = backtrace((void *)stack, sizeof(stack) / sizeof(stack[0])); - backtrace_symbols_fd((void *)stack, stack_size, STDERR_FILENO); - fflush(stderr); -} - static void attach_gdb() { pid_t my_pid = getpid(); char* pathname = "/bin/sh"; @@ -197,15 +190,20 @@ static void attach_gdb() { } } #else -static void print_c_stacktrace() { - // not supported -} - static void attach_gdb() { // not supported } #endif +size_t GraalPyPrivate_CaptureStacktrace(void **frames, size_t max_depth, size_t skip); +void GraalPyPrivate_PrintCapturedStacktrace(FILE *file, const char *header, void *const *frames, size_t depth); +void GraalPyPrivate_PrintCurrentStacktrace(FILE *file, const char *header, size_t max_depth, size_t skip); +void GraalPyPrivate_LogCapturedStacktrace(int level, const char *prefix, void *const *frames, size_t depth); + +static void print_c_stacktrace() { + GraalPyPrivate_PrintCurrentStacktrace(stderr, "Native stacktrace:\n", 16, 0); +} + /* Flags definitions representing global (debug) options. */ static MUST_INLINE int GraalPyPrivate_Trace_Memory() { return Py_Truffle_Options & PY_TRUFFLE_TRACE_MEM; @@ -232,6 +230,12 @@ static MUST_INLINE int GraalPyPrivate_Debug_CAPI() { static MUST_INLINE int GraalPyPrivate_PythonGC() { return Py_Truffle_Options & PY_TRUFFLE_PYTHON_GC; } +static MUST_INLINE int GraalPyPrivate_PoisonNativeMemoryOnFree() { + return Py_Truffle_Options & PY_TRUFFLE_POISON_NATIVE_MEMORY_ON_FREE; +} +static MUST_INLINE int GraalPyPrivate_SampleNativeMemoryAllocSites() { + return Py_Truffle_Options & PY_TRUFFLE_SAMPLE_NATIVE_MEMORY_ALLOC_SITES; +} static void GraalPyPrivate_Log(int level, const char *format, ...) diff --git a/graalpython/com.oracle.graal.python.cext/src/graalpy_stacktrace.c b/graalpython/com.oracle.graal.python.cext/src/graalpy_stacktrace.c new file mode 100644 index 0000000000..817a0cc2d3 --- /dev/null +++ b/graalpython/com.oracle.graal.python.cext/src/graalpy_stacktrace.c @@ -0,0 +1,279 @@ +/* + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * The Universal Permissive License (UPL), Version 1.0 + * + * Subject to the condition set forth below, permission is hereby granted to any + * person obtaining a copy of this software, associated documentation and/or + * data (collectively the "Software"), free of charge and under any and all + * copyright rights in the Software, and any and all patent rights owned or + * freely licensable by each licensor hereunder covering either (i) the + * unmodified Software as contributed to or provided by such licensor, or (ii) + * the Larger Works (as defined below), to deal in both + * + * (a) the Software, and + * + * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if + * one is included with the Software each a "Larger Work" to which the Software + * is contributed by such licensors), + * + * without restriction, including without limitation the rights to copy, create + * derivative works of, display, perform, and distribute the Software and make, + * use, sell, offer for sale, import, export, have made, and have sold the + * Software and the Larger Work(s), and to sublicense the foregoing rights on + * either these or other terms. + * + * This license is subject to the following condition: + * + * The above copyright notice and either this complete permission notice or at a + * minimum a reference to the UPL must be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "capi.h" + +#include +#include +#include +#include + +#if defined(MS_WINDOWS) +#include +#include +#elif (defined(__linux__) && defined(__GNU_LIBRARY__)) || defined(__APPLE__) +#include +#endif + +#define GRAALPY_NATIVE_STACK_MAX_NAME 1024 +#define GRAALPY_NATIVE_STACK_LINE_BUFFER 2048 + +typedef void (*GraalPyStacktraceWriter)(void *ctx, const char *line); + +static void +render_unavailable_stacktrace(GraalPyStacktraceWriter writer, void *ctx) +{ + writer(ctx, ""); +} + +#if defined(MS_WINDOWS) + +static int +ensure_windows_symbols_initialized(void) +{ + static int initialized = 0; + if (!initialized) { + HANDLE process = GetCurrentProcess(); + SymSetOptions(SymGetOptions() | SYMOPT_LOAD_LINES | SYMOPT_UNDNAME); + if (!SymInitialize(process, NULL, TRUE)) { + return 0; + } + initialized = 1; + } + return 1; +} + +static const char * +windows_basename(const char *path) +{ + const char *slash = strrchr(path, '\\'); + const char *alt = strrchr(path, '/'); + const char *base = slash != NULL ? slash + 1 : path; + if (alt != NULL && (slash == NULL || alt > slash)) { + base = alt + 1; + } + return base; +} + +static void +render_windows_stacktrace(GraalPyStacktraceWriter writer, void *ctx, void *const *frames, size_t depth) +{ + HANDLE process = GetCurrentProcess(); + char line[GRAALPY_NATIVE_STACK_LINE_BUFFER]; + char symbol_buffer[sizeof(SYMBOL_INFO) + GRAALPY_NATIVE_STACK_MAX_NAME]; + PSYMBOL_INFO symbol = (PSYMBOL_INFO) symbol_buffer; + + memset(symbol_buffer, 0, sizeof(symbol_buffer)); + symbol->SizeOfStruct = sizeof(SYMBOL_INFO); + symbol->MaxNameLen = GRAALPY_NATIVE_STACK_MAX_NAME - 1; + + if (!ensure_windows_symbols_initialized()) { + for (size_t i = 0; i < depth; i++) { + snprintf(line, sizeof(line), "frame[%lu]: %p", + (unsigned long) i, (void *) frames[i]); + writer(ctx, line); + } + return; + } + + for (size_t i = 0; i < depth; i++) { + DWORD64 address = (DWORD64) (uintptr_t) frames[i]; + DWORD64 displacement = 0; + IMAGEHLP_LINE64 source_line; + DWORD source_displacement = 0; + char module_path[MAX_PATH] = {'\0'}; + const char *module_name = NULL; + HMODULE module = NULL; + + memset(&source_line, 0, sizeof(source_line)); + source_line.SizeOfStruct = sizeof(source_line); + + if (GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, + (LPCSTR) frames[i], &module) && GetModuleFileNameA(module, module_path, MAX_PATH) > 0) { + module_name = windows_basename(module_path); + } + + if (SymFromAddr(process, address, &displacement, symbol)) { + if (SymGetLineFromAddr64(process, address, &source_displacement, &source_line)) { + if (module_name != NULL) { + snprintf(line, sizeof(line), "frame[%lu]: %s!%s+0x%llx (%s:%lu) [%p]", + (unsigned long) i, module_name, symbol->Name, (unsigned long long) displacement, + source_line.FileName, (unsigned long) source_line.LineNumber, (void *) frames[i]); + } else { + snprintf(line, sizeof(line), "frame[%lu]: %s+0x%llx (%s:%lu) [%p]", + (unsigned long) i, symbol->Name, (unsigned long long) displacement, + source_line.FileName, (unsigned long) source_line.LineNumber, (void *) frames[i]); + } + } else if (module_name != NULL) { + snprintf(line, sizeof(line), "frame[%lu]: %s!%s+0x%llx [%p]", + (unsigned long) i, module_name, symbol->Name, (unsigned long long) displacement, (void *) frames[i]); + } else { + snprintf(line, sizeof(line), "frame[%lu]: %s+0x%llx [%p]", + (unsigned long) i, symbol->Name, (unsigned long long) displacement, (void *) frames[i]); + } + } else if (module_name != NULL) { + snprintf(line, sizeof(line), "frame[%lu]: %s [%p]", + (unsigned long) i, module_name, (void *) frames[i]); + } else { + snprintf(line, sizeof(line), "frame[%lu]: %p", + (unsigned long) i, (void *) frames[i]); + } + writer(ctx, line); + } +} + +#elif (defined(__linux__) && defined(__GNU_LIBRARY__)) || defined(__APPLE__) + +static void +render_execinfo_stacktrace(GraalPyStacktraceWriter writer, void *ctx, void *const *frames, size_t depth) +{ + char **symbols = backtrace_symbols((void *const *) frames, (int) depth); + char line[GRAALPY_NATIVE_STACK_LINE_BUFFER]; + if (symbols == NULL) { + for (size_t i = 0; i < depth; i++) { + snprintf(line, sizeof(line), "frame[%lu]: %p", + (unsigned long) i, (void *) frames[i]); + writer(ctx, line); + } + return; + } + + for (size_t i = 0; i < depth; i++) { + snprintf(line, sizeof(line), "frame[%lu]: %s", (unsigned long) i, symbols[i]); + writer(ctx, line); + } + free(symbols); +} + +#endif + +size_t +GraalPyPrivate_CaptureStacktrace(void **frames, size_t max_depth, size_t skip) +{ + if (frames == NULL || max_depth == 0) { + return 0; + } +#if defined(MS_WINDOWS) + return (size_t) CaptureStackBackTrace((ULONG) (skip + 1), (ULONG) max_depth, frames, NULL); +#elif (defined(__linux__) && defined(__GNU_LIBRARY__)) || defined(__APPLE__) + int raw_depth = backtrace(frames, (int) max_depth); + size_t depth = raw_depth > 0 ? (size_t) raw_depth : 0; + size_t start = depth > (skip + 1) ? (skip + 1) : depth; + size_t usable_depth = depth - start; + if (usable_depth > 0) { + memmove(frames, frames + start, usable_depth * sizeof(void *)); + } + return usable_depth; +#else + return 0; +#endif +} + +static void +render_stacktrace(GraalPyStacktraceWriter writer, void *ctx, void *const *frames, size_t depth) +{ + if (depth == 0) { + render_unavailable_stacktrace(writer, ctx); + return; + } +#if defined(MS_WINDOWS) + render_windows_stacktrace(writer, ctx, frames, depth); +#elif (defined(__linux__) && defined(__GNU_LIBRARY__)) || defined(__APPLE__) + render_execinfo_stacktrace(writer, ctx, frames, depth); +#else + (void) frames; + render_unavailable_stacktrace(writer, ctx); +#endif +} + +static void +file_writer(void *ctx, const char *line) +{ + fprintf((FILE *) ctx, "%s\n", line); +} + +void +GraalPyPrivate_PrintCapturedStacktrace(FILE *file, const char *header, void *const *frames, size_t depth) +{ + if (header != NULL) { + fputs(header, file); + } + render_stacktrace(file_writer, file, frames, depth); + fflush(file); +} + +void +GraalPyPrivate_PrintCurrentStacktrace(FILE *file, const char *header, size_t max_depth, size_t skip) +{ + void *frames[64]; + size_t depth = max_depth; + if (depth > (sizeof(frames) / sizeof(frames[0]))) { + depth = sizeof(frames) / sizeof(frames[0]); + } + depth = GraalPyPrivate_CaptureStacktrace(frames, depth, skip + 1); + GraalPyPrivate_PrintCapturedStacktrace(file, header, frames, depth); +} + +typedef struct { + int level; + const char *prefix; +} LogWriterCtx; + +static void +log_writer(void *ctx, const char *line) +{ + LogWriterCtx *log_ctx = (LogWriterCtx *) ctx; + if (log_ctx->prefix != NULL) { + GraalPyPrivate_Log(log_ctx->level, "%s%s\n", log_ctx->prefix, line); + } else { + GraalPyPrivate_Log(log_ctx->level, "%s\n", line); + } +} + +void +GraalPyPrivate_LogCapturedStacktrace(int level, const char *prefix, void *const *frames, size_t depth) +{ + if ((Py_Truffle_Options & level) == 0) { + return; + } + LogWriterCtx log_ctx = {level, prefix}; + render_stacktrace(log_writer, &log_ctx, frames, depth); +} diff --git a/graalpython/com.oracle.graal.python.cext/src/obmalloc.c b/graalpython/com.oracle.graal.python.cext/src/obmalloc.c index ae39e1adac..e03c961bac 100644 --- a/graalpython/com.oracle.graal.python.cext/src/obmalloc.c +++ b/graalpython/com.oracle.graal.python.cext/src/obmalloc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -50,12 +50,27 @@ typedef struct { size_t dummy; } mem_head_t; +typedef struct { + void *ptr; + void *stack[12]; + size_t size; + size_t depth; + unsigned long long serial; + char operation; +} GraalPyMemSample_t; + /* Get an object's GC head */ #define AS_MEM_HEAD(o) ((mem_head_t *)(o)-1) /* Get the object given the GC head */ #define FROM_MEM_HEAD(g) ((void *)(((mem_head_t *)g)+1)) +#define GRAALPY_MEM_HEAD_MAGIC ((size_t)0x47505241574D454DULL) +#define GRAALPY_MEM_HEAD_POISON ((size_t)0xDDDDBAD0DDDDBAD0ULL) +#define GRAALPY_MEM_SAMPLE_RING_SIZE (4096) +#define GRAALPY_MEM_SAMPLE_HISTORY (8) +#define GRAALPY_MEM_SAMPLE_STACK_SKIP (2) +#define GRAALPY_MEM_SAMPLE_USEFUL_DEPTH (10) #define MAX_COLLECTION_RETRIES (7) #define COLLECTION_DELAY_INCREMENT (50) @@ -72,6 +87,113 @@ typedef struct { } GraalPyMem_t; static GraalPyMem_t _GraalPyMem_State = { 0, 0, 0 }; +static GraalPyMemSample_t _GraalPyMem_Samples[GRAALPY_MEM_SAMPLE_RING_SIZE] = {{0}}; +static unsigned long long _GraalPyMem_SampleSerial = 0; +static size_t _GraalPyMem_SampleIndex = 0; + +static MUST_INLINE int +_GraalPyMem_PoisonOnFreeEnabled(void) +{ + return GraalPyPrivate_PoisonNativeMemoryOnFree(); +} + +static MUST_INLINE int +_GraalPyMem_SampleAllocSitesEnabled(void) +{ + return GraalPyPrivate_SampleNativeMemoryAllocSites(); +} + +static void +_GraalPyMem_CaptureSampleStack(GraalPyMemSample_t *sample) +{ + sample->depth = GraalPyPrivate_CaptureStacktrace(sample->stack, GRAALPY_MEM_SAMPLE_USEFUL_DEPTH, + GRAALPY_MEM_SAMPLE_STACK_SKIP); +} + +static void +_GraalPyMem_RecordSample(char operation, void *ptr, size_t size) +{ + if (UNLIKELY(ptr == NULL)) { + return; + } + if (LIKELY(!_GraalPyMem_SampleAllocSitesEnabled())) { + return; + } + + size_t index = _GraalPyMem_SampleIndex++ % GRAALPY_MEM_SAMPLE_RING_SIZE; + GraalPyMemSample_t *sample = &_GraalPyMem_Samples[index]; + sample->ptr = ptr; + sample->size = size; + sample->serial = ++_GraalPyMem_SampleSerial; + sample->operation = operation; + _GraalPyMem_CaptureSampleStack(sample); +} + +static void +_GraalPyMem_LogRecentSamples(const char *func, void *ptr) +{ + if (LIKELY(!_GraalPyMem_SampleAllocSitesEnabled())) { + return; + } + + size_t next_index = _GraalPyMem_SampleIndex; + int printed = 0; + for (size_t offset = 0; offset < GRAALPY_MEM_SAMPLE_RING_SIZE && printed < GRAALPY_MEM_SAMPLE_HISTORY; offset++) { + size_t index = (next_index + GRAALPY_MEM_SAMPLE_RING_SIZE - offset - 1) % GRAALPY_MEM_SAMPLE_RING_SIZE; + const GraalPyMemSample_t *sample = &_GraalPyMem_Samples[index]; + if (sample->ptr == ptr && sample->serial != 0) { + char prefix[128]; + GraalPyPrivate_Log(PY_TRUFFLE_LOG_INFO, + "%s: recent raw memory sample #%llu op=%c ptr=%p size=%lu depth=%lu\n", + func, sample->serial, sample->operation, sample->ptr, (unsigned long) sample->size, (unsigned long) sample->depth); + snprintf(prefix, sizeof(prefix), "%s: sample #%llu ", func, sample->serial); + GraalPyPrivate_LogCapturedStacktrace(PY_TRUFFLE_LOG_INFO, prefix, sample->stack, sample->depth); + printed++; + } + } +} + +static void +_GraalPyMem_InitHeader(mem_head_t *ptr_with_head, size_t size) +{ + ptr_with_head->size = size; + ptr_with_head->dummy = GRAALPY_MEM_HEAD_MAGIC; +} + +static void +_GraalPyMem_PoisonBlock(mem_head_t *ptr_with_head, size_t size) +{ + if (LIKELY(!_GraalPyMem_PoisonOnFreeEnabled())) { + return; + } + + memset(ptr_with_head, 0xDB, sizeof(mem_head_t) + size); + ptr_with_head->size = GRAALPY_MEM_HEAD_POISON; + ptr_with_head->dummy = GRAALPY_MEM_HEAD_POISON; +} + +static void +_GraalPyMem_FatalInvalidHeader(const char *func, void *ptr, const mem_head_t *ptr_with_head) +{ + const char *reason = (ptr_with_head->size == GRAALPY_MEM_HEAD_POISON && ptr_with_head->dummy == GRAALPY_MEM_HEAD_POISON) + ? "poisoned raw allocation header" + : "invalid raw allocation header"; + GraalPyPrivate_Log(PY_TRUFFLE_LOG_INFO, + "%s: %s for ptr=%p head=%p size=%lu dummy=0x%lx\n", + func, reason, ptr, ptr_with_head, (unsigned long) ptr_with_head->size, (unsigned long) ptr_with_head->dummy); + _GraalPyMem_LogRecentSamples(func, ptr); + Py_FatalError("invalid GraalPy raw allocation header"); +} + +static mem_head_t * +_GraalPyMem_GetValidatedHead(const char *func, void *ptr) +{ + mem_head_t *ptr_with_head = AS_MEM_HEAD(ptr); + if (UNLIKELY(ptr_with_head->dummy != GRAALPY_MEM_HEAD_MAGIC)) { + _GraalPyMem_FatalInvalidHeader(func, ptr, ptr_with_head); + } + return ptr_with_head; +} #if 0 // GraalPy change /* bpo-35053: Declare tracemalloc configuration here rather than @@ -325,7 +447,9 @@ PyMem_RawMalloc(size_t size) */ if (size > (size_t)PY_SSIZE_T_MAX) return NULL; - return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size); + void *ptr = _PyMem_Raw.malloc(_PyMem_Raw.ctx, size); + _GraalPyMem_RecordSample('m', ptr, size == 0 ? 1 : size); + return ptr; } void * @@ -334,7 +458,10 @@ PyMem_RawCalloc(size_t nelem, size_t elsize) /* see PyMem_RawMalloc() */ if (elsize != 0 && nelem > (size_t)PY_SSIZE_T_MAX / elsize) return NULL; - return _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize); + void *ptr = _PyMem_Raw.calloc(_PyMem_Raw.ctx, nelem, elsize); + size_t nbytes = (nelem == 0 || elsize == 0) ? 1 : nelem * elsize; + _GraalPyMem_RecordSample('c', ptr, nbytes); + return ptr; } void* @@ -343,11 +470,14 @@ PyMem_RawRealloc(void *ptr, size_t new_size) /* see PyMem_RawMalloc() */ if (new_size > (size_t)PY_SSIZE_T_MAX) return NULL; - return _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size); + void *new_ptr = _PyMem_Raw.realloc(_PyMem_Raw.ctx, ptr, new_size); + _GraalPyMem_RecordSample('r', new_ptr, new_size == 0 ? 1 : new_size); + return new_ptr; } void PyMem_RawFree(void *ptr) { + _GraalPyMem_RecordSample('f', ptr, 0); _PyMem_Raw.free(_PyMem_Raw.ctx, ptr); } @@ -426,11 +556,16 @@ _GraalPyMem_RawMalloc(void *ctx, size_t size) To solve these problems, allocate an extra byte. */ if (size == 0) size = 1; - if (_GraalPyMem_PrepareAlloc((GraalPyMem_t*) ctx, size)) { + GraalPyMem_t *state = (GraalPyMem_t *)ctx; + if (_GraalPyMem_PrepareAlloc(state, size)) { return NULL; } mem_head_t *ptr_with_head = (mem_head_t *)malloc(size + sizeof(mem_head_t)); - ptr_with_head->size = size; + if (ptr_with_head == NULL) { + state->allocated_memory -= size; + return NULL; + } + _GraalPyMem_InitHeader(ptr_with_head, size); return FROM_MEM_HEAD(ptr_with_head); } @@ -450,15 +585,20 @@ _GraalPyMem_RawCalloc(void *ctx, size_t nelem, size_t elsize) elsize = 1; } size_t nbytes = nelem * elsize; - if (_GraalPyMem_PrepareAlloc((GraalPyMem_t*) ctx, nbytes)) { + GraalPyMem_t *state = (GraalPyMem_t *)ctx; + if (_GraalPyMem_PrepareAlloc(state, nbytes)) { return NULL; } /* We cannot use 'calloc' because we need to allocate following layout: [ mem_head_t ] [ e_0 ] [ e_1 ] [ e_2 ] ... [ n_nelem ] */ size_t total = nbytes + sizeof(mem_head_t); mem_head_t *ptr_with_head = (mem_head_t *)malloc(total); + if (ptr_with_head == NULL) { + state->allocated_memory -= nbytes; + return NULL; + } memset(ptr_with_head, 0, total); - ptr_with_head->size = nbytes; + _GraalPyMem_InitHeader(ptr_with_head, nbytes); return FROM_MEM_HEAD(ptr_with_head); } @@ -474,25 +614,33 @@ _GraalPyMem_RawRealloc(void *ctx, void *ptr, size_t size) size = 1; if (ptr != NULL) { - old = AS_MEM_HEAD(ptr); + old = _GraalPyMem_GetValidatedHead(__func__, ptr); old_size = old->size; } else { old = NULL; old_size = 0; } - // account for the difference in size - if (old_size >= size) { - /* In case of "shrinking", just subtract the counter but don't trigger - the Java GC. */ - state->allocated_memory -= size; - } else if (_GraalPyMem_PrepareAlloc(state, size - old_size)) { + if (old_size < size && _GraalPyMem_PrepareAlloc(state, size - old_size)) { return NULL; } mem_head_t *ptr_with_head = (mem_head_t *)realloc(old, size + sizeof(mem_head_t)); - ptr_with_head->size = size; + if (ptr_with_head == NULL) { + if (old_size < size) { + state->allocated_memory -= size - old_size; + } + return NULL; + } + + if (old_size > size) { + /* In case of "shrinking", just subtract the difference but don't + trigger the Java GC. */ + state->allocated_memory -= old_size - size; + } + + _GraalPyMem_InitHeader(ptr_with_head, size); return FROM_MEM_HEAD(ptr_with_head); } @@ -506,7 +654,7 @@ _GraalPyMem_RawFree(void *ctx, void *ptr) if (ptr == NULL) return; GraalPyMem_t *state = (GraalPyMem_t *)ctx; - mem_head_t *ptr_with_head = AS_MEM_HEAD(ptr); + mem_head_t *ptr_with_head = _GraalPyMem_GetValidatedHead(__func__, ptr); const size_t size = ptr_with_head->size; if (state->allocated_memory < size) { GraalPyPrivate_Log(PY_TRUFFLE_LOG_INFO, @@ -515,5 +663,6 @@ _GraalPyMem_RawFree(void *ctx, void *ptr) state->allocated_memory = size; } state->allocated_memory -= size; + _GraalPyMem_PoisonBlock(ptr_with_head, size); free(ptr_with_head); } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltins.java index c0bd0d172f..631acc4906 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltins.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -1643,6 +1643,8 @@ static int doGeneric(@Bind Node inliningTarget) { private static final int LOG_FINEST = 0x20; private static final int DEBUG_CAPI = 0x40; private static final int PYTHON_GC = 0x80; + private static final int POISON_NATIVE_MEMORY_ON_FREE = 0x100; + private static final int SAMPLE_NATIVE_MEMORY_ALLOC_SITES = 0x200; /* * These should be kept so they can be shared across multiple contexts in the same engine, if @@ -1682,6 +1684,12 @@ int getNativeOptions() { if (language.getEngineOption(PythonOptions.PythonGC)) { options |= PYTHON_GC; } + if (language.getEngineOption(PythonOptions.PoisonNativeMemoryOnFree)) { + options |= POISON_NATIVE_MEMORY_ON_FREE; + } + if (language.getEngineOption(PythonOptions.SampleNativeMemoryAllocSites)) { + options |= SAMPLE_NATIVE_MEMORY_ALLOC_SITES; + } return options; } } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonOptions.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonOptions.java index 10f6a8bb03..7928d53441 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonOptions.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonOptions.java @@ -399,6 +399,12 @@ public static void checkBytecodeDSLEnv() { @Option(category = OptionCategory.EXPERT, usageSyntax = "", help = "Initial native memory heap size that triggers a GC (default: 256 MB).") // public static final OptionKey InitialNativeMemory = new OptionKey<>(1L << 28); + @EngineOption @Option(category = OptionCategory.EXPERT, usageSyntax = "true|false", help = "Poison GraalPy raw allocator headers and payloads before freeing native memory blocks.", stability = OptionStability.EXPERIMENTAL) // + public static final OptionKey PoisonNativeMemoryOnFree = new OptionKey<>(false); + + @EngineOption @Option(category = OptionCategory.EXPERT, usageSyntax = "true|false", help = "Record a lightweight rolling history of GraalPy raw native memory allocation sites for allocator debugging.", stability = OptionStability.EXPERIMENTAL) // + public static final OptionKey SampleNativeMemoryAllocSites = new OptionKey<>(false); + @Option(category = OptionCategory.EXPERT, usageSyntax = "true|false", help = "Use the panama backend for NFI.", stability = OptionStability.EXPERIMENTAL) // public static final OptionKey UsePanama = new OptionKey<>(false); // see [GR-67358] diff --git a/mx.graalpython/mx_graalpython_benchmark.py b/mx.graalpython/mx_graalpython_benchmark.py index 9c1b412c8f..4877be192f 100644 --- a/mx.graalpython/mx_graalpython_benchmark.py +++ b/mx.graalpython/mx_graalpython_benchmark.py @@ -86,6 +86,17 @@ BENCH_BGV = 'benchmarks-bgv' +BENCHMARK_DEBUG_ARGS = ( + # These first two are not /too/ bad for runtime + # '--python.PoisonNativeMemoryOnFree=true', + # '--python.SampleNativeMemoryAllocSites=true', + + # These below can be *extremely* heavy + # '--python.TraceNativeMemory=true', + # '--python.TraceNativeMemoryCalls=true', + # '--log.python.level=FINER', +) + # ---------------------------------------------------------------------------------------------------------------------- # # utils @@ -1062,7 +1073,7 @@ def register_vms(suite, sandboxed_options): def add_graalpy_vm(name, *extra_polyglot_args): graalpy_vms.append((name, extra_polyglot_args)) - python_vm_registry.add_vm(GraalPythonVm(config_name=name, extra_polyglot_args=extra_polyglot_args), suite, 10) + python_vm_registry.add_vm(GraalPythonVm(config_name=name, extra_polyglot_args=BENCHMARK_DEBUG_ARGS + extra_polyglot_args), suite, 10) # GraalPy VMs: add_graalpy_vm(CONFIGURATION_DEFAULT)