From 9d28e994ed2a0065f74663b3488a32e3a491c59a Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 26 Sep 2024 14:50:28 -0700 Subject: [PATCH 1/3] Attempt to match 3.12 in work done in GC --- Include/internal/pycore_gc.h | 1 + Lib/test/test_gc.py | 22 +++++----------------- Modules/_testinternalcapi.c | 6 ++++++ Python/gc.c | 32 ++++++++++++++++---------------- 4 files changed, 28 insertions(+), 33 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index cf96f661e6cd7e..063d469eb4d9c3 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -351,6 +351,7 @@ struct _gc_runtime_state { <0: suppressed; don't immortalize objects */ int immortalize; #endif + Py_ssize_t prior_heap_size; }; #ifdef Py_GIL_DISABLED diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index bb7df1f5cfa7f7..9a1e60fcf2c3fd 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -22,6 +22,7 @@ import _testcapi from _testcapi import with_tp_del from _testcapi import ContainerNoGC + import _testinternalcapi except ImportError: _testcapi = None def with_tp_del(cls): @@ -1101,32 +1102,19 @@ def make_ll(depth): return head head = make_ll(1000) - count = 1000 - - # There will be some objects we aren't counting, - # e.g. the gc stats dicts. This test checks - # that the counts don't grow, so we try to - # correct for the uncounted objects - # This is just an estimate. - CORRECTION = 20 enabled = gc.isenabled() gc.enable() olds = [] + gc.collect() + baseline_live = _testinternalcapi.get_heap_size() for i in range(20_000): newhead = make_ll(20) - count += 20 newhead.surprise = head olds.append(newhead) if len(olds) == 20: - stats = gc.get_stats() - young = stats[0] - incremental = stats[1] - old = stats[2] - collected = young['collected'] + incremental['collected'] + old['collected'] - count += CORRECTION - live = count - collected - self.assertLess(live, 25000) + live = _testinternalcapi.get_heap_size() + self.assertLess(live-baseline_live, 25000) del olds[:] if not enabled: gc.disable() diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index c403075fbb2501..776fcf4948527a 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -2048,6 +2048,11 @@ identify_type_slot_wrappers(PyObject *self, PyObject *Py_UNUSED(ignored)) return _PyType_GetSlotWrapperNames(); } +static PyObject * +get_heap_size(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + return PyLong_FromSsize_t(PyInterpreterState_Get()->gc.heap_size); +} static PyMethodDef module_functions[] = { {"get_configs", get_configs, METH_NOARGS}, @@ -2145,6 +2150,7 @@ static PyMethodDef module_functions[] = { GH_119213_GETARGS_METHODDEF {"get_static_builtin_types", get_static_builtin_types, METH_NOARGS}, {"identify_type_slot_wrappers", identify_type_slot_wrappers, METH_NOARGS}, + {"get_heap_size", get_heap_size, METH_NOARGS, NULL}, {NULL, NULL} /* sentinel */ }; diff --git a/Python/gc.c b/Python/gc.c index 024d041437be4a..b18601d093afa7 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -182,6 +182,7 @@ _PyGC_Init(PyInterpreterState *interp) return _PyStatus_NO_MEMORY(); } gcstate->heap_size = 0; + gcstate->prior_heap_size = 0; return _PyStatus_OK(); } @@ -1278,19 +1279,16 @@ gc_list_set_space(PyGC_Head *list, int space) * the incremental collector must progress through the old * space faster than objects are added to the old space. * - * Each young or incremental collection adds a number of - * objects, S (for survivors) to the old space, and - * incremental collectors scan I objects from the old space. - * I > S must be true. We also want I > S * N to be where - * N > 1. Higher values of N mean that the old space is - * scanned more rapidly. - * The default incremental threshold of 10 translates to - * N == 1.4 (1 + 4/threshold) + * To do this we maintain a prior heap size, so the + * change in heap size can easily be computed. + * + * Each increment scans twice the delta (if increasing) + * plus half the size of the young generation. */ -/* Divide by 10, so that the default incremental threshold of 10 - * scans objects at 1% of the heap size */ -#define SCAN_RATE_DIVISOR 10 +/* Multiply by 5, so that the default incremental threshold of 10 + * scans objects at half the rate as the young generation */ +#define SCAN_RATE_MULTIPLIER 20 static void add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) @@ -1344,7 +1342,6 @@ gc_collect_young(PyThreadState *tstate, if (scale_factor < 1) { scale_factor = 1; } - gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; add_stats(gcstate, 0, stats); } @@ -1446,9 +1443,6 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) if (scale_factor < 1) { scale_factor = 1; } - gc_list_merge(&gcstate->young.head, &increment); - gcstate->young.count = 0; - gc_list_validate_space(&increment, gcstate->visited_space); Py_ssize_t increment_size = 0; while (increment_size < gcstate->work_to_do) { if (gc_list_is_empty(not_visited)) { @@ -1467,7 +1461,12 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_list_validate_space(&survivors, gcstate->visited_space); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); - gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; + Py_ssize_t delta = gcstate->heap_size - gcstate->prior_heap_size; + delta += gcstate->young.threshold * SCAN_RATE_MULTIPLIER / scale_factor; + if (delta > 0) { + gcstate->work_to_do += delta; + } + gcstate->prior_heap_size = gcstate->heap_size; gcstate->work_to_do -= increment_size; validate_old(gcstate); @@ -1856,6 +1855,7 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) gc_collect_young(tstate, &stats); break; case 1: + gc_collect_young(tstate, &stats); gc_collect_increment(tstate, &stats); break; case 2: From d451ace86265f50f115d9327ef9bbdac857eaf38 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 27 Sep 2024 13:28:21 -0700 Subject: [PATCH 2/3] Lower scaling factor --- Lib/test/test_gc.py | 5 +++-- Python/gc.c | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 9a1e60fcf2c3fd..98a38b1ab3faaa 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1108,13 +1108,14 @@ def make_ll(depth): olds = [] gc.collect() baseline_live = _testinternalcapi.get_heap_size() - for i in range(20_000): + iterations = 200_000 if support.is_resource_enabled('cpu') else 20_000 + for i in range(iterations): newhead = make_ll(20) newhead.surprise = head olds.append(newhead) if len(olds) == 20: live = _testinternalcapi.get_heap_size() - self.assertLess(live-baseline_live, 25000) + self.assertLess(live, baseline_live*2) del olds[:] if not enabled: gc.disable() diff --git a/Python/gc.c b/Python/gc.c index b18601d093afa7..ea8f58fa78c864 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1287,8 +1287,8 @@ gc_list_set_space(PyGC_Head *list, int space) */ /* Multiply by 5, so that the default incremental threshold of 10 - * scans objects at half the rate as the young generation */ -#define SCAN_RATE_MULTIPLIER 20 + * scans objects at the same rate as the young generation */ +#define SCAN_RATE_MULTIPLIER 10 static void add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) @@ -1461,7 +1461,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_list_validate_space(&survivors, gcstate->visited_space); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); - Py_ssize_t delta = gcstate->heap_size - gcstate->prior_heap_size; + Py_ssize_t delta = (gcstate->heap_size - gcstate->prior_heap_size)*2; delta += gcstate->young.threshold * SCAN_RATE_MULTIPLIER / scale_factor; if (delta > 0) { gcstate->work_to_do += delta; From 615a96efb9fe66d8c2bdb9603b29247ba36ac5a1 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 27 Sep 2024 14:25:38 -0700 Subject: [PATCH 3/3] Pre-scan sys.modules --- Include/internal/pycore_gc.h | 3 ++- Lib/test/test_gc.py | 1 + Python/gc.c | 32 +++++++++++++++++++++++++++++--- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 063d469eb4d9c3..44a935aef17150 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -328,7 +328,8 @@ struct _gc_runtime_state { Py_ssize_t heap_size; Py_ssize_t work_to_do; /* Which of the old spaces is the visited space */ - int visited_space; + uint8_t visited_space; + uint8_t scan_reachable; #ifdef Py_GIL_DISABLED /* This is the number of objects that survived the last full diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 98a38b1ab3faaa..7f7319292d5235 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1115,6 +1115,7 @@ def make_ll(depth): olds.append(newhead) if len(olds) == 20: live = _testinternalcapi.get_heap_size() + print(i, live, baseline_live) self.assertLess(live, baseline_live*2) del olds[:] if not enabled: diff --git a/Python/gc.c b/Python/gc.c index ea8f58fa78c864..4e5dc6896605f2 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1287,8 +1287,8 @@ gc_list_set_space(PyGC_Head *list, int space) */ /* Multiply by 5, so that the default incremental threshold of 10 - * scans objects at the same rate as the young generation */ -#define SCAN_RATE_MULTIPLIER 10 + * scans objects at half the rate of the young generation */ +#define SCAN_RATE_MULTIPLIER 5 static void add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) @@ -1428,8 +1428,30 @@ completed_cycle(GCState *gcstate) gc = next; } gcstate->work_to_do = 0; + gcstate->scan_reachable = 1; } + +static void +gc_mark_reachable(PyThreadState *tstate) +{ + GCState *gcstate = &tstate->interp->gc; + PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; + PyObject *sysdict = tstate->interp->sysdict; + PyObject *sysmod = PyDict_GetItemString(sysdict, "modules"); + if (sysmod == NULL) { + return; + } + PyGC_Head reachable; + gc_list_init(&reachable); + PyGC_Head *gc = _Py_AS_GC(sysmod); + gc_list_move(gc, &reachable); + gc_set_old_space(gc, gcstate->visited_space); + gcstate->work_to_do -= expand_region_transitively_reachable(&reachable, gc, gcstate); + gc_list_merge(&reachable, visited); +} + + static void gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) { @@ -1439,6 +1461,10 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; PyGC_Head increment; gc_list_init(&increment); + if (gcstate->scan_reachable) { + gc_mark_reachable(tstate); + gcstate->scan_reachable = 0; + } Py_ssize_t scale_factor = gcstate->old[0].threshold; if (scale_factor < 1) { scale_factor = 1; @@ -1461,7 +1487,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_list_validate_space(&survivors, gcstate->visited_space); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); - Py_ssize_t delta = (gcstate->heap_size - gcstate->prior_heap_size)*2; + Py_ssize_t delta = (gcstate->heap_size - gcstate->prior_heap_size)*3; delta += gcstate->young.threshold * SCAN_RATE_MULTIPLIER / scale_factor; if (delta > 0) { gcstate->work_to_do += delta;