diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index cf96f661e6cd7e..44a935aef17150 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -328,7 +328,8 @@ struct _gc_runtime_state { Py_ssize_t heap_size; Py_ssize_t work_to_do; /* Which of the old spaces is the visited space */ - int visited_space; + uint8_t visited_space; + uint8_t scan_reachable; #ifdef Py_GIL_DISABLED /* This is the number of objects that survived the last full @@ -351,6 +352,7 @@ struct _gc_runtime_state { <0: suppressed; don't immortalize objects */ int immortalize; #endif + Py_ssize_t prior_heap_size; }; #ifdef Py_GIL_DISABLED diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index bb7df1f5cfa7f7..7f7319292d5235 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -22,6 +22,7 @@ import _testcapi from _testcapi import with_tp_del from _testcapi import ContainerNoGC + import _testinternalcapi except ImportError: _testcapi = None def with_tp_del(cls): @@ -1101,32 +1102,21 @@ def make_ll(depth): return head head = make_ll(1000) - count = 1000 - - # There will be some objects we aren't counting, - # e.g. the gc stats dicts. This test checks - # that the counts don't grow, so we try to - # correct for the uncounted objects - # This is just an estimate. - CORRECTION = 20 enabled = gc.isenabled() gc.enable() olds = [] - for i in range(20_000): + gc.collect() + baseline_live = _testinternalcapi.get_heap_size() + iterations = 200_000 if support.is_resource_enabled('cpu') else 20_000 + for i in range(iterations): newhead = make_ll(20) - count += 20 newhead.surprise = head olds.append(newhead) if len(olds) == 20: - stats = gc.get_stats() - young = stats[0] - incremental = stats[1] - old = stats[2] - collected = young['collected'] + incremental['collected'] + old['collected'] - count += CORRECTION - live = count - collected - self.assertLess(live, 25000) + live = _testinternalcapi.get_heap_size() + print(i, live, baseline_live) + self.assertLess(live, baseline_live*2) del olds[:] if not enabled: gc.disable() diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index c403075fbb2501..776fcf4948527a 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -2048,6 +2048,11 @@ identify_type_slot_wrappers(PyObject *self, PyObject *Py_UNUSED(ignored)) return _PyType_GetSlotWrapperNames(); } +static PyObject * +get_heap_size(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + return PyLong_FromSsize_t(PyInterpreterState_Get()->gc.heap_size); +} static PyMethodDef module_functions[] = { {"get_configs", get_configs, METH_NOARGS}, @@ -2145,6 +2150,7 @@ static PyMethodDef module_functions[] = { GH_119213_GETARGS_METHODDEF {"get_static_builtin_types", get_static_builtin_types, METH_NOARGS}, {"identify_type_slot_wrappers", identify_type_slot_wrappers, METH_NOARGS}, + {"get_heap_size", get_heap_size, METH_NOARGS, NULL}, {NULL, NULL} /* sentinel */ }; diff --git a/Python/gc.c b/Python/gc.c index 024d041437be4a..4e5dc6896605f2 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -182,6 +182,7 @@ _PyGC_Init(PyInterpreterState *interp) return _PyStatus_NO_MEMORY(); } gcstate->heap_size = 0; + gcstate->prior_heap_size = 0; return _PyStatus_OK(); } @@ -1278,19 +1279,16 @@ gc_list_set_space(PyGC_Head *list, int space) * the incremental collector must progress through the old * space faster than objects are added to the old space. * - * Each young or incremental collection adds a number of - * objects, S (for survivors) to the old space, and - * incremental collectors scan I objects from the old space. - * I > S must be true. We also want I > S * N to be where - * N > 1. Higher values of N mean that the old space is - * scanned more rapidly. - * The default incremental threshold of 10 translates to - * N == 1.4 (1 + 4/threshold) + * To do this we maintain a prior heap size, so the + * change in heap size can easily be computed. + * + * Each increment scans twice the delta (if increasing) + * plus half the size of the young generation. */ -/* Divide by 10, so that the default incremental threshold of 10 - * scans objects at 1% of the heap size */ -#define SCAN_RATE_DIVISOR 10 +/* Multiply by 5, so that the default incremental threshold of 10 + * scans objects at half the rate of the young generation */ +#define SCAN_RATE_MULTIPLIER 5 static void add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) @@ -1344,7 +1342,6 @@ gc_collect_young(PyThreadState *tstate, if (scale_factor < 1) { scale_factor = 1; } - gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; add_stats(gcstate, 0, stats); } @@ -1431,8 +1428,30 @@ completed_cycle(GCState *gcstate) gc = next; } gcstate->work_to_do = 0; + gcstate->scan_reachable = 1; +} + + +static void +gc_mark_reachable(PyThreadState *tstate) +{ + GCState *gcstate = &tstate->interp->gc; + PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; + PyObject *sysdict = tstate->interp->sysdict; + PyObject *sysmod = PyDict_GetItemString(sysdict, "modules"); + if (sysmod == NULL) { + return; + } + PyGC_Head reachable; + gc_list_init(&reachable); + PyGC_Head *gc = _Py_AS_GC(sysmod); + gc_list_move(gc, &reachable); + gc_set_old_space(gc, gcstate->visited_space); + gcstate->work_to_do -= expand_region_transitively_reachable(&reachable, gc, gcstate); + gc_list_merge(&reachable, visited); } + static void gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) { @@ -1442,13 +1461,14 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; PyGC_Head increment; gc_list_init(&increment); + if (gcstate->scan_reachable) { + gc_mark_reachable(tstate); + gcstate->scan_reachable = 0; + } Py_ssize_t scale_factor = gcstate->old[0].threshold; if (scale_factor < 1) { scale_factor = 1; } - gc_list_merge(&gcstate->young.head, &increment); - gcstate->young.count = 0; - gc_list_validate_space(&increment, gcstate->visited_space); Py_ssize_t increment_size = 0; while (increment_size < gcstate->work_to_do) { if (gc_list_is_empty(not_visited)) { @@ -1467,7 +1487,12 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_list_validate_space(&survivors, gcstate->visited_space); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); - gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; + Py_ssize_t delta = (gcstate->heap_size - gcstate->prior_heap_size)*3; + delta += gcstate->young.threshold * SCAN_RATE_MULTIPLIER / scale_factor; + if (delta > 0) { + gcstate->work_to_do += delta; + } + gcstate->prior_heap_size = gcstate->heap_size; gcstate->work_to_do -= increment_size; validate_old(gcstate); @@ -1856,6 +1881,7 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) gc_collect_young(tstate, &stats); break; case 1: + gc_collect_young(tstate, &stats); gc_collect_increment(tstate, &stats); break; case 2: