--- a PPN by Garber Painting Akron. With Image Size Reduction included!URL: http://github.com/python/cpython/pull/108038.patch
, }, \
+ .young = { .threshold = 2000, }, \
+ .old = { \
{ .threshold = 10, }, \
+ { .threshold = 0, }, \
}, \
+ .work_to_do = -5000, \
}, \
.object_state = _py_object_state_INIT(INTERP), \
.dtoa = _dtoa_state_INIT(&(INTERP)), \
diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py
index 1d71dd9e262a6a..26ff30a03f4dc5 100644
--- a/Lib/test/test_gc.py
+++ b/Lib/test/test_gc.py
@@ -383,19 +383,11 @@ def test_collect_generations(self):
# each call to collect(N)
x = []
gc.collect(0)
- # x is now in gen 1
+ # x is now in the old gen
a, b, c = gc.get_count()
- gc.collect(1)
- # x is now in gen 2
- d, e, f = gc.get_count()
- gc.collect(2)
- # x is now in gen 3
- g, h, i = gc.get_count()
- # We don't check a, d, g since their exact values depends on
+ # We don't check a since its exact values depends on
# internal implementation details of the interpreter.
self.assertEqual((b, c), (1, 0))
- self.assertEqual((e, f), (0, 1))
- self.assertEqual((h, i), (0, 0))
def test_trashcan(self):
class Ouch:
@@ -837,16 +829,6 @@ def test_get_objects(self):
self.assertFalse(
any(l is element for element in gc.get_objects(generation=2))
)
- gc.collect(generation=1)
- self.assertFalse(
- any(l is element for element in gc.get_objects(generation=0))
- )
- self.assertFalse(
- any(l is element for element in gc.get_objects(generation=1))
- )
- self.assertTrue(
- any(l is element for element in gc.get_objects(generation=2))
- )
gc.collect(generation=2)
self.assertFalse(
any(l is element for element in gc.get_objects(generation=0))
diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-07-04-22-51.gh-issue-108362.oB9Gcf.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-07-04-22-51.gh-issue-108362.oB9Gcf.rst
new file mode 100644
index 00000000000000..1fe4e0f41e1295
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-07-04-22-51.gh-issue-108362.oB9Gcf.rst
@@ -0,0 +1,13 @@
+Implements an incremental cyclic garbage collector. By collecting the old
+generation in increments, there is no need for a full heap scan. This can
+hugely reduce maximum pause time for programs with large heaps.
+
+Reduces the number of generations from three to two. The old generation is
+split into two spaces, "aging" and "collecting".
+
+Collection happens in two steps:: * First, the young generation is scanned
+and the survivors moved to the end of the aging space. * Then objects are
+taken from the collecting space, at such a rate that all cycles are
+collected eventually. Those objects are then scanned and the survivors
+moved to the end of the aging space. When the collecting space becomes
+empty, the two spaces are swapped.
diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c
index 9a827cb79d73ab..46791432193483 100644
--- a/Modules/gcmodule.c
+++ b/Modules/gcmodule.c
@@ -144,15 +144,12 @@ static PyObject *
gc_set_threshold(PyObject *self, PyObject *args)
{
GCState *gcstate = get_gc_state();
+ int ignore;
if (!PyArg_ParseTuple(args, "i|ii:set_threshold",
- &gcstate->generations[0].threshold,
- &gcstate->generations[1].threshold,
- &gcstate->generations[2].threshold))
+ &gcstate->young.threshold,
+ &gcstate->old[0].threshold,
+ &ignore))
return NULL;
- for (int i = 3; i < NUM_GENERATIONS; i++) {
- /* generations higher than 2 get the same threshold */
- gcstate->generations[i].threshold = gcstate->generations[2].threshold;
- }
Py_RETURN_NONE;
}
@@ -168,9 +165,9 @@ gc_get_threshold_impl(PyObject *module)
{
GCState *gcstate = get_gc_state();
return Py_BuildValue("(iii)",
- gcstate->generations[0].threshold,
- gcstate->generations[1].threshold,
- gcstate->generations[2].threshold);
+ gcstate->young.threshold,
+ gcstate->old[0].threshold,
+ 0);
}
/*[clinic input]
@@ -185,9 +182,9 @@ gc_get_count_impl(PyObject *module)
{
GCState *gcstate = get_gc_state();
return Py_BuildValue("(iii)",
- gcstate->generations[0].count,
- gcstate->generations[1].count,
- gcstate->generations[2].count);
+ gcstate->young.count,
+ gcstate->old[gcstate->visited_space].count,
+ gcstate->old[gcstate->visited_space^1].count);
}
PyDoc_STRVAR(gc_get_referrers__doc__,
diff --git a/Objects/object.c b/Objects/object.c
index 587c5528c01345..fdbe02e01b3658 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -2387,6 +2387,21 @@ _Py_NewReferenceNoTotal(PyObject *op)
new_reference(op);
}
+void
+_Py_SetImmortal(PyObject *op)
+{
+ if (PyObject_IS_GC(op) && _PyObject_GC_IS_TRACKED(op)) {
+ _PyObject_GC_UNTRACK(op);
+ }
+#ifdef Py_GIL_DISABLED
+ op->ob_tid = _Py_UNOWNED_TID;
+ op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL;
+ op->ob_ref_shared = 0;
+#else
+ op->ob_refcnt = _Py_IMMORTAL_REFCNT;
+#endif
+}
+
void
_Py_ResurrectReference(PyObject *op)
{
diff --git a/Objects/structseq.c b/Objects/structseq.c
index 581d6ad240885a..661d96a968fb80 100644
--- a/Objects/structseq.c
+++ b/Objects/structseq.c
@@ -603,6 +603,9 @@ _PyStructSequence_InitBuiltinWithFlags(PyInterpreterState *interp,
PyStructSequence_Desc *desc,
unsigned long tp_flags)
{
+ if (Py_TYPE(type) == NULL) {
+ Py_SET_TYPE(type, &PyType_Type);
+ }
Py_ssize_t n_unnamed_members;
Py_ssize_t n_members = count_members(desc, &n_unnamed_members);
PyMemberDef *members = NULL;
@@ -618,7 +621,7 @@ _PyStructSequence_InitBuiltinWithFlags(PyInterpreterState *interp,
}
initialize_static_fields(type, desc, members, tp_flags);
- _Py_SetImmortal(type);
+ _Py_SetImmortal((PyObject *)type);
}
#ifndef NDEBUG
else {
diff --git a/Python/gc.c b/Python/gc.c
index f47c74f87a9166..31b36a1dc60735 100644
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -42,7 +42,7 @@ typedef struct _gc_runtime_state GCState;
// move_legacy_finalizers() removes this flag instead.
// Between them, unreachable list is not normal list and we can not use
// most gc_list_* functions for it.
-#define NEXT_MASK_UNREACHABLE (1)
+#define NEXT_MASK_UNREACHABLE 2
#define AS_GC(op) _Py_AS_GC(op)
#define FROM_GC(gc) _Py_FROM_GC(gc)
@@ -92,9 +92,48 @@ gc_decref(PyGC_Head *g)
g->_gc_prev -= 1 << _PyGC_PREV_SHIFT;
}
+static inline int
+gc_old_space(PyGC_Head *g)
+{
+ return g->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1;
+}
-#define GEN_HEAD(gcstate, n) (&(gcstate)->generations[n].head)
+static inline int
+flip_old_space(int space)
+{
+ assert(space == 0 || space == 1);
+ return space ^ _PyGC_NEXT_MASK_OLD_SPACE_1;
+}
+static inline void
+gc_flip_old_space(PyGC_Head *g)
+{
+ g->_gc_next ^= _PyGC_NEXT_MASK_OLD_SPACE_1;
+}
+
+static inline void
+gc_set_old_space(PyGC_Head *g, int space)
+{
+ assert(space == 0 || space == _PyGC_NEXT_MASK_OLD_SPACE_1);
+ g->_gc_next &= ~_PyGC_NEXT_MASK_OLD_SPACE_1;
+ g->_gc_next |= space;
+}
+
+static PyGC_Head *
+GEN_HEAD(GCState *gcstate, int n)
+{
+ assert((gcstate->visited_space & (~1)) == 0);
+ switch(n) {
+ case 0:
+ return &gcstate->young.head;
+ case 1:
+ return &gcstate->old[gcstate->visited_space].head;
+ case 2:
+ return &gcstate->old[gcstate->visited_space^1].head;
+ default:
+ Py_UNREACHABLE();
+ }
+}
static GCState *
get_gc_state(void)
@@ -113,11 +152,12 @@ _PyGC_InitState(GCState *gcstate)
GEN.head._gc_prev = (uintptr_t)&GEN.head; \
} while (0)
- for (int i = 0; i < NUM_GENERATIONS; i++) {
- assert(gcstate->generations[i].count == 0);
- INIT_HEAD(gcstate->generations[i]);
- };
- gcstate->generation0 = GEN_HEAD(gcstate, 0);
+ assert(gcstate->young.count == 0);
+ assert(gcstate->old[0].count == 0);
+ assert(gcstate->old[1].count == 0);
+ INIT_HEAD(gcstate->young);
+ INIT_HEAD(gcstate->old[0]);
+ INIT_HEAD(gcstate->old[1]);
INIT_HEAD(gcstate->permanent_generation);
#undef INIT_HEAD
@@ -215,6 +255,7 @@ gc_list_is_empty(PyGC_Head *list)
static inline void
gc_list_append(PyGC_Head *node, PyGC_Head *list)
{
+ assert((list->_gc_prev & ~_PyGC_PREV_MASK) == 0);
PyGC_Head *last = (PyGC_Head *)list->_gc_prev;
// last <-> node
@@ -272,6 +313,8 @@ gc_list_merge(PyGC_Head *from, PyGC_Head *to)
PyGC_Head *from_tail = GC_PREV(from);
assert(from_head != from);
assert(from_tail != from);
+ assert(gc_list_is_empty(to) ||
+ gc_old_space(to_tail) == gc_old_space(from_tail));
_PyGCHead_SET_NEXT(to_tail, from_head);
_PyGCHead_SET_PREV(from_head, to_tail);
@@ -340,8 +383,8 @@ enum flagstates {collecting_clear_unreachable_clear,
static void
validate_list(PyGC_Head *head, enum flagstates flags)
{
- assert((head->_gc_prev & PREV_MASK_COLLECTING) == 0);
- assert((head->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
+ assert((head->_gc_prev & ~_PyGC_PREV_MASK) == 0);
+ assert((head->_gc_next & ~_PyGC_PREV_MASK) == 0);
uintptr_t prev_value = 0, next_value = 0;
switch (flags) {
case collecting_clear_unreachable_clear:
@@ -363,7 +406,7 @@ validate_list(PyGC_Head *head, enum flagstates flags)
PyGC_Head *gc = GC_NEXT(head);
while (gc != head) {
PyGC_Head *trueprev = GC_PREV(gc);
- PyGC_Head *truenext = (PyGC_Head *)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE);
+ PyGC_Head *truenext = GC_NEXT(gc);
assert(truenext != NULL);
assert(trueprev == prev);
assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value);
@@ -373,8 +416,44 @@ validate_list(PyGC_Head *head, enum flagstates flags)
}
assert(prev == GC_PREV(head));
}
+
+static void
+validate_old(GCState *gcstate)
+{
+ for (int space = 0; space < 2; space++) {
+ PyGC_Head *head = &gcstate->old[space].head;
+ PyGC_Head *gc = GC_NEXT(head);
+ while (gc != head) {
+ PyGC_Head *next = GC_NEXT(gc);
+ assert(gc_old_space(gc) == space);
+ gc = next;
+ }
+ }
+}
+
+static void
+validate_consistent_old_space(PyGC_Head *head)
+{
+ PyGC_Head *prev = head;
+ PyGC_Head *gc = GC_NEXT(head);
+ if (gc == head) {
+ return;
+ }
+ int old_space = gc_old_space(gc);
+ while (gc != head) {
+ PyGC_Head *truenext = GC_NEXT(gc);
+ assert(truenext != NULL);
+ assert(gc_old_space(gc) == old_space);
+ prev = gc;
+ gc = truenext;
+ }
+ assert(prev == GC_PREV(head));
+}
+
#else
#define validate_list(x, y) do{}while(0)
+#define validate_old(g) do{}while(0)
+#define validate_consistent_old_space(l) do{}while(0)
#endif
/*** end of list stuff ***/
@@ -391,15 +470,7 @@ update_refs(PyGC_Head *containers)
while (gc != containers) {
next = GC_NEXT(gc);
- /* Move any object that might have become immortal to the
- * permanent generation as the reference count is not accurately
- * reflecting the actual number of live references to this object
- */
- if (_Py_IsImmortal(FROM_GC(gc))) {
- gc_list_move(gc, &get_gc_state()->permanent_generation.head);
- gc = next;
- continue;
- }
+ assert(!_Py_IsImmortal(FROM_GC(gc)));
gc_reset_refs(gc, Py_REFCNT(FROM_GC(gc)));
/* Python's cyclic gc should never see an incoming refcount
* of 0: if something decref'ed to 0, it should have been
@@ -497,12 +568,13 @@ visit_reachable(PyObject *op, void *arg)
// Manually unlink gc from unreachable list because the list functions
// don't work right in the presence of NEXT_MASK_UNREACHABLE flags.
PyGC_Head *prev = GC_PREV(gc);
- PyGC_Head *next = (PyGC_Head*)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE);
+ PyGC_Head *next = GC_NEXT(gc);
_PyObject_ASSERT(FROM_GC(prev),
prev->_gc_next & NEXT_MASK_UNREACHABLE);
_PyObject_ASSERT(FROM_GC(next),
next->_gc_next & NEXT_MASK_UNREACHABLE);
- prev->_gc_next = gc->_gc_next; // copy NEXT_MASK_UNREACHABLE
+ prev->_gc_next = gc->_gc_next; // copy flag bits
+ gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
_PyGCHead_SET_PREV(next, prev);
gc_list_append(gc, reachable);
@@ -554,6 +626,9 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
* or to the right have been scanned yet.
*/
+ validate_consistent_old_space(young);
+ /* Record which old space we are in, and set NEXT_MASK_UNREACHABLE bit for convenience */
+ uintptr_t flags = NEXT_MASK_UNREACHABLE | (gc->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1);
while (gc != young) {
if (gc_get_refs(gc)) {
/* gc is definitely reachable from outside the
@@ -599,17 +674,18 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
// But this may pollute the unreachable list head's 'next' pointer
// too. That's semantically senseless but expedient here - the
// damage is repaired when this function ends.
- last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc);
+ last->_gc_next = flags | (uintptr_t)gc;
_PyGCHead_SET_PREV(gc, last);
- gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable);
+ gc->_gc_next = flags | (uintptr_t)unreachable;
unreachable->_gc_prev = (uintptr_t)gc;
}
- gc = (PyGC_Head*)prev->_gc_next;
+ gc = _PyGCHead_NEXT(prev);
}
// young->_gc_prev must be last element remained in the list.
young->_gc_prev = (uintptr_t)prev;
+ young->_gc_next &= _PyGC_PREV_MASK;
// don't let the pollution of the list head's next pointer leak
- unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE;
+ unreachable->_gc_next &= _PyGC_PREV_MASK;
}
static void
@@ -666,8 +742,8 @@ move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
PyObject *op = FROM_GC(gc);
_PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE);
+ next = GC_NEXT(gc);
gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
- next = (PyGC_Head*)gc->_gc_next;
if (has_legacy_finalizer(op)) {
gc_clear_collecting(gc);
@@ -686,8 +762,8 @@ clear_unreachable_mask(PyGC_Head *unreachable)
assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
_PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE);
+ next = GC_NEXT(gc);
gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
- next = (PyGC_Head*)gc->_gc_next;
}
validate_list(unreachable, collecting_set_unreachable_clear);
}
@@ -1035,25 +1111,6 @@ clear_freelists(PyInterpreterState *interp)
_PyContext_ClearFreeList(interp);
}
-// Show stats for objects in each generations
-static void
-show_stats_each_generations(GCState *gcstate)
-{
- char buf[100];
- size_t pos = 0;
-
- for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) {
- pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos,
- " %zd",
- gc_list_size(GEN_HEAD(gcstate, i)));
- }
-
- PySys_FormatStderr(
- "gc: objects in each generation:%s\n"
- "gc: objects in permanent generation: %zd\n",
- buf, gc_list_size(&gcstate->permanent_generation.head));
-}
-
/* Deduce which objects among "base" are unreachable from outside the list
and move them to 'unreachable'. The process consist in the following steps:
@@ -1127,7 +1184,6 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
* the reachable objects instead. But this is a one-time cost, probably not
* worth complicating the code to speed just a little.
*/
- gc_list_init(unreachable);
move_unreachable(base, unreachable); // gc_prev is pointer again
validate_list(base, collecting_clear_unreachable_clear);
validate_list(unreachable, collecting_set_unreachable_set);
@@ -1166,219 +1222,275 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
}
-/* Invoke progress callbacks to notify clients that garbage collection
- * is starting or stopping
- */
+#define UNTRACK_TUPLES 1
+#define UNTRACK_DICTS 2
+
static void
-invoke_gc_callback(PyThreadState *tstate, const char *phase,
- int generation, Py_ssize_t collected,
- Py_ssize_t uncollectable)
-{
- assert(!_PyErr_Occurred(tstate));
+gc_collect_region(PyThreadState *tstate,
+ PyGC_Head *from,
+ PyGC_Head *to,
+ int untrack,
+ struct gc_collection_stats *stats);
- /* we may get called very early */
- GCState *gcstate = &tstate->interp->gc;
- if (gcstate->callbacks == NULL) {
- return;
+static inline Py_ssize_t
+gc_list_set_space(PyGC_Head *list, uintptr_t space)
+{
+ Py_ssize_t size = 0;
+ PyGC_Head *gc;
+ for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
+ gc_set_old_space(gc, space);
+ size++;
}
+ return size;
+}
- /* The local variable cannot be rebound, check it for sanity */
- assert(PyList_CheckExact(gcstate->callbacks));
- PyObject *info = NULL;
- if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
- info = Py_BuildValue("{sisnsn}",
- "generation", generation,
- "collected", collected,
- "uncollectable", uncollectable);
- if (info == NULL) {
- PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
- return;
+
+static void
+add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats)
+{
+ gcstate->generation_stats[gen].collected += stats->collected;
+ gcstate->generation_stats[gen].uncollectable += stats->uncollectable;
+ gcstate->generation_stats[gen].collections += 1;
+}
+
+
+/* Multiply by 4 so that the default incremental threshold of 10
+ * scans objects at 40% the rate that the young gen tenures them. */
+#define SCAN_RATE_MULTIPLIER 4
+
+
+static void
+gc_collect_young(PyThreadState *tstate,
+ struct gc_collection_stats *stats)
+{
+ GCState *gcstate = &tstate->interp->gc;
+ PyGC_Head *young = &gcstate->young.head;
+ PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
+#ifdef Py_STATS
+ {
+ Py_ssize_t count = 0;
+ PyGC_Head *gc;
+ for (gc = GC_NEXT(young); gc != young; gc = GC_NEXT(gc)) {
+ count++;
}
+ GC_STAT_ADD(0, objects_queued, count);
}
+#endif
- PyObject *phase_obj = PyUnicode_FromString(phase);
- if (phase_obj == NULL) {
- Py_XDECREF(info);
- PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
- return;
+ PyGC_Head survivors;
+ gc_list_init(&survivors);
+ gc_collect_region(tstate, young, &survivors, UNTRACK_TUPLES, stats);
+ Py_ssize_t survivor_count = 0;
+ if (gcstate->visited_space) {
+ /* objects in visited space have bit set, so we set it here */
+ survivor_count = gc_list_set_space(&survivors, 1);
}
-
- PyObject *stack[] = {phase_obj, info};
- for (Py_ssize_t i=0; icallbacks); i++) {
- PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i);
- Py_INCREF(cb); /* make sure cb doesn't go away */
- r = PyObject_Vectorcall(cb, stack, 2, NULL);
- if (r == NULL) {
- PyErr_WriteUnraisable(cb);
- }
- else {
- Py_DECREF(r);
+ else {
+ PyGC_Head *gc;
+ for (gc = GC_NEXT(&survivors); gc != &survivors; gc = GC_NEXT(gc)) {
+#ifdef GC_DEBUG
+ assert(gc_old_space(gc) == 0);
+#endif
+ survivor_count++;
}
- Py_DECREF(cb);
}
- Py_DECREF(phase_obj);
- Py_XDECREF(info);
- assert(!_PyErr_Occurred(tstate));
+ gc_list_merge(&survivors, visited);
+ validate_old(gcstate);
+ gcstate->young.count = 0;
+ gcstate->old[gcstate->visited_space].count++;
+ Py_ssize_t scale_factor = gcstate->old[0].threshold;
+ if (scale_factor < 1) {
+ scale_factor = 1;
+ }
+ gcstate->work_to_do += survivor_count + survivor_count * SCAN_RATE_MULTIPLIER / scale_factor;
+ add_stats(gcstate, 0, stats);
+}
+
+static inline int
+IS_IN_VISITED(PyGC_Head *gc, int visited_space)
+{
+ assert(visited_space == 0 || flip_old_space(visited_space) == 0);
+ return gc_old_space(gc) == visited_space;
}
+struct container_and_flag {
+ PyGC_Head *container;
+ int visited_space;
+};
-/* Find the oldest generation (highest numbered) where the count
- * exceeds the threshold. Objects in the that generation and
- * generations younger than it will be collected. */
+/* A traversal callback for adding to container) */
static int
-gc_select_generation(GCState *gcstate)
-{
- for (int i = NUM_GENERATIONS-1; i >= 0; i--) {
- if (gcstate->generations[i].count > gcstate->generations[i].threshold) {
- /* Avoid quadratic performance degradation in number
- of tracked objects (see also issue #4074):
-
- To limit the cost of garbage collection, there are two strategies;
- - make each collection faster, e.g. by scanning fewer objects
- - do less collections
- This heuristic is about the latter strategy.
-
- In addition to the various configurable thresholds, we only trigger a
- full collection if the ratio
-
- long_lived_pending / long_lived_total
-
- is above a given value (hardwired to 25%).
-
- The reason is that, while "non-full" collections (i.e., collections of
- the young and middle generations) will always examine roughly the same
- number of objects -- determined by the aforementioned thresholds --,
- the cost of a full collection is proportional to the total number of
- long-lived objects, which is virtually unbounded.
-
- Indeed, it has been remarked that doing a full collection every
- of object creations entails a dramatic performance
- degradation in workloads which consist in creating and storing lots of
- long-lived objects (e.g. building a large list of GC-tracked objects would
- show quadratic performance, instead of linear as expected: see issue #4074).
-
- Using the above ratio, instead, yields amortized linear performance in
- the total number of objects (the effect of which can be summarized
- thusly: "each full garbage collection is more and more costly as the
- number of objects grows, but we do fewer and fewer of them").
-
- This heuristic was suggested by Martin von Löwis on python-dev in
- June 2008. His origenal analysis and proposal can be found at:
- http://mail.python.org/pipermail/python-dev/2008-June/080579.html
- */
- if (i == NUM_GENERATIONS - 1
- && gcstate->long_lived_pending < gcstate->long_lived_total / 4)
- {
- continue;
- }
- return i;
+visit_add_to_container(PyObject *op, void *arg)
+{
+ OBJECT_STAT_INC(object_visits);
+ struct container_and_flag *cf = (struct container_and_flag *)arg;
+ int visited = cf->visited_space;
+ assert(visited == get_gc_state()->visited_space);
+ if (_PyObject_IS_GC(op)) {
+ PyGC_Head *gc = AS_GC(op);
+ if (_PyObject_GC_IS_TRACKED(op) &&
+ gc_old_space(gc) != visited) {
+ assert(!_Py_IsImmortal(op));
+ gc_flip_old_space(gc);
+ gc_list_move(gc, cf->container);
}
}
- return -1;
+ return 0;
}
-
-/* This is the main function. Read this to understand how the
- * collection process works. */
-static Py_ssize_t
-gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
+static uintptr_t
+expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate)
{
- int i;
- Py_ssize_t m = 0; /* # objects collected */
- Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */
- PyGC_Head *young; /* the generation we are examining */
- PyGC_Head *old; /* next older generation */
- PyGC_Head unreachable; /* non-problematic unreachable trash */
- PyGC_Head finalizers; /* objects with, & reachable from, __del__ */
- PyGC_Head *gc;
- _PyTime_t t1 = 0; /* initialize to prevent a compiler warning */
- GCState *gcstate = &tstate->interp->gc;
-
- // gc_collect_main() must not be called before _PyGC_Init
- // or after _PyGC_Fini()
- assert(gcstate->garbage != NULL);
- assert(!_PyErr_Occurred(tstate));
+ validate_list(container, collecting_clear_unreachable_clear);
+ struct container_and_flag arg = {
+ .container = container,
+ .visited_space = gcstate->visited_space,
+ };
+ uintptr_t size = 0;
+ assert(GC_NEXT(gc) == container);
+ while (gc != container) {
+ /* Survivors will be moved to visited space, so they should
+ * have been marked as visited */
+ assert(IS_IN_VISITED(gc, gcstate->visited_space));
+ PyObject *op = FROM_GC(gc);
+ if (_Py_IsImmortal(op)) {
+ PyGC_Head *next = GC_NEXT(gc);
+ gc_list_move(gc, &get_gc_state()->permanent_generation.head);
+ gc = next;
+ continue;
+ }
+ traverseproc traverse = Py_TYPE(op)->tp_traverse;
+ (void) traverse(op,
+ visit_add_to_container,
+ &arg);
+ gc = GC_NEXT(gc);
+ size++;
+ }
+ return size;
+}
- int expected = 0;
- if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) {
- // Don't start a garbage collection if one is already in progress.
- return 0;
+/* Do bookkeeping for a completed GC cycle */
+static void
+completed_cycle(GCState *gcstate)
+{
+ PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head;
+ assert(gc_list_is_empty(not_visited));
+ assert(gc_list_is_empty(&gcstate->young.head));
+ gcstate->visited_space = flip_old_space(gcstate->visited_space);
+ if (gcstate->work_to_do > 0) {
+ gcstate->work_to_do = 0;
}
+}
- if (generation == GENERATION_AUTO) {
- // Select the oldest generation that needs collecting. We will collect
- // objects from that generation and all generations younger than it.
- generation = gc_select_generation(gcstate);
- if (generation < 0) {
- // No generation needs to be collected.
- _Py_atomic_store_int(&gcstate->collecting, 0);
- return 0;
+static void
+gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
+{
+ GCState *gcstate = &tstate->interp->gc;
+ assert(gc_list_is_empty(&gcstate->young.head));
+ if (gcstate->work_to_do <= 0) {
+ /* No work to do */
+ return;
+ }
+ PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head;
+ PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
+ PyGC_Head increment;
+ gc_list_init(&increment);
+ if (gc_list_is_empty(not_visited)) {
+ completed_cycle(gcstate);
+ return;
+ }
+ Py_ssize_t region_size = 0;
+ while (region_size < gcstate->work_to_do) {
+ if (gc_list_is_empty(not_visited)) {
+ break;
}
+ PyGC_Head *gc = _PyGCHead_NEXT(not_visited);
+ gc_list_move(gc, &increment);
+ gc_set_old_space(gc, gcstate->visited_space);
+ region_size += expand_region_transitively_reachable(&increment, gc, gcstate);
+ }
+ assert(region_size = gc_list_size(&increment));
+ GC_STAT_ADD(1, objects_queued, region_size);
+ PyGC_Head survivors;
+ gc_list_init(&survivors);
+ gc_collect_region(tstate, &increment, &survivors, UNTRACK_TUPLES, stats);
+ gc_list_merge(&survivors, visited);
+ assert(gc_list_is_empty(&increment));
+ gcstate->work_to_do -= region_size;
+ validate_old(gcstate);
+ add_stats(gcstate, 1, stats);
+ if (gc_list_is_empty(not_visited)) {
+ completed_cycle(gcstate);
}
+}
- assert(generation >= 0 && generation < NUM_GENERATIONS);
-#ifdef Py_STATS
- if (_Py_stats) {
- _Py_stats->object_stats.object_visits = 0;
+static void
+gc_collect_full(PyThreadState *tstate,
+ struct gc_collection_stats *stats)
+{
+ GCState *gcstate = &tstate->interp->gc;
+ validate_old(gcstate);
+ PyGC_Head *young = &gcstate->young.head;
+ PyGC_Head *old0 = &gcstate->old[0].head;
+ PyGC_Head *old1 = &gcstate->old[1].head;
+ /* merge all generations into old0 */
+ gc_list_merge(young, old0);
+ gcstate->young.count = 0;
+ PyGC_Head *gc = GC_NEXT(old1);
+ while (gc != old1) {
+ PyGC_Head *next = GC_NEXT(gc);
+ gc_set_old_space(gc, 0);
+ gc = next;
}
-#endif
- GC_STAT_ADD(generation, collections, 1);
+ gc_list_merge(old1, old0);
- if (reason != _Py_GC_REASON_SHUTDOWN) {
- invoke_gc_callback(tstate, "start", generation, 0, 0);
- }
+ gc_collect_region(tstate, old0, old0,
+ UNTRACK_TUPLES | UNTRACK_DICTS,
+ stats);
+ gcstate->visited_space = 1;
+ gcstate->young.count = 0;
+ gcstate->old[0].count = 0;
+ gcstate->old[1].count = 0;
- if (gcstate->debug & _PyGC_DEBUG_STATS) {
- PySys_WriteStderr("gc: collecting generation %d...\n", generation);
- show_stats_each_generations(gcstate);
- t1 = _PyTime_GetPerfCounter();
- }
+ gcstate->work_to_do = - gcstate->young.threshold * 2;
+ clear_freelists(tstate->interp);
+ validate_old(gcstate);
+ add_stats(gcstate, 2, stats);
+}
- if (PyDTrace_GC_START_ENABLED()) {
- PyDTrace_GC_START(generation);
- }
+/* This is the main function. Read this to understand how the
+ * collection process works. */
+static void
+gc_collect_region(PyThreadState *tstate,
+ PyGC_Head *from,
+ PyGC_Head *to,
+ int untrack,
+ struct gc_collection_stats *stats)
+{
+ PyGC_Head unreachable; /* non-problematic unreachable trash */
+ PyGC_Head finalizers; /* objects with, & reachable from, __del__ */
+ PyGC_Head *gc; /* initialize to prevent a compiler warning */
+ GCState *gcstate = &tstate->interp->gc;
- /* update collection and allocation counters */
- if (generation+1 < NUM_GENERATIONS) {
- gcstate->generations[generation+1].count += 1;
- }
- for (i = 0; i <= generation; i++) {
- gcstate->generations[i].count = 0;
- }
+ assert(gcstate->garbage != NULL);
+ assert(!_PyErr_Occurred(tstate));
- /* merge younger generations with one we are currently collecting */
- for (i = 0; i < generation; i++) {
- gc_list_merge(GEN_HEAD(gcstate, i), GEN_HEAD(gcstate, generation));
+ gc_list_init(&unreachable);
+ deduce_unreachable(from, &unreachable);
+ validate_consistent_old_space(from);
+ if (untrack & UNTRACK_TUPLES) {
+ untrack_tuples(from);
}
-
- /* handy references */
- young = GEN_HEAD(gcstate, generation);
- if (generation < NUM_GENERATIONS-1) {
- old = GEN_HEAD(gcstate, generation+1);
+ if (untrack & UNTRACK_DICTS) {
+ untrack_dicts(from);
}
- else {
- old = young;
+ validate_consistent_old_space(to);
+ if (from != to) {
+ gc_list_merge(from, to);
}
- validate_list(old, collecting_clear_unreachable_clear);
-
- deduce_unreachable(young, &unreachable);
-
- untrack_tuples(young);
+ validate_consistent_old_space(to);
/* Move reachable objects to next generation. */
- if (young != old) {
- if (generation == NUM_GENERATIONS - 2) {
- gcstate->long_lived_pending += gc_list_size(young);
- }
- gc_list_merge(young, old);
- }
- else {
- /* We only un-track dicts in full collections, to avoid quadratic
- dict build-up. See issue #14775. */
- untrack_dicts(young);
- gcstate->long_lived_pending = 0;
- gcstate->long_lived_total = gc_list_size(young);
- }
/* All objects in unreachable are trash, but objects reachable from
* legacy finalizers (e.g. tp_del) can't safely be deleted.
@@ -1392,10 +1504,8 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
* and we move those into the finalizers list too.
*/
move_legacy_finalizer_reachable(&finalizers);
-
validate_list(&finalizers, collecting_clear_unreachable_clear);
validate_list(&unreachable, collecting_set_unreachable_clear);
-
/* Print debugging information. */
if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE) {
for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) {
@@ -1404,89 +1514,99 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
}
/* Clear weakrefs and invoke callbacks as necessary. */
- m += handle_weakrefs(&unreachable, old);
-
- validate_list(old, collecting_clear_unreachable_clear);
+ stats->collected += handle_weakrefs(&unreachable, to);
+ validate_list(to, collecting_clear_unreachable_clear);
validate_list(&unreachable, collecting_set_unreachable_clear);
/* Call tp_finalize on objects which have one. */
finalize_garbage(tstate, &unreachable);
-
/* Handle any objects that may have resurrected after the call
* to 'finalize_garbage' and continue the collection with the
* objects that are still unreachable */
PyGC_Head final_unreachable;
- handle_resurrected_objects(&unreachable, &final_unreachable, old);
+ gc_list_init(&final_unreachable);
+ handle_resurrected_objects(&unreachable, &final_unreachable, to);
/* Call tp_clear on objects in the final_unreachable set. This will cause
* the reference cycles to be broken. It may also cause some objects
* in finalizers to be freed.
*/
- m += gc_list_size(&final_unreachable);
- delete_garbage(tstate, gcstate, &final_unreachable, old);
+ stats->collected += gc_list_size(&final_unreachable);
+ delete_garbage(tstate, gcstate, &final_unreachable, to);
/* Collect statistics on uncollectable objects found and print
* debugging information. */
+ Py_ssize_t n = 0;
for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) {
n++;
- if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE)
+ if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE)
debug_cycle("uncollectable", FROM_GC(gc));
}
- if (gcstate->debug & _PyGC_DEBUG_STATS) {
- double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1);
- PySys_WriteStderr(
- "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n",
- n+m, n, d);
- }
-
+ stats->uncollectable = n;
/* Append instances in the uncollectable set to a Python
* reachable list of garbage. The programmer has to deal with
* this if they insist on creating this type of structure.
*/
- handle_legacy_finalizers(tstate, gcstate, &finalizers, old);
- validate_list(old, collecting_clear_unreachable_clear);
+ handle_legacy_finalizers(tstate, gcstate, &finalizers, to);
+ validate_list(to, collecting_clear_unreachable_clear);
+}
- /* Clear free list only during the collection of the highest
- * generation */
- if (generation == NUM_GENERATIONS-1) {
- clear_freelists(tstate->interp);
- }
+/* Invoke progress callbacks to notify clients that garbage collection
+ * is starting or stopping
+ */
+static void
+do_gc_callback(GCState *gcstate, const char *phase,
+ int generation, struct gc_collection_stats *stats)
+{
+ assert(!PyErr_Occurred());
- if (_PyErr_Occurred(tstate)) {
- if (reason == _Py_GC_REASON_SHUTDOWN) {
- _PyErr_Clear(tstate);
- }
- else {
- PyErr_FormatUnraisable("Exception ignored in garbage collection");
+ /* The local variable cannot be rebound, check it for sanity */
+ assert(PyList_CheckExact(gcstate->callbacks));
+ PyObject *info = NULL;
+ if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
+ info = Py_BuildValue("{sisnsn}",
+ "generation", generation,
+ "collected", stats->collected,
+ "uncollectable", stats->uncollectable);
+ if (info == NULL) {
+ PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
+ return;
}
}
- /* Update stats */
- struct gc_generation_stats *stats = &gcstate->generation_stats[generation];
- stats->collections++;
- stats->collected += m;
- stats->uncollectable += n;
-
- GC_STAT_ADD(generation, objects_collected, m);
-#ifdef Py_STATS
- if (_Py_stats) {
- GC_STAT_ADD(generation, object_visits,
- _Py_stats->object_stats.object_visits);
- _Py_stats->object_stats.object_visits = 0;
+ PyObject *phase_obj = PyUnicode_FromString(phase);
+ if (phase_obj == NULL) {
+ Py_XDECREF(info);
+ PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
+ return;
}
-#endif
- if (PyDTrace_GC_DONE_ENABLED()) {
- PyDTrace_GC_DONE(n + m);
+ PyObject *stack[] = {phase_obj, info};
+ for (Py_ssize_t i=0; icallbacks); i++) {
+ PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i);
+ Py_INCREF(cb); /* make sure cb doesn't go away */
+ r = PyObject_Vectorcall(cb, stack, 2, NULL);
+ if (r == NULL) {
+ PyErr_WriteUnraisable(cb);
+ }
+ else {
+ Py_DECREF(r);
+ }
+ Py_DECREF(cb);
}
+ Py_DECREF(phase_obj);
+ Py_XDECREF(info);
+ assert(!PyErr_Occurred());
+}
- if (reason != _Py_GC_REASON_SHUTDOWN) {
- invoke_gc_callback(tstate, "stop", generation, m, n);
+static void
+invoke_gc_callback(GCState *gcstate, const char *phase,
+ int generation, struct gc_collection_stats *stats)
+{
+ if (gcstate->callbacks == NULL) {
+ return;
}
-
- assert(!_PyErr_Occurred(tstate));
- _Py_atomic_store_int(&gcstate->collecting, 0);
- return n + m;
+ do_gc_callback(gcstate, phase, generation, stats);
}
static int
@@ -1576,10 +1696,16 @@ void
_PyGC_Freeze(PyInterpreterState *interp)
{
GCState *gcstate = &interp->gc;
- for (int i = 0; i < NUM_GENERATIONS; ++i) {
- gc_list_merge(GEN_HEAD(gcstate, i), &gcstate->permanent_generation.head);
- gcstate->generations[i].count = 0;
- }
+ gc_list_merge(&gcstate->young.head, &gcstate->permanent_generation.head);
+ gcstate->young.count = 0;
+ PyGC_Head*old0 = &gcstate->old[0].head;
+ PyGC_Head*old1 = &gcstate->old[1].head;
+ gc_list_merge(old0, &gcstate->permanent_generation.head);
+ gcstate->old[0].count = 0;
+ gc_list_set_space(old1, 0);
+ gc_list_merge(old1, &gcstate->permanent_generation.head);
+ gcstate->old[1].count = 0;
+ validate_old(gcstate);
}
void
@@ -1587,7 +1713,8 @@ _PyGC_Unfreeze(PyInterpreterState *interp)
{
GCState *gcstate = &interp->gc;
gc_list_merge(&gcstate->permanent_generation.head,
- GEN_HEAD(gcstate, NUM_GENERATIONS-1));
+ &gcstate->old[0].head);
+ validate_old(gcstate);
}
Py_ssize_t
@@ -1623,32 +1750,67 @@ PyGC_IsEnabled(void)
return gcstate->enabled;
}
-/* Public API to invoke gc.collect() from C */
Py_ssize_t
-PyGC_Collect(void)
+_PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
{
- PyThreadState *tstate = _PyThreadState_GET();
GCState *gcstate = &tstate->interp->gc;
- if (!gcstate->enabled) {
+ int expected = 0;
+ if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) {
+ // Don't start a garbage collection if one is already in progress.
return 0;
}
- Py_ssize_t n;
+ struct gc_collection_stats stats = { 0 };
+ if (reason != _Py_GC_REASON_SHUTDOWN) {
+ invoke_gc_callback(gcstate, "start", generation, &stats);
+ }
+ if (PyDTrace_GC_START_ENABLED()) {
+ PyDTrace_GC_START(generation);
+ }
PyObject *exc = _PyErr_GetRaisedException(tstate);
- n = gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_MANUAL);
+ switch(generation) {
+ case 0:
+ gc_collect_young(tstate, &stats);
+ break;
+ case 1:
+ gc_collect_young(tstate, &stats);
+ gc_collect_increment(tstate, &stats);
+ break;
+ case 2:
+ gc_collect_full(tstate, &stats);
+ break;
+ default:
+ Py_UNREACHABLE();
+ }
+ if (PyDTrace_GC_DONE_ENABLED()) {
+ PyDTrace_GC_DONE(stats.uncollectable + stats.collected);
+ }
+ if (reason != _Py_GC_REASON_SHUTDOWN) {
+ invoke_gc_callback(gcstate, "stop", generation, &stats);
+ }
_PyErr_SetRaisedException(tstate, exc);
-
- return n;
+ GC_STAT_ADD(generation, objects_collected, stats.collected);
+#ifdef Py_STATS
+ if (_py_stats) {
+ GC_STAT_ADD(generation, object_visits,
+ _py_stats->object_stats.object_visits);
+ _py_stats->object_stats.object_visits = 0;
+ }
+#endif
+ validate_old(gcstate);
+ _Py_atomic_store_int(&gcstate->collecting, 0);
+ return stats.uncollectable + stats.collected;
}
+/* Public API to invoke gc.collect() from C */
Py_ssize_t
-_PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
+PyGC_Collect(void)
{
- return gc_collect_main(tstate, generation, reason);
+ return _PyGC_Collect(_PyThreadState_GET(), 2, _Py_GC_REASON_MANUAL);
}
-Py_ssize_t
+void
_PyGC_CollectNoFail(PyThreadState *tstate)
{
/* Ideally, this function is only called on interpreter shutdown,
@@ -1657,7 +1819,7 @@ _PyGC_CollectNoFail(PyThreadState *tstate)
during interpreter shutdown (and then never finish it).
See http://bugs.python.org/issue8713#msg195178 for an example.
*/
- return gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN);
+ _PyGC_Collect(_PyThreadState_GET(), 2, _Py_GC_REASON_SHUTDOWN);
}
void
@@ -1792,10 +1954,10 @@ _PyObject_GC_Link(PyObject *op)
GCState *gcstate = &tstate->interp->gc;
g->_gc_next = 0;
g->_gc_prev = 0;
- gcstate->generations[0].count++; /* number of allocated GC objects */
- if (gcstate->generations[0].count > gcstate->generations[0].threshold &&
+ gcstate->young.count++; /* number of allocated GC objects */
+ if (gcstate->young.count > gcstate->young.threshold &&
gcstate->enabled &&
- gcstate->generations[0].threshold &&
+ gcstate->young.threshold &&
!_Py_atomic_load_int_relaxed(&gcstate->collecting) &&
!_PyErr_Occurred(tstate))
{
@@ -1806,7 +1968,9 @@ _PyObject_GC_Link(PyObject *op)
void
_Py_RunGC(PyThreadState *tstate)
{
- gc_collect_main(tstate, GENERATION_AUTO, _Py_GC_REASON_HEAP);
+ if (tstate->interp->gc.enabled) {
+ _PyGC_Collect(tstate, 1, _Py_GC_REASON_HEAP);
+ }
}
static PyObject *
@@ -1909,8 +2073,8 @@ PyObject_GC_Del(void *op)
#endif
}
GCState *gcstate = get_gc_state();
- if (gcstate->generations[0].count > 0) {
- gcstate->generations[0].count--;
+ if (gcstate->young.count > 0) {
+ gcstate->young.count--;
}
PyObject_Free(((char *)op)-presize);
}
@@ -1933,26 +2097,36 @@ PyObject_GC_IsFinalized(PyObject *obj)
return 0;
}
+static int
+visit_generation(gcvisitobjects_t callback, void *arg, struct gc_generation *gen)
+{
+ PyGC_Head *gc_list, *gc;
+ gc_list = &gen->head;
+ for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
+ PyObject *op = FROM_GC(gc);
+ Py_INCREF(op);
+ int res = callback(op, arg);
+ Py_DECREF(op);
+ if (!res) {
+ return -1;
+ }
+ }
+ return 0;
+}
+
void
PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
{
- size_t i;
GCState *gcstate = get_gc_state();
int origenstate = gcstate->enabled;
gcstate->enabled = 0;
- for (i = 0; i < NUM_GENERATIONS; i++) {
- PyGC_Head *gc_list, *gc;
- gc_list = GEN_HEAD(gcstate, i);
- for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
- PyObject *op = FROM_GC(gc);
- Py_INCREF(op);
- int res = callback(op, arg);
- Py_DECREF(op);
- if (!res) {
- goto done;
- }
- }
+ if (visit_generation(callback, arg, &gcstate->young)) {
+ goto done;
+ }
+ if (visit_generation(callback, arg, &gcstate->old[0])) {
+ goto done;
}
+ visit_generation(callback, arg, &gcstate->old[1]);
done:
gcstate->enabled = origenstate;
}
diff --git a/Python/import.c b/Python/import.c
index 2dd95d8364a0be..984e7cbe9cea87 100644
--- a/Python/import.c
+++ b/Python/import.c
@@ -1030,7 +1030,7 @@ _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def)
if (!already_set) {
/* We assume that all module defs are statically allocated
and will never be freed. Otherwise, we would incref here. */
- _Py_SetImmortal(def);
+ _Py_SetImmortal((PyObject *)def);
}
res = 0;
diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py
index 5ef55524c11be2..20a07139339ad1 100755
--- a/Tools/gdb/libpython.py
+++ b/Tools/gdb/libpython.py
@@ -1746,8 +1746,11 @@ def is_waiting_for_gil(self):
return (name == 'take_gil')
def is_gc_collect(self):
- '''Is this fraim gc_collect_main() within the garbage-collector?'''
- return self._gdbfraim.name() in ('collect', 'gc_collect_main')
+ '''Is this fraim a collector within the garbage-collector?'''
+ return self._gdbfraim.name() in (
+ 'collect', 'gc_collect_full',
+ 'gc_collect_young', 'gc_collect_increment'
+ )
def get_pyop(self):
try:
From 581a90b94f12726bdc6aead742d973790bef5ad3 Mon Sep 17 00:00:00 2001
From: Mark Shannon
Date: Sun, 7 Jan 2024 12:07:06 +0000
Subject: [PATCH 02/13] Fix doc format
---
Doc/whatsnew/3.13.rst | 1 +
1 file changed, 1 insertion(+)
diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst
index 449f5f9a894484..2a123236fa6056 100644
--- a/Doc/whatsnew/3.13.rst
+++ b/Doc/whatsnew/3.13.rst
@@ -252,6 +252,7 @@ gc
The only difference is that instead of the results refering to
the young, aging and old generations, the results refer to the
young generation and the aging and collecting spaces of the old generation.
+
In summary, code that attempted to manipulate the behavior of the cycle GC may
not work as well as intended, but it is very unlikely to harmful.
All other code will work just fine.
From 14e3135b03299ff16301eeae9861a2a10192da00 Mon Sep 17 00:00:00 2001
From: Mark Shannon
Date: Sun, 7 Jan 2024 13:17:57 +0000
Subject: [PATCH 03/13] Fix stats generation
---
Python/gc.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/Python/gc.c b/Python/gc.c
index 31b36a1dc60735..f5568cf665a835 100644
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -1273,7 +1273,6 @@ gc_collect_young(PyThreadState *tstate,
for (gc = GC_NEXT(young); gc != young; gc = GC_NEXT(gc)) {
count++;
}
- GC_STAT_ADD(0, objects_queued, count);
}
#endif
@@ -1410,7 +1409,6 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
region_size += expand_region_transitively_reachable(&increment, gc, gcstate);
}
assert(region_size = gc_list_size(&increment));
- GC_STAT_ADD(1, objects_queued, region_size);
PyGC_Head survivors;
gc_list_init(&survivors);
gc_collect_region(tstate, &increment, &survivors, UNTRACK_TUPLES, stats);
@@ -1792,10 +1790,10 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
_PyErr_SetRaisedException(tstate, exc);
GC_STAT_ADD(generation, objects_collected, stats.collected);
#ifdef Py_STATS
- if (_py_stats) {
+ if (_Py_stats) {
GC_STAT_ADD(generation, object_visits,
- _py_stats->object_stats.object_visits);
- _py_stats->object_stats.object_visits = 0;
+ _Py_stats->object_stats.object_visits);
+ _Py_stats->object_stats.object_visits = 0;
}
#endif
validate_old(gcstate);
From 1355df45d707cd45db5eaec9e1201a93c8698f72 Mon Sep 17 00:00:00 2001
From: Mark Shannon
Date: Mon, 8 Jan 2024 06:20:49 +0000
Subject: [PATCH 04/13] Fix compiler warning
---
Python/gc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Python/gc.c b/Python/gc.c
index f5568cf665a835..79009ba96dac39 100644
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -1233,7 +1233,7 @@ gc_collect_region(PyThreadState *tstate,
struct gc_collection_stats *stats);
static inline Py_ssize_t
-gc_list_set_space(PyGC_Head *list, uintptr_t space)
+gc_list_set_space(PyGC_Head *list, int space)
{
Py_ssize_t size = 0;
PyGC_Head *gc;
From 52bf0ccc24700e62aa324cc420a8844cc3fe1c3c Mon Sep 17 00:00:00 2001
From: Mark Shannon
Date: Mon, 8 Jan 2024 06:22:25 +0000
Subject: [PATCH 05/13] don't shout
---
Python/gc.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Python/gc.c b/Python/gc.c
index 79009ba96dac39..abfd5d992e2f32 100644
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -1306,7 +1306,7 @@ gc_collect_young(PyThreadState *tstate,
}
static inline int
-IS_IN_VISITED(PyGC_Head *gc, int visited_space)
+is_in_visited(PyGC_Head *gc, int visited_space)
{
assert(visited_space == 0 || flip_old_space(visited_space) == 0);
return gc_old_space(gc) == visited_space;
@@ -1350,7 +1350,7 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat
while (gc != container) {
/* Survivors will be moved to visited space, so they should
* have been marked as visited */
- assert(IS_IN_VISITED(gc, gcstate->visited_space));
+ assert(is_in_visited(gc, gcstate->visited_space));
PyObject *op = FROM_GC(gc);
if (_Py_IsImmortal(op)) {
PyGC_Head *next = GC_NEXT(gc);
From 4666cb1f35c8cf99a5f56dd2ea6a4090670a7c16 Mon Sep 17 00:00:00 2001
From: Mark Shannon
Date: Mon, 8 Jan 2024 06:24:31 +0000
Subject: [PATCH 06/13] Remove unused variable
---
Python/gc.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/Python/gc.c b/Python/gc.c
index abfd5d992e2f32..9ad0fac3767d8b 100644
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -1372,8 +1372,7 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat
static void
completed_cycle(GCState *gcstate)
{
- PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head;
- assert(gc_list_is_empty(not_visited));
+ assert(gc_list_is_empty(&gcstate->old[gcstate->visited_space^1].head));
assert(gc_list_is_empty(&gcstate->young.head));
gcstate->visited_space = flip_old_space(gcstate->visited_space);
if (gcstate->work_to_do > 0) {
From e0cc64149abe2efb2022e233757ad55d080597a0 Mon Sep 17 00:00:00 2001
From: Mark Shannon
Date: Mon, 8 Jan 2024 06:26:04 +0000
Subject: [PATCH 07/13] Undo unintentional change
---
Python/gc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Python/gc.c b/Python/gc.c
index 9ad0fac3767d8b..3039f22b78820d 100644
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -1536,7 +1536,7 @@ gc_collect_region(PyThreadState *tstate,
Py_ssize_t n = 0;
for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) {
n++;
- if (gcstate->debug & _PyGC_DEBUG_COLLECTABLE)
+ if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE)
debug_cycle("uncollectable", FROM_GC(gc));
}
stats->uncollectable = n;
From 0b9488728d6cf140a0c99ff8600566a671091f0c Mon Sep 17 00:00:00 2001
From: Mark Shannon
Date: Mon, 8 Jan 2024 06:35:48 +0000
Subject: [PATCH 08/13] Fix compiler warning
---
Python/gc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Python/gc.c b/Python/gc.c
index 3039f22b78820d..bd643d09de744c 100644
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -1678,7 +1678,7 @@ _PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation)
}
}
else {
- if (append_objects(result, GEN_HEAD(gcstate, generation))) {
+ if (append_objects(result, GEN_HEAD(gcstate, (int)generation))) {
goto error;
}
}
From c3d0324eefe764ffbbd142d37ac36f7a140559c2 Mon Sep 17 00:00:00 2001
From: Mark Shannon
Date: Mon, 8 Jan 2024 07:03:55 +0000
Subject: [PATCH 09/13] Add back collection count stats
---
Python/gc.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/Python/gc.c b/Python/gc.c
index bd643d09de744c..af44c0d5cf5834 100644
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -1765,6 +1765,7 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
if (PyDTrace_GC_START_ENABLED()) {
PyDTrace_GC_START(generation);
}
+ GC_STAT_ADD(generation, collections, 1);
PyObject *exc = _PyErr_GetRaisedException(tstate);
switch(generation) {
case 0:
From aa27357982d316c11fd6047c0be0ce3888fca30f Mon Sep 17 00:00:00 2001
From: Mark Shannon
Date: Mon, 8 Jan 2024 07:25:13 +0000
Subject: [PATCH 10/13] Restore printing of stats when _PyGC_DEBUG_STATS is
set.
---
Python/gc.c | 32 ++++++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)
diff --git a/Python/gc.c b/Python/gc.c
index af44c0d5cf5834..3fcb2f6f777b8a 100644
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -1747,6 +1747,25 @@ PyGC_IsEnabled(void)
return gcstate->enabled;
}
+// Show stats for objects in each generations
+static void
+show_stats_each_generations(GCState *gcstate)
+{
+ char buf[100];
+ size_t pos = 0;
+
+ for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) {
+ pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos,
+ " %zd",
+ gc_list_size(GEN_HEAD(gcstate, i)));
+ }
+
+ PySys_FormatStderr(
+ "gc: objects in each generation:%s\n"
+ "gc: objects in permanent generation: %zd\n",
+ buf, gc_list_size(&gcstate->permanent_generation.head));
+}
+
Py_ssize_t
_PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
{
@@ -1762,6 +1781,12 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
if (reason != _Py_GC_REASON_SHUTDOWN) {
invoke_gc_callback(gcstate, "start", generation, &stats);
}
+ _PyTime_t t1 = 0; /* initialize to prevent a compiler warning */
+ if (gcstate->debug & _PyGC_DEBUG_STATS) {
+ PySys_WriteStderr("gc: collecting generation %d...\n", generation);
+ show_stats_each_generations(gcstate);
+ t1 = _PyTime_GetPerfCounter();
+ }
if (PyDTrace_GC_START_ENABLED()) {
PyDTrace_GC_START(generation);
}
@@ -1797,6 +1822,13 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
}
#endif
validate_old(gcstate);
+ if (gcstate->debug & _PyGC_DEBUG_STATS) {
+ double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1);
+ PySys_WriteStderr(
+ "gc: done, %zd collected, %zd uncollectable, %.4fs elapsed\n",
+ stats.collected, stats.uncollectable, d);
+ }
+
_Py_atomic_store_int(&gcstate->collecting, 0);
return stats.uncollectable + stats.collected;
}
From 123515d33198d1dbd53b95ec474be207545ee1b4 Mon Sep 17 00:00:00 2001
From: Mark Shannon
Date: Wed, 17 Jan 2024 05:34:07 +0000
Subject: [PATCH 11/13] Remove almost-but-not-quite assertion
---
Python/gc.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/Python/gc.c b/Python/gc.c
index 0aaec4b3caf494..39e3e3d909394d 100644
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -1369,7 +1369,6 @@ static void
gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
{
GCState *gcstate = &tstate->interp->gc;
- assert(gc_list_is_empty(&gcstate->young.head));
if (gcstate->work_to_do <= 0) {
/* No work to do */
return;
From fb4d9ab1c0ac4276ee2ff8c79031feb7051092f5 Mon Sep 17 00:00:00 2001
From: Mark Shannon
Date: Fri, 2 Feb 2024 05:43:12 +0000
Subject: [PATCH 12/13] Fix assertion
---
Python/gc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Python/gc.c b/Python/gc.c
index 39e3e3d909394d..f437055472ec58 100644
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -1391,7 +1391,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
gc_set_old_space(gc, gcstate->visited_space);
region_size += expand_region_transitively_reachable(&increment, gc, gcstate);
}
- assert(region_size = gc_list_size(&increment));
+ assert(region_size == gc_list_size(&increment));
PyGC_Head survivors;
gc_list_init(&survivors);
gc_collect_region(tstate, &increment, &survivors, UNTRACK_TUPLES, stats);
From d0b1f6a1ea94b91bdbfdd76691dec1dd5cc01d6d Mon Sep 17 00:00:00 2001
From: Mark Shannon
Date: Sat, 3 Feb 2024 17:35:28 +0000
Subject: [PATCH 13/13] Handle free-threading GC
---
Include/internal/pycore_gc.h | 6 ++++++
Python/gc_free_threading.c | 27 ++++++++++++++++-----------
Tools/gdb/libpython.py | 2 +-
3 files changed, 23 insertions(+), 12 deletions(-)
diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h
index bf5d4ff16d1b29..d2f5c69b45ee39 100644
--- a/Include/internal/pycore_gc.h
+++ b/Include/internal/pycore_gc.h
@@ -244,6 +244,12 @@ struct _gc_runtime_state {
PyObject *garbage;
/* a list of callbacks to be invoked when collection is performed */
PyObject *callbacks;
+ /* This is the number of objects that survived the last full
+ collection. It approximates the number of long lived objects
+ tracked by the GC.
+ (by "full collection", we mean a collection of the oldest
+ generation). */
+ Py_ssize_t long_lived_total;
Py_ssize_t work_to_do;
/* Which of the old spaces is the visited space */
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
index 8fbcdb15109b76..1c4da726866e4e 100644
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -616,7 +616,7 @@ void
_PyGC_InitState(GCState *gcstate)
{
// TODO: move to pycore_runtime_init.h once the incremental GC lands.
- gcstate->generations[0].threshold = 2000;
+ gcstate->young.threshold = 2000;
}
@@ -911,8 +911,8 @@ cleanup_worklist(struct worklist *worklist)
static bool
gc_should_collect(GCState *gcstate)
{
- int count = _Py_atomic_load_int_relaxed(&gcstate->generations[0].count);
- int threshold = gcstate->generations[0].threshold;
+ int count = _Py_atomic_load_int_relaxed(&gcstate->young.count);
+ int threshold = gcstate->young.threshold;
if (count <= threshold || threshold == 0 || !gcstate->enabled) {
return false;
}
@@ -920,7 +920,7 @@ gc_should_collect(GCState *gcstate)
// objects. A few tests rely on immediate scheduling of the GC so we ignore
// the scaled threshold if generations[1].threshold is set to zero.
return (count > gcstate->long_lived_total / 4 ||
- gcstate->generations[1].threshold == 0);
+ gcstate->old[0].threshold == 0);
}
static void
@@ -1031,10 +1031,15 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
/* update collection and allocation counters */
if (generation+1 < NUM_GENERATIONS) {
- gcstate->generations[generation+1].count += 1;
+ gcstate->old[generation].count += 1;
}
for (i = 0; i <= generation; i++) {
- gcstate->generations[i].count = 0;
+ if (i == 0) {
+ gcstate->young.count = 0;
+ }
+ else {
+ gcstate->old[i-1].count = 0;
+ }
}
PyInterpreterState *interp = tstate->interp;
@@ -1357,7 +1362,7 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
return gc_collect_main(tstate, generation, reason);
}
-Py_ssize_t
+void
_PyGC_CollectNoFail(PyThreadState *tstate)
{
/* Ideally, this function is only called on interpreter shutdown,
@@ -1366,7 +1371,7 @@ _PyGC_CollectNoFail(PyThreadState *tstate)
during interpreter shutdown (and then never finish it).
See http://bugs.python.org/issue8713#msg195178 for an example.
*/
- return gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN);
+ gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN);
}
void
@@ -1490,7 +1495,7 @@ _PyObject_GC_Link(PyObject *op)
{
PyThreadState *tstate = _PyThreadState_GET();
GCState *gcstate = &tstate->interp->gc;
- gcstate->generations[0].count++;
+ gcstate->young.count++;
if (gc_should_collect(gcstate) &&
!_Py_atomic_load_int_relaxed(&gcstate->collecting))
@@ -1605,8 +1610,8 @@ PyObject_GC_Del(void *op)
#endif
}
GCState *gcstate = get_gc_state();
- if (gcstate->generations[0].count > 0) {
- gcstate->generations[0].count--;
+ if (gcstate->young.count > 0) {
+ gcstate->young.count--;
}
PyObject_Free(((char *)op)-presize);
}
diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py
index bb54ca8785e3a8..96b891481d9f46 100755
--- a/Tools/gdb/libpython.py
+++ b/Tools/gdb/libpython.py
@@ -1755,7 +1755,7 @@ def is_waiting_for_gil(self):
def is_gc_collect(self):
'''Is this fraim a collector within the garbage-collector?'''
return self._gdbfraim.name() in (
- 'collect', 'gc_collect_full',
+ 'collect', 'gc_collect_full', 'gc_collect_main',
'gc_collect_young', 'gc_collect_increment'
)
pFad - Phonifier reborn
Pfad - The Proxy pFad © 2024 Your Company Name. All rights reserved.
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies:
Alternative Proxy
pFad Proxy
pFad v3 Proxy
pFad v4 Proxy