From a7b41c8711a7746a2c046dc935ffc7313712024d Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 4 Dec 2024 19:01:42 +0000 Subject: [PATCH 1/4] gh-127582: Make object resurrection thread-safe for free threading. Objects may be temporarily "resurrected" in destructors when calling finalizers or watcher callbacks. We previously undid the resurrection by decrementing the reference count using `Py_SET_REFCNT`. This was not thread-safe because other threads might be accessing the object (modifying its reference count) if it was exposed by the finalizer, watcher callback, or temporarily accessed by a racy dictionary or list access. This adds internal-only thread-safe functions for temporary object resurrection during destructors. --- Include/internal/pycore_object.h | 44 ++++++++++++++++++++++++++++++++ Objects/codeobject.c | 7 ++--- Objects/dictobject.c | 7 ++--- Objects/funcobject.c | 7 ++--- Objects/object.c | 38 +++++++++++++++++++++++---- 5 files changed, 83 insertions(+), 20 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index ce876b093b2522..bf72089ea02e9a 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -697,8 +697,52 @@ _PyObject_SetMaybeWeakref(PyObject *op) } } +extern int _PyObject_ResurrectEndSlow(PyObject *op); #endif +// Temporarily resurrects an object during deallocation. The refcount is set +// to one. +static inline void +_PyObject_Resurrect(PyObject *op) +{ + assert(Py_REFCNT(op) == 0); +#ifdef Py_REF_DEBUG + _Py_IncRefTotal(_PyThreadState_GET()); +#endif +#ifdef Py_GIL_DISABLED + _Py_atomic_store_uintptr_relaxed(&op->ob_tid, _Py_ThreadId()); + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, 1); + _Py_atomic_store_ssize_relaxed(&op->ob_ref_shared, 0); +#else + Py_SET_REFCNT(op, 1); +#endif +} + +// Undoes an object resurrection by decrementing the refcount without calling +// _Py_Dealloc(). Returns 1 if the object is dead, and deallocation +// should continue. Returns 0 if the object is still alive. +static inline int +_PyObject_ResurrectEnd(PyObject *op) +{ +#ifdef Py_REF_DEBUG + _Py_DecRefTotal(_PyThreadState_GET()); +#endif +#ifndef Py_GIL_DISABLED + Py_SET_REFCNT(op, Py_REFCNT(op) - 1); + return Py_REFCNT(op) == 0; +#else + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + Py_ssize_t shared = _Py_atomic_load_ssize_acquire(&op->ob_ref_shared); + if (_Py_IsOwnedByCurrentThread(op) && local == 1 && shared == 0) { + // Fast-path: object has a single refcount and is owned by this thread + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, 0); + return 1; + } + // Slow-path: object has a shared refcount or is not owned by this thread + return _PyObject_ResurrectEndSlow(op); +#endif +} + /* Tries to incref op and returns 1 if successful or 0 otherwise. */ static inline int _Py_TryIncref(PyObject *op) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 148350cc4b9195..5e42e78f3eac40 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1867,14 +1867,11 @@ free_monitoring_data(_PyCoMonitoringData *data) static void code_dealloc(PyCodeObject *co) { - assert(Py_REFCNT(co) == 0); - Py_SET_REFCNT(co, 1); + _PyObject_Resurrect((PyObject *)co); notify_code_watchers(PY_CODE_EVENT_DESTROY, co); - if (Py_REFCNT(co) > 1) { - Py_SET_REFCNT(co, Py_REFCNT(co) - 1); + if (!_PyObject_ResurrectEnd((PyObject *)co)) { return; } - Py_SET_REFCNT(co, 0); #ifdef Py_GIL_DISABLED PyObject_GC_UnTrack(co); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index a13d8084d14d66..3fe6414f1714a4 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -3162,14 +3162,11 @@ dict_dealloc(PyObject *self) { PyDictObject *mp = (PyDictObject *)self; PyInterpreterState *interp = _PyInterpreterState_GET(); - assert(Py_REFCNT(mp) == 0); - Py_SET_REFCNT(mp, 1); + _PyObject_Resurrect(self); _PyDict_NotifyEvent(interp, PyDict_EVENT_DEALLOCATED, mp, NULL, NULL); - if (Py_REFCNT(mp) > 1) { - Py_SET_REFCNT(mp, Py_REFCNT(mp) - 1); + if (!_PyObject_ResurrectEnd(self)) { return; } - Py_SET_REFCNT(mp, 0); PyDictValues *values = mp->ma_values; PyDictKeysObject *keys = mp->ma_keys; Py_ssize_t i, n; diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 4ba47285f7152f..aa4d0154724a41 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -1092,14 +1092,11 @@ static void func_dealloc(PyObject *self) { PyFunctionObject *op = _PyFunction_CAST(self); - assert(Py_REFCNT(op) == 0); - Py_SET_REFCNT(op, 1); + _PyObject_Resurrect(self); handle_func_event(PyFunction_EVENT_DESTROY, op, NULL); - if (Py_REFCNT(op) > 1) { - Py_SET_REFCNT(op, Py_REFCNT(op) - 1); + if (!_PyObject_ResurrectEnd(self)) { return; } - Py_SET_REFCNT(op, 0); _PyObject_GC_UNTRACK(op); if (op->func_weakreflist != NULL) { PyObject_ClearWeakRefs((PyObject *) op); diff --git a/Objects/object.c b/Objects/object.c index 8868fa29066404..afcb7de36f3581 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -362,8 +362,10 @@ is_dead(PyObject *o) } # endif -void -_Py_DecRefSharedDebug(PyObject *o, const char *filename, int lineno) +// Decrement the shared reference count of an object. Return 1 if the object +// is dead and should be deallocated, 0 otherwise. +static int +_Py_DecRefSharedIsDead(PyObject *o, const char *filename, int lineno) { // Should we queue the object for the owning thread to merge? int should_queue; @@ -404,6 +406,15 @@ _Py_DecRefSharedDebug(PyObject *o, const char *filename, int lineno) } else if (new_shared == _Py_REF_MERGED) { // refcount is zero AND merged + return 1; + } + return 0; +} + +void +_Py_DecRefSharedDebug(PyObject *o, const char *filename, int lineno) +{ + if (_Py_DecRefSharedIsDead(o, filename, lineno)) { _Py_Dealloc(o); } } @@ -472,6 +483,24 @@ _Py_ExplicitMergeRefcount(PyObject *op, Py_ssize_t extra) &shared, new_shared)); return refcnt; } + +// The more complicated "slow" path for undoing the resurrection of an object. +int +_PyObject_ResurrectEndSlow(PyObject *op) +{ + if (_Py_IsImmortal(op)) { + return 0; + } + if (_Py_IsOwnedByCurrentThread(op)) { + // If the object is owned by the current thread, give up ownership and + // merge the refcount. This isn't necessary in all cases, but it + // simplifies the implementation. + return _Py_ExplicitMergeRefcount(op, -1) == 0; + } + return _Py_DecRefSharedIsDead(op, NULL, 0); +} + + #endif /* Py_GIL_DISABLED */ @@ -550,7 +579,7 @@ PyObject_CallFinalizerFromDealloc(PyObject *self) } /* Temporarily resurrect the object. */ - Py_SET_REFCNT(self, 1); + _PyObject_Resurrect(self); PyObject_CallFinalizer(self); @@ -560,8 +589,7 @@ PyObject_CallFinalizerFromDealloc(PyObject *self) /* Undo the temporary resurrection; can't use DECREF here, it would * cause a recursive call. */ - Py_SET_REFCNT(self, Py_REFCNT(self) - 1); - if (Py_REFCNT(self) == 0) { + if (_PyObject_ResurrectEnd(self)) { return 0; /* this is the normal path out */ } From f56bbbb50701bfbad052ad771e9518bc276f31b1 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 5 Dec 2024 10:31:02 -0500 Subject: [PATCH 2/4] Switch to dead=0 and alive=1 --- Include/internal/pycore_object.h | 8 ++++---- Objects/codeobject.c | 2 +- Objects/dictobject.c | 2 +- Objects/funcobject.c | 2 +- Objects/object.c | 10 ++++++---- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index bf72089ea02e9a..9592873a7a75d0 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -719,8 +719,8 @@ _PyObject_Resurrect(PyObject *op) } // Undoes an object resurrection by decrementing the refcount without calling -// _Py_Dealloc(). Returns 1 if the object is dead, and deallocation -// should continue. Returns 0 if the object is still alive. +// _Py_Dealloc(). Returns 0 if the object is dead (the normal case), and +// deallocation should continue. Returns 1 if the object is still alive. static inline int _PyObject_ResurrectEnd(PyObject *op) { @@ -729,14 +729,14 @@ _PyObject_ResurrectEnd(PyObject *op) #endif #ifndef Py_GIL_DISABLED Py_SET_REFCNT(op, Py_REFCNT(op) - 1); - return Py_REFCNT(op) == 0; + return Py_REFCNT(op) != 0; #else uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); Py_ssize_t shared = _Py_atomic_load_ssize_acquire(&op->ob_ref_shared); if (_Py_IsOwnedByCurrentThread(op) && local == 1 && shared == 0) { // Fast-path: object has a single refcount and is owned by this thread _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, 0); - return 1; + return 0; } // Slow-path: object has a shared refcount or is not owned by this thread return _PyObject_ResurrectEndSlow(op); diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 5e42e78f3eac40..b8b233ae124350 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1869,7 +1869,7 @@ code_dealloc(PyCodeObject *co) { _PyObject_Resurrect((PyObject *)co); notify_code_watchers(PY_CODE_EVENT_DESTROY, co); - if (!_PyObject_ResurrectEnd((PyObject *)co)) { + if (_PyObject_ResurrectEnd((PyObject *)co)) { return; } diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 3fe6414f1714a4..d6392174410344 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -3164,7 +3164,7 @@ dict_dealloc(PyObject *self) PyInterpreterState *interp = _PyInterpreterState_GET(); _PyObject_Resurrect(self); _PyDict_NotifyEvent(interp, PyDict_EVENT_DEALLOCATED, mp, NULL, NULL); - if (!_PyObject_ResurrectEnd(self)) { + if (_PyObject_ResurrectEnd(self)) { return; } PyDictValues *values = mp->ma_values; diff --git a/Objects/funcobject.c b/Objects/funcobject.c index aa4d0154724a41..607103490f6107 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -1094,7 +1094,7 @@ func_dealloc(PyObject *self) PyFunctionObject *op = _PyFunction_CAST(self); _PyObject_Resurrect(self); handle_func_event(PyFunction_EVENT_DESTROY, op, NULL); - if (!_PyObject_ResurrectEnd(self)) { + if (_PyObject_ResurrectEnd(self)) { return; } _PyObject_GC_UNTRACK(op); diff --git a/Objects/object.c b/Objects/object.c index afcb7de36f3581..55cc9b66422610 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -489,15 +489,17 @@ int _PyObject_ResurrectEndSlow(PyObject *op) { if (_Py_IsImmortal(op)) { - return 0; + return 1; } if (_Py_IsOwnedByCurrentThread(op)) { // If the object is owned by the current thread, give up ownership and // merge the refcount. This isn't necessary in all cases, but it // simplifies the implementation. - return _Py_ExplicitMergeRefcount(op, -1) == 0; + Py_ssize_t refcount = _Py_ExplicitMergeRefcount(op, -1); + return refcount != 0; } - return _Py_DecRefSharedIsDead(op, NULL, 0); + int is_dead = _Py_DecRefSharedIsDead(op, NULL, 0); + return !is_dead; } @@ -589,7 +591,7 @@ PyObject_CallFinalizerFromDealloc(PyObject *self) /* Undo the temporary resurrection; can't use DECREF here, it would * cause a recursive call. */ - if (_PyObject_ResurrectEnd(self)) { + if (!_PyObject_ResurrectEnd(self)) { return 0; /* this is the normal path out */ } From 2b1b67330e9b0abc9ffb0999772a41a83109d712 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 5 Dec 2024 19:23:08 +0000 Subject: [PATCH 3/4] Rename _PyObject_Resurrect to _PyObject_ResurrectStart --- Include/internal/pycore_object.h | 2 +- Objects/codeobject.c | 2 +- Objects/dictobject.c | 2 +- Objects/funcobject.c | 2 +- Objects/object.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 9592873a7a75d0..6b0b464a6fdb96 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -703,7 +703,7 @@ extern int _PyObject_ResurrectEndSlow(PyObject *op); // Temporarily resurrects an object during deallocation. The refcount is set // to one. static inline void -_PyObject_Resurrect(PyObject *op) +_PyObject_ResurrectStart(PyObject *op) { assert(Py_REFCNT(op) == 0); #ifdef Py_REF_DEBUG diff --git a/Objects/codeobject.c b/Objects/codeobject.c index b8b233ae124350..eb8de136ee6432 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1867,7 +1867,7 @@ free_monitoring_data(_PyCoMonitoringData *data) static void code_dealloc(PyCodeObject *co) { - _PyObject_Resurrect((PyObject *)co); + _PyObject_ResurrectStart((PyObject *)co); notify_code_watchers(PY_CODE_EVENT_DESTROY, co); if (_PyObject_ResurrectEnd((PyObject *)co)) { return; diff --git a/Objects/dictobject.c b/Objects/dictobject.c index d6392174410344..1c9f86438dadc3 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -3162,7 +3162,7 @@ dict_dealloc(PyObject *self) { PyDictObject *mp = (PyDictObject *)self; PyInterpreterState *interp = _PyInterpreterState_GET(); - _PyObject_Resurrect(self); + _PyObject_ResurrectStart(self); _PyDict_NotifyEvent(interp, PyDict_EVENT_DEALLOCATED, mp, NULL, NULL); if (_PyObject_ResurrectEnd(self)) { return; diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 607103490f6107..cca7f01498013e 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -1092,7 +1092,7 @@ static void func_dealloc(PyObject *self) { PyFunctionObject *op = _PyFunction_CAST(self); - _PyObject_Resurrect(self); + _PyObject_ResurrectStart(self); handle_func_event(PyFunction_EVENT_DESTROY, op, NULL); if (_PyObject_ResurrectEnd(self)) { return; diff --git a/Objects/object.c b/Objects/object.c index 55cc9b66422610..74f47fa4239032 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -581,7 +581,7 @@ PyObject_CallFinalizerFromDealloc(PyObject *self) } /* Temporarily resurrect the object. */ - _PyObject_Resurrect(self); + _PyObject_ResurrectStart(self); PyObject_CallFinalizer(self); From d35b992bb79a2613854acffd93e000474a11c169 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 5 Dec 2024 19:26:06 +0000 Subject: [PATCH 4/4] Add blurb --- .../2024-12-05-19-25-00.gh-issue-127582.ogUY2a.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-12-05-19-25-00.gh-issue-127582.ogUY2a.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-05-19-25-00.gh-issue-127582.ogUY2a.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-05-19-25-00.gh-issue-127582.ogUY2a.rst new file mode 100644 index 00000000000000..59491feeb9bcfa --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-05-19-25-00.gh-issue-127582.ogUY2a.rst @@ -0,0 +1,2 @@ +Fix non-thread-safe object resurrection when calling finalizers and watcher +callbacks in the free threading build.