Skip to content

Commit d760e05

Browse files
committed
Fix use-after-free in tl_pending_args across subinterpreters
Clear tl_pending_args to NULL whenever tl_pending_callback is set to false. Previously, the thread-local pointer was left dangling after callback completion. When a dirty scheduler thread later handled a different subinterpreter's code, Py_XDECREF on the stale pointer would attempt to free memory from the wrong allocator.
1 parent 47c4a5c commit d760e05

File tree

3 files changed

+61
-3
lines changed

3 files changed

+61
-3
lines changed

c_src/py_callback.c

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,7 @@ static PyObject *build_pending_callback_exc_args(void) {
563563
PyObject *exc_args = PyTuple_New(3);
564564
if (exc_args == NULL) {
565565
tl_pending_callback = false;
566+
Py_CLEAR(tl_pending_args);
566567
return NULL;
567568
}
568569

@@ -575,6 +576,7 @@ static PyObject *build_pending_callback_exc_args(void) {
575576
Py_XDECREF(func_name_obj);
576577
Py_DECREF(exc_args);
577578
tl_pending_callback = false;
579+
Py_CLEAR(tl_pending_args);
578580
return NULL;
579581
}
580582

@@ -610,6 +612,7 @@ static ERL_NIF_TERM build_suspended_result(ErlNifEnv *env, suspended_state_t *su
610612
ERL_NIF_TERM args_term = py_to_term(env, tl_pending_args);
611613

612614
tl_pending_callback = false;
615+
Py_CLEAR(tl_pending_args);
613616

614617
return enif_make_tuple4(env,
615618
ATOM_SUSPENDED,
@@ -811,6 +814,7 @@ static ERL_NIF_TERM build_suspended_context_result(ErlNifEnv *env, suspended_con
811814
ERL_NIF_TERM args_term = py_to_term(env, tl_pending_args);
812815

813816
tl_pending_callback = false;
817+
Py_CLEAR(tl_pending_args);
814818

815819
return enif_make_tuple4(env,
816820
ATOM_SUSPENDED,
@@ -1290,6 +1294,19 @@ PyTypeObject ErlangPidType = {
12901294
static PyObject *erlang_call_impl(PyObject *self, PyObject *args) {
12911295
(void)self;
12921296

1297+
/*
1298+
* Invariant check: pending callback TLS must be clear when entering.
1299+
* If any state is still set, it's leaked from a prior context that didn't
1300+
* properly clean up - fail loudly rather than risk cross-interpreter corruption.
1301+
*/
1302+
if (tl_pending_callback || tl_pending_args != NULL ||
1303+
tl_pending_func_name != NULL || tl_pending_callback_id != 0) {
1304+
PyErr_SetString(PyExc_RuntimeError,
1305+
"erlang.call: stale pending callback TLS detected - "
1306+
"prior context did not clean up properly");
1307+
return NULL;
1308+
}
1309+
12931310
/*
12941311
* Check if we have a callback handler available.
12951312
* Priority:
@@ -1553,6 +1570,7 @@ static PyObject *erlang_call_impl(PyObject *self, PyObject *args) {
15531570
tl_pending_func_name = enif_alloc(func_name_len + 1);
15541571
if (tl_pending_func_name == NULL) {
15551572
tl_pending_callback = false;
1573+
Py_CLEAR(tl_pending_args);
15561574
Py_DECREF(call_args);
15571575
PyErr_SetString(PyExc_MemoryError, "Failed to allocate function name");
15581576
return NULL;
@@ -1561,9 +1579,12 @@ static PyObject *erlang_call_impl(PyObject *self, PyObject *args) {
15611579
tl_pending_func_name[func_name_len] = '\0';
15621580
tl_pending_func_name_len = func_name_len;
15631581

1564-
/* Store args (take ownership) */
1565-
Py_XDECREF(tl_pending_args);
1566-
tl_pending_args = call_args; /* Takes ownership, don't decref */
1582+
/* Store args (take ownership)
1583+
* Use Py_XSETREF for swap-first pattern: sets tl_pending_args to new value
1584+
* BEFORE decref'ing old value. This prevents re-entrancy issues if the old
1585+
* object's finalizer triggers another erlang.call() during decref.
1586+
*/
1587+
Py_XSETREF(tl_pending_args, call_args);
15671588

15681589
/* Raise exception to abort Python execution */
15691590
PyErr_SetString(SuspensionRequiredException, "callback pending");
@@ -2649,6 +2670,7 @@ static ERL_NIF_TERM nif_resume_callback_dirty(ErlNifEnv *env, int argc, const ER
26492670
Py_DECREF(exc_args);
26502671
if (new_suspended == NULL) {
26512672
tl_pending_callback = false;
2673+
Py_CLEAR(tl_pending_args);
26522674
result = make_error(env, "create_nested_suspended_state_failed");
26532675
} else {
26542676
result = build_suspended_result(env, new_suspended);
@@ -2717,6 +2739,7 @@ static ERL_NIF_TERM nif_resume_callback_dirty(ErlNifEnv *env, int argc, const ER
27172739
Py_DECREF(exc_args);
27182740
if (new_suspended == NULL) {
27192741
tl_pending_callback = false;
2742+
Py_CLEAR(tl_pending_args);
27202743
result = make_error(env, "create_nested_suspended_state_failed");
27212744
} else {
27222745
result = build_suspended_result(env, new_suspended);

c_src/py_exec.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ static void process_request(py_request_t *req) {
306306
Py_DECREF(exc_args);
307307
if (suspended == NULL) {
308308
tl_pending_callback = false;
309+
Py_CLEAR(tl_pending_args);
309310
req->result = make_error(env, "create_suspended_state_failed");
310311
} else {
311312
req->result = build_suspended_result(env, suspended);
@@ -393,6 +394,7 @@ static void process_request(py_request_t *req) {
393394
Py_DECREF(exc_args);
394395
if (suspended == NULL) {
395396
tl_pending_callback = false;
397+
Py_CLEAR(tl_pending_args);
396398
req->result = make_error(env, "create_suspended_state_failed");
397399
} else {
398400
req->result = build_suspended_result(env, suspended);

c_src/py_nif.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,24 @@ __thread char *tl_pending_func_name = NULL;
116116
__thread size_t tl_pending_func_name_len = 0;
117117
__thread PyObject *tl_pending_args = NULL;
118118

119+
/**
120+
* Clear all pending callback thread-local state.
121+
*
122+
* Must be called at context boundaries while still in the correct interpreter
123+
* context, to prevent cross-interpreter contamination if Python code caught
124+
* and swallowed SuspensionRequiredException.
125+
*/
126+
static inline void clear_pending_callback_tls(void) {
127+
tl_pending_callback = false;
128+
tl_pending_callback_id = 0;
129+
if (tl_pending_func_name != NULL) {
130+
enif_free(tl_pending_func_name);
131+
tl_pending_func_name = NULL;
132+
}
133+
tl_pending_func_name_len = 0;
134+
Py_CLEAR(tl_pending_args);
135+
}
136+
119137
/* Thread-local timeout state */
120138
__thread uint64_t tl_timeout_deadline = 0;
121139
__thread bool tl_timeout_enabled = false;
@@ -2280,6 +2298,7 @@ static ERL_NIF_TERM nif_context_call(ErlNifEnv *env, int argc, const ERL_NIF_TER
22802298

22812299
if (suspended == NULL) {
22822300
tl_pending_callback = false;
2301+
Py_CLEAR(tl_pending_args);
22832302
result = make_error(env, "create_suspended_state_failed");
22842303
} else {
22852304
result = build_suspended_context_result(env, suspended);
@@ -2298,6 +2317,9 @@ static ERL_NIF_TERM nif_context_call(ErlNifEnv *env, int argc, const ERL_NIF_TER
22982317
tl_allow_suspension = prev_allow_suspension;
22992318
tl_current_context = prev_context;
23002319

2320+
/* Clear pending callback TLS before releasing context */
2321+
clear_pending_callback_tls();
2322+
23012323
enif_free(module_name);
23022324
enif_free(func_name);
23032325

@@ -2382,6 +2404,7 @@ static ERL_NIF_TERM nif_context_eval(ErlNifEnv *env, int argc, const ERL_NIF_TER
23822404

23832405
if (suspended == NULL) {
23842406
tl_pending_callback = false;
2407+
Py_CLEAR(tl_pending_args);
23852408
result = make_error(env, "create_suspended_state_failed");
23862409
} else {
23872410
result = build_suspended_context_result(env, suspended);
@@ -2399,6 +2422,9 @@ static ERL_NIF_TERM nif_context_eval(ErlNifEnv *env, int argc, const ERL_NIF_TER
23992422
tl_allow_suspension = prev_allow_suspension;
24002423
tl_current_context = prev_context;
24012424

2425+
/* Clear pending callback TLS before releasing context */
2426+
clear_pending_callback_tls();
2427+
24022428
enif_free(code);
24032429

24042430
/* Release thread state using centralized guard */
@@ -2867,12 +2893,14 @@ static ERL_NIF_TERM nif_context_resume(ErlNifEnv *env, int argc, const ERL_NIF_T
28672893

28682894
if (nested == NULL) {
28692895
tl_pending_callback = false;
2896+
Py_CLEAR(tl_pending_args);
28702897
result = make_error(env, "create_nested_suspended_state_failed");
28712898
} else {
28722899
/* Copy accumulated callback results from parent to nested state */
28732900
if (copy_callback_results_to_nested(nested, state) != 0) {
28742901
enif_release_resource(nested);
28752902
tl_pending_callback = false;
2903+
Py_CLEAR(tl_pending_args);
28762904
result = make_error(env, "copy_callback_results_failed");
28772905
} else {
28782906
result = build_suspended_context_result(env, nested);
@@ -2921,12 +2949,14 @@ static ERL_NIF_TERM nif_context_resume(ErlNifEnv *env, int argc, const ERL_NIF_T
29212949

29222950
if (nested == NULL) {
29232951
tl_pending_callback = false;
2952+
Py_CLEAR(tl_pending_args);
29242953
result = make_error(env, "create_nested_suspended_state_failed");
29252954
} else {
29262955
/* Copy accumulated callback results from parent to nested state */
29272956
if (copy_callback_results_to_nested(nested, state) != 0) {
29282957
enif_release_resource(nested);
29292958
tl_pending_callback = false;
2959+
Py_CLEAR(tl_pending_args);
29302960
result = make_error(env, "copy_callback_results_failed");
29312961
} else {
29322962
result = build_suspended_context_result(env, nested);
@@ -2951,6 +2981,9 @@ static ERL_NIF_TERM nif_context_resume(ErlNifEnv *env, int argc, const ERL_NIF_T
29512981
tl_allow_suspension = prev_allow_suspension;
29522982
tl_current_context = prev_context;
29532983

2984+
/* Clear pending callback TLS before releasing context */
2985+
clear_pending_callback_tls();
2986+
29542987
/* Release thread state using centralized guard */
29552988
py_context_release(&guard);
29562989

0 commit comments

Comments
 (0)