Skip to content

Commit 1a9cdaf

Browse files
authored
gh-116738: Make _bz2 module thread-safe (gh-142756)
Make the attributes in _bz2 module thread-safe on the free-threading build. Attributes (eof, needs_input, unused_data) are now stored atomically or accessed via mutex-protected getters.
1 parent a882ae1 commit 1a9cdaf

File tree

3 files changed

+38
-6
lines changed

3 files changed

+38
-6
lines changed

Lib/test/test_free_threading/test_bz2.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,22 @@ def worker():
4242
data = bz2d.decompress(compressed, chunk_size)
4343
self.assertEqual(len(data), chunk_size)
4444
output.append(data)
45+
# Read attributes concurrently with other threads decompressing
46+
self.assertIsInstance(bz2d.eof, bool)
47+
self.assertIsInstance(bz2d.needs_input, bool)
48+
self.assertIsInstance(bz2d.unused_data, bytes)
4549

4650
run_concurrently(worker_func=worker, nthreads=NTHREADS)
4751
self.assertEqual(len(output), NTHREADS)
4852
# Verify the expected chunks (order doesn't matter due to append race)
4953
self.assertEqual(set(output), set(chunks))
54+
self.assertTrue(bz2d.eof)
55+
self.assertFalse(bz2d.needs_input)
56+
# Each thread added full compressed data to the buffer, but only 1 copy
57+
# is consumed to produce the output. The rest remains as unused_data.
58+
self.assertEqual(
59+
len(bz2d.unused_data), len(compressed) * (NTHREADS - 1)
60+
)
5061

5162

5263
if __name__ == "__main__":
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Make the attributes in :mod:`bz2` thread-safe on the :term:`free threaded
2+
<free threading>` build.

Modules/_bz2module.c

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
// Blocks output buffer wrappers
1414
#include "pycore_blocks_output_buffer.h"
15+
#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_CHAR_RELAXED
1516

1617
#if OUTPUT_BUFFER_MAX_BLOCK_SIZE > UINT32_MAX
1718
#error "The maximum block size accepted by libbzip2 is UINT32_MAX."
@@ -437,7 +438,7 @@ decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
437438
if (catch_bz2_error(bzret))
438439
goto error;
439440
if (bzret == BZ_STREAM_END) {
440-
d->eof = 1;
441+
FT_ATOMIC_STORE_CHAR_RELAXED(d->eof, 1);
441442
break;
442443
} else if (d->bzs_avail_in_real == 0) {
443444
break;
@@ -521,7 +522,7 @@ decompress(BZ2Decompressor *d, char *data, size_t len, Py_ssize_t max_length)
521522
}
522523

523524
if (d->eof) {
524-
d->needs_input = 0;
525+
FT_ATOMIC_STORE_CHAR_RELAXED(d->needs_input, 0);
525526
if (d->bzs_avail_in_real > 0) {
526527
Py_XSETREF(d->unused_data,
527528
PyBytes_FromStringAndSize(bzs->next_in, d->bzs_avail_in_real));
@@ -531,10 +532,10 @@ decompress(BZ2Decompressor *d, char *data, size_t len, Py_ssize_t max_length)
531532
}
532533
else if (d->bzs_avail_in_real == 0) {
533534
bzs->next_in = NULL;
534-
d->needs_input = 1;
535+
FT_ATOMIC_STORE_CHAR_RELAXED(d->needs_input, 1);
535536
}
536537
else {
537-
d->needs_input = 0;
538+
FT_ATOMIC_STORE_CHAR_RELAXED(d->needs_input, 0);
538539

539540
/* If we did not use the input buffer, we now have
540541
to copy the tail from the caller's buffer into the
@@ -682,11 +683,28 @@ PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
682683
PyDoc_STRVAR(BZ2Decompressor_needs_input_doc,
683684
"True if more input is needed before more decompressed data can be produced.");
684685

686+
static PyObject *
687+
BZ2Decompressor_unused_data_get(PyObject *op, void *Py_UNUSED(ignored))
688+
{
689+
BZ2Decompressor *self = _BZ2Decompressor_CAST(op);
690+
PyMutex_Lock(&self->mutex);
691+
PyObject *result = Py_XNewRef(self->unused_data);
692+
PyMutex_Unlock(&self->mutex);
693+
if (result == NULL) {
694+
PyErr_SetString(PyExc_AttributeError, "unused_data");
695+
}
696+
return result;
697+
}
698+
699+
static PyGetSetDef BZ2Decompressor_getset[] = {
700+
{"unused_data", BZ2Decompressor_unused_data_get, NULL,
701+
BZ2Decompressor_unused_data__doc__},
702+
{NULL},
703+
};
704+
685705
static PyMemberDef BZ2Decompressor_members[] = {
686706
{"eof", Py_T_BOOL, offsetof(BZ2Decompressor, eof),
687707
Py_READONLY, BZ2Decompressor_eof__doc__},
688-
{"unused_data", Py_T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
689-
Py_READONLY, BZ2Decompressor_unused_data__doc__},
690708
{"needs_input", Py_T_BOOL, offsetof(BZ2Decompressor, needs_input), Py_READONLY,
691709
BZ2Decompressor_needs_input_doc},
692710
{NULL}
@@ -697,6 +715,7 @@ static PyType_Slot bz2_decompressor_type_slots[] = {
697715
{Py_tp_methods, BZ2Decompressor_methods},
698716
{Py_tp_doc, (char *)_bz2_BZ2Decompressor__doc__},
699717
{Py_tp_members, BZ2Decompressor_members},
718+
{Py_tp_getset, BZ2Decompressor_getset},
700719
{Py_tp_new, _bz2_BZ2Decompressor},
701720
{0, 0}
702721
};

0 commit comments

Comments
 (0)