Skip to content

Commit 14e6052

Browse files
authored
gh-139871: Optimize bytearray construction with encoding (#142243)
When a `str` is encoded in `bytearray.__init__` the encoder tends to create a new unique bytes object. Rather than allocate new memory and copy the bytes use the already created bytes object as bytearray backing. The bigger the `str` the bigger the saving. Mean +- std dev: [main_encoding] 497 us +- 9 us -> [encoding] 14.2 us +- 0.3 us: 34.97x faster ```python import pyperf runner = pyperf.Runner() runner.timeit( name="encode", setup="a = 'a' * 1_000_000", stmt="bytearray(a, encoding='utf8')") ```
1 parent 850f95f commit 14e6052

File tree

1 file changed

+16
-1
lines changed

1 file changed

+16
-1
lines changed

Objects/bytearrayobject.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -914,6 +914,10 @@ bytearray___init___impl(PyByteArrayObject *self, PyObject *arg,
914914
return -1;
915915
}
916916

917+
/* Should be caused by first init or the resize to 0. */
918+
assert(self->ob_bytes_object == Py_GetConstantBorrowed(Py_CONSTANT_EMPTY_BYTES));
919+
assert(self->ob_exports == 0);
920+
917921
/* Make a quick exit if no first argument */
918922
if (arg == NULL) {
919923
if (encoding != NULL || errors != NULL) {
@@ -935,9 +939,20 @@ bytearray___init___impl(PyByteArrayObject *self, PyObject *arg,
935939
return -1;
936940
}
937941
encoded = PyUnicode_AsEncodedString(arg, encoding, errors);
938-
if (encoded == NULL)
942+
if (encoded == NULL) {
939943
return -1;
944+
}
940945
assert(PyBytes_Check(encoded));
946+
947+
/* Most encodes return a new unique bytes, just use it as buffer. */
948+
if (_PyObject_IsUniquelyReferenced(encoded)
949+
&& PyBytes_CheckExact(encoded))
950+
{
951+
Py_ssize_t size = Py_SIZE(encoded);
952+
self->ob_bytes_object = encoded;
953+
bytearray_reinit_from_bytes(self, size, size);
954+
return 0;
955+
}
941956
new = bytearray_iconcat((PyObject*)self, encoded);
942957
Py_DECREF(encoded);
943958
if (new == NULL)

0 commit comments

Comments
 (0)