From edb61db61e6822ebee632b94f6adb281cecdff8e Mon Sep 17 00:00:00 2001 From: Rodrigo Tobar Date: Tue, 18 Jul 2023 21:17:28 +0800 Subject: [PATCH 1/3] Allow users to specify Packer internal buf_size Giving this flexibility to users means that internal reallocations can be avoided if the buffer size is good enough, at the expense of potentially allocating more memory than needed. This, together with reusing a Packer object, means that multiple serialisations can end up requiring no memory allocations other than the initial buffer creation, which can be a big win in some situations. The default value is still 1MB, making this backwards compatible. Signed-off-by: Rodrigo Tobar --- msgpack/_packer.pyx | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 074b39fc..ceed0144 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -102,6 +102,11 @@ cdef class Packer(object): :param str unicode_errors: The error handler for encoding unicode. (default: 'strict') DO NOT USE THIS!! This option is kept for very specific usage. + + :param int buf_size: + The size of the internal buffer. (default: 1024 * 1024) + Useful if serialisation size can be correctly estimated, + avoid unnecessary reallocations. """ cdef msgpack_packer pk cdef object _default @@ -112,8 +117,7 @@ cdef class Packer(object): cdef bint autoreset cdef bint datetime - def __cinit__(self): - cdef int buf_size = 1024*1024 + def __cinit__(self, buf_size=1024*1024, **_kwargs): self.pk.buf = PyMem_Malloc(buf_size) if self.pk.buf == NULL: raise MemoryError("Unable to allocate internal buffer.") @@ -122,7 +126,7 @@ cdef class Packer(object): def __init__(self, *, default=None, bint use_single_float=False, bint autoreset=True, bint use_bin_type=True, - bint strict_types=False, bint datetime=False, unicode_errors=None): + bint strict_types=False, bint datetime=False, unicode_errors=None, buf_size=1024*1024): self.use_float = use_single_float self.strict_types = strict_types self.autoreset = autoreset From fdf5c1c5176c3d341450dbd052ad667436876db6 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sun, 5 May 2024 23:33:36 +0900 Subject: [PATCH 2/3] change default buffer size to 256KiB --- msgpack/_packer.pyx | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 7157669b..9c106474 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -105,7 +105,7 @@ cdef class Packer(object): DO NOT USE THIS!! This option is kept for very specific usage. :param int buf_size: - The size of the internal buffer. (default: 1024 * 1024) + The size of the internal buffer. (default: 256*1024) Useful if serialisation size can be correctly estimated, avoid unnecessary reallocations. """ @@ -118,7 +118,7 @@ cdef class Packer(object): cdef bint autoreset cdef bint datetime - def __cinit__(self, buf_size=1024*1024, **_kwargs): + def __cinit__(self, buf_size=256*1024, **_kwargs): self.pk.buf = PyMem_Malloc(buf_size) if self.pk.buf == NULL: raise MemoryError("Unable to allocate internal buffer.") @@ -127,7 +127,8 @@ cdef class Packer(object): def __init__(self, *, default=None, bint use_single_float=False, bint autoreset=True, bint use_bin_type=True, - bint strict_types=False, bint datetime=False, unicode_errors=None, buf_size=1024*1024): + bint strict_types=False, bint datetime=False, unicode_errors=None, + buf_size=256*1024): self.use_float = use_single_float self.strict_types = strict_types self.autoreset = autoreset From 09825cb5290e808f39f29f913f3be8b9c39421b7 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 6 May 2024 00:39:37 +0900 Subject: [PATCH 3/3] update fallback --- msgpack/_packer.pyx | 2 +- msgpack/_unpacker.pyx | 2 +- msgpack/fallback.py | 27 +++++---------------------- 3 files changed, 7 insertions(+), 24 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 4405a302..99557d37 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -53,7 +53,7 @@ cdef inline int PyBytesLike_CheckExact(object o): return PyBytes_CheckExact(o) or PyByteArray_CheckExact(o) -cdef class Packer(object): +cdef class Packer: """ MessagePack Packer diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 2771e7bf..34ff3304 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -210,7 +210,7 @@ def unpackb(object packed, *, object object_hook=None, object list_hook=None, raise ValueError("Unpack failed: error = %d" % (ret,)) -cdef class Unpacker(object): +cdef class Unpacker: """Streaming unpacker. Arguments: diff --git a/msgpack/fallback.py b/msgpack/fallback.py index ea4c4ced..cbf0d30e 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -232,6 +232,7 @@ class Unpacker: def __init__( self, file_like=None, + *, read_size=0, use_list=True, raw=False, @@ -650,32 +651,13 @@ class Packer: The error handler for encoding unicode. (default: 'strict') DO NOT USE THIS!! This option is kept for very specific usage. - Example of streaming deserialize from file-like object:: - - unpacker = Unpacker(file_like) - for o in unpacker: - process(o) - - Example of streaming deserialize from socket:: - - unpacker = Unpacker() - while True: - buf = sock.recv(1024**2) - if not buf: - break - unpacker.feed(buf) - for o in unpacker: - process(o) - - Raises ``ExtraData`` when *packed* contains extra bytes. - Raises ``OutOfData`` when *packed* is incomplete. - Raises ``FormatError`` when *packed* is not valid msgpack. - Raises ``StackError`` when *packed* contains too nested. - Other exceptions can be raised during unpacking. + :param int buf_size: + Internal buffer size. This option is used only for C implementation. """ def __init__( self, + *, default=None, use_single_float=False, autoreset=True, @@ -683,6 +665,7 @@ def __init__( strict_types=False, datetime=False, unicode_errors=None, + buf_size=None, ): self._strict_types = strict_types self._use_float = use_single_float