From 5ce2dbcf386e5a2667b0d36a0d6d29f59ab6c423 Mon Sep 17 00:00:00 2001 From: isotr0py <2037008807@qq.com> Date: Thu, 31 Oct 2024 20:19:24 +0800 Subject: [PATCH 01/11] refactor gguf reader --- gguf-py/gguf/gguf_reader.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index e8e61abf86ae4..92119640b8979 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -87,7 +87,8 @@ class GGUFReader: } def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = 'r'): - self.data = np.memmap(path, mode = mode) + self.data = open(path, mode="rb") + self.mmap = np.memmap(path, mode = mode) offs = 0 # Check for GGUF magic @@ -127,7 +128,8 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = if padding != 0: offs += self.alignment - padding self.data_offset = offs - self._build_tensors(offs, tensors_fields) + # self._build_tensors(offs, tensors_fields) + self.data.close() _DT = TypeVar('_DT', bound = npt.DTypeLike) @@ -140,16 +142,22 @@ def get_tensor(self, idx: int) -> ReaderTensor: return self.tensors[idx] def _get( - self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None, + self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None, lazy: bool = False, ) -> npt.NDArray[Any]: count = int(count) - itemsize = int(np.empty([], dtype = dtype).itemsize) - end_offs = offset + itemsize * count - return ( - self.data[offset:end_offs] - .view(dtype = dtype)[:count] - .newbyteorder(override_order or self.byte_order) - ) + itemsize = np.dtype(dtype).itemsize + if not lazy: + self.data.seek(offset) + return ( + np.frombuffer(self.data.read(itemsize * count), dtype = dtype, count = count) + .newbyteorder(override_order or self.byte_order) + ) + else: + return ( + self.mmap[offset:offset + itemsize * count] + .view(dtype = dtype)[:count] + .newbyteorder(override_order or self.byte_order) + ) def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int: if field.name in self.fields: @@ -311,7 +319,7 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None: n_elements = n_elems, n_bytes = n_bytes, data_offset = data_offs, - data = self._get(data_offs, item_type, item_count).reshape(np_dims), + data = self._get(data_offs, item_type, item_count, lazy=True).reshape(np_dims), field = field, )) self.tensors = tensors From bcef54e10a14388deb24a2884b829ca7d551321d Mon Sep 17 00:00:00 2001 From: isotr0py <2037008807@qq.com> Date: Thu, 31 Oct 2024 22:13:15 +0800 Subject: [PATCH 02/11] improve performance --- gguf-py/gguf/gguf_reader.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index 92119640b8979..3dd004d6631af 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -6,6 +6,7 @@ import logging import os +import struct from collections import OrderedDict from typing import Any, Literal, NamedTuple, TypeVar, Union @@ -92,7 +93,7 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = offs = 0 # Check for GGUF magic - if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC: + if struct.unpack(" int: self.fields[field.name] = field return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts) - def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]: - slen = self._get(offset, np.uint64) - return slen, self._get(offset + 8, np.uint8, slen[0]) + def _get_str(self, offset: int, return_size=False) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]: + self.data.seek(offset) + slen = struct.unpack(' ReaderField: # Get Tensor Name name_len, name_data = self._get_str(offs) - offs += int(name_len.nbytes + name_data.nbytes) + offs = self.data.tell() # Get Tensor Dimensions Count n_dims = self._get(offs, np.uint32) - offs += int(n_dims.nbytes) + offs = self.data.tell() # Get Tensor Dimension Array dims = self._get(offs, np.uint64, n_dims[0]) - offs += int(dims.nbytes) + offs = self.data.tell() # Get Tensor Encoding Scheme Type raw_dtype = self._get(offs, np.uint32) - offs += int(raw_dtype.nbytes) + offs = self.data.tell() # Get Tensor Offset offset_tensor = self._get(offs, np.uint64) - offs += int(offset_tensor.nbytes) + offs = self.data.tell() return ReaderField( orig_offs, @@ -245,9 +251,9 @@ def _build_fields(self, offs: int, count: int) -> int: for _ in range(count): orig_offs = offs kv_klen, kv_kdata = self._get_str(offs) - offs += int(kv_klen.nbytes + kv_kdata.nbytes) + offs = self.data.tell() raw_kv_type = self._get(offs, np.uint32) - offs += int(raw_kv_type.nbytes) + offs = self.data.tell() parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type] idxs_offs = len(parts) field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0]) @@ -266,7 +272,7 @@ def _build_tensor_info(self, offs: int, count: int) -> tuple[int, list[ReaderFie tensor_fields = [] for _ in range(count): field = self._get_tensor_info_field(offs) - offs += sum(int(part.nbytes) for part in field.parts) + offs = self.data.tell() tensor_fields.append(field) return offs, tensor_fields From 205676ceb7855342741242c9742dd2ae257bb812 Mon Sep 17 00:00:00 2001 From: isotr0py <2037008807@qq.com> Date: Mon, 4 Nov 2024 16:06:46 +0800 Subject: [PATCH 03/11] fix mode --- gguf-py/gguf/gguf_reader.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index 3dd004d6631af..fd32a3d1fd991 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -88,8 +88,9 @@ class GGUFReader: } def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = 'r'): - self.data = open(path, mode="rb") - self.mmap = np.memmap(path, mode = mode) + file_mode = "rb" if mode == 'r' else 'rb+' + self.data = open(path, mode=file_mode) + self.mmap = np.memmap(self.data, mode = mode) offs = 0 # Check for GGUF magic @@ -129,7 +130,7 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = if padding != 0: offs += self.alignment - padding self.data_offset = offs - # self._build_tensors(offs, tensors_fields) + self._build_tensors(offs, tensors_fields) self.data.close() _DT = TypeVar('_DT', bound = npt.DTypeLike) From dd320df4b421531fad61609590f9145af9ac9499 Mon Sep 17 00:00:00 2001 From: isotr0py <2037008807@qq.com> Date: Mon, 4 Nov 2024 16:19:23 +0800 Subject: [PATCH 04/11] fix mode --- gguf-py/gguf/gguf_reader.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index fd32a3d1fd991..fb51756da1b17 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -89,8 +89,9 @@ class GGUFReader: def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = 'r'): file_mode = "rb" if mode == 'r' else 'rb+' + self.mode = mode self.data = open(path, mode=file_mode) - self.mmap = np.memmap(self.data, mode = mode) + self.mmap = np.memmap(path, mode = mode) offs = 0 # Check for GGUF magic @@ -150,10 +151,11 @@ def _get( itemsize = np.dtype(dtype).itemsize if not lazy: self.data.seek(offset) - return ( + data = ( np.frombuffer(self.data.read(itemsize * count), dtype = dtype, count = count) .newbyteorder(override_order or self.byte_order) ) + return data if self.mode == 'r' else data.copy() else: return ( self.mmap[offset:offset + itemsize * count] From 1dc02150bc56f195dea6b722b02ae507390a4628 Mon Sep 17 00:00:00 2001 From: isotr0py <2037008807@qq.com> Date: Mon, 4 Nov 2024 16:44:17 +0800 Subject: [PATCH 05/11] optimize offsets calculation --- gguf-py/gguf/gguf_reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index fb51756da1b17..754cb501c9532 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -203,9 +203,9 @@ def _get_field_parts( # Handle arrays. if gtype == GGUFValueType.ARRAY: raw_itype = self._get(offs, np.uint32) - offs += int(raw_itype.nbytes) + offs = self.data.tell() alen = self._get(offs, np.uint64) - offs += int(alen.nbytes) + offs = self.data.tell() aparts: list[npt.NDArray[Any]] = [raw_itype, alen] data_idxs: list[int] = [] for idx in range(alen[0]): From a92c920eec5d53a34f718c119d55631488651767 Mon Sep 17 00:00:00 2001 From: isotr0py <2037008807@qq.com> Date: Tue, 5 Nov 2024 01:03:38 +0800 Subject: [PATCH 06/11] revert unnecessary change --- gguf-py/gguf/gguf_reader.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index 754cb501c9532..b6015d9bbfca7 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -6,6 +6,7 @@ import logging import os +import mmap import struct from collections import OrderedDict from typing import Any, Literal, NamedTuple, TypeVar, Union @@ -91,7 +92,7 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = file_mode = "rb" if mode == 'r' else 'rb+' self.mode = mode self.data = open(path, mode=file_mode) - self.mmap = np.memmap(path, mode = mode) + self.mmap = np.memmap(self.data, mode = mode) offs = 0 # Check for GGUF magic @@ -132,6 +133,8 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = offs += self.alignment - padding self.data_offset = offs self._build_tensors(offs, tensors_fields) + + def __del__(self) -> None: self.data.close() _DT = TypeVar('_DT', bound = npt.DTypeLike) @@ -145,23 +148,17 @@ def get_tensor(self, idx: int) -> ReaderTensor: return self.tensors[idx] def _get( - self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None, lazy: bool = False, + self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None, ) -> npt.NDArray[Any]: count = int(count) itemsize = np.dtype(dtype).itemsize - if not lazy: - self.data.seek(offset) - data = ( - np.frombuffer(self.data.read(itemsize * count), dtype = dtype, count = count) - .newbyteorder(override_order or self.byte_order) - ) - return data if self.mode == 'r' else data.copy() - else: - return ( - self.mmap[offset:offset + itemsize * count] - .view(dtype = dtype)[:count] - .newbyteorder(override_order or self.byte_order) - ) + new_offset = offset + itemsize * count + self.data.seek(new_offset) + return ( + self.mmap[offset:new_offset] + .view(dtype = dtype)[:count] + .newbyteorder(override_order or self.byte_order) + ) def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int: if field.name in self.fields: @@ -328,7 +325,7 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None: n_elements = n_elems, n_bytes = n_bytes, data_offset = data_offs, - data = self._get(data_offs, item_type, item_count, lazy=True).reshape(np_dims), + data = self._get(data_offs, item_type, item_count).reshape(np_dims), field = field, )) self.tensors = tensors From ad6fd8de25c6d2b682e7a63d5d93ad7538cf82b1 Mon Sep 17 00:00:00 2001 From: isotr0py <2037008807@qq.com> Date: Tue, 5 Nov 2024 01:48:31 +0800 Subject: [PATCH 07/11] revert unnecessary change --- gguf-py/gguf/gguf_reader.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index b6015d9bbfca7..f0ccc030fc008 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -152,10 +152,10 @@ def _get( ) -> npt.NDArray[Any]: count = int(count) itemsize = np.dtype(dtype).itemsize - new_offset = offset + itemsize * count - self.data.seek(new_offset) + end_offs = offset + itemsize * count + self.data.seek(end_offs) return ( - self.mmap[offset:new_offset] + self.mmap[offset:end_offs] .view(dtype = dtype)[:count] .newbyteorder(override_order or self.byte_order) ) From 6a13722ca5c07c0b812bdc872c14949d10544234 Mon Sep 17 00:00:00 2001 From: isotr0py <2037008807@qq.com> Date: Tue, 5 Nov 2024 12:42:19 +0800 Subject: [PATCH 08/11] code format --- gguf-py/gguf/gguf_reader.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index f0ccc030fc008..ee94ae056c6fc 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -6,7 +6,6 @@ import logging import os -import mmap import struct from collections import OrderedDict from typing import Any, Literal, NamedTuple, TypeVar, Union @@ -133,7 +132,7 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = offs += self.alignment - padding self.data_offset = offs self._build_tensors(offs, tensors_fields) - + def __del__(self) -> None: self.data.close() @@ -174,7 +173,7 @@ def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int: def _get_str(self, offset: int, return_size=False) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]: self.data.seek(offset) slen = struct.unpack(' Date: Tue, 5 Nov 2024 14:01:38 +0800 Subject: [PATCH 09/11] make mode compatiable --- gguf-py/gguf/gguf_reader.py | 42 +++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index ee94ae056c6fc..eb1068b7a5008 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -88,7 +88,7 @@ class GGUFReader: } def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = 'r'): - file_mode = "rb" if mode == 'r' else 'rb+' + file_mode = "rb+" if mode == 'r+' else 'rb' self.mode = mode self.data = open(path, mode=file_mode) self.mmap = np.memmap(self.data, mode = mode) @@ -147,17 +147,22 @@ def get_tensor(self, idx: int) -> ReaderTensor: return self.tensors[idx] def _get( - self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None, + self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None, use_mmap: bool = False ) -> npt.NDArray[Any]: count = int(count) itemsize = np.dtype(dtype).itemsize end_offs = offset + itemsize * count - self.data.seek(end_offs) - return ( - self.mmap[offset:end_offs] - .view(dtype = dtype)[:count] - .newbyteorder(override_order or self.byte_order) - ) + if self.mode != "r" or use_mmap: + data = ( + self.mmap[offset:end_offs] + .view(dtype = dtype)[:count] + .newbyteorder(override_order or self.byte_order) + ) + self.data.seek(end_offs) + else: + self.data.seek(offset) + data = np.frombuffer(self.data.read(itemsize * count), dtype = dtype) + return data def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int: if field.name in self.fields: @@ -170,14 +175,15 @@ def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int: self.fields[field.name] = field return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts) - def _get_str(self, offset: int, return_size=False) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]: + def _get_str(self, offset: int) -> list[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]: self.data.seek(offset) - slen = struct.unpack(' None: n_elements = n_elems, n_bytes = n_bytes, data_offset = data_offs, - data = self._get(data_offs, item_type, item_count).reshape(np_dims), + data = self._get(data_offs, item_type, item_count, use_mmap=True).reshape(np_dims), field = field, )) self.tensors = tensors From 810f06bd5bbab48a6f303db85216c8da237cbdee Mon Sep 17 00:00:00 2001 From: isotr0py <2037008807@qq.com> Date: Tue, 5 Nov 2024 15:15:19 +0800 Subject: [PATCH 10/11] revert --- gguf-py/gguf/gguf_reader.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index eb1068b7a5008..56332646c7a5f 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -150,7 +150,8 @@ def _get( self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None, use_mmap: bool = False ) -> npt.NDArray[Any]: count = int(count) - itemsize = np.dtype(dtype).itemsize + dtype = np.dtype(dtype) + itemsize = dtype.itemsize end_offs = offset + itemsize * count if self.mode != "r" or use_mmap: data = ( @@ -161,6 +162,7 @@ def _get( self.data.seek(end_offs) else: self.data.seek(offset) + dtype = dtype.newbyteorder(override_order or self.byte_order) data = np.frombuffer(self.data.read(itemsize * count), dtype = dtype) return data @@ -176,13 +178,16 @@ def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int: return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts) def _get_str(self, offset: int) -> list[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]: - self.data.seek(offset) if self.mode != "r": slen = self._get(offset, np.uint64) - sdata = self._get(offset + 8, np.uint8, slen[0]) + sdata = self._get(offset + 8, np.uint8, slen.item()) else: - slen = np.frombuffer(self.data.read(8), dtype = np.uint64) - sdata = np.frombuffer(self.data.read(slen.item()), dtype = np.uint8) + # This is faster to return a read-only str structure with less seek calling. + self.data.seek(offset) + u64 = np.dtype(np.uint64).newbyteorder(self.byte_order) + u8 = np.dtype(np.uint8).newbyteorder(self.byte_order) + slen = np.frombuffer(self.data.read(8), dtype=u64) + sdata = np.frombuffer(self.data.read(slen.item()), dtype=u8) return [slen, sdata] def _get_field_parts( From 94d814c559a1afd77a8c63ffc8d2f4a34ae824e0 Mon Sep 17 00:00:00 2001 From: isotr0py <2037008807@qq.com> Date: Wed, 4 Dec 2024 15:25:19 +0800 Subject: [PATCH 11/11] fix reader on linux Signed-off-by: isotr0py <2037008807@qq.com> --- gguf-py/gguf/gguf_reader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index 56332646c7a5f..ed2f5ef3f7a9b 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -95,6 +95,7 @@ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = offs = 0 # Check for GGUF magic + self.data.seek(offs) if struct.unpack("