Skip to content

gguf-py: Improve GGUFReader read-only mode performance #10159

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
Closed
75 changes: 50 additions & 25 deletions gguf-py/gguf/gguf_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import logging
import os
import struct
from collections import OrderedDict
from typing import Any, Literal, NamedTuple, TypeVar, Union

Expand Down Expand Up @@ -87,11 +88,15 @@
}

def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = 'r'):
self.data = np.memmap(path, mode = mode)
file_mode = "rb+" if mode == 'r+' else 'rb'
self.mode = mode
self.data = open(path, mode=file_mode)
self.mmap = np.memmap(self.data, mode = mode)
offs = 0

# Check for GGUF magic
if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC:
self.data.seek(offs)
if struct.unpack("<I", self.data.read(4))[0] != GGUF_MAGIC:
raise ValueError('GGUF magic invalid')
offs += 4

Expand Down Expand Up @@ -129,6 +134,9 @@
self.data_offset = offs
self._build_tensors(offs, tensors_fields)

def __del__(self) -> None:
self.data.close()

_DT = TypeVar('_DT', bound = npt.DTypeLike)

# Fetch a key/value metadata field by key.
Expand All @@ -140,16 +148,24 @@
return self.tensors[idx]

def _get(
self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None,
self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None, use_mmap: bool = False
) -> npt.NDArray[Any]:
count = int(count)
itemsize = int(np.empty([], dtype = dtype).itemsize)
dtype = np.dtype(dtype)
itemsize = dtype.itemsize
end_offs = offset + itemsize * count
return (
self.data[offset:end_offs]
.view(dtype = dtype)[:count]
.newbyteorder(override_order or self.byte_order)
)
if self.mode != "r" or use_mmap:
data = (
self.mmap[offset:end_offs]
.view(dtype = dtype)[:count]
.newbyteorder(override_order or self.byte_order)
)
self.data.seek(end_offs)
else:
self.data.seek(offset)
dtype = dtype.newbyteorder(override_order or self.byte_order)
data = np.frombuffer(self.data.read(itemsize * count), dtype = dtype)
return data

def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
if field.name in self.fields:
Expand All @@ -162,9 +178,18 @@
self.fields[field.name] = field
return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts)

def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
slen = self._get(offset, np.uint64)
return slen, self._get(offset + 8, np.uint8, slen[0])
def _get_str(self, offset: int) -> list[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:

Check failure on line 181 in gguf-py/gguf/gguf_reader.py

View workflow job for this annotation

GitHub Actions / pyright type-check

Too many type arguments provided for "list"; expected 1 but received 2 (reportInvalidTypeArguments)
if self.mode != "r":
slen = self._get(offset, np.uint64)
sdata = self._get(offset + 8, np.uint8, slen.item())
else:
# This is faster to return a read-only str structure with less seek calling.
self.data.seek(offset)
u64 = np.dtype(np.uint64).newbyteorder(self.byte_order)
u8 = np.dtype(np.uint8).newbyteorder(self.byte_order)
slen = np.frombuffer(self.data.read(8), dtype=u64)
sdata = np.frombuffer(self.data.read(slen.item()), dtype=u8)
return [slen, sdata]

Check failure on line 192 in gguf-py/gguf/gguf_reader.py

View workflow job for this annotation

GitHub Actions / pyright type-check

Type "list[NDArray[Any] | NDArray[unsignedinteger[_64Bit]] | NDArray[unsignedinteger[_8Bit]]]" is not assignable to return type "list[NDArray[uint64]]"   Type "NDArray[Any] | NDArray[unsignedinteger[_8Bit]]" is not assignable to type "NDArray[uint64]"     "ndarray[Any, dtype[unsignedinteger[_8Bit]]]" is not assignable to "ndarray[Any, dtype[uint64]]"       Type parameter "_DType_co@ndarray" is covariant, but "dtype[unsignedinteger[_8Bit]]" is not a subtype of "dtype[uint64]"         "dtype[unsignedinteger[_8Bit]]" is not assignable to "dtype[uint64]"           Type parameter "_DTypeScalar_co@dtype" is covariant, but "unsignedinteger[_8Bit]" is not a subtype of "uint64" (reportReturnType)

def _get_field_parts(
self, orig_offs: int, raw_type: int,
Expand All @@ -175,8 +200,8 @@
types.append(gtype)
# Handle strings.
if gtype == GGUFValueType.STRING:
sparts: list[npt.NDArray[Any]] = list(self._get_str(offs))
size = sum(int(part.nbytes) for part in sparts)
sparts: list[npt.NDArray[Any]] = self._get_str(offs)
size = 8 + sparts[0].item()
return size, sparts, [1], types
# Check if it's a simple scalar type.
nptype = self.gguf_scalar_to_np.get(gtype)
Expand All @@ -186,9 +211,9 @@
# Handle arrays.
if gtype == GGUFValueType.ARRAY:
raw_itype = self._get(offs, np.uint32)
offs += int(raw_itype.nbytes)
offs = self.data.tell()
alen = self._get(offs, np.uint64)
offs += int(alen.nbytes)
offs = self.data.tell()
aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
data_idxs: list[int] = []
for idx in range(alen[0]):
Expand All @@ -208,23 +233,23 @@

# Get Tensor Name
name_len, name_data = self._get_str(offs)
offs += int(name_len.nbytes + name_data.nbytes)
offs = self.data.tell()

# Get Tensor Dimensions Count
n_dims = self._get(offs, np.uint32)
offs += int(n_dims.nbytes)
offs = self.data.tell()

# Get Tensor Dimension Array
dims = self._get(offs, np.uint64, n_dims[0])
offs += int(dims.nbytes)
offs = self.data.tell()

# Get Tensor Encoding Scheme Type
raw_dtype = self._get(offs, np.uint32)
offs += int(raw_dtype.nbytes)
offs = self.data.tell()

# Get Tensor Offset
offset_tensor = self._get(offs, np.uint64)
offs += int(offset_tensor.nbytes)
offs = self.data.tell()

return ReaderField(
orig_offs,
Expand All @@ -237,9 +262,9 @@
for _ in range(count):
orig_offs = offs
kv_klen, kv_kdata = self._get_str(offs)
offs += int(kv_klen.nbytes + kv_kdata.nbytes)
offs = self.data.tell()
raw_kv_type = self._get(offs, np.uint32)
offs += int(raw_kv_type.nbytes)
offs = self.data.tell()
parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type]
idxs_offs = len(parts)
field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0])
Expand All @@ -258,7 +283,7 @@
tensor_fields = []
for _ in range(count):
field = self._get_tensor_info_field(offs)
offs += sum(int(part.nbytes) for part in field.parts)
offs = self.data.tell()
tensor_fields.append(field)
return offs, tensor_fields

Expand Down Expand Up @@ -311,7 +336,7 @@
n_elements = n_elems,
n_bytes = n_bytes,
data_offset = data_offs,
data = self._get(data_offs, item_type, item_count).reshape(np_dims),
data = self._get(data_offs, item_type, item_count, use_mmap=True).reshape(np_dims),
field = field,
))
self.tensors = tensors
Loading