Skip to content

Commit

Permalink
Inline get_object_at.
Browse files Browse the repository at this point in the history
  • Loading branch information
jelmer committed Jan 17, 2023
1 parent a0d7981 commit f193985
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 49 deletions.
12 changes: 2 additions & 10 deletions dulwich/contrib/swift.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
GreenThreadsMissingObjectFinder,
)

from dulwich.lru_cache import LRUSizeCache
from dulwich.objects import (
Blob,
Commit,
Expand All @@ -66,7 +65,6 @@
write_pack_index_v2,
load_pack_index_file,
read_pack_header,
_compute_object_size,
unpack_object,
write_pack_object,
)
Expand Down Expand Up @@ -585,20 +583,14 @@ def __init__(self, scon, filename):
self.pack_length = int(headers["content-length"])
pack_reader = SwiftPackReader(self.scon, self._filename, self.pack_length)
(version, self._num_objects) = read_pack_header(pack_reader.read)
self._offset_cache = LRUSizeCache(
1024 * 1024 * self.scon.cache_length,
compute_size=_compute_object_size,
)
self.pack = None

def get_object_at(self, offset):
if offset in self._offset_cache:
return self._offset_cache[offset]
def get_unpacked_object_at(self, offset):
assert offset >= self._header_size
pack_reader = SwiftPackReader(self.scon, self._filename, self.pack_length)
pack_reader.seek(offset)
unpacked, _ = unpack_object(pack_reader.read)
return (unpacked.pack_type_num, unpacked._obj())
return unpacked

def get_stored_checksum(self):
pack_reader = SwiftPackReader(self.scon, self._filename, self.pack_length)
Expand Down
47 changes: 9 additions & 38 deletions dulwich/pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,6 @@
ChecksumMismatch,
)
from dulwich.file import GitFile
from dulwich.lru_cache import (
LRUSizeCache,
)
from dulwich.objects import (
ShaFile,
ObjectID,
Expand Down Expand Up @@ -894,14 +891,6 @@ def unpack_object(
return unpacked, unused


def _compute_object_size(value):
"""Compute the size of a unresolved object for use with LRUSizeCache."""
(num, obj) = value
if num in DELTA_TYPES:
return chunks_length(obj[1])
return chunks_length(obj)


class PackStreamReader:
"""Class to read a pack stream.
Expand Down Expand Up @@ -1169,9 +1158,6 @@ def __init__(self, filename, file=None, size=None):
else:
self._file = file
(version, self._num_objects) = read_pack_header(self._file.read)
self._offset_cache = LRUSizeCache(
1024 * 1024 * 20, compute_size=_compute_object_size
)

@property
def filename(self):
Expand Down Expand Up @@ -1331,20 +1317,6 @@ def get_unpacked_object_at(self, offset: int, *, include_comp: bool = False) ->
unpacked.offset = offset
return unpacked

def get_object_at(self, offset: int) -> Tuple[int, OldUnpackedObject]:
"""Given an offset in to the packfile return the object that is there.
Using the associated index the location of an object can be looked up,
and then the packfile can be asked directly for that object using this
function.
"""
try:
return self._offset_cache[offset]
except KeyError:
pass
unpacked = self.get_unpacked_object_at(offset, include_comp=False)
return (unpacked.pack_type_num, unpacked._obj())


T = TypeVar('T')

Expand Down Expand Up @@ -2391,7 +2363,9 @@ def __contains__(self, sha1: bytes) -> bool:

def get_raw(self, sha1: bytes) -> Tuple[int, bytes]:
offset = self.index.object_offset(sha1)
obj_type, obj = self.data.get_object_at(offset)
unpacked = self.data.get_unpacked_object_at(offset, include_comp=False)
obj = unpacked._obj()
obj_type = unpacked.pack_type_num
type_num, chunks = self.resolve_object(offset, obj_type, obj)
return type_num, b"".join(chunks)

Expand Down Expand Up @@ -2475,7 +2449,9 @@ def get_ref(self, sha: bytes) -> Tuple[Optional[int], int, OldUnpackedObject]:
except KeyError:
offset = None
if offset:
type, obj = self.data.get_object_at(offset)
unpacked = self.data.get_unpacked_object_at(offset, include_comp=False)
type = unpacked.pack_type_num
obj = unpacked._obj()
elif self.resolve_ext_ref:
type, obj = self.resolve_ext_ref(sha)
else:
Expand All @@ -2501,7 +2477,9 @@ def resolve_object(self, offset: int, type: int, obj, get_ref=None) -> Tuple[int
(delta_offset, delta) = base_obj
# TODO: clean up asserts and replace with nicer error messages
base_offset = base_offset - delta_offset
base_type, base_obj = self.data.get_object_at(base_offset)
unpacked = self.data.get_unpacked_object_at(base_offset, include_comp=False)
base_type = unpacked.pack_type_num
base_obj = unpacked._obj()
assert isinstance(base_type, int)
elif base_type == REF_DELTA:
(basename, delta) = base_obj
Expand All @@ -2515,13 +2493,6 @@ def resolve_object(self, offset: int, type: int, obj, get_ref=None) -> Tuple[int
chunks = base_obj
for prev_offset, delta_type, delta in reversed(delta_stack):
chunks = apply_delta(chunks, delta)
# TODO(dborowitz): This can result in poor performance if
# large base objects are separated from deltas in the pack.
# We should reorganize so that we apply deltas to all
# objects in a chain one after the other to optimize cache
# performance.
if prev_offset is not None:
self.data._offset_cache[prev_offset] = base_type, chunks
return base_type, chunks

def entries(self, progress: Optional[ProgressFn] = None) -> Iterator[PackIndexEntry]:
Expand Down
2 changes: 1 addition & 1 deletion dulwich/tests/test_pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ def test_pack_tuples(self):
self.assertEqual(expected, set(list(tuples)))
self.assertEqual(3, len(tuples))

def test_get_object_at(self):
def test_get_object(self):
"""Tests random access for non-delta objects"""
with self.get_pack(pack1_sha) as p:
obj = p[a_sha]
Expand Down

0 comments on commit f193985

Please sign in to comment.