Skip to content

Commit ec8920e

Browse files
pks-tgitster
authored andcommitted
packfile: track packs via the MRU list exclusively
We track packfiles via two different lists: - `struct packfile_store::packs` is a list that sorts local packs first. In addition, these packs are sorted so that younger packs are sorted towards the front. - `struct packfile_store::mru` is a list that sorts packs so that most-recently used packs are at the front. The reasoning behind the ordering in the `packs` list is that younger objects stored in the local object store tend to be accessed more frequently, and that is certainly true for some cases. But there are going to be lots of cases where that isn't true. Especially when traversing history it is likely that one needs to access many older objects, and due to our housekeeping it is very likely that almost all of those older objects will be contained in one large pack that is oldest. So whether or not the ordering makes sense really depends on the use case at hand. A flexible approach like our MRU list addresses that need, as it will sort packs towards the front that are accessed all the time. Intuitively, this approach is thus able to satisfy more use cases more efficiently. This reasoning casts some doubt on whether or not it really makes sense to track packs via two different lists. It causes confusion, and it is not clear whether there are use cases where the `packs` list really is such an obvious choice. Merge these two lists into one most-recently-used list. Note that there is one important edge case: `for_each_packed_object()` uses the MRU list to iterate through packs, and then it lists each object in those packs. This would have the effect that we now sort the current pack towards the front, thus modifying the list of packfiles we are iterating over, with the consequence that we'll see an infinite loop. This edge case is worked around by introducing a new field that allows us to skip updating the MRU. Signed-off-by: Patrick Steinhardt <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 533cf93 commit ec8920e

File tree

3 files changed

+26
-32
lines changed

3 files changed

+26
-32
lines changed

builtin/pack-objects.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1748,11 +1748,11 @@ static int want_object_in_pack_mtime(const struct object_id *oid,
17481748
}
17491749
}
17501750

1751-
for (e = the_repository->objects->packfiles->mru.head; e; e = e->next) {
1751+
for (e = the_repository->objects->packfiles->packs.head; e; e = e->next) {
17521752
struct packed_git *p = e->pack;
17531753
want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime);
17541754
if (!exclude && want > 0)
1755-
packfile_list_prepend(&the_repository->objects->packfiles->mru, p);
1755+
packfile_list_prepend(&the_repository->objects->packfiles->packs, p);
17561756
if (want != -1)
17571757
return want;
17581758
}

packfile.c

Lines changed: 7 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -870,9 +870,7 @@ void packfile_store_add_pack(struct packfile_store *store,
870870
if (pack->pack_fd != -1)
871871
pack_open_fds++;
872872

873-
packfile_list_prepend(&store->packs, pack);
874-
packfile_list_append(&store->mru, pack);
875-
873+
packfile_list_append(&store->packs, pack);
876874
strmap_put(&store->packs_by_path, pack->pack_name, pack);
877875
}
878876

@@ -1077,14 +1075,6 @@ static int sort_pack(const struct packfile_list_entry *a,
10771075
return -1;
10781076
}
10791077

1080-
static void packfile_store_prepare_mru(struct packfile_store *store)
1081-
{
1082-
packfile_list_clear(&store->mru);
1083-
1084-
for (struct packfile_list_entry *e = store->packs.head; e; e = e->next)
1085-
packfile_list_append(&store->mru, e->pack);
1086-
}
1087-
10881078
void packfile_store_prepare(struct packfile_store *store)
10891079
{
10901080
struct odb_source *source;
@@ -1103,7 +1093,6 @@ void packfile_store_prepare(struct packfile_store *store)
11031093
if (!e->next)
11041094
store->packs.tail = e;
11051095

1106-
packfile_store_prepare_mru(store);
11071096
store->initialized = true;
11081097
}
11091098

@@ -1128,12 +1117,6 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor
11281117
return store->packs.head;
11291118
}
11301119

1131-
struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store)
1132-
{
1133-
packfile_store_prepare(store);
1134-
return store->mru.head;
1135-
}
1136-
11371120
/*
11381121
* Give a fast, rough count of the number of objects in the repository. This
11391122
* ignores loose objects completely. If you have a lot of them, then either
@@ -2134,11 +2117,12 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa
21342117
if (!r->objects->packfiles->packs.head)
21352118
return 0;
21362119

2137-
for (l = r->objects->packfiles->mru.head; l; l = l->next) {
2120+
for (l = r->objects->packfiles->packs.head; l; l = l->next) {
21382121
struct packed_git *p = l->pack;
21392122

21402123
if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) {
2141-
packfile_list_prepend(&r->objects->packfiles->mru, p);
2124+
if (!r->objects->packfiles->skip_mru_updates)
2125+
packfile_list_prepend(&r->objects->packfiles->packs, p);
21422126
return 1;
21432127
}
21442128
}
@@ -2270,6 +2254,7 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
22702254
int r = 0;
22712255
int pack_errors = 0;
22722256

2257+
repo->objects->packfiles->skip_mru_updates = true;
22732258
repo_for_each_pack(repo, p) {
22742259
if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
22752260
continue;
@@ -2290,6 +2275,8 @@ int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
22902275
if (r)
22912276
break;
22922277
}
2278+
repo->objects->packfiles->skip_mru_updates = false;
2279+
22932280
return r ? r : pack_errors;
22942281
}
22952282

packfile.h

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,8 @@ struct packfile_store {
7979
struct object_database *odb;
8080

8181
/*
82-
* The list of packfiles in the order in which they are being added to
83-
* the store.
82+
* The list of packfiles in the order in which they have been most
83+
* recently used.
8484
*/
8585
struct packfile_list packs;
8686

@@ -98,9 +98,6 @@ struct packfile_store {
9898
unsigned flags;
9999
} kept_cache;
100100

101-
/* A most-recently-used ordered version of the packs list. */
102-
struct packfile_list mru;
103-
104101
/*
105102
* A map of packfile names to packed_git structs for tracking which
106103
* packs have been loaded already.
@@ -112,6 +109,21 @@ struct packfile_store {
112109
* packs.
113110
*/
114111
bool initialized;
112+
113+
/*
114+
* Usually, packfiles will be reordered to the front of the `packs`
115+
* list whenever an object is looked up via them. This has the effect
116+
* that packs that contain a lot of accessed objects will be located
117+
* towards the front.
118+
*
119+
* This is usually desireable, but there are exceptions. One exception
120+
* is when the looking up multiple objects in a loop for each packfile.
121+
* In that case, we may easily end up with an infinite loop as the
122+
* packfiles get reordered to the front repeatedly.
123+
*
124+
* Setting this field to `true` thus disables these reorderings.
125+
*/
126+
bool skip_mru_updates;
115127
};
116128

117129
/*
@@ -171,11 +183,6 @@ void packfile_store_add_pack(struct packfile_store *store,
171183
*/
172184
struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *store);
173185

174-
/*
175-
* Get all packs in most-recently-used order.
176-
*/
177-
struct packfile_list_entry *packfile_store_get_packs_mru(struct packfile_store *store);
178-
179186
/*
180187
* Open the packfile and add it to the store if it isn't yet known. Returns
181188
* either the newly opened packfile or the preexisting packfile. Returns a

0 commit comments

Comments
 (0)