From 8a30a1efd11bcfb7b0f5b8543492a09fc495ae60 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Tue, 14 May 2019 14:10:55 -0700 Subject: index-pack: prefetch missing REF_DELTA bases When fetching, the client sends "have" commit IDs indicating that the server does not need to send any object referenced by those commits, reducing network I/O. When the client is a partial clone, the client still sends "have"s in this way, even if it does not have every object referenced by a commit it sent as "have". If a server omits such an object, it is fine: the client could lazily fetch that object before this fetch, and it can still do so after. The issue is when the server sends a thin pack containing an object that is a REF_DELTA against such a missing object: index-pack fails to fix the thin pack. When support for lazily fetching missing objects was added in 8b4c0103a9 ("sha1_file: support lazily fetching missing objects", 2017-12-08), support in index-pack was turned off in the belief that it accesses the repo only to do hash collision checks. However, this is not true: it also needs to access the repo to resolve REF_DELTA bases. Support for lazy fetching should still generally be turned off in index-pack because it is used as part of the lazy fetching process itself (if not, infinite loops may occur), but we do need to fetch the REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that those are REF_DELTA themselves, because we do not send "have" when making such fetches.) To resolve this, prefetch all missing REF_DELTA bases before attempting to resolve them. This both ensures that all bases are attempted to be fetched, and ensures that we make only one request per index-pack invocation, and not one request per missing object. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'builtin/index-pack.c') diff --git a/builtin/index-pack.c b/builtin/index-pack.c index ccf4eb7e9b..0d55f73b0b 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -14,6 +14,7 @@ #include "thread-utils.h" #include "packfile.h" #include "object-store.h" +#include "fetch-object.h" static const char index_pack_usage[] = "git index-pack [-v] [-o ] [--keep | --keep=] [--verify] [--strict] ( | --stdin [--fix-thin] [])"; @@ -1351,6 +1352,25 @@ static void fix_unresolved_deltas(struct hashfile *f) sorted_by_pos[i] = &ref_deltas[i]; QSORT(sorted_by_pos, nr_ref_deltas, delta_pos_compare); + if (repository_format_partial_clone) { + /* + * Prefetch the delta bases. + */ + struct oid_array to_fetch = OID_ARRAY_INIT; + for (i = 0; i < nr_ref_deltas; i++) { + struct ref_delta_entry *d = sorted_by_pos[i]; + if (!oid_object_info_extended(the_repository, &d->oid, + NULL, + OBJECT_INFO_FOR_PREFETCH)) + continue; + oid_array_append(&to_fetch, &d->oid); + } + if (to_fetch.nr) + fetch_objects(repository_format_partial_clone, + to_fetch.oid, to_fetch.nr); + oid_array_clear(&to_fetch); + } + for (i = 0; i < nr_ref_deltas; i++) { struct ref_delta_entry *d = sorted_by_pos[i]; enum object_type type; @@ -1650,8 +1670,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) int report_end_of_input = 0; /* - * index-pack never needs to fetch missing objects, since it only - * accesses the repo to do hash collision checks + * index-pack never needs to fetch missing objects except when + * REF_DELTA bases are missing (which are explicitly handled). It only + * accesses the repo to do hash collision checks and to check which + * REF_DELTA bases need to be fetched. */ fetch_if_missing = 0; -- cgit v1.2.3