From 50033772d50ef3c4023d63561d20bc61db96500e Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Sat, 11 Jan 2020 20:15:24 -0800 Subject: connected: verify promisor-ness of partial clone Commit dfa33a298d ("clone: do faster object check for partial clones", 2019-04-21) optimized the connectivity check done when cloning with --filter to check only the existence of objects directly pointed to by refs. But this is not sufficient: they also need to be promisor objects. Make this check more robust by instead checking that these objects are promisor objects, that is, they appear in a promisor pack. Signed-off-by: Jonathan Tan Reviewed-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- builtin/clone.c | 5 +++-- connected.c | 19 ++++++++++++++----- connected.h | 11 ++++++----- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 0fc89ae2b9..0516181052 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -673,7 +673,7 @@ static void update_remote_refs(const struct ref *refs, const char *msg, struct transport *transport, int check_connectivity, - int check_refs_only) + int check_refs_are_promisor_objects_only) { const struct ref *rm = mapped_refs; @@ -682,7 +682,8 @@ static void update_remote_refs(const struct ref *refs, opt.transport = transport; opt.progress = transport->progress; - opt.check_refs_only = !!check_refs_only; + opt.check_refs_are_promisor_objects_only = + !!check_refs_are_promisor_objects_only; if (check_connected(iterate_ref_map, &rm, &opt)) die(_("remote did not send all necessary objects")); diff --git a/connected.c b/connected.c index c337f5f7f4..7e9bd1bc62 100644 --- a/connected.c +++ b/connected.c @@ -52,19 +52,28 @@ int check_connected(oid_iterate_fn fn, void *cb_data, strbuf_release(&idx_file); } - if (opt->check_refs_only) { + if (opt->check_refs_are_promisor_objects_only) { /* * For partial clones, we don't want to have to do a regular * connectivity check because we have to enumerate and exclude * all promisor objects (slow), and then the connectivity check * itself becomes a no-op because in a partial clone every * object is a promisor object. Instead, just make sure we - * received the objects pointed to by each wanted ref. + * received, in a promisor packfile, the objects pointed to by + * each wanted ref. */ do { - if (!repo_has_object_file_with_flags(the_repository, &oid, - OBJECT_INFO_SKIP_FETCH_OBJECT)) - return 1; + struct packed_git *p; + + for (p = get_all_packs(the_repository); p; p = p->next) { + if (!p->pack_promisor) + continue; + if (find_pack_entry_one(oid.hash, p)) + goto promisor_pack_found; + } + return 1; +promisor_pack_found: + ; } while (!fn(cb_data, &oid)); return 0; } diff --git a/connected.h b/connected.h index ce2e7d8f2e..eba5c261ba 100644 --- a/connected.h +++ b/connected.h @@ -48,12 +48,13 @@ struct check_connected_options { unsigned is_deepening_fetch : 1; /* - * If non-zero, only check the top-level objects referenced by the - * wanted refs (passed in as cb_data). This is useful for partial - * clones, where enumerating and excluding all promisor objects is very - * slow and the commit-walk itself becomes a no-op. + * If non-zero, only check that the top-level objects referenced by the + * wanted refs (passed in as cb_data) are promisor objects. This is + * useful for partial clones, where enumerating and excluding all + * promisor objects is very slow and the commit-walk itself becomes a + * no-op. */ - unsigned check_refs_only : 1; + unsigned check_refs_are_promisor_objects_only : 1; }; #define CHECK_CONNECTED_INIT { 0 } -- cgit v1.2.3 From 2df1aa239cd50698ab3e0c2cb18b9c1d2f0e44d7 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Sat, 11 Jan 2020 20:15:25 -0800 Subject: fetch: forgo full connectivity check if --filter If a filter is specified, we do not need a full connectivity check on the contents of the packfile we just fetched; we only need to check that the objects referenced are promisor objects. This significantly speeds up fetches into repositories that have many promisor objects, because during the connectivity check, all promisor objects are enumerated (to mark them UNINTERESTING), and that takes a significant amount of time. Signed-off-by: Jonathan Tan Reviewed-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- builtin/fetch.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index b4c6d921d0..6fb50320eb 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -906,8 +906,17 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, url = xstrdup("foreign"); if (!connectivity_checked) { + struct check_connected_options opt = CHECK_CONNECTED_INIT; + + if (filter_options.choice) + /* + * Since a filter is specified, objects indirectly + * referenced by refs are allowed to be absent. + */ + opt.check_refs_are_promisor_objects_only = 1; + rm = ref_map; - if (check_connected(iterate_ref_map, &rm, NULL)) { + if (check_connected(iterate_ref_map, &rm, &opt)) { rc = error(_("%s did not send all necessary objects\n"), url); goto abort; } -- cgit v1.2.3