From 95acf11a3dc3d18ec999f4913ec6c6a54545c6b7 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Tue, 7 Apr 2020 15:11:43 -0700 Subject: diff: restrict when prefetching occurs Commit 7fbbcb21b1 ("diff: batch fetching of missing blobs", 2019-04-08) optimized "diff" by prefetching blobs in a partial clone, but there are some cases wherein blobs do not need to be prefetched. In these cases, any command that uses the diff machinery will unnecessarily fetch blobs. diffcore_std() may read blobs when it calls the following functions: (1) diffcore_skip_stat_unmatch() (controlled by the config variable diff.autorefreshindex) (2) diffcore_break() and diffcore_merge_broken() (for break-rewrite detection) (3) diffcore_rename() (for rename detection) (4) diffcore_pickaxe() (for detecting addition/deletion of specified string) Instead of always prefetching blobs, teach diffcore_skip_stat_unmatch(), diffcore_break(), and diffcore_rename() to prefetch blobs upon the first read of a missing object. This covers (1), (2), and (3): to cover the rest, teach diffcore_std() to prefetch if the output type is one that includes blob data (and hence blob data will be required later anyway), or if it knows that (4) will be run. Helped-by: Jeff King Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- diffcore-rename.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 4 deletions(-) (limited to 'diffcore-rename.c') diff --git a/diffcore-rename.c b/diffcore-rename.c index bf4c0b8740..99e63e90f8 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -1,4 +1,5 @@ /* + * * Copyright (C) 2005 Junio C Hamano */ #include "cache.h" @@ -7,6 +8,7 @@ #include "object-store.h" #include "hashmap.h" #include "progress.h" +#include "promisor-remote.h" /* Table of rename/copy destinations */ @@ -128,10 +130,46 @@ struct diff_score { short name_score; }; +struct prefetch_options { + struct repository *repo; + int skip_unmodified; +}; +static void prefetch(void *prefetch_options) +{ + struct prefetch_options *options = prefetch_options; + int i; + struct oid_array to_fetch = OID_ARRAY_INIT; + + for (i = 0; i < rename_dst_nr; i++) { + if (rename_dst[i].pair) + /* + * The loop in diffcore_rename() will not need these + * blobs, so skip prefetching. + */ + continue; /* already found exact match */ + diff_add_if_missing(options->repo, &to_fetch, + rename_dst[i].two); + } + for (i = 0; i < rename_src_nr; i++) { + if (options->skip_unmodified && + diff_unmodified_pair(rename_src[i].p)) + /* + * The loop in diffcore_rename() will not need these + * blobs, so skip prefetching. + */ + continue; + diff_add_if_missing(options->repo, &to_fetch, + rename_src[i].p->one); + } + promisor_remote_get_direct(options->repo, to_fetch.oid, to_fetch.nr); + oid_array_clear(&to_fetch); +} + static int estimate_similarity(struct repository *r, struct diff_filespec *src, struct diff_filespec *dst, - int minimum_score) + int minimum_score, + int skip_unmodified) { /* src points at a file that existed in the original tree (or * optionally a file in the destination tree) and dst points @@ -151,6 +189,12 @@ static int estimate_similarity(struct repository *r, struct diff_populate_filespec_options dpf_options = { .check_size_only = 1 }; + struct prefetch_options prefetch_options = {r, skip_unmodified}; + + if (r == the_repository && has_promisor_remote()) { + dpf_options.missing_object_cb = prefetch; + dpf_options.missing_object_data = &prefetch_options; + } /* We deal only with regular files. Symlink renames are handled * only when they are exact matches --- in other words, no edits @@ -190,9 +234,11 @@ static int estimate_similarity(struct repository *r, if (max_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE) return 0; - if (!src->cnt_data && diff_populate_filespec(r, src, NULL)) + dpf_options.check_size_only = 0; + + if (!src->cnt_data && diff_populate_filespec(r, src, &dpf_options)) return 0; - if (!dst->cnt_data && diff_populate_filespec(r, dst, NULL)) + if (!dst->cnt_data && diff_populate_filespec(r, dst, &dpf_options)) return 0; if (diffcore_count_changes(r, src, dst, @@ -569,7 +615,8 @@ void diffcore_rename(struct diff_options *options) this_src.score = estimate_similarity(options->repo, one, two, - minimum_score); + minimum_score, + skip_unmodified); this_src.name_score = basename_same(one, two); this_src.dst = i; this_src.src = j; -- cgit v1.2.3