diff options
author | Elijah Newren <newren@gmail.com> | 2021-06-22 08:04:39 +0000 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2021-06-28 07:58:25 -0700 |
commit | d331dd3b0c829fe9019f0113a095ed95bc06f227 (patch) | |
tree | 0ecfb755c3662041ec5a8087c237c31afdd546c5 | |
parent | t6421: add tests checking for excessive object downloads during merge (diff) | |
download | tgif-d331dd3b0c829fe9019f0113a095ed95bc06f227.tar.xz |
diffcore-rename: allow different missing_object_cb functions
estimate_similarity() was setting up a diff_populate_filespec_options
every time it was called, requiring the caller of estimate_similarity()
to pass in some data needed to set up this option. Currently the needed
data consisted of a single variable (skip_unmodified), but we want to
also have the different estimate_similarity() callsites start using
different missing_object_cb functions as well. Rather than also passing
that data in, just have the caller pass in the whole
diff_populate_filespec_options, and reduce the number of times we need to
set it up.
As a side note, this also drops the number of calls to
has_promisor_remote() dramatically. If L is the number of basename
paths to compare, M is the number of inexact sources, and N is the
number of inexact destinations, then the number of calls to
has_promisor_remote() drops from L+M*N down to at most 2 -- one for each
of the sites that calls estimate_similarity(). has_promisor_remote() is
a very fast function so this almost certainly has no measurable
performance impact, but it seems cleaner to avoid calling that function
so many times.
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r-- | diffcore-rename.c | 58 |
1 files changed, 39 insertions, 19 deletions
diff --git a/diffcore-rename.c b/diffcore-rename.c index 3375e24659..8affa6130e 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -126,7 +126,7 @@ static int estimate_similarity(struct repository *r, struct diff_filespec *src, struct diff_filespec *dst, int minimum_score, - int skip_unmodified) + struct diff_populate_filespec_options *dpf_opt) { /* src points at a file that existed in the original tree (or * optionally a file in the destination tree) and dst points @@ -143,15 +143,6 @@ static int estimate_similarity(struct repository *r, */ unsigned long max_size, delta_size, base_size, src_copied, literal_added; int score; - struct diff_populate_filespec_options dpf_options = { - .check_size_only = 1 - }; - struct prefetch_options prefetch_options = {r, skip_unmodified}; - - if (r == the_repository && has_promisor_remote()) { - dpf_options.missing_object_cb = prefetch; - dpf_options.missing_object_data = &prefetch_options; - } /* We deal only with regular files. Symlink renames are handled * only when they are exact matches --- in other words, no edits @@ -169,11 +160,13 @@ static int estimate_similarity(struct repository *r, * is a possible size - we really should have a flag to * say whether the size is valid or not!) */ + dpf_opt->check_size_only = 1; + if (!src->cnt_data && - diff_populate_filespec(r, src, &dpf_options)) + diff_populate_filespec(r, src, dpf_opt)) return 0; if (!dst->cnt_data && - diff_populate_filespec(r, dst, &dpf_options)) + diff_populate_filespec(r, dst, dpf_opt)) return 0; max_size = ((src->size > dst->size) ? src->size : dst->size); @@ -191,11 +184,11 @@ static int estimate_similarity(struct repository *r, if (max_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE) return 0; - dpf_options.check_size_only = 0; + dpf_opt->check_size_only = 0; - if (!src->cnt_data && diff_populate_filespec(r, src, &dpf_options)) + if (!src->cnt_data && diff_populate_filespec(r, src, dpf_opt)) return 0; - if (!dst->cnt_data && diff_populate_filespec(r, dst, &dpf_options)) + if (!dst->cnt_data && diff_populate_filespec(r, dst, dpf_opt)) return 0; if (diffcore_count_changes(r, src, dst, @@ -862,7 +855,11 @@ static int find_basename_matches(struct diff_options *options, int i, renames = 0; struct strintmap sources; struct strintmap dests; - + struct diff_populate_filespec_options dpf_options = { + .check_binary = 0, + .missing_object_cb = NULL, + .missing_object_data = NULL + }; /* * The prefeteching stuff wants to know if it can skip prefetching * blobs that are unmodified...and will then do a little extra work @@ -873,7 +870,10 @@ static int find_basename_matches(struct diff_options *options, * the extra work necessary to check if rename_src entries are * unmodified would be a small waste. */ - int skip_unmodified = 0; + struct prefetch_options prefetch_options = { + .repo = options->repo, + .skip_unmodified = 0 + }; /* * Create maps of basename -> fullname(s) for remaining sources and @@ -910,6 +910,11 @@ static int find_basename_matches(struct diff_options *options, strintmap_set(&dests, base, i); } + if (options->repo == the_repository && has_promisor_remote()) { + dpf_options.missing_object_cb = prefetch; + dpf_options.missing_object_data = &prefetch_options; + } + /* Now look for basename matchups and do similarity estimation */ for (i = 0; i < rename_src_nr; ++i) { char *filename = rename_src[i].p->one->path; @@ -953,7 +958,7 @@ static int find_basename_matches(struct diff_options *options, one = rename_src[src_index].p->one; two = rename_dst[dst_index].p->two; score = estimate_similarity(options->repo, one, two, - minimum_score, skip_unmodified); + minimum_score, &dpf_options); /* If sufficiently similar, record as rename pair */ if (score < minimum_score) @@ -1272,6 +1277,14 @@ void diffcore_rename_extended(struct diff_options *options, int num_sources, want_copies; struct progress *progress = NULL; struct dir_rename_info info; + struct diff_populate_filespec_options dpf_options = { + .check_binary = 0, + .missing_object_cb = NULL, + .missing_object_data = NULL + }; + struct prefetch_options prefetch_options = { + .repo = options->repo + }; trace2_region_enter("diff", "setup", options->repo); info.setup = 0; @@ -1433,6 +1446,13 @@ void diffcore_rename_extended(struct diff_options *options, (uint64_t)num_destinations * (uint64_t)num_sources); } + /* Finish setting up dpf_options */ + prefetch_options.skip_unmodified = skip_unmodified; + if (options->repo == the_repository && has_promisor_remote()) { + dpf_options.missing_object_cb = prefetch; + dpf_options.missing_object_data = &prefetch_options; + } + CALLOC_ARRAY(mx, st_mult(NUM_CANDIDATE_PER_DST, num_destinations)); for (dst_cnt = i = 0; i < rename_dst_nr; i++) { struct diff_filespec *two = rename_dst[i].p->two; @@ -1458,7 +1478,7 @@ void diffcore_rename_extended(struct diff_options *options, this_src.score = estimate_similarity(options->repo, one, two, minimum_score, - skip_unmodified); + &dpf_options); this_src.name_score = basename_same(one, two); this_src.dst = i; this_src.src = j; |