From db7ed7418b3702ab2b2df755764c3f452917a890 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Thu, 2 Apr 2020 12:19:16 -0700 Subject: promisor-remote: accept 0 as oid_nr in function There are 3 callers to promisor_remote_get_direct() that first check if the number of objects to be fetched is equal to 0. Fold that check into promisor_remote_get_direct(), and in doing so, be explicit as to what promisor_remote_get_direct() does if oid_nr is 0 (it returns 0, success, immediately). Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- diff.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'diff.c') diff --git a/diff.c b/diff.c index f2cfbf2214..d0f8032456 100644 --- a/diff.c +++ b/diff.c @@ -6517,12 +6517,11 @@ void diffcore_std(struct diff_options *options) add_if_missing(options->repo, &to_fetch, p->one); add_if_missing(options->repo, &to_fetch, p->two); } - if (to_fetch.nr) - /* - * NEEDSWORK: Consider deduplicating the OIDs sent. - */ - promisor_remote_get_direct(options->repo, - to_fetch.oid, to_fetch.nr); + /* + * NEEDSWORK: Consider deduplicating the OIDs sent. + */ + promisor_remote_get_direct(options->repo, + to_fetch.oid, to_fetch.nr); oid_array_clear(&to_fetch); } -- cgit v1.2.3 From 1c37e86ab2834dfca311799e799568794bc474ce Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Tue, 7 Apr 2020 15:11:41 -0700 Subject: diff: make diff_populate_filespec_options struct The behavior of diff_populate_filespec() currently can be customized through a bitflag, but a subsequent patch requires it to support a non-boolean option. Replace the bitflag with an options struct. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- diff.c | 54 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 19 deletions(-) (limited to 'diff.c') diff --git a/diff.c b/diff.c index d0f8032456..c7457aa4a1 100644 --- a/diff.c +++ b/diff.c @@ -573,7 +573,7 @@ static int fill_mmfile(struct repository *r, mmfile_t *mf, mf->size = 0; return 0; } - else if (diff_populate_filespec(r, one, 0)) + else if (diff_populate_filespec(r, one, NULL)) return -1; mf->ptr = one->data; @@ -585,9 +585,13 @@ static int fill_mmfile(struct repository *r, mmfile_t *mf, static unsigned long diff_filespec_size(struct repository *r, struct diff_filespec *one) { + struct diff_populate_filespec_options dpf_options = { + .check_size_only = 1, + }; + if (!DIFF_FILE_VALID(one)) return 0; - diff_populate_filespec(r, one, CHECK_SIZE_ONLY); + diff_populate_filespec(r, one, &dpf_options); return one->size; } @@ -3020,6 +3024,9 @@ static void show_dirstat(struct diff_options *options) struct diff_filepair *p = q->queue[i]; const char *name; unsigned long copied, added, damage; + struct diff_populate_filespec_options dpf_options = { + .check_size_only = 1, + }; name = p->two->path ? p->two->path : p->one->path; @@ -3047,19 +3054,19 @@ static void show_dirstat(struct diff_options *options) } if (DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two)) { - diff_populate_filespec(options->repo, p->one, 0); - diff_populate_filespec(options->repo, p->two, 0); + diff_populate_filespec(options->repo, p->one, NULL); + diff_populate_filespec(options->repo, p->two, NULL); diffcore_count_changes(options->repo, p->one, p->two, NULL, NULL, &copied, &added); diff_free_filespec_data(p->one); diff_free_filespec_data(p->two); } else if (DIFF_FILE_VALID(p->one)) { - diff_populate_filespec(options->repo, p->one, CHECK_SIZE_ONLY); + diff_populate_filespec(options->repo, p->one, &dpf_options); copied = added = 0; diff_free_filespec_data(p->one); } else if (DIFF_FILE_VALID(p->two)) { - diff_populate_filespec(options->repo, p->two, CHECK_SIZE_ONLY); + diff_populate_filespec(options->repo, p->two, &dpf_options); copied = 0; added = p->two->size; diff_free_filespec_data(p->two); @@ -3339,13 +3346,17 @@ static void emit_binary_diff(struct diff_options *o, int diff_filespec_is_binary(struct repository *r, struct diff_filespec *one) { + struct diff_populate_filespec_options dpf_options = { + .check_binary = 1, + }; + if (one->is_binary == -1) { diff_filespec_load_driver(one, r->index); if (one->driver->binary != -1) one->is_binary = one->driver->binary; else { if (!one->data && DIFF_FILE_VALID(one)) - diff_populate_filespec(r, one, CHECK_BINARY); + diff_populate_filespec(r, one, &dpf_options); if (one->is_binary == -1 && one->data) one->is_binary = buffer_is_binary(one->data, one->size); @@ -3677,8 +3688,8 @@ static void builtin_diffstat(const char *name_a, const char *name_b, } else if (complete_rewrite) { - diff_populate_filespec(o->repo, one, 0); - diff_populate_filespec(o->repo, two, 0); + diff_populate_filespec(o->repo, one, NULL); + diff_populate_filespec(o->repo, two, NULL); data->deleted = count_lines(one->data, one->size); data->added = count_lines(two->data, two->size); } @@ -3914,9 +3925,10 @@ static int diff_populate_gitlink(struct diff_filespec *s, int size_only) */ int diff_populate_filespec(struct repository *r, struct diff_filespec *s, - unsigned int flags) + const struct diff_populate_filespec_options *options) { - int size_only = flags & CHECK_SIZE_ONLY; + int size_only = options ? options->check_size_only : 0; + int check_binary = options ? options->check_binary : 0; int err = 0; int conv_flags = global_conv_flags_eol; /* @@ -3986,7 +3998,7 @@ int diff_populate_filespec(struct repository *r, * opening the file and inspecting the contents, this * is probably fine. */ - if ((flags & CHECK_BINARY) && + if (check_binary && s->size > big_file_threshold && s->is_binary == -1) { s->is_binary = 1; return 0; @@ -4012,7 +4024,7 @@ int diff_populate_filespec(struct repository *r, } else { enum object_type type; - if (size_only || (flags & CHECK_BINARY)) { + if (size_only || check_binary) { type = oid_object_info(r, &s->oid, &s->size); if (type < 0) die("unable to read %s", @@ -4141,7 +4153,7 @@ static struct diff_tempfile *prepare_temp_file(struct repository *r, return temp; } else { - if (diff_populate_filespec(r, one, 0)) + if (diff_populate_filespec(r, one, NULL)) die("cannot read data blob for %s", one->path); prep_temp_blob(r->index, name, temp, one->data, one->size, @@ -6407,9 +6419,9 @@ static int diff_filespec_is_identical(struct repository *r, { if (S_ISGITLINK(one->mode)) return 0; - if (diff_populate_filespec(r, one, 0)) + if (diff_populate_filespec(r, one, NULL)) return 0; - if (diff_populate_filespec(r, two, 0)) + if (diff_populate_filespec(r, two, NULL)) return 0; return !memcmp(one->data, two->data, one->size); } @@ -6417,6 +6429,10 @@ static int diff_filespec_is_identical(struct repository *r, static int diff_filespec_check_stat_unmatch(struct repository *r, struct diff_filepair *p) { + struct diff_populate_filespec_options dpf_options = { + .check_size_only = 1, + }; + if (p->done_skip_stat_unmatch) return p->skip_stat_unmatch_result; @@ -6439,8 +6455,8 @@ static int diff_filespec_check_stat_unmatch(struct repository *r, !DIFF_FILE_VALID(p->two) || (p->one->oid_valid && p->two->oid_valid) || (p->one->mode != p->two->mode) || - diff_populate_filespec(r, p->one, CHECK_SIZE_ONLY) || - diff_populate_filespec(r, p->two, CHECK_SIZE_ONLY) || + diff_populate_filespec(r, p->one, &dpf_options) || + diff_populate_filespec(r, p->two, &dpf_options) || (p->one->size != p->two->size) || !diff_filespec_is_identical(r, p->one, p->two)) /* (2) */ p->skip_stat_unmatch_result = 1; @@ -6770,7 +6786,7 @@ size_t fill_textconv(struct repository *r, *outbuf = ""; return 0; } - if (diff_populate_filespec(r, df, 0)) + if (diff_populate_filespec(r, df, NULL)) die("unable to read files to diff"); *outbuf = df->data; return df->size; -- cgit v1.2.3 From c14b6f83ec7453d2a93bba04f45caf26905f2bff Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Tue, 7 Apr 2020 15:11:42 -0700 Subject: diff: refactor object read Refactor the object reads in diff_populate_filespec() to have the first object read not be in an if/else branch, because in a future patch, a retry will be added to that first object read. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- diff.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'diff.c') diff --git a/diff.c b/diff.c index c7457aa4a1..61ce05d219 100644 --- a/diff.c +++ b/diff.c @@ -4023,12 +4023,22 @@ int diff_populate_filespec(struct repository *r, } } else { - enum object_type type; + struct object_info info = { + .sizep = &s->size + }; + + if (!(size_only || check_binary)) + /* + * Set contentp, since there is no chance that merely + * the size is sufficient. + */ + info.contentp = &s->data; + + if (oid_object_info_extended(r, &s->oid, &info, + OBJECT_INFO_LOOKUP_REPLACE)) + die("unable to read %s", oid_to_hex(&s->oid)); + if (size_only || check_binary) { - type = oid_object_info(r, &s->oid, &s->size); - if (type < 0) - die("unable to read %s", - oid_to_hex(&s->oid)); if (size_only) return 0; if (s->size > big_file_threshold && s->is_binary == -1) { @@ -4036,9 +4046,12 @@ int diff_populate_filespec(struct repository *r, return 0; } } - s->data = repo_read_object_file(r, &s->oid, &type, &s->size); - if (!s->data) - die("unable to read %s", oid_to_hex(&s->oid)); + if (!info.contentp) { + info.contentp = &s->data; + if (oid_object_info_extended(r, &s->oid, &info, + OBJECT_INFO_LOOKUP_REPLACE)) + die("unable to read %s", oid_to_hex(&s->oid)); + } s->should_free = 1; } return 0; -- cgit v1.2.3 From 95acf11a3dc3d18ec999f4913ec6c6a54545c6b7 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Tue, 7 Apr 2020 15:11:43 -0700 Subject: diff: restrict when prefetching occurs Commit 7fbbcb21b1 ("diff: batch fetching of missing blobs", 2019-04-08) optimized "diff" by prefetching blobs in a partial clone, but there are some cases wherein blobs do not need to be prefetched. In these cases, any command that uses the diff machinery will unnecessarily fetch blobs. diffcore_std() may read blobs when it calls the following functions: (1) diffcore_skip_stat_unmatch() (controlled by the config variable diff.autorefreshindex) (2) diffcore_break() and diffcore_merge_broken() (for break-rewrite detection) (3) diffcore_rename() (for rename detection) (4) diffcore_pickaxe() (for detecting addition/deletion of specified string) Instead of always prefetching blobs, teach diffcore_skip_stat_unmatch(), diffcore_break(), and diffcore_rename() to prefetch blobs upon the first read of a missing object. This covers (1), (2), and (3): to cover the rest, teach diffcore_std() to prefetch if the output type is one that includes blob data (and hence blob data will be required later anyway), or if it knows that (4) will be run. Helped-by: Jeff King Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- diff.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 51 insertions(+), 22 deletions(-) (limited to 'diff.c') diff --git a/diff.c b/diff.c index 61ce05d219..b061f5bc70 100644 --- a/diff.c +++ b/diff.c @@ -4034,10 +4034,18 @@ int diff_populate_filespec(struct repository *r, */ info.contentp = &s->data; + if (options && options->missing_object_cb) { + if (!oid_object_info_extended(r, &s->oid, &info, + OBJECT_INFO_LOOKUP_REPLACE | + OBJECT_INFO_SKIP_FETCH_OBJECT)) + goto object_read; + options->missing_object_cb(options->missing_object_data); + } if (oid_object_info_extended(r, &s->oid, &info, OBJECT_INFO_LOOKUP_REPLACE)) die("unable to read %s", oid_to_hex(&s->oid)); +object_read: if (size_only || check_binary) { if (size_only) return 0; @@ -6444,6 +6452,8 @@ static int diff_filespec_check_stat_unmatch(struct repository *r, { struct diff_populate_filespec_options dpf_options = { .check_size_only = 1, + .missing_object_cb = diff_queued_diff_prefetch, + .missing_object_data = r, }; if (p->done_skip_stat_unmatch) @@ -6520,9 +6530,9 @@ void diffcore_fix_diff_index(void) QSORT(q->queue, q->nr, diffnamecmp); } -static void add_if_missing(struct repository *r, - struct oid_array *to_fetch, - const struct diff_filespec *filespec) +void diff_add_if_missing(struct repository *r, + struct oid_array *to_fetch, + const struct diff_filespec *filespec) { if (filespec && filespec->oid_valid && !S_ISGITLINK(filespec->mode) && @@ -6531,29 +6541,48 @@ static void add_if_missing(struct repository *r, oid_array_append(to_fetch, &filespec->oid); } -void diffcore_std(struct diff_options *options) +void diff_queued_diff_prefetch(void *repository) { - if (options->repo == the_repository && has_promisor_remote()) { - /* - * Prefetch the diff pairs that are about to be flushed. - */ - int i; - struct diff_queue_struct *q = &diff_queued_diff; - struct oid_array to_fetch = OID_ARRAY_INIT; + struct repository *repo = repository; + int i; + struct diff_queue_struct *q = &diff_queued_diff; + struct oid_array to_fetch = OID_ARRAY_INIT; - for (i = 0; i < q->nr; i++) { - struct diff_filepair *p = q->queue[i]; - add_if_missing(options->repo, &to_fetch, p->one); - add_if_missing(options->repo, &to_fetch, p->two); - } - /* - * NEEDSWORK: Consider deduplicating the OIDs sent. - */ - promisor_remote_get_direct(options->repo, - to_fetch.oid, to_fetch.nr); - oid_array_clear(&to_fetch); + for (i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + diff_add_if_missing(repo, &to_fetch, p->one); + diff_add_if_missing(repo, &to_fetch, p->two); } + /* + * NEEDSWORK: Consider deduplicating the OIDs sent. + */ + promisor_remote_get_direct(repo, to_fetch.oid, to_fetch.nr); + + oid_array_clear(&to_fetch); +} + +void diffcore_std(struct diff_options *options) +{ + int output_formats_to_prefetch = DIFF_FORMAT_DIFFSTAT | + DIFF_FORMAT_NUMSTAT | + DIFF_FORMAT_PATCH | + DIFF_FORMAT_SHORTSTAT | + DIFF_FORMAT_DIRSTAT; + + /* + * Check if the user requested a blob-data-requiring diff output and/or + * break-rewrite detection (which requires blob data). If yes, prefetch + * the diff pairs. + * + * If no prefetching occurs, diffcore_rename() will prefetch if it + * decides that it needs inexact rename detection. + */ + if (options->repo == the_repository && has_promisor_remote() && + (options->output_format & output_formats_to_prefetch || + options->pickaxe_opts & DIFF_PICKAXE_KINDS_MASK)) + diff_queued_diff_prefetch(options->repo); + /* NOTE please keep the following in sync with diff_tree_combined() */ if (options->skip_stat_unmatch) diffcore_skip_stat_unmatch(options); -- cgit v1.2.3