From ded0abc73c67c6a9b9dbc2a22755fab01ae17e41 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 19 Feb 2011 03:04:56 -0500 Subject: diff: handle diffstat of rewritten binary files The logic in builtin_diffstat assumes that a complete_rewrite pair should have its lines counted. This is nonsensical for binary files and leads to confusing things like: $ git diff --stat --summary HEAD^ HEAD foo.rand | Bin 4096 -> 4096 bytes 1 files changed, 0 insertions(+), 0 deletions(-) $ git diff --stat --summary -B HEAD^ HEAD foo.rand | 34 +++++++++++++++------------------- 1 files changed, 15 insertions(+), 19 deletions(-) rewrite foo.rand (100%) So let's reorder the function to handle binary files first (which from diffstat's perspective look like complete rewrites anyway), then rewrites, then actual diffstats. There are two bonus prizes to this reorder: 1. It gets rid of a now-superfluous goto. 2. The binary case is at the top, which means we can further optimize it in the next patch. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- diff.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'diff.c') diff --git a/diff.c b/diff.c index 381cc8d4fd..14a354147c 100644 --- a/diff.c +++ b/diff.c @@ -1811,26 +1811,31 @@ static void builtin_diffstat(const char *name_a, const char *name_b, data->is_unmerged = 1; return; } - if (complete_rewrite) { + + if (diff_filespec_is_binary(one) || diff_filespec_is_binary(two)) { + if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0) + die("unable to read files to diff"); + data->is_binary = 1; + data->added = mf2.size; + data->deleted = mf1.size; + } + + else if (complete_rewrite) { diff_populate_filespec(one, 0); diff_populate_filespec(two, 0); data->deleted = count_lines(one->data, one->size); data->added = count_lines(two->data, two->size); - goto free_and_return; } - if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0) - die("unable to read files to diff"); - if (diff_filespec_is_binary(one) || diff_filespec_is_binary(two)) { - data->is_binary = 1; - data->added = mf2.size; - data->deleted = mf1.size; - } else { + else { /* Crazy xdl interfaces.. */ xpparam_t xpp; xdemitconf_t xecfg; xdemitcb_t ecb; + if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0) + die("unable to read files to diff"); + memset(&xpp, 0, sizeof(xpp)); memset(&xecfg, 0, sizeof(xecfg)); xpp.flags = XDF_NEED_MINIMAL | o->xdl_opts; @@ -1838,7 +1843,6 @@ static void builtin_diffstat(const char *name_a, const char *name_b, &xpp, &xecfg, &ecb); } - free_and_return: diff_free_filespec_data(one); diff_free_filespec_data(two); } -- cgit v1.2.3 From abb371a1ef5bddee904551afdec38854f95e99bb Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 19 Feb 2011 03:16:32 -0500 Subject: diff: don't retrieve binary blobs for diffstat We only need the size, which is much cheaper to get, especially if it is a big binary file. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- diff.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'diff.c') diff --git a/diff.c b/diff.c index 14a354147c..0d2ed00dad 100644 --- a/diff.c +++ b/diff.c @@ -235,6 +235,15 @@ static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one) return 0; } +/* like fill_mmfile, but only for size, so we can avoid retrieving blob */ +static unsigned long diff_filespec_size(struct diff_filespec *one) +{ + if (!DIFF_FILE_VALID(one)) + return 0; + diff_populate_filespec(one, 1); + return one->size; +} + static int count_trailing_blank(mmfile_t *mf, unsigned ws_rule) { char *ptr = mf->ptr; @@ -1813,11 +1822,9 @@ static void builtin_diffstat(const char *name_a, const char *name_b, } if (diff_filespec_is_binary(one) || diff_filespec_is_binary(two)) { - if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0) - die("unable to read files to diff"); data->is_binary = 1; - data->added = mf2.size; - data->deleted = mf1.size; + data->added = diff_filespec_size(two); + data->deleted = diff_filespec_size(one); } else if (complete_rewrite) { -- cgit v1.2.3