diff options
author | Junio C Hamano <gitster@pobox.com> | 2021-07-13 16:52:50 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2021-07-13 16:52:50 -0700 |
commit | 4da281e84d87b099ba8d0a255534dc1251be968e (patch) | |
tree | 5919bdb367772fbbbe822f72f33ea2256b94d33a | |
parent | Merge branch 'hn/prep-tests-for-reftable' (diff) | |
parent | xdiff-interface: replace discard_hunk_line() with a flag (diff) | |
download | tgif-4da281e84d87b099ba8d0a255534dc1251be968e.tar.xz |
Merge branch 'ab/pickaxe-pcre2'
Rewrite the backend for "diff -G/-S" to use pcre2 engine when
available.
* ab/pickaxe-pcre2: (22 commits)
xdiff-interface: replace discard_hunk_line() with a flag
xdiff users: use designated initializers for out_line
pickaxe -G: don't special-case create/delete
pickaxe -G: terminate early on matching lines
xdiff-interface: allow early return from xdiff_emit_line_fn
xdiff-interface: prepare for allowing early return
pickaxe -S: slightly optimize contains()
pickaxe: rename variables in has_changes() for brevity
pickaxe -S: support content with NULs under --pickaxe-regex
pickaxe: assert that we must have a needle under -G or -S
pickaxe: refactor function selection in diffcore-pickaxe()
perf: add performance test for pickaxe
pickaxe/style: consolidate declarations and assignments
diff.h: move pickaxe fields together again
pickaxe: die when --find-object and --pickaxe-all are combined
pickaxe: die when -G and --pickaxe-regex are combined
pickaxe tests: add missing test for --no-pickaxe-regex being an error
pickaxe tests: test for -G, -S and --find-object incompatibility
pickaxe tests: add test for "log -S" not being a regex
pickaxe tests: add test for diffgrep_consume() internals
...
-rw-r--r-- | builtin/merge-tree.c | 5 | ||||
-rw-r--r-- | builtin/rerere.c | 4 | ||||
-rw-r--r-- | combine-diff.c | 5 | ||||
-rw-r--r-- | diff.c | 39 | ||||
-rw-r--r-- | diff.h | 7 | ||||
-rw-r--r-- | diffcore-pickaxe.c | 106 | ||||
-rw-r--r-- | range-diff.c | 3 | ||||
-rwxr-xr-x | t/perf/p4209-pickaxe.sh | 70 | ||||
-rwxr-xr-x | t/t4209-log-pickaxe.sh | 114 | ||||
-rwxr-xr-x | t/t7816-grep-binary-pattern.sh | 4 | ||||
-rw-r--r-- | xdiff-interface.c | 27 | ||||
-rw-r--r-- | xdiff-interface.h | 31 | ||||
-rw-r--r-- | xdiff/xdiff.h | 1 | ||||
-rw-r--r-- | xdiff/xemit.c | 3 |
14 files changed, 312 insertions, 107 deletions
diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c index de8520778d..5dc94d6f88 100644 --- a/builtin/merge-tree.c +++ b/builtin/merge-tree.c @@ -107,15 +107,12 @@ static void show_diff(struct merge_list *entry) mmfile_t src, dst; xpparam_t xpp; xdemitconf_t xecfg; - xdemitcb_t ecb; + xdemitcb_t ecb = { .out_line = show_outf }; memset(&xpp, 0, sizeof(xpp)); xpp.flags = 0; memset(&xecfg, 0, sizeof(xecfg)); xecfg.ctxlen = 3; - ecb.out_hunk = NULL; - ecb.out_line = show_outf; - ecb.priv = NULL; src.ptr = origin(entry, &size); if (!src.ptr) diff --git a/builtin/rerere.c b/builtin/rerere.c index fd3be17b97..83d7a778e3 100644 --- a/builtin/rerere.c +++ b/builtin/rerere.c @@ -28,7 +28,7 @@ static int diff_two(const char *file1, const char *label1, { xpparam_t xpp; xdemitconf_t xecfg; - xdemitcb_t ecb; + xdemitcb_t ecb = { .out_line = outf }; mmfile_t minus, plus; int ret; @@ -41,8 +41,6 @@ static int diff_two(const char *file1, const char *label1, xpp.flags = 0; memset(&xecfg, 0, sizeof(xecfg)); xecfg.ctxlen = 3; - ecb.out_hunk = NULL; - ecb.out_line = outf; ret = xdi_diff(&minus, &plus, &xpp, &xecfg, &ecb); free(minus.ptr); diff --git a/combine-diff.c b/combine-diff.c index 7d925ce9ce..d93782daeb 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -403,11 +403,11 @@ static void consume_hunk(void *state_, state->sline[state->nb-1].p_lno[state->n] = state->ob; } -static void consume_line(void *state_, char *line, unsigned long len) +static int consume_line(void *state_, char *line, unsigned long len) { struct combine_diff_state *state = state_; if (!state->lost_bucket) - return; /* not in any hunk yet */ + return 0; /* not in any hunk yet */ switch (line[0]) { case '-': append_lost(state->lost_bucket, state->n, line+1, len-1); @@ -417,6 +417,7 @@ static void consume_line(void *state_, char *line, unsigned long len) state->lno++; break; } + return 0; } static void combine_diff(struct repository *r, @@ -2340,7 +2340,7 @@ static void find_lno(const char *line, struct emit_callback *ecbdata) ecbdata->lno_in_postimage = strtol(p + 1, NULL, 10); } -static void fn_out_consume(void *priv, char *line, unsigned long len) +static int fn_out_consume(void *priv, char *line, unsigned long len) { struct emit_callback *ecbdata = priv; struct diff_options *o = ecbdata->opt; @@ -2376,7 +2376,7 @@ static void fn_out_consume(void *priv, char *line, unsigned long len) len = sane_truncate_line(line, len); find_lno(line, ecbdata); emit_hunk_header(ecbdata, line, len); - return; + return 0; } if (ecbdata->diff_words) { @@ -2386,11 +2386,11 @@ static void fn_out_consume(void *priv, char *line, unsigned long len) if (line[0] == '-') { diff_words_append(line, len, &ecbdata->diff_words->minus); - return; + return 0; } else if (line[0] == '+') { diff_words_append(line, len, &ecbdata->diff_words->plus); - return; + return 0; } else if (starts_with(line, "\\ ")) { /* * Eat the "no newline at eof" marker as if we @@ -2399,11 +2399,11 @@ static void fn_out_consume(void *priv, char *line, unsigned long len) * defer processing. If this is the end of * preimage, more "+" lines may come after it. */ - return; + return 0; } diff_words_flush(ecbdata); emit_diff_symbol(o, s, line, len, 0); - return; + return 0; } switch (line[0]) { @@ -2427,6 +2427,7 @@ static void fn_out_consume(void *priv, char *line, unsigned long len) line, len, 0); break; } + return 0; } static void pprint_rename(struct strbuf *name, const char *a, const char *b) @@ -2526,7 +2527,7 @@ static struct diffstat_file *diffstat_add(struct diffstat_t *diffstat, return x; } -static void diffstat_consume(void *priv, char *line, unsigned long len) +static int diffstat_consume(void *priv, char *line, unsigned long len) { struct diffstat_t *diffstat = priv; struct diffstat_file *x = diffstat->files[diffstat->nr - 1]; @@ -2535,6 +2536,7 @@ static void diffstat_consume(void *priv, char *line, unsigned long len) x->added++; else if (line[0] == '-') x->deleted++; + return 0; } const char mime_boundary_leader[] = "------------"; @@ -3212,7 +3214,7 @@ static void checkdiff_consume_hunk(void *priv, data->lineno = nb - 1; } -static void checkdiff_consume(void *priv, char *line, unsigned long len) +static int checkdiff_consume(void *priv, char *line, unsigned long len) { struct checkdiff_t *data = priv; int marker_size = data->conflict_marker_size; @@ -3236,7 +3238,7 @@ static void checkdiff_consume(void *priv, char *line, unsigned long len) } bad = ws_check(line + 1, len - 1, data->ws_rule); if (!bad) - return; + return 0; data->status |= bad; err = whitespace_error_string(bad); fprintf(data->o->file, "%s%s:%d: %s.\n", @@ -3248,6 +3250,7 @@ static void checkdiff_consume(void *priv, char *line, unsigned long len) } else if (line[0] == ' ') { data->lineno++; } + return 0; } static unsigned char *deflate_it(char *data, @@ -3726,7 +3729,8 @@ static void builtin_diffstat(const char *name_a, const char *name_b, xpp.anchors_nr = o->anchors_nr; xecfg.ctxlen = o->context; xecfg.interhunkctxlen = o->interhunkcontext; - if (xdi_diff_outf(&mf1, &mf2, discard_hunk_line, + xecfg.flags = XDL_EMIT_NO_HUNK_HDR; + if (xdi_diff_outf(&mf1, &mf2, NULL, diffstat_consume, diffstat, &xpp, &xecfg)) die("unable to generate diffstat for %s", one->path); @@ -4632,6 +4636,12 @@ void diff_setup_done(struct diff_options *options) if (HAS_MULTI_BITS(options->pickaxe_opts & DIFF_PICKAXE_KINDS_MASK)) die(_("-G, -S and --find-object are mutually exclusive")); + if (HAS_MULTI_BITS(options->pickaxe_opts & DIFF_PICKAXE_KINDS_G_REGEX_MASK)) + die(_("-G and --pickaxe-regex are mutually exclusive, use --pickaxe-regex with -S")); + + if (HAS_MULTI_BITS(options->pickaxe_opts & DIFF_PICKAXE_KINDS_ALL_OBJFIND_MASK)) + die(_("---pickaxe-all and --find-object are mutually exclusive, use --pickaxe-all with -G and -S")); + /* * Most of the time we can say "there are changes" * only by checking if there are changed paths, but @@ -6119,17 +6129,18 @@ void flush_one_hunk(struct object_id *result, git_hash_ctx *ctx) } } -static void patch_id_consume(void *priv, char *line, unsigned long len) +static int patch_id_consume(void *priv, char *line, unsigned long len) { struct patch_id_t *data = priv; int new_len; if (len > 12 && starts_with(line, "\\ ")) - return; + return 0; new_len = remove_space(line, len); the_hash_algo->update_fn(data->ctx, line, new_len); data->patchlen += new_len; + return 0; } static void patch_id_add_string(git_hash_ctx *ctx, const char *str) @@ -6227,8 +6238,8 @@ static int diff_get_patch_id(struct diff_options *options, struct object_id *oid xpp.flags = 0; xecfg.ctxlen = 3; - xecfg.flags = 0; - if (xdi_diff_outf(&mf1, &mf2, discard_hunk_line, + xecfg.flags = XDL_EMIT_NO_HUNK_HDR; + if (xdi_diff_outf(&mf1, &mf2, NULL, patch_id_consume, &data, &xpp, &xecfg)) return error("unable to generate patch-id diff for %s", p->one->path); @@ -265,6 +265,7 @@ struct diff_options { * postimage of the diff_queue. */ const char *pickaxe; + unsigned pickaxe_opts; /* -I<regex> */ regex_t **ignore_regex; @@ -304,8 +305,6 @@ struct diff_options { /* The output format used when `diff_flush()` is run. */ int output_format; - unsigned pickaxe_opts; - /* Affects the way detection logic for complete rewrites, renames and * copies. */ @@ -556,6 +555,10 @@ int git_config_rename(const char *var, const char *value); #define DIFF_PICKAXE_KINDS_MASK (DIFF_PICKAXE_KIND_S | \ DIFF_PICKAXE_KIND_G | \ DIFF_PICKAXE_KIND_OBJFIND) +#define DIFF_PICKAXE_KINDS_G_REGEX_MASK (DIFF_PICKAXE_KIND_G | \ + DIFF_PICKAXE_REGEX) +#define DIFF_PICKAXE_KINDS_ALL_OBJFIND_MASK (DIFF_PICKAXE_ALL | \ + DIFF_PICKAXE_KIND_OBJFIND) #define DIFF_PICKAXE_IGNORE_CASE 32 diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index a9c6d60df2..c88e50c632 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -19,38 +19,31 @@ struct diffgrep_cb { int hit; }; -static void diffgrep_consume(void *priv, char *line, unsigned long len) +static int diffgrep_consume(void *priv, char *line, unsigned long len) { struct diffgrep_cb *data = priv; regmatch_t regmatch; if (line[0] != '+' && line[0] != '-') - return; + return 0; if (data->hit) - /* - * NEEDSWORK: we should have a way to terminate the - * caller early. - */ - return; - data->hit = !regexec_buf(data->regexp, line + 1, len - 1, 1, - ®match, 0); + BUG("Already matched in diffgrep_consume! Broken xdiff_emit_line_fn?"); + if (!regexec_buf(data->regexp, line + 1, len - 1, 1, + ®match, 0)) { + data->hit = 1; + return 1; + } + return 0; } static int diff_grep(mmfile_t *one, mmfile_t *two, struct diff_options *o, regex_t *regexp, kwset_t kws) { - regmatch_t regmatch; struct diffgrep_cb ecbdata; xpparam_t xpp; xdemitconf_t xecfg; - - if (!one) - return !regexec_buf(regexp, two->ptr, two->size, - 1, ®match, 0); - if (!two) - return !regexec_buf(regexp, one->ptr, one->size, - 1, ®match, 0); + int ret; /* * We have both sides; need to run textual diff and see if @@ -60,38 +53,47 @@ static int diff_grep(mmfile_t *one, mmfile_t *two, memset(&xecfg, 0, sizeof(xecfg)); ecbdata.regexp = regexp; ecbdata.hit = 0; + xecfg.flags = XDL_EMIT_NO_HUNK_HDR; xecfg.ctxlen = o->context; xecfg.interhunkctxlen = o->interhunkcontext; - if (xdi_diff_outf(one, two, discard_hunk_line, diffgrep_consume, - &ecbdata, &xpp, &xecfg)) - return 0; - return ecbdata.hit; + + /* + * An xdiff error might be our "data->hit" from above. See the + * comment for xdiff_emit_line_fn in xdiff-interface.h + */ + ret = xdi_diff_outf(one, two, NULL, diffgrep_consume, + &ecbdata, &xpp, &xecfg); + if (ecbdata.hit) + return 1; + if (ret) + return ret; + return 0; } -static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws) +static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws, + unsigned int limit) { - unsigned int cnt; - unsigned long sz; - const char *data; - - sz = mf->size; - data = mf->ptr; - cnt = 0; + unsigned int cnt = 0; + unsigned long sz = mf->size; + const char *data = mf->ptr; if (regexp) { regmatch_t regmatch; int flags = 0; - while (sz && *data && + while (sz && !regexec_buf(regexp, data, sz, 1, ®match, flags)) { flags |= REG_NOTBOL; data += regmatch.rm_eo; sz -= regmatch.rm_eo; - if (sz && *data && regmatch.rm_so == regmatch.rm_eo) { + if (sz && regmatch.rm_so == regmatch.rm_eo) { data++; sz--; } cnt++; + + if (limit && cnt == limit) + return cnt; } } else { /* Classic exact string match */ @@ -103,6 +105,9 @@ static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws) sz -= offset + kwsm.size[0]; data += offset + kwsm.size[0]; cnt++; + + if (limit && cnt == limit) + return cnt; } } return cnt; @@ -112,9 +117,9 @@ static int has_changes(mmfile_t *one, mmfile_t *two, struct diff_options *o, regex_t *regexp, kwset_t kws) { - unsigned int one_contains = one ? contains(one, regexp, kws) : 0; - unsigned int two_contains = two ? contains(two, regexp, kws) : 0; - return one_contains != two_contains; + unsigned int c1 = one ? contains(one, regexp, kws, 0) : 0; + unsigned int c2 = two ? contains(two, regexp, kws, c1 + 1) : 0; + return c1 != c2; } static int pickaxe_match(struct diff_filepair *p, struct diff_options *o, @@ -136,9 +141,6 @@ static int pickaxe_match(struct diff_filepair *p, struct diff_options *o, oidset_contains(o->objfind, &p->two->oid)); } - if (!o->pickaxe[0]) - return 0; - if (o->flags.allow_textconv) { textconv_one = get_textconv(o->repo, p->one); textconv_two = get_textconv(o->repo, p->two); @@ -163,9 +165,7 @@ static int pickaxe_match(struct diff_filepair *p, struct diff_options *o, mf1.size = fill_textconv(o->repo, textconv_one, p->one, &mf1.ptr); mf2.size = fill_textconv(o->repo, textconv_two, p->two, &mf2.ptr); - ret = fn(DIFF_FILE_VALID(p->one) ? &mf1 : NULL, - DIFF_FILE_VALID(p->two) ? &mf2 : NULL, - o, regexp, kws); + ret = fn(&mf1, &mf2, o, regexp, kws); if (textconv_one) free(mf1.ptr); @@ -232,13 +232,31 @@ void diffcore_pickaxe(struct diff_options *o) int opts = o->pickaxe_opts; regex_t regex, *regexp = NULL; kwset_t kws = NULL; + pickaxe_fn fn; + if (opts & ~DIFF_PICKAXE_KIND_OBJFIND && + (!needle || !*needle)) + BUG("should have needle under -G or -S"); if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) { int cflags = REG_EXTENDED | REG_NEWLINE; if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE) cflags |= REG_ICASE; regcomp_or_die(®ex, needle, cflags); regexp = ®ex; + + if (opts & DIFF_PICKAXE_KIND_G) + fn = diff_grep; + else if (opts & DIFF_PICKAXE_REGEX) + fn = has_changes; + else + /* + * We don't need to check the combination of + * -G and --pickaxe-regex, by the time we get + * here diff.c has already died if they're + * combined. See the usage tests in + * t4209-log-pickaxe.sh. + */ + BUG("unreachable"); } else if (opts & DIFF_PICKAXE_KIND_S) { if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE && has_non_ascii(needle)) { @@ -255,10 +273,14 @@ void diffcore_pickaxe(struct diff_options *o) kwsincr(kws, needle, strlen(needle)); kwsprep(kws); } + fn = has_changes; + } else if (opts & DIFF_PICKAXE_KIND_OBJFIND) { + fn = NULL; + } else { + BUG("unknown pickaxe_opts flag"); } - pickaxe(&diff_queued_diff, o, regexp, kws, - (opts & DIFF_PICKAXE_KIND_G) ? diff_grep : has_changes); + pickaxe(&diff_queued_diff, o, regexp, kws, fn); if (regexp) regfree(regexp); diff --git a/range-diff.c b/range-diff.c index 1a4471fe4c..e9479794b4 100644 --- a/range-diff.c +++ b/range-diff.c @@ -274,9 +274,10 @@ static void find_exact_matches(struct string_list *a, struct string_list *b) hashmap_clear(&map); } -static void diffsize_consume(void *data, char *line, unsigned long len) +static int diffsize_consume(void *data, char *line, unsigned long len) { (*(int *)data)++; + return 0; } static void diffsize_hunk(void *data, long ob, long on, long nb, long nn, diff --git a/t/perf/p4209-pickaxe.sh b/t/perf/p4209-pickaxe.sh new file mode 100755 index 0000000000..f585a4465a --- /dev/null +++ b/t/perf/p4209-pickaxe.sh @@ -0,0 +1,70 @@ +#!/bin/sh + +test_description="Test pickaxe performance" + +. ./perf-lib.sh + +test_perf_default_repo + +# Not --max-count, as that's the number of matching commit, so it's +# unbounded. We want to limit our revision walk here. +from_rev_desc= +from_rev= +max_count=1000 +if test_have_prereq EXPENSIVE +then + max_count=10000 +fi +from_rev=" $(git rev-list HEAD | head -n $max_count | tail -n 1).." +from_rev_desc=" <limit-rev>.." + +for icase in \ + '' \ + '-i ' +do + # -S (no regex) + for pattern in \ + 'int main' \ + 'æ' + do + for opts in \ + '-S' + do + test_perf "git log $icase$opts'$pattern'$from_rev_desc" " + git log --pretty=format:%H $icase$opts'$pattern'$from_rev + " + done + done + + # -S (regex) + for pattern in \ + '(int|void|null)' \ + 'if *\([^ ]+ & ' \ + '[àáâãäåæñøùúûüýþ]' + do + for opts in \ + '--pickaxe-regex -S' + do + test_perf "git log $icase$opts'$pattern'$from_rev_desc" " + git log --pretty=format:%H $icase$opts'$pattern'$from_rev + " + done + done + + # -G + for pattern in \ + '(int|void|null)' \ + 'if *\([^ ]+ & ' \ + '[àáâãäåæñøùúûüýþ]' + do + for opts in \ + '-G' + do + test_perf "git log $icase$opts'$pattern'$from_rev_desc" " + git log --pretty=format:%H $icase$opts'$pattern'$from_rev + " + done + done +done + +test_done diff --git a/t/t4209-log-pickaxe.sh b/t/t4209-log-pickaxe.sh index 5d06f5f45e..75795d0b49 100755 --- a/t/t4209-log-pickaxe.sh +++ b/t/t4209-log-pickaxe.sh @@ -55,6 +55,43 @@ test_expect_success setup ' git rev-parse --verify HEAD >expect_second ' +test_expect_success 'usage' ' + test_expect_code 129 git log -S 2>err && + test_i18ngrep "switch.*requires a value" err && + + test_expect_code 129 git log -G 2>err && + test_i18ngrep "switch.*requires a value" err && + + test_expect_code 128 git log -Gregex -Sstring 2>err && + grep "mutually exclusive" err && + + test_expect_code 128 git log -Gregex --find-object=HEAD 2>err && + grep "mutually exclusive" err && + + test_expect_code 128 git log -Sstring --find-object=HEAD 2>err && + grep "mutually exclusive" err && + + test_expect_code 128 git log --pickaxe-all --find-object=HEAD 2>err && + grep "mutually exclusive" err +' + +test_expect_success 'usage: --pickaxe-regex' ' + test_expect_code 128 git log -Gregex --pickaxe-regex 2>err && + grep "mutually exclusive" err +' + +test_expect_success 'usage: --no-pickaxe-regex' ' + cat >expect <<-\EOF && + fatal: unrecognized argument: --no-pickaxe-regex + EOF + + test_expect_code 128 git log -Sstring --no-pickaxe-regex 2>actual && + test_cmp expect actual && + + test_expect_code 128 git log -Gstring --no-pickaxe-regex 2>err && + test_cmp expect actual +' + test_log expect_initial --grep initial test_log expect_nomatch --grep InItial test_log_icase expect_initial --grep InItial @@ -106,38 +143,83 @@ test_expect_success 'log -S --no-textconv (missing textconv tool)' ' rm .gitattributes ' +test_expect_success 'setup log -[GS] plain & regex' ' + test_create_repo GS-plain && + test_commit -C GS-plain --append A data.txt "a" && + test_commit -C GS-plain --append B data.txt "a a" && + test_commit -C GS-plain --append C data.txt "b" && + test_commit -C GS-plain --append D data.txt "[b]" && + test_commit -C GS-plain E data.txt "" && + + # We also include E, the deletion commit + git -C GS-plain log --grep="[ABE]" >A-to-B-then-E-log && + git -C GS-plain log --grep="[CDE]" >C-to-D-then-E-log && + git -C GS-plain log --grep="[DE]" >D-then-E-log && + git -C GS-plain log >full-log +' + +test_expect_success 'log -G trims diff new/old [-+]' ' + git -C GS-plain log -G"[+-]a" >log && + test_must_be_empty log && + git -C GS-plain log -G"^a" >log && + test_cmp log A-to-B-then-E-log +' + +test_expect_success 'log -S<pat> is not a regex, but -S<pat> --pickaxe-regex is' ' + git -C GS-plain log -S"a" >log && + test_cmp log A-to-B-then-E-log && + + git -C GS-plain log -S"[a]" >log && + test_must_be_empty log && + + git -C GS-plain log -S"[a]" --pickaxe-regex >log && + test_cmp log A-to-B-then-E-log && + + git -C GS-plain log -S"[b]" >log && + test_cmp log D-then-E-log && + + git -C GS-plain log -S"[b]" --pickaxe-regex >log && + test_cmp log C-to-D-then-E-log +' + test_expect_success 'setup log -[GS] binary & --text' ' - git checkout --orphan GS-binary-and-text && - git read-tree --empty && - printf "a\na\0a\n" >data.bin && - git add data.bin && - git commit -m "create binary file" data.bin && - printf "a\na\0a\n" >>data.bin && - git commit -m "modify binary file" data.bin && - git rm data.bin && - git commit -m "delete binary file" data.bin && - git log >full-log + test_create_repo GS-bin-txt && + test_commit -C GS-bin-txt --printf A data.bin "a\na\0a\n" && + test_commit -C GS-bin-txt --append --printf B data.bin "a\na\0a\n" && + test_commit -C GS-bin-txt C data.bin "" && + git -C GS-bin-txt log >full-log ' test_expect_success 'log -G ignores binary files' ' - git log -Ga >log && + git -C GS-bin-txt log -Ga >log && test_must_be_empty log ' test_expect_success 'log -G looks into binary files with -a' ' - git log -a -Ga >log && + git -C GS-bin-txt log -a -Ga >log && test_cmp log full-log ' test_expect_success 'log -G looks into binary files with textconv filter' ' - test_when_finished "rm .gitattributes" && - echo "* diff=bin" >.gitattributes && - git -c diff.bin.textconv=cat log -Ga >log && + test_when_finished "rm GS-bin-txt/.gitattributes" && + ( + cd GS-bin-txt && + echo "* diff=bin" >.gitattributes && + git -c diff.bin.textconv=cat log -Ga >../log + ) && test_cmp log full-log ' test_expect_success 'log -S looks into binary files' ' - git log -Sa >log && + git -C GS-bin-txt log -Sa >log && + test_cmp log full-log +' + +test_expect_success 'log -S --pickaxe-regex looks into binary files' ' + git -C GS-bin-txt log --pickaxe-regex -Sa >log && + test_cmp log full-log && + + git -C GS-bin-txt log --pickaxe-regex -S"[a]" >log && test_cmp log full-log ' diff --git a/t/t7816-grep-binary-pattern.sh b/t/t7816-grep-binary-pattern.sh index 60bab291e4..9d67a5fc4c 100755 --- a/t/t7816-grep-binary-pattern.sh +++ b/t/t7816-grep-binary-pattern.sh @@ -59,7 +59,7 @@ test_expect_success 'setup' " git commit -m. " -# Simple fixed-string matching that can use kwset (no -i && non-ASCII) +# Simple fixed-string matching nul_match P P P '-F' 'yQf' nul_match P P P '-F' 'yQx' nul_match P P P '-Fi' 'YQf' @@ -78,7 +78,7 @@ nul_match P P P '-Fi' '[Y]QF' nul_match P P P '-F' 'æQ[ð]' nul_match P P P '-F' '[æ]Qð' -# The -F kwset codepath can't handle -i && non-ASCII... +# Matching pattern and subject case with -i nul_match P 1 1 '-i' '[æ]Qð' # ...PCRE v2 only matches non-ASCII with -i casefolding under UTF-8 diff --git a/xdiff-interface.c b/xdiff-interface.c index 609615db2c..75b32aef51 100644 --- a/xdiff-interface.c +++ b/xdiff-interface.c @@ -31,29 +31,36 @@ static int xdiff_out_hunk(void *priv_, return 0; } -static void consume_one(void *priv_, char *s, unsigned long size) +static int consume_one(void *priv_, char *s, unsigned long size) { struct xdiff_emit_state *priv = priv_; char *ep; while (size) { unsigned long this_size; + int ret; ep = memchr(s, '\n', size); this_size = (ep == NULL) ? size : (ep - s + 1); - priv->line_fn(priv->consume_callback_data, s, this_size); + ret = priv->line_fn(priv->consume_callback_data, s, this_size); + if (ret) + return ret; size -= this_size; s += this_size; } + return 0; } static int xdiff_outf(void *priv_, mmbuffer_t *mb, int nbuf) { struct xdiff_emit_state *priv = priv_; int i; + int stop = 0; if (!priv->line_fn) return 0; for (i = 0; i < nbuf; i++) { + if (stop) + return 1; if (mb[i].ptr[mb[i].size-1] != '\n') { /* Incomplete line */ strbuf_add(&priv->remainder, mb[i].ptr, mb[i].size); @@ -62,17 +69,21 @@ static int xdiff_outf(void *priv_, mmbuffer_t *mb, int nbuf) /* we have a complete line */ if (!priv->remainder.len) { - consume_one(priv, mb[i].ptr, mb[i].size); + stop = consume_one(priv, mb[i].ptr, mb[i].size); continue; } strbuf_add(&priv->remainder, mb[i].ptr, mb[i].size); - consume_one(priv, priv->remainder.buf, priv->remainder.len); + stop = consume_one(priv, priv->remainder.buf, priv->remainder.len); strbuf_reset(&priv->remainder); } + if (stop) + return -1; if (priv->remainder.len) { - consume_one(priv, priv->remainder.buf, priv->remainder.len); + stop = consume_one(priv, priv->remainder.buf, priv->remainder.len); strbuf_reset(&priv->remainder); } + if (stop) + return -1; return 0; } @@ -115,12 +126,6 @@ int xdi_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdemitconf_t co return xdl_diff(&a, &b, xpp, xecfg, xecb); } -void discard_hunk_line(void *priv, - long ob, long on, long nb, long nn, - const char *func, long funclen) -{ -} - int xdi_diff_outf(mmfile_t *mf1, mmfile_t *mf2, xdiff_emit_hunk_fn hunk_fn, xdiff_emit_line_fn line_fn, diff --git a/xdiff-interface.h b/xdiff-interface.h index 93df26900c..4301a7eef2 100644 --- a/xdiff-interface.h +++ b/xdiff-interface.h @@ -11,7 +11,28 @@ */ #define MAX_XDIFF_SIZE (1024UL * 1024 * 1023) -typedef void (*xdiff_emit_line_fn)(void *, char *, unsigned long); +/** + * The `xdiff_emit_line_fn` function can return 1 to abort early, or 0 + * to continue processing. Note that doing so is an all-or-nothing + * affair, as returning 1 will return all the way to the top-level, + * e.g. the xdi_diff_outf() call to generate the diff. + * + * Thus returning 1 means you won't be getting any more diff lines. If + * you need something in-between those two options you'll to use + * `xdl_emit_hunk_consume_func_t` and implement your own version of + * xdl_emit_diff(). + * + * We may extend the interface in the future to understand other more + * granular return values. While you should return 1 to exit early, + * doing so will currently make your early return indistinguishable + * from an error internal to xdiff, xdiff itself will see that + * non-zero return and translate it to -1. + * + * See "diff_grep" in diffcore-pickaxe.c for a trick to work around + * this, i.e. using the "consume_callback_data" to note the desired + * early return. + */ +typedef int (*xdiff_emit_line_fn)(void *, char *, unsigned long); typedef void (*xdiff_emit_hunk_fn)(void *data, long old_begin, long old_nr, long new_begin, long new_nr, @@ -33,14 +54,6 @@ int git_xmerge_config(const char *var, const char *value, void *cb); extern int git_xmerge_style; /* - * Can be used as a no-op hunk_fn for xdi_diff_outf(), since a NULL - * one just sends the hunk line to the line_fn callback). - */ -void discard_hunk_line(void *priv, - long ob, long on, long nb, long nn, - const char *func, long funclen); - -/* * Compare the strings l1 with l2 which are of size s1 and s2 respectively. * Returns 1 if the strings are deemed equal, 0 otherwise. * The `flags` given as XDF_WHITESPACE_FLAGS determine how white spaces diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h index 7a04605146..b29deca5de 100644 --- a/xdiff/xdiff.h +++ b/xdiff/xdiff.h @@ -50,6 +50,7 @@ extern "C" { /* xdemitconf_t.flags */ #define XDL_EMIT_FUNCNAMES (1 << 0) +#define XDL_EMIT_NO_HUNK_HDR (1 << 1) #define XDL_EMIT_FUNCCONTEXT (1 << 2) /* merge simplification levels */ diff --git a/xdiff/xemit.c b/xdiff/xemit.c index 9d7d6c5087..1cbf2b9829 100644 --- a/xdiff/xemit.c +++ b/xdiff/xemit.c @@ -278,7 +278,8 @@ pre_context_calculation: s1 - 1, funclineprev); funclineprev = s1 - 1; } - if (xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2, + if (!(xecfg->flags & XDL_EMIT_NO_HUNK_HDR) && + xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2, func_line.buf, func_line.len, ecb) < 0) return -1; |