summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLibravatar Junio C Hamano <gitster@pobox.com>2021-07-13 16:52:50 -0700
committerLibravatar Junio C Hamano <gitster@pobox.com>2021-07-13 16:52:50 -0700
commit4da281e84d87b099ba8d0a255534dc1251be968e (patch)
tree5919bdb367772fbbbe822f72f33ea2256b94d33a
parentMerge branch 'hn/prep-tests-for-reftable' (diff)
parentxdiff-interface: replace discard_hunk_line() with a flag (diff)
downloadtgif-4da281e84d87b099ba8d0a255534dc1251be968e.tar.xz
Merge branch 'ab/pickaxe-pcre2'
Rewrite the backend for "diff -G/-S" to use pcre2 engine when available. * ab/pickaxe-pcre2: (22 commits) xdiff-interface: replace discard_hunk_line() with a flag xdiff users: use designated initializers for out_line pickaxe -G: don't special-case create/delete pickaxe -G: terminate early on matching lines xdiff-interface: allow early return from xdiff_emit_line_fn xdiff-interface: prepare for allowing early return pickaxe -S: slightly optimize contains() pickaxe: rename variables in has_changes() for brevity pickaxe -S: support content with NULs under --pickaxe-regex pickaxe: assert that we must have a needle under -G or -S pickaxe: refactor function selection in diffcore-pickaxe() perf: add performance test for pickaxe pickaxe/style: consolidate declarations and assignments diff.h: move pickaxe fields together again pickaxe: die when --find-object and --pickaxe-all are combined pickaxe: die when -G and --pickaxe-regex are combined pickaxe tests: add missing test for --no-pickaxe-regex being an error pickaxe tests: test for -G, -S and --find-object incompatibility pickaxe tests: add test for "log -S" not being a regex pickaxe tests: add test for diffgrep_consume() internals ...
-rw-r--r--builtin/merge-tree.c5
-rw-r--r--builtin/rerere.c4
-rw-r--r--combine-diff.c5
-rw-r--r--diff.c39
-rw-r--r--diff.h7
-rw-r--r--diffcore-pickaxe.c106
-rw-r--r--range-diff.c3
-rwxr-xr-xt/perf/p4209-pickaxe.sh70
-rwxr-xr-xt/t4209-log-pickaxe.sh114
-rwxr-xr-xt/t7816-grep-binary-pattern.sh4
-rw-r--r--xdiff-interface.c27
-rw-r--r--xdiff-interface.h31
-rw-r--r--xdiff/xdiff.h1
-rw-r--r--xdiff/xemit.c3
14 files changed, 312 insertions, 107 deletions
diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c
index de8520778d..5dc94d6f88 100644
--- a/builtin/merge-tree.c
+++ b/builtin/merge-tree.c
@@ -107,15 +107,12 @@ static void show_diff(struct merge_list *entry)
mmfile_t src, dst;
xpparam_t xpp;
xdemitconf_t xecfg;
- xdemitcb_t ecb;
+ xdemitcb_t ecb = { .out_line = show_outf };
memset(&xpp, 0, sizeof(xpp));
xpp.flags = 0;
memset(&xecfg, 0, sizeof(xecfg));
xecfg.ctxlen = 3;
- ecb.out_hunk = NULL;
- ecb.out_line = show_outf;
- ecb.priv = NULL;
src.ptr = origin(entry, &size);
if (!src.ptr)
diff --git a/builtin/rerere.c b/builtin/rerere.c
index fd3be17b97..83d7a778e3 100644
--- a/builtin/rerere.c
+++ b/builtin/rerere.c
@@ -28,7 +28,7 @@ static int diff_two(const char *file1, const char *label1,
{
xpparam_t xpp;
xdemitconf_t xecfg;
- xdemitcb_t ecb;
+ xdemitcb_t ecb = { .out_line = outf };
mmfile_t minus, plus;
int ret;
@@ -41,8 +41,6 @@ static int diff_two(const char *file1, const char *label1,
xpp.flags = 0;
memset(&xecfg, 0, sizeof(xecfg));
xecfg.ctxlen = 3;
- ecb.out_hunk = NULL;
- ecb.out_line = outf;
ret = xdi_diff(&minus, &plus, &xpp, &xecfg, &ecb);
free(minus.ptr);
diff --git a/combine-diff.c b/combine-diff.c
index 7d925ce9ce..d93782daeb 100644
--- a/combine-diff.c
+++ b/combine-diff.c
@@ -403,11 +403,11 @@ static void consume_hunk(void *state_,
state->sline[state->nb-1].p_lno[state->n] = state->ob;
}
-static void consume_line(void *state_, char *line, unsigned long len)
+static int consume_line(void *state_, char *line, unsigned long len)
{
struct combine_diff_state *state = state_;
if (!state->lost_bucket)
- return; /* not in any hunk yet */
+ return 0; /* not in any hunk yet */
switch (line[0]) {
case '-':
append_lost(state->lost_bucket, state->n, line+1, len-1);
@@ -417,6 +417,7 @@ static void consume_line(void *state_, char *line, unsigned long len)
state->lno++;
break;
}
+ return 0;
}
static void combine_diff(struct repository *r,
diff --git a/diff.c b/diff.c
index 52c791574b..260dc37d4b 100644
--- a/diff.c
+++ b/diff.c
@@ -2340,7 +2340,7 @@ static void find_lno(const char *line, struct emit_callback *ecbdata)
ecbdata->lno_in_postimage = strtol(p + 1, NULL, 10);
}
-static void fn_out_consume(void *priv, char *line, unsigned long len)
+static int fn_out_consume(void *priv, char *line, unsigned long len)
{
struct emit_callback *ecbdata = priv;
struct diff_options *o = ecbdata->opt;
@@ -2376,7 +2376,7 @@ static void fn_out_consume(void *priv, char *line, unsigned long len)
len = sane_truncate_line(line, len);
find_lno(line, ecbdata);
emit_hunk_header(ecbdata, line, len);
- return;
+ return 0;
}
if (ecbdata->diff_words) {
@@ -2386,11 +2386,11 @@ static void fn_out_consume(void *priv, char *line, unsigned long len)
if (line[0] == '-') {
diff_words_append(line, len,
&ecbdata->diff_words->minus);
- return;
+ return 0;
} else if (line[0] == '+') {
diff_words_append(line, len,
&ecbdata->diff_words->plus);
- return;
+ return 0;
} else if (starts_with(line, "\\ ")) {
/*
* Eat the "no newline at eof" marker as if we
@@ -2399,11 +2399,11 @@ static void fn_out_consume(void *priv, char *line, unsigned long len)
* defer processing. If this is the end of
* preimage, more "+" lines may come after it.
*/
- return;
+ return 0;
}
diff_words_flush(ecbdata);
emit_diff_symbol(o, s, line, len, 0);
- return;
+ return 0;
}
switch (line[0]) {
@@ -2427,6 +2427,7 @@ static void fn_out_consume(void *priv, char *line, unsigned long len)
line, len, 0);
break;
}
+ return 0;
}
static void pprint_rename(struct strbuf *name, const char *a, const char *b)
@@ -2526,7 +2527,7 @@ static struct diffstat_file *diffstat_add(struct diffstat_t *diffstat,
return x;
}
-static void diffstat_consume(void *priv, char *line, unsigned long len)
+static int diffstat_consume(void *priv, char *line, unsigned long len)
{
struct diffstat_t *diffstat = priv;
struct diffstat_file *x = diffstat->files[diffstat->nr - 1];
@@ -2535,6 +2536,7 @@ static void diffstat_consume(void *priv, char *line, unsigned long len)
x->added++;
else if (line[0] == '-')
x->deleted++;
+ return 0;
}
const char mime_boundary_leader[] = "------------";
@@ -3212,7 +3214,7 @@ static void checkdiff_consume_hunk(void *priv,
data->lineno = nb - 1;
}
-static void checkdiff_consume(void *priv, char *line, unsigned long len)
+static int checkdiff_consume(void *priv, char *line, unsigned long len)
{
struct checkdiff_t *data = priv;
int marker_size = data->conflict_marker_size;
@@ -3236,7 +3238,7 @@ static void checkdiff_consume(void *priv, char *line, unsigned long len)
}
bad = ws_check(line + 1, len - 1, data->ws_rule);
if (!bad)
- return;
+ return 0;
data->status |= bad;
err = whitespace_error_string(bad);
fprintf(data->o->file, "%s%s:%d: %s.\n",
@@ -3248,6 +3250,7 @@ static void checkdiff_consume(void *priv, char *line, unsigned long len)
} else if (line[0] == ' ') {
data->lineno++;
}
+ return 0;
}
static unsigned char *deflate_it(char *data,
@@ -3726,7 +3729,8 @@ static void builtin_diffstat(const char *name_a, const char *name_b,
xpp.anchors_nr = o->anchors_nr;
xecfg.ctxlen = o->context;
xecfg.interhunkctxlen = o->interhunkcontext;
- if (xdi_diff_outf(&mf1, &mf2, discard_hunk_line,
+ xecfg.flags = XDL_EMIT_NO_HUNK_HDR;
+ if (xdi_diff_outf(&mf1, &mf2, NULL,
diffstat_consume, diffstat, &xpp, &xecfg))
die("unable to generate diffstat for %s", one->path);
@@ -4632,6 +4636,12 @@ void diff_setup_done(struct diff_options *options)
if (HAS_MULTI_BITS(options->pickaxe_opts & DIFF_PICKAXE_KINDS_MASK))
die(_("-G, -S and --find-object are mutually exclusive"));
+ if (HAS_MULTI_BITS(options->pickaxe_opts & DIFF_PICKAXE_KINDS_G_REGEX_MASK))
+ die(_("-G and --pickaxe-regex are mutually exclusive, use --pickaxe-regex with -S"));
+
+ if (HAS_MULTI_BITS(options->pickaxe_opts & DIFF_PICKAXE_KINDS_ALL_OBJFIND_MASK))
+ die(_("---pickaxe-all and --find-object are mutually exclusive, use --pickaxe-all with -G and -S"));
+
/*
* Most of the time we can say "there are changes"
* only by checking if there are changed paths, but
@@ -6119,17 +6129,18 @@ void flush_one_hunk(struct object_id *result, git_hash_ctx *ctx)
}
}
-static void patch_id_consume(void *priv, char *line, unsigned long len)
+static int patch_id_consume(void *priv, char *line, unsigned long len)
{
struct patch_id_t *data = priv;
int new_len;
if (len > 12 && starts_with(line, "\\ "))
- return;
+ return 0;
new_len = remove_space(line, len);
the_hash_algo->update_fn(data->ctx, line, new_len);
data->patchlen += new_len;
+ return 0;
}
static void patch_id_add_string(git_hash_ctx *ctx, const char *str)
@@ -6227,8 +6238,8 @@ static int diff_get_patch_id(struct diff_options *options, struct object_id *oid
xpp.flags = 0;
xecfg.ctxlen = 3;
- xecfg.flags = 0;
- if (xdi_diff_outf(&mf1, &mf2, discard_hunk_line,
+ xecfg.flags = XDL_EMIT_NO_HUNK_HDR;
+ if (xdi_diff_outf(&mf1, &mf2, NULL,
patch_id_consume, &data, &xpp, &xecfg))
return error("unable to generate patch-id diff for %s",
p->one->path);
diff --git a/diff.h b/diff.h
index c8f3faea8a..8ba85c5e60 100644
--- a/diff.h
+++ b/diff.h
@@ -265,6 +265,7 @@ struct diff_options {
* postimage of the diff_queue.
*/
const char *pickaxe;
+ unsigned pickaxe_opts;
/* -I<regex> */
regex_t **ignore_regex;
@@ -304,8 +305,6 @@ struct diff_options {
/* The output format used when `diff_flush()` is run. */
int output_format;
- unsigned pickaxe_opts;
-
/* Affects the way detection logic for complete rewrites, renames and
* copies.
*/
@@ -556,6 +555,10 @@ int git_config_rename(const char *var, const char *value);
#define DIFF_PICKAXE_KINDS_MASK (DIFF_PICKAXE_KIND_S | \
DIFF_PICKAXE_KIND_G | \
DIFF_PICKAXE_KIND_OBJFIND)
+#define DIFF_PICKAXE_KINDS_G_REGEX_MASK (DIFF_PICKAXE_KIND_G | \
+ DIFF_PICKAXE_REGEX)
+#define DIFF_PICKAXE_KINDS_ALL_OBJFIND_MASK (DIFF_PICKAXE_ALL | \
+ DIFF_PICKAXE_KIND_OBJFIND)
#define DIFF_PICKAXE_IGNORE_CASE 32
diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index a9c6d60df2..c88e50c632 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -19,38 +19,31 @@ struct diffgrep_cb {
int hit;
};
-static void diffgrep_consume(void *priv, char *line, unsigned long len)
+static int diffgrep_consume(void *priv, char *line, unsigned long len)
{
struct diffgrep_cb *data = priv;
regmatch_t regmatch;
if (line[0] != '+' && line[0] != '-')
- return;
+ return 0;
if (data->hit)
- /*
- * NEEDSWORK: we should have a way to terminate the
- * caller early.
- */
- return;
- data->hit = !regexec_buf(data->regexp, line + 1, len - 1, 1,
- &regmatch, 0);
+ BUG("Already matched in diffgrep_consume! Broken xdiff_emit_line_fn?");
+ if (!regexec_buf(data->regexp, line + 1, len - 1, 1,
+ &regmatch, 0)) {
+ data->hit = 1;
+ return 1;
+ }
+ return 0;
}
static int diff_grep(mmfile_t *one, mmfile_t *two,
struct diff_options *o,
regex_t *regexp, kwset_t kws)
{
- regmatch_t regmatch;
struct diffgrep_cb ecbdata;
xpparam_t xpp;
xdemitconf_t xecfg;
-
- if (!one)
- return !regexec_buf(regexp, two->ptr, two->size,
- 1, &regmatch, 0);
- if (!two)
- return !regexec_buf(regexp, one->ptr, one->size,
- 1, &regmatch, 0);
+ int ret;
/*
* We have both sides; need to run textual diff and see if
@@ -60,38 +53,47 @@ static int diff_grep(mmfile_t *one, mmfile_t *two,
memset(&xecfg, 0, sizeof(xecfg));
ecbdata.regexp = regexp;
ecbdata.hit = 0;
+ xecfg.flags = XDL_EMIT_NO_HUNK_HDR;
xecfg.ctxlen = o->context;
xecfg.interhunkctxlen = o->interhunkcontext;
- if (xdi_diff_outf(one, two, discard_hunk_line, diffgrep_consume,
- &ecbdata, &xpp, &xecfg))
- return 0;
- return ecbdata.hit;
+
+ /*
+ * An xdiff error might be our "data->hit" from above. See the
+ * comment for xdiff_emit_line_fn in xdiff-interface.h
+ */
+ ret = xdi_diff_outf(one, two, NULL, diffgrep_consume,
+ &ecbdata, &xpp, &xecfg);
+ if (ecbdata.hit)
+ return 1;
+ if (ret)
+ return ret;
+ return 0;
}
-static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws)
+static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws,
+ unsigned int limit)
{
- unsigned int cnt;
- unsigned long sz;
- const char *data;
-
- sz = mf->size;
- data = mf->ptr;
- cnt = 0;
+ unsigned int cnt = 0;
+ unsigned long sz = mf->size;
+ const char *data = mf->ptr;
if (regexp) {
regmatch_t regmatch;
int flags = 0;
- while (sz && *data &&
+ while (sz &&
!regexec_buf(regexp, data, sz, 1, &regmatch, flags)) {
flags |= REG_NOTBOL;
data += regmatch.rm_eo;
sz -= regmatch.rm_eo;
- if (sz && *data && regmatch.rm_so == regmatch.rm_eo) {
+ if (sz && regmatch.rm_so == regmatch.rm_eo) {
data++;
sz--;
}
cnt++;
+
+ if (limit && cnt == limit)
+ return cnt;
}
} else { /* Classic exact string match */
@@ -103,6 +105,9 @@ static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws)
sz -= offset + kwsm.size[0];
data += offset + kwsm.size[0];
cnt++;
+
+ if (limit && cnt == limit)
+ return cnt;
}
}
return cnt;
@@ -112,9 +117,9 @@ static int has_changes(mmfile_t *one, mmfile_t *two,
struct diff_options *o,
regex_t *regexp, kwset_t kws)
{
- unsigned int one_contains = one ? contains(one, regexp, kws) : 0;
- unsigned int two_contains = two ? contains(two, regexp, kws) : 0;
- return one_contains != two_contains;
+ unsigned int c1 = one ? contains(one, regexp, kws, 0) : 0;
+ unsigned int c2 = two ? contains(two, regexp, kws, c1 + 1) : 0;
+ return c1 != c2;
}
static int pickaxe_match(struct diff_filepair *p, struct diff_options *o,
@@ -136,9 +141,6 @@ static int pickaxe_match(struct diff_filepair *p, struct diff_options *o,
oidset_contains(o->objfind, &p->two->oid));
}
- if (!o->pickaxe[0])
- return 0;
-
if (o->flags.allow_textconv) {
textconv_one = get_textconv(o->repo, p->one);
textconv_two = get_textconv(o->repo, p->two);
@@ -163,9 +165,7 @@ static int pickaxe_match(struct diff_filepair *p, struct diff_options *o,
mf1.size = fill_textconv(o->repo, textconv_one, p->one, &mf1.ptr);
mf2.size = fill_textconv(o->repo, textconv_two, p->two, &mf2.ptr);
- ret = fn(DIFF_FILE_VALID(p->one) ? &mf1 : NULL,
- DIFF_FILE_VALID(p->two) ? &mf2 : NULL,
- o, regexp, kws);
+ ret = fn(&mf1, &mf2, o, regexp, kws);
if (textconv_one)
free(mf1.ptr);
@@ -232,13 +232,31 @@ void diffcore_pickaxe(struct diff_options *o)
int opts = o->pickaxe_opts;
regex_t regex, *regexp = NULL;
kwset_t kws = NULL;
+ pickaxe_fn fn;
+ if (opts & ~DIFF_PICKAXE_KIND_OBJFIND &&
+ (!needle || !*needle))
+ BUG("should have needle under -G or -S");
if (opts & (DIFF_PICKAXE_REGEX | DIFF_PICKAXE_KIND_G)) {
int cflags = REG_EXTENDED | REG_NEWLINE;
if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE)
cflags |= REG_ICASE;
regcomp_or_die(&regex, needle, cflags);
regexp = &regex;
+
+ if (opts & DIFF_PICKAXE_KIND_G)
+ fn = diff_grep;
+ else if (opts & DIFF_PICKAXE_REGEX)
+ fn = has_changes;
+ else
+ /*
+ * We don't need to check the combination of
+ * -G and --pickaxe-regex, by the time we get
+ * here diff.c has already died if they're
+ * combined. See the usage tests in
+ * t4209-log-pickaxe.sh.
+ */
+ BUG("unreachable");
} else if (opts & DIFF_PICKAXE_KIND_S) {
if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE &&
has_non_ascii(needle)) {
@@ -255,10 +273,14 @@ void diffcore_pickaxe(struct diff_options *o)
kwsincr(kws, needle, strlen(needle));
kwsprep(kws);
}
+ fn = has_changes;
+ } else if (opts & DIFF_PICKAXE_KIND_OBJFIND) {
+ fn = NULL;
+ } else {
+ BUG("unknown pickaxe_opts flag");
}
- pickaxe(&diff_queued_diff, o, regexp, kws,
- (opts & DIFF_PICKAXE_KIND_G) ? diff_grep : has_changes);
+ pickaxe(&diff_queued_diff, o, regexp, kws, fn);
if (regexp)
regfree(regexp);
diff --git a/range-diff.c b/range-diff.c
index 1a4471fe4c..e9479794b4 100644
--- a/range-diff.c
+++ b/range-diff.c
@@ -274,9 +274,10 @@ static void find_exact_matches(struct string_list *a, struct string_list *b)
hashmap_clear(&map);
}
-static void diffsize_consume(void *data, char *line, unsigned long len)
+static int diffsize_consume(void *data, char *line, unsigned long len)
{
(*(int *)data)++;
+ return 0;
}
static void diffsize_hunk(void *data, long ob, long on, long nb, long nn,
diff --git a/t/perf/p4209-pickaxe.sh b/t/perf/p4209-pickaxe.sh
new file mode 100755
index 0000000000..f585a4465a
--- /dev/null
+++ b/t/perf/p4209-pickaxe.sh
@@ -0,0 +1,70 @@
+#!/bin/sh
+
+test_description="Test pickaxe performance"
+
+. ./perf-lib.sh
+
+test_perf_default_repo
+
+# Not --max-count, as that's the number of matching commit, so it's
+# unbounded. We want to limit our revision walk here.
+from_rev_desc=
+from_rev=
+max_count=1000
+if test_have_prereq EXPENSIVE
+then
+ max_count=10000
+fi
+from_rev=" $(git rev-list HEAD | head -n $max_count | tail -n 1).."
+from_rev_desc=" <limit-rev>.."
+
+for icase in \
+ '' \
+ '-i '
+do
+ # -S (no regex)
+ for pattern in \
+ 'int main' \
+ 'æ'
+ do
+ for opts in \
+ '-S'
+ do
+ test_perf "git log $icase$opts'$pattern'$from_rev_desc" "
+ git log --pretty=format:%H $icase$opts'$pattern'$from_rev
+ "
+ done
+ done
+
+ # -S (regex)
+ for pattern in \
+ '(int|void|null)' \
+ 'if *\([^ ]+ & ' \
+ '[àáâãäåæñøùúûüýþ]'
+ do
+ for opts in \
+ '--pickaxe-regex -S'
+ do
+ test_perf "git log $icase$opts'$pattern'$from_rev_desc" "
+ git log --pretty=format:%H $icase$opts'$pattern'$from_rev
+ "
+ done
+ done
+
+ # -G
+ for pattern in \
+ '(int|void|null)' \
+ 'if *\([^ ]+ & ' \
+ '[àáâãäåæñøùúûüýþ]'
+ do
+ for opts in \
+ '-G'
+ do
+ test_perf "git log $icase$opts'$pattern'$from_rev_desc" "
+ git log --pretty=format:%H $icase$opts'$pattern'$from_rev
+ "
+ done
+ done
+done
+
+test_done
diff --git a/t/t4209-log-pickaxe.sh b/t/t4209-log-pickaxe.sh
index 5d06f5f45e..75795d0b49 100755
--- a/t/t4209-log-pickaxe.sh
+++ b/t/t4209-log-pickaxe.sh
@@ -55,6 +55,43 @@ test_expect_success setup '
git rev-parse --verify HEAD >expect_second
'
+test_expect_success 'usage' '
+ test_expect_code 129 git log -S 2>err &&
+ test_i18ngrep "switch.*requires a value" err &&
+
+ test_expect_code 129 git log -G 2>err &&
+ test_i18ngrep "switch.*requires a value" err &&
+
+ test_expect_code 128 git log -Gregex -Sstring 2>err &&
+ grep "mutually exclusive" err &&
+
+ test_expect_code 128 git log -Gregex --find-object=HEAD 2>err &&
+ grep "mutually exclusive" err &&
+
+ test_expect_code 128 git log -Sstring --find-object=HEAD 2>err &&
+ grep "mutually exclusive" err &&
+
+ test_expect_code 128 git log --pickaxe-all --find-object=HEAD 2>err &&
+ grep "mutually exclusive" err
+'
+
+test_expect_success 'usage: --pickaxe-regex' '
+ test_expect_code 128 git log -Gregex --pickaxe-regex 2>err &&
+ grep "mutually exclusive" err
+'
+
+test_expect_success 'usage: --no-pickaxe-regex' '
+ cat >expect <<-\EOF &&
+ fatal: unrecognized argument: --no-pickaxe-regex
+ EOF
+
+ test_expect_code 128 git log -Sstring --no-pickaxe-regex 2>actual &&
+ test_cmp expect actual &&
+
+ test_expect_code 128 git log -Gstring --no-pickaxe-regex 2>err &&
+ test_cmp expect actual
+'
+
test_log expect_initial --grep initial
test_log expect_nomatch --grep InItial
test_log_icase expect_initial --grep InItial
@@ -106,38 +143,83 @@ test_expect_success 'log -S --no-textconv (missing textconv tool)' '
rm .gitattributes
'
+test_expect_success 'setup log -[GS] plain & regex' '
+ test_create_repo GS-plain &&
+ test_commit -C GS-plain --append A data.txt "a" &&
+ test_commit -C GS-plain --append B data.txt "a a" &&
+ test_commit -C GS-plain --append C data.txt "b" &&
+ test_commit -C GS-plain --append D data.txt "[b]" &&
+ test_commit -C GS-plain E data.txt "" &&
+
+ # We also include E, the deletion commit
+ git -C GS-plain log --grep="[ABE]" >A-to-B-then-E-log &&
+ git -C GS-plain log --grep="[CDE]" >C-to-D-then-E-log &&
+ git -C GS-plain log --grep="[DE]" >D-then-E-log &&
+ git -C GS-plain log >full-log
+'
+
+test_expect_success 'log -G trims diff new/old [-+]' '
+ git -C GS-plain log -G"[+-]a" >log &&
+ test_must_be_empty log &&
+ git -C GS-plain log -G"^a" >log &&
+ test_cmp log A-to-B-then-E-log
+'
+
+test_expect_success 'log -S<pat> is not a regex, but -S<pat> --pickaxe-regex is' '
+ git -C GS-plain log -S"a" >log &&
+ test_cmp log A-to-B-then-E-log &&
+
+ git -C GS-plain log -S"[a]" >log &&
+ test_must_be_empty log &&
+
+ git -C GS-plain log -S"[a]" --pickaxe-regex >log &&
+ test_cmp log A-to-B-then-E-log &&
+
+ git -C GS-plain log -S"[b]" >log &&
+ test_cmp log D-then-E-log &&
+
+ git -C GS-plain log -S"[b]" --pickaxe-regex >log &&
+ test_cmp log C-to-D-then-E-log
+'
+
test_expect_success 'setup log -[GS] binary & --text' '
- git checkout --orphan GS-binary-and-text &&
- git read-tree --empty &&
- printf "a\na\0a\n" >data.bin &&
- git add data.bin &&
- git commit -m "create binary file" data.bin &&
- printf "a\na\0a\n" >>data.bin &&
- git commit -m "modify binary file" data.bin &&
- git rm data.bin &&
- git commit -m "delete binary file" data.bin &&
- git log >full-log
+ test_create_repo GS-bin-txt &&
+ test_commit -C GS-bin-txt --printf A data.bin "a\na\0a\n" &&
+ test_commit -C GS-bin-txt --append --printf B data.bin "a\na\0a\n" &&
+ test_commit -C GS-bin-txt C data.bin "" &&
+ git -C GS-bin-txt log >full-log
'
test_expect_success 'log -G ignores binary files' '
- git log -Ga >log &&
+ git -C GS-bin-txt log -Ga >log &&
test_must_be_empty log
'
test_expect_success 'log -G looks into binary files with -a' '
- git log -a -Ga >log &&
+ git -C GS-bin-txt log -a -Ga >log &&
test_cmp log full-log
'
test_expect_success 'log -G looks into binary files with textconv filter' '
- test_when_finished "rm .gitattributes" &&
- echo "* diff=bin" >.gitattributes &&
- git -c diff.bin.textconv=cat log -Ga >log &&
+ test_when_finished "rm GS-bin-txt/.gitattributes" &&
+ (
+ cd GS-bin-txt &&
+ echo "* diff=bin" >.gitattributes &&
+ git -c diff.bin.textconv=cat log -Ga >../log
+ ) &&
test_cmp log full-log
'
test_expect_success 'log -S looks into binary files' '
- git log -Sa >log &&
+ git -C GS-bin-txt log -Sa >log &&
+ test_cmp log full-log
+'
+
+test_expect_success 'log -S --pickaxe-regex looks into binary files' '
+ git -C GS-bin-txt log --pickaxe-regex -Sa >log &&
+ test_cmp log full-log &&
+
+ git -C GS-bin-txt log --pickaxe-regex -S"[a]" >log &&
test_cmp log full-log
'
diff --git a/t/t7816-grep-binary-pattern.sh b/t/t7816-grep-binary-pattern.sh
index 60bab291e4..9d67a5fc4c 100755
--- a/t/t7816-grep-binary-pattern.sh
+++ b/t/t7816-grep-binary-pattern.sh
@@ -59,7 +59,7 @@ test_expect_success 'setup' "
git commit -m.
"
-# Simple fixed-string matching that can use kwset (no -i && non-ASCII)
+# Simple fixed-string matching
nul_match P P P '-F' 'yQf'
nul_match P P P '-F' 'yQx'
nul_match P P P '-Fi' 'YQf'
@@ -78,7 +78,7 @@ nul_match P P P '-Fi' '[Y]QF'
nul_match P P P '-F' 'æQ[ð]'
nul_match P P P '-F' '[æ]Qð'
-# The -F kwset codepath can't handle -i && non-ASCII...
+# Matching pattern and subject case with -i
nul_match P 1 1 '-i' '[æ]Qð'
# ...PCRE v2 only matches non-ASCII with -i casefolding under UTF-8
diff --git a/xdiff-interface.c b/xdiff-interface.c
index 609615db2c..75b32aef51 100644
--- a/xdiff-interface.c
+++ b/xdiff-interface.c
@@ -31,29 +31,36 @@ static int xdiff_out_hunk(void *priv_,
return 0;
}
-static void consume_one(void *priv_, char *s, unsigned long size)
+static int consume_one(void *priv_, char *s, unsigned long size)
{
struct xdiff_emit_state *priv = priv_;
char *ep;
while (size) {
unsigned long this_size;
+ int ret;
ep = memchr(s, '\n', size);
this_size = (ep == NULL) ? size : (ep - s + 1);
- priv->line_fn(priv->consume_callback_data, s, this_size);
+ ret = priv->line_fn(priv->consume_callback_data, s, this_size);
+ if (ret)
+ return ret;
size -= this_size;
s += this_size;
}
+ return 0;
}
static int xdiff_outf(void *priv_, mmbuffer_t *mb, int nbuf)
{
struct xdiff_emit_state *priv = priv_;
int i;
+ int stop = 0;
if (!priv->line_fn)
return 0;
for (i = 0; i < nbuf; i++) {
+ if (stop)
+ return 1;
if (mb[i].ptr[mb[i].size-1] != '\n') {
/* Incomplete line */
strbuf_add(&priv->remainder, mb[i].ptr, mb[i].size);
@@ -62,17 +69,21 @@ static int xdiff_outf(void *priv_, mmbuffer_t *mb, int nbuf)
/* we have a complete line */
if (!priv->remainder.len) {
- consume_one(priv, mb[i].ptr, mb[i].size);
+ stop = consume_one(priv, mb[i].ptr, mb[i].size);
continue;
}
strbuf_add(&priv->remainder, mb[i].ptr, mb[i].size);
- consume_one(priv, priv->remainder.buf, priv->remainder.len);
+ stop = consume_one(priv, priv->remainder.buf, priv->remainder.len);
strbuf_reset(&priv->remainder);
}
+ if (stop)
+ return -1;
if (priv->remainder.len) {
- consume_one(priv, priv->remainder.buf, priv->remainder.len);
+ stop = consume_one(priv, priv->remainder.buf, priv->remainder.len);
strbuf_reset(&priv->remainder);
}
+ if (stop)
+ return -1;
return 0;
}
@@ -115,12 +126,6 @@ int xdi_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdemitconf_t co
return xdl_diff(&a, &b, xpp, xecfg, xecb);
}
-void discard_hunk_line(void *priv,
- long ob, long on, long nb, long nn,
- const char *func, long funclen)
-{
-}
-
int xdi_diff_outf(mmfile_t *mf1, mmfile_t *mf2,
xdiff_emit_hunk_fn hunk_fn,
xdiff_emit_line_fn line_fn,
diff --git a/xdiff-interface.h b/xdiff-interface.h
index 93df26900c..4301a7eef2 100644
--- a/xdiff-interface.h
+++ b/xdiff-interface.h
@@ -11,7 +11,28 @@
*/
#define MAX_XDIFF_SIZE (1024UL * 1024 * 1023)
-typedef void (*xdiff_emit_line_fn)(void *, char *, unsigned long);
+/**
+ * The `xdiff_emit_line_fn` function can return 1 to abort early, or 0
+ * to continue processing. Note that doing so is an all-or-nothing
+ * affair, as returning 1 will return all the way to the top-level,
+ * e.g. the xdi_diff_outf() call to generate the diff.
+ *
+ * Thus returning 1 means you won't be getting any more diff lines. If
+ * you need something in-between those two options you'll to use
+ * `xdl_emit_hunk_consume_func_t` and implement your own version of
+ * xdl_emit_diff().
+ *
+ * We may extend the interface in the future to understand other more
+ * granular return values. While you should return 1 to exit early,
+ * doing so will currently make your early return indistinguishable
+ * from an error internal to xdiff, xdiff itself will see that
+ * non-zero return and translate it to -1.
+ *
+ * See "diff_grep" in diffcore-pickaxe.c for a trick to work around
+ * this, i.e. using the "consume_callback_data" to note the desired
+ * early return.
+ */
+typedef int (*xdiff_emit_line_fn)(void *, char *, unsigned long);
typedef void (*xdiff_emit_hunk_fn)(void *data,
long old_begin, long old_nr,
long new_begin, long new_nr,
@@ -33,14 +54,6 @@ int git_xmerge_config(const char *var, const char *value, void *cb);
extern int git_xmerge_style;
/*
- * Can be used as a no-op hunk_fn for xdi_diff_outf(), since a NULL
- * one just sends the hunk line to the line_fn callback).
- */
-void discard_hunk_line(void *priv,
- long ob, long on, long nb, long nn,
- const char *func, long funclen);
-
-/*
* Compare the strings l1 with l2 which are of size s1 and s2 respectively.
* Returns 1 if the strings are deemed equal, 0 otherwise.
* The `flags` given as XDF_WHITESPACE_FLAGS determine how white spaces
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 7a04605146..b29deca5de 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -50,6 +50,7 @@ extern "C" {
/* xdemitconf_t.flags */
#define XDL_EMIT_FUNCNAMES (1 << 0)
+#define XDL_EMIT_NO_HUNK_HDR (1 << 1)
#define XDL_EMIT_FUNCCONTEXT (1 << 2)
/* merge simplification levels */
diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index 9d7d6c5087..1cbf2b9829 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -278,7 +278,8 @@ pre_context_calculation:
s1 - 1, funclineprev);
funclineprev = s1 - 1;
}
- if (xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2,
+ if (!(xecfg->flags & XDL_EMIT_NO_HUNK_HDR) &&
+ xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2,
func_line.buf, func_line.len, ecb) < 0)
return -1;