summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLibravatar Junio C Hamano <gitster@pobox.com>2021-11-01 13:48:08 -0700
committerLibravatar Junio C Hamano <gitster@pobox.com>2021-11-01 13:48:08 -0700
commitb93d7206919e9eac22b3ca0291bce8da65960595 (patch)
tree1fb405da58548ea2246aa833630706a9ab9bf928
parentGit 2.34-rc0 (diff)
parentgrep/pcre2: fix an edge case concerning ascii patterns and UTF-8 data (diff)
downloadtgif-b93d7206919e9eac22b3ca0291bce8da65960595.tar.xz
Merge branch 'hm/paint-hits-in-log-grep'
"git log --grep=string --author=name" learns to highlight hits just like "git grep string" does. * hm/paint-hits-in-log-grep: grep/pcre2: fix an edge case concerning ascii patterns and UTF-8 data pretty: colorize pattern matches in commit messages grep: refactor next_match() and match_one_pattern() for external use
-rw-r--r--Documentation/config/color.txt7
-rw-r--r--grep.c85
-rw-r--r--grep.h9
-rw-r--r--pretty.c101
-rwxr-xr-xt/t4202-log.sh51
-rwxr-xr-xt/t7812-grep-icase-non-ascii.sh48
6 files changed, 255 insertions, 46 deletions
diff --git a/Documentation/config/color.txt b/Documentation/config/color.txt
index 6e817f6047..1795b2d16b 100644
--- a/Documentation/config/color.txt
+++ b/Documentation/config/color.txt
@@ -105,9 +105,12 @@ color.grep.<slot>::
`matchContext`;;
matching text in context lines
`matchSelected`;;
- matching text in selected lines
+ matching text in selected lines. Also, used to customize the following
+ linkgit:git-log[1] subcommands: `--grep`, `--author` and `--committer`.
`selected`;;
- non-matching text in selected lines
+ non-matching text in selected lines. Also, used to customize the
+ following linkgit:git-log[1] subcommands: `--grep`, `--author` and
+ `--committer`.
`separator`;;
separators between fields on a line (`:`, `-`, and `=`)
and between hunks (`--`)
diff --git a/grep.c b/grep.c
index 14fe8a0fd2..f6e113e9f0 100644
--- a/grep.c
+++ b/grep.c
@@ -382,8 +382,10 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
}
options |= PCRE2_CASELESS;
}
- if (!opt->ignore_locale && is_utf8_locale() && has_non_ascii(p->pattern) &&
- !(!opt->ignore_case && (p->fixed || p->is_fixed)))
+ if ((!opt->ignore_locale && !has_non_ascii(p->pattern)) ||
+ (!opt->ignore_locale && is_utf8_locale() &&
+ has_non_ascii(p->pattern) && !(!opt->ignore_case &&
+ (p->fixed || p->is_fixed))))
options |= (PCRE2_UTF | PCRE2_MATCH_INVALID_UTF);
#ifdef GIT_PCRE2_VERSION_10_36_OR_HIGHER
@@ -944,10 +946,10 @@ static struct {
{ "reflog ", 7 },
};
-static int match_one_pattern(struct grep_pat *p,
- const char *bol, const char *eol,
- enum grep_context ctx,
- regmatch_t *pmatch, int eflags)
+static int headerless_match_one_pattern(struct grep_pat *p,
+ const char *bol, const char *eol,
+ enum grep_context ctx,
+ regmatch_t *pmatch, int eflags)
{
int hit = 0;
const char *start = bol;
@@ -956,25 +958,6 @@ static int match_one_pattern(struct grep_pat *p,
((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
return 0;
- if (p->token == GREP_PATTERN_HEAD) {
- const char *field;
- size_t len;
- assert(p->field < ARRAY_SIZE(header_field));
- field = header_field[p->field].field;
- len = header_field[p->field].len;
- if (strncmp(bol, field, len))
- return 0;
- bol += len;
- switch (p->field) {
- case GREP_HEADER_AUTHOR:
- case GREP_HEADER_COMMITTER:
- strip_timestamp(bol, &eol);
- break;
- default:
- break;
- }
- }
-
again:
hit = patmatch(p, bol, eol, pmatch, eflags);
@@ -1025,6 +1008,36 @@ static int match_one_pattern(struct grep_pat *p,
return hit;
}
+static int match_one_pattern(struct grep_pat *p,
+ const char *bol, const char *eol,
+ enum grep_context ctx, regmatch_t *pmatch,
+ int eflags)
+{
+ const char *field;
+ size_t len;
+
+ if (p->token == GREP_PATTERN_HEAD) {
+ assert(p->field < ARRAY_SIZE(header_field));
+ field = header_field[p->field].field;
+ len = header_field[p->field].len;
+ if (strncmp(bol, field, len))
+ return 0;
+ bol += len;
+
+ switch (p->field) {
+ case GREP_HEADER_AUTHOR:
+ case GREP_HEADER_COMMITTER:
+ strip_timestamp(bol, &eol);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return headerless_match_one_pattern(p, bol, eol, ctx, pmatch, eflags);
+}
+
+
static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x,
const char *bol, const char *eol,
enum grep_context ctx, ssize_t *col,
@@ -1143,7 +1156,7 @@ static int match_next_pattern(struct grep_pat *p,
{
regmatch_t match;
- if (!match_one_pattern(p, bol, eol, ctx, &match, eflags))
+ if (!headerless_match_one_pattern(p, bol, eol, ctx, &match, eflags))
return 0;
if (match.rm_so < 0 || match.rm_eo < 0)
return 0;
@@ -1158,19 +1171,26 @@ static int match_next_pattern(struct grep_pat *p,
return 1;
}
-static int next_match(struct grep_opt *opt,
- const char *bol, const char *eol,
- enum grep_context ctx, regmatch_t *pmatch, int eflags)
+int grep_next_match(struct grep_opt *opt,
+ const char *bol, const char *eol,
+ enum grep_context ctx, regmatch_t *pmatch,
+ enum grep_header_field field, int eflags)
{
struct grep_pat *p;
int hit = 0;
pmatch->rm_so = pmatch->rm_eo = -1;
if (bol < eol) {
- for (p = opt->pattern_list; p; p = p->next) {
+ for (p = ((ctx == GREP_CONTEXT_HEAD)
+ ? opt->header_list : opt->pattern_list);
+ p; p = p->next) {
switch (p->token) {
- case GREP_PATTERN: /* atom */
case GREP_PATTERN_HEAD:
+ if ((field != GREP_HEADER_FIELD_MAX) &&
+ (p->field != field))
+ continue;
+ /* fall thru */
+ case GREP_PATTERN: /* atom */
case GREP_PATTERN_BODY:
hit |= match_next_pattern(p, bol, eol, ctx,
pmatch, eflags);
@@ -1261,7 +1281,8 @@ static void show_line(struct grep_opt *opt,
else if (sign == '=')
line_color = opt->colors[GREP_COLOR_FUNCTION];
}
- while (next_match(opt, bol, eol, ctx, &match, eflags)) {
+ while (grep_next_match(opt, bol, eol, ctx, &match,
+ GREP_HEADER_FIELD_MAX, eflags)) {
if (match.rm_so == match.rm_eo)
break;
diff --git a/grep.h b/grep.h
index 3c75ed1fd8..3e8815c347 100644
--- a/grep.h
+++ b/grep.h
@@ -191,6 +191,15 @@ void compile_grep_patterns(struct grep_opt *opt);
void free_grep_patterns(struct grep_opt *opt);
int grep_buffer(struct grep_opt *opt, const char *buf, unsigned long size);
+/* The field parameter is only used to filter header patterns
+ * (where appropriate). If filtering isn't desirable
+ * GREP_HEADER_FIELD_MAX should be supplied.
+ */
+int grep_next_match(struct grep_opt *opt,
+ const char *bol, const char *eol,
+ enum grep_context ctx, regmatch_t *pmatch,
+ enum grep_header_field field, int eflags);
+
struct grep_source {
char *name;
diff --git a/pretty.c b/pretty.c
index be477bd51f..1af5b093ae 100644
--- a/pretty.c
+++ b/pretty.c
@@ -431,6 +431,52 @@ const char *show_ident_date(const struct ident_split *ident,
return show_date(date, tz, mode);
}
+static inline void strbuf_add_with_color(struct strbuf *sb, const char *color,
+ const char *buf, size_t buflen)
+{
+ strbuf_addstr(sb, color);
+ strbuf_add(sb, buf, buflen);
+ if (*color)
+ strbuf_addstr(sb, GIT_COLOR_RESET);
+}
+
+static void append_line_with_color(struct strbuf *sb, struct grep_opt *opt,
+ const char *line, size_t linelen,
+ int color, enum grep_context ctx,
+ enum grep_header_field field)
+{
+ const char *buf, *eol, *line_color, *match_color;
+ regmatch_t match;
+ int eflags = 0;
+
+ buf = line;
+ eol = buf + linelen;
+
+ if (!opt || !want_color(color) || opt->invert)
+ goto end;
+
+ line_color = opt->colors[GREP_COLOR_SELECTED];
+ match_color = opt->colors[GREP_COLOR_MATCH_SELECTED];
+
+ while (grep_next_match(opt, buf, eol, ctx, &match, field, eflags)) {
+ if (match.rm_so == match.rm_eo)
+ break;
+
+ strbuf_add_with_color(sb, line_color, buf, match.rm_so);
+ strbuf_add_with_color(sb, match_color, buf + match.rm_so,
+ match.rm_eo - match.rm_so);
+ buf += match.rm_eo;
+ eflags = REG_NOTBOL;
+ }
+
+ if (eflags)
+ strbuf_add_with_color(sb, line_color, buf, eol - buf);
+ else {
+end:
+ strbuf_add(sb, buf, eol - buf);
+ }
+}
+
void pp_user_info(struct pretty_print_context *pp,
const char *what, struct strbuf *sb,
const char *line, const char *encoding)
@@ -496,9 +542,26 @@ void pp_user_info(struct pretty_print_context *pp,
strbuf_addch(sb, '\n');
strbuf_addf(sb, " <%.*s>\n", (int)maillen, mailbuf);
} else {
- strbuf_addf(sb, "%s: %.*s%.*s <%.*s>\n", what,
- (pp->fmt == CMIT_FMT_FULLER) ? 4 : 0, " ",
- (int)namelen, namebuf, (int)maillen, mailbuf);
+ struct strbuf id = STRBUF_INIT;
+ enum grep_header_field field = GREP_HEADER_FIELD_MAX;
+ struct grep_opt *opt = pp->rev ? &pp->rev->grep_filter : NULL;
+
+ if (!strcmp(what, "Author"))
+ field = GREP_HEADER_AUTHOR;
+ else if (!strcmp(what, "Commit"))
+ field = GREP_HEADER_COMMITTER;
+
+ strbuf_addf(sb, "%s: ", what);
+ if (pp->fmt == CMIT_FMT_FULLER)
+ strbuf_addchars(sb, ' ', 4);
+
+ strbuf_addf(&id, "%.*s <%.*s>", (int)namelen, namebuf,
+ (int)maillen, mailbuf);
+
+ append_line_with_color(sb, opt, id.buf, id.len, pp->color,
+ GREP_CONTEXT_HEAD, field);
+ strbuf_addch(sb, '\n');
+ strbuf_release(&id);
}
switch (pp->fmt) {
@@ -1935,8 +1998,9 @@ static int pp_utf8_width(const char *start, const char *end)
return width;
}
-static void strbuf_add_tabexpand(struct strbuf *sb, int tabwidth,
- const char *line, int linelen)
+static void strbuf_add_tabexpand(struct strbuf *sb, struct grep_opt *opt,
+ int color, int tabwidth, const char *line,
+ int linelen)
{
const char *tab;
@@ -1953,7 +2017,9 @@ static void strbuf_add_tabexpand(struct strbuf *sb, int tabwidth,
break;
/* Output the data .. */
- strbuf_add(sb, line, tab - line);
+ append_line_with_color(sb, opt, line, tab - line, color,
+ GREP_CONTEXT_BODY,
+ GREP_HEADER_FIELD_MAX);
/* .. and the de-tabified tab */
strbuf_addchars(sb, ' ', tabwidth - (width % tabwidth));
@@ -1968,7 +2034,8 @@ static void strbuf_add_tabexpand(struct strbuf *sb, int tabwidth,
* worrying about width - there's nothing more to
* align.
*/
- strbuf_add(sb, line, linelen);
+ append_line_with_color(sb, opt, line, linelen, color, GREP_CONTEXT_BODY,
+ GREP_HEADER_FIELD_MAX);
}
/*
@@ -1980,11 +2047,16 @@ static void pp_handle_indent(struct pretty_print_context *pp,
struct strbuf *sb, int indent,
const char *line, int linelen)
{
+ struct grep_opt *opt = pp->rev ? &pp->rev->grep_filter : NULL;
+
strbuf_addchars(sb, ' ', indent);
if (pp->expand_tabs_in_log)
- strbuf_add_tabexpand(sb, pp->expand_tabs_in_log, line, linelen);
+ strbuf_add_tabexpand(sb, opt, pp->color, pp->expand_tabs_in_log,
+ line, linelen);
else
- strbuf_add(sb, line, linelen);
+ append_line_with_color(sb, opt, line, linelen, pp->color,
+ GREP_CONTEXT_BODY,
+ GREP_HEADER_FIELD_MAX);
}
static int is_mboxrd_from(const char *line, int len)
@@ -2002,7 +2074,9 @@ void pp_remainder(struct pretty_print_context *pp,
struct strbuf *sb,
int indent)
{
+ struct grep_opt *opt = pp->rev ? &pp->rev->grep_filter : NULL;
int first = 1;
+
for (;;) {
const char *line = *msg_p;
int linelen = get_one_line(line);
@@ -2023,14 +2097,17 @@ void pp_remainder(struct pretty_print_context *pp,
if (indent)
pp_handle_indent(pp, sb, indent, line, linelen);
else if (pp->expand_tabs_in_log)
- strbuf_add_tabexpand(sb, pp->expand_tabs_in_log,
- line, linelen);
+ strbuf_add_tabexpand(sb, opt, pp->color,
+ pp->expand_tabs_in_log, line,
+ linelen);
else {
if (pp->fmt == CMIT_FMT_MBOXRD &&
is_mboxrd_from(line, linelen))
strbuf_addch(sb, '>');
- strbuf_add(sb, line, linelen);
+ append_line_with_color(sb, opt, line, linelen,
+ pp->color, GREP_CONTEXT_BODY,
+ GREP_HEADER_FIELD_MAX);
}
strbuf_addch(sb, '\n');
}
diff --git a/t/t4202-log.sh b/t/t4202-log.sh
index 6a650dacd6..7884e3d46b 100755
--- a/t/t4202-log.sh
+++ b/t/t4202-log.sh
@@ -449,6 +449,57 @@ test_expect_success !FAIL_PREREQS 'log with various grep.patternType configurati
)
'
+test_expect_success 'log --author' '
+ cat >expect <<-\EOF &&
+ Author: <BOLD;RED>A U<RESET> Thor <author@example.com>
+ EOF
+ git log -1 --color=always --author="A U" >log &&
+ grep Author log >actual.raw &&
+ test_decode_color <actual.raw >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success 'log --committer' '
+ cat >expect <<-\EOF &&
+ Commit: C O Mitter <committer@<BOLD;RED>example<RESET>.com>
+ EOF
+ git log -1 --color=always --pretty=fuller --committer="example" >log &&
+ grep "Commit:" log >actual.raw &&
+ test_decode_color <actual.raw >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success 'log -i --grep with color' '
+ cat >expect <<-\EOF &&
+ <BOLD;RED>Sec<RESET>ond
+ <BOLD;RED>sec<RESET>ond
+ EOF
+ git log --color=always -i --grep=^sec >log &&
+ grep -i sec log >actual.raw &&
+ test_decode_color <actual.raw >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success '-c color.grep.selected log --grep' '
+ cat >expect <<-\EOF &&
+ <GREEN>th<RESET><BOLD;RED>ir<RESET><GREEN>d<RESET>
+ EOF
+ git -c color.grep.selected="green" log --color=always --grep=ir >log &&
+ grep ir log >actual.raw &&
+ test_decode_color <actual.raw >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success '-c color.grep.matchSelected log --grep' '
+ cat >expect <<-\EOF &&
+ <BLUE>i<RESET>n<BLUE>i<RESET>t<BLUE>i<RESET>al
+ EOF
+ git -c color.grep.matchSelected="blue" log --color=always --grep=i >log &&
+ grep al log >actual.raw &&
+ test_decode_color <actual.raw >actual &&
+ test_cmp expect actual
+'
+
cat > expect <<EOF
* Second
* sixth
diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh
index e5d1e4ea68..22487d90fd 100755
--- a/t/t7812-grep-icase-non-ascii.sh
+++ b/t/t7812-grep-icase-non-ascii.sh
@@ -53,6 +53,54 @@ test_expect_success REGEX_LOCALE 'pickaxe -i on non-ascii' '
test_cmp expected actual
'
+test_expect_success GETTEXT_LOCALE,PCRE 'log --author with an ascii pattern on UTF-8 data' '
+ cat >expected <<-\EOF &&
+ Author: <BOLD;RED>À Ú Thor<RESET> <author@example.com>
+ EOF
+ test_write_lines "forth" >file4 &&
+ git add file4 &&
+ git commit --author="À Ú Thor <author@example.com>" -m sécond &&
+ git log -1 --color=always --perl-regexp --author=".*Thor" >log &&
+ grep Author log >actual.raw &&
+ test_decode_color <actual.raw >actual &&
+ test_cmp expected actual
+'
+
+test_expect_success GETTEXT_LOCALE,PCRE 'log --committer with an ascii pattern on ISO-8859-1 data' '
+ cat >expected <<-\EOF &&
+ Commit: Ç<BOLD;RED> O Mîtter <committer@example.com><RESET>
+ EOF
+ test_write_lines "fifth" >file5 &&
+ git add file5 &&
+ GIT_COMMITTER_NAME="Ç O Mîtter" &&
+ GIT_COMMITTER_EMAIL="committer@example.com" &&
+ git -c i18n.commitEncoding=latin1 commit -m thïrd &&
+ git -c i18n.logOutputEncoding=latin1 log -1 --pretty=fuller --color=always --perl-regexp --committer=" O.*" >log &&
+ grep Commit: log >actual.raw &&
+ test_decode_color <actual.raw >actual &&
+ test_cmp expected actual
+'
+
+test_expect_success GETTEXT_LOCALE,PCRE 'log --grep with an ascii pattern on UTF-8 data' '
+ cat >expected <<-\EOF &&
+ sé<BOLD;RED>con<RESET>d
+ EOF
+ git log -1 --color=always --perl-regexp --grep="con" >log &&
+ grep con log >actual.raw &&
+ test_decode_color <actual.raw >actual &&
+ test_cmp expected actual
+'
+
+test_expect_success GETTEXT_LOCALE,PCRE 'log --grep with an ascii pattern on ISO-8859-1 data' '
+ cat >expected <<-\EOF &&
+ <BOLD;RED>thïrd<RESET>
+ EOF
+ git -c i18n.logOutputEncoding=latin1 log -1 --color=always --perl-regexp --grep="th.*rd" >log &&
+ grep "th.*rd" log >actual.raw &&
+ test_decode_color <actual.raw >actual &&
+ test_cmp expected actual
+'
+
test_expect_success GETTEXT_LOCALE,LIBPCRE2 'PCRE v2: setup invalid UTF-8 data' '
printf "\\200\\n" >invalid-0x80 &&
echo "ævar" >expected &&