diff options
-rw-r--r-- | revision.c | 27 | ||||
-rwxr-xr-x | t/t4210-log-i18n.sh | 58 |
2 files changed, 78 insertions, 7 deletions
diff --git a/revision.c b/revision.c index d7562ee500..ef60205412 100644 --- a/revision.c +++ b/revision.c @@ -2268,7 +2268,10 @@ static int commit_rewrite_person(struct strbuf *buf, const char *what, struct st static int commit_match(struct commit *commit, struct rev_info *opt) { int retval; + const char *encoding; + char *message; struct strbuf buf = STRBUF_INIT; + if (!opt->grep_filter.pattern_list && !opt->grep_filter.header_list) return 1; @@ -2279,13 +2282,23 @@ static int commit_match(struct commit *commit, struct rev_info *opt) strbuf_addch(&buf, '\n'); } + /* + * We grep in the user's output encoding, under the assumption that it + * is the encoding they are most likely to write their grep pattern + * for. In addition, it means we will match the "notes" encoding below, + * so we will not end up with a buffer that has two different encodings + * in it. + */ + encoding = get_log_output_encoding(); + message = logmsg_reencode(commit, encoding); + /* Copy the commit to temporary if we are using "fake" headers */ if (buf.len) - strbuf_addstr(&buf, commit->buffer); + strbuf_addstr(&buf, message); if (opt->grep_filter.header_list && opt->mailmap) { if (!buf.len) - strbuf_addstr(&buf, commit->buffer); + strbuf_addstr(&buf, message); commit_rewrite_person(&buf, "\nauthor ", opt->mailmap); commit_rewrite_person(&buf, "\ncommitter ", opt->mailmap); @@ -2294,18 +2307,18 @@ static int commit_match(struct commit *commit, struct rev_info *opt) /* Append "fake" message parts as needed */ if (opt->show_notes) { if (!buf.len) - strbuf_addstr(&buf, commit->buffer); - format_display_notes(commit->object.sha1, &buf, - get_log_output_encoding(), 1); + strbuf_addstr(&buf, message); + format_display_notes(commit->object.sha1, &buf, encoding, 1); } - /* Find either in the commit object, or in the temporary */ + /* Find either in the original commit message, or in the temporary */ if (buf.len) retval = grep_buffer(&opt->grep_filter, buf.buf, buf.len); else retval = grep_buffer(&opt->grep_filter, - commit->buffer, strlen(commit->buffer)); + message, strlen(message)); strbuf_release(&buf); + logmsg_free(message, commit); return retval; } diff --git a/t/t4210-log-i18n.sh b/t/t4210-log-i18n.sh new file mode 100755 index 0000000000..52a74729ba --- /dev/null +++ b/t/t4210-log-i18n.sh @@ -0,0 +1,58 @@ +#!/bin/sh + +test_description='test log with i18n features' +. ./test-lib.sh + +# two forms of é +utf8_e=$(printf '\303\251') +latin1_e=$(printf '\351') + +test_expect_success 'create commits in different encodings' ' + test_tick && + cat >msg <<-EOF && + utf8 + + t${utf8_e}st + EOF + git add msg && + git -c i18n.commitencoding=utf8 commit -F msg && + cat >msg <<-EOF && + latin1 + + t${latin1_e}st + EOF + git add msg && + git -c i18n.commitencoding=ISO-8859-1 commit -F msg +' + +test_expect_success 'log --grep searches in log output encoding (utf8)' ' + cat >expect <<-\EOF && + latin1 + utf8 + EOF + git log --encoding=utf8 --format=%s --grep=$utf8_e >actual && + test_cmp expect actual +' + +test_expect_success 'log --grep searches in log output encoding (latin1)' ' + cat >expect <<-\EOF && + latin1 + utf8 + EOF + git log --encoding=ISO-8859-1 --format=%s --grep=$latin1_e >actual && + test_cmp expect actual +' + +test_expect_success 'log --grep does not find non-reencoded values (utf8)' ' + >expect && + git log --encoding=utf8 --format=%s --grep=$latin1_e >actual && + test_cmp expect actual +' + +test_expect_success 'log --grep does not find non-reencoded values (latin1)' ' + >expect && + git log --encoding=ISO-8859-1 --format=%s --grep=$utf8_e >actual && + test_cmp expect actual +' + +test_done |