diff options
author | Junio C Hamano <gitster@pobox.com> | 2019-12-01 09:04:36 -0800 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2019-12-01 09:04:36 -0800 |
commit | 6511cb33c918e5156cfff32f70b33dc65380760e (patch) | |
tree | 36a09199288758c52434c821fdae2ef8b73d4357 | |
parent | Merge branch 'jk/remove-sha1-to-hex' (diff) | |
parent | sequencer: reencode commit message for am/rebase --show-current-patch (diff) | |
download | tgif-6511cb33c918e5156cfff32f70b33dc65380760e.tar.xz |
Merge branch 'dd/sequencer-utf8'
Handling of commit objects that use non UTF-8 encoding during
"rebase -i" has been improved.
* dd/sequencer-utf8:
sequencer: reencode commit message for am/rebase --show-current-patch
sequencer: reencode old merge-commit message
sequencer: reencode squashing commit's message
sequencer: reencode revert/cherry-pick's todo list
sequencer: reencode to utf-8 before arrange rebase's todo list
t3900: demonstrate git-rebase problem with multi encoding
configure.ac: define ICONV_OMITS_BOM if necessary
t0028: eliminate non-standard usage of printf
-rw-r--r-- | configure.ac | 49 | ||||
-rw-r--r-- | sequencer.c | 21 | ||||
-rwxr-xr-x | t/t0028-working-tree-encoding.sh | 4 | ||||
-rwxr-xr-x | t/t3434-rebase-i18n.sh | 84 | ||||
-rw-r--r-- | t/t3434/ISO8859-1.txt | 3 | ||||
-rw-r--r-- | t/t3434/eucJP.txt | 4 | ||||
-rwxr-xr-x | t/t3900-i18n-commit.sh | 37 |
7 files changed, 193 insertions, 9 deletions
diff --git a/configure.ac b/configure.ac index 4d32d5e432..66aedb9288 100644 --- a/configure.ac +++ b/configure.ac @@ -844,12 +844,61 @@ AC_MSG_CHECKING([for old iconv()]) AC_COMPILE_IFELSE([OLDICONVTEST_SRC], [AC_MSG_RESULT([no])], [AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_OLD_ICONV, 1) OLD_ICONV=UnfortunatelyYes]) GIT_UNSTASH_FLAGS($ICONVDIR) GIT_CONF_SUBST([OLD_ICONV]) +# +# Define ICONV_OMITS_BOM if you are on a system which +# iconv omits bom for utf-{16,32} +if test -z "$NO_ICONV"; then +AC_CACHE_CHECK([whether iconv omits bom for utf-16 and utf-32], + [ac_cv_iconv_omits_bom], +[ +old_LIBS="$LIBS" +if test -n "$NEEDS_LIBICONV"; then + LIBS="$LIBS -liconv" +fi + +AC_RUN_IFELSE( + [AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT + #include <iconv.h> + #ifdef HAVE_OLD_ICONV + typedef const char *iconv_ibp; + #else + typedef char *iconv_ibp; + #endif + ], + [[ + int v; + iconv_t conv; + char in[] = "a"; iconv_ibp pin = in; + char out[20] = ""; char *pout = out; + size_t isz = sizeof in; + size_t osz = sizeof out; + + conv = iconv_open("UTF-16", "UTF-8"); + iconv(conv, &pin, &isz, &pout, &osz); + iconv_close(conv); + v = (unsigned char)(out[0]) + (unsigned char)(out[1]); + return v != 0xfe + 0xff; + ]])], + [ac_cv_iconv_omits_bom=no], + [ac_cv_iconv_omits_bom=yes]) + +LIBS="$old_LIBS" +]) +if test "x$ac_cv_iconv_omits_bom" = xyes; then + ICONV_OMITS_BOM=Yes +else + ICONV_OMITS_BOM= +fi +GIT_CONF_SUBST([ICONV_OMITS_BOM]) +fi + ## Checks for typedefs, structures, and compiler characteristics. AC_MSG_NOTICE([CHECKS for typedefs, structures, and compiler characteristics]) # diff --git a/sequencer.c b/sequencer.c index 655a99d096..2c6fbcb41a 100644 --- a/sequencer.c +++ b/sequencer.c @@ -1574,6 +1574,7 @@ static int update_squash_messages(struct repository *r, struct strbuf buf = STRBUF_INIT; int res; const char *message, *body; + const char *encoding = get_commit_output_encoding(); if (opts->current_fixup_count > 0) { struct strbuf header = STRBUF_INIT; @@ -1600,7 +1601,7 @@ static int update_squash_messages(struct repository *r, return error(_("need a HEAD to fixup")); if (!(head_commit = lookup_commit_reference(r, &head))) return error(_("could not read HEAD")); - if (!(head_message = get_commit_buffer(head_commit, NULL))) + if (!(head_message = logmsg_reencode(head_commit, NULL, encoding))) return error(_("could not read HEAD's commit message")); find_commit_subject(head_message, &body); @@ -1621,7 +1622,7 @@ static int update_squash_messages(struct repository *r, unuse_commit_buffer(head_commit, head_message); } - if (!(message = get_commit_buffer(commit, NULL))) + if (!(message = logmsg_reencode(commit, NULL, encoding))) return error(_("could not read commit message of %s"), oid_to_hex(&commit->object.oid)); find_commit_subject(message, &body); @@ -2562,14 +2563,17 @@ static int walk_revs_populate_todo(struct todo_list *todo_list, enum todo_command command = opts->action == REPLAY_PICK ? TODO_PICK : TODO_REVERT; const char *command_string = todo_command_info[command].str; + const char *encoding; struct commit *commit; if (prepare_revs(opts)) return -1; + encoding = get_log_output_encoding(); + while ((commit = get_revision(opts->revs))) { struct todo_item *item = append_new_todo(todo_list); - const char *commit_buffer = get_commit_buffer(commit, NULL); + const char *commit_buffer = logmsg_reencode(commit, NULL, encoding); const char *subject; int subject_len; @@ -2966,7 +2970,8 @@ static int make_patch(struct repository *r, strbuf_addf(&buf, "%s/message", get_dir(opts)); if (!file_exists(buf.buf)) { - const char *commit_buffer = get_commit_buffer(commit, NULL); + const char *encoding = get_commit_output_encoding(); + const char *commit_buffer = logmsg_reencode(commit, NULL, encoding); find_commit_subject(commit_buffer, &subject); res |= write_message(subject, strlen(subject), buf.buf, 1); unuse_commit_buffer(commit, commit_buffer); @@ -3368,7 +3373,8 @@ static int do_merge(struct repository *r, } if (commit) { - const char *message = get_commit_buffer(commit, NULL); + const char *encoding = get_commit_output_encoding(); + const char *message = logmsg_reencode(commit, NULL, encoding); const char *body; int len; @@ -4149,9 +4155,10 @@ static int commit_staged_changes(struct repository *r, */ struct commit *commit; const char *path = rebase_path_squash_msg(); + const char *encoding = get_commit_output_encoding(); if (parse_head(r, &commit) || - !(p = get_commit_buffer(commit, NULL)) || + !(p = logmsg_reencode(commit, NULL, encoding)) || write_message(p, strlen(p), path, 0)) { unuse_commit_buffer(commit, p); return error(_("could not write file: " @@ -5167,7 +5174,7 @@ int todo_list_rearrange_squash(struct todo_list *todo_list) *commit_todo_item_at(&commit_todo, item->commit) = item; parse_commit(item->commit); - commit_buffer = get_commit_buffer(item->commit, NULL); + commit_buffer = logmsg_reencode(item->commit, NULL, "UTF-8"); find_commit_subject(commit_buffer, &subject); format_subject(&buf, subject, " "); subject = subjects[i] = strbuf_detach(&buf, &subject_len); diff --git a/t/t0028-working-tree-encoding.sh b/t/t0028-working-tree-encoding.sh index 7aa0945d8d..bfc4fb9af5 100755 --- a/t/t0028-working-tree-encoding.sh +++ b/t/t0028-working-tree-encoding.sh @@ -17,7 +17,7 @@ test_lazy_prereq NO_UTF32_BOM ' write_utf16 () { if test_have_prereq NO_UTF16_BOM then - printf '\xfe\xff' + printf '\376\377' fi && iconv -f UTF-8 -t UTF-16 } @@ -25,7 +25,7 @@ write_utf16 () { write_utf32 () { if test_have_prereq NO_UTF32_BOM then - printf '\x00\x00\xfe\xff' + printf '\0\0\376\377' fi && iconv -f UTF-8 -t UTF-32 } diff --git a/t/t3434-rebase-i18n.sh b/t/t3434-rebase-i18n.sh new file mode 100755 index 0000000000..4b5b128cd6 --- /dev/null +++ b/t/t3434-rebase-i18n.sh @@ -0,0 +1,84 @@ +#!/bin/sh +# +# Copyright (c) 2019 Doan Tran Cong Danh +# + +test_description='rebase with changing encoding + +Initial setup: + +1 - 2 master + \ + 3 - 4 first + \ + 5 - 6 second +' + +. ./test-lib.sh + +compare_msg () { + iconv -f "$2" -t "$3" "$TEST_DIRECTORY/t3434/$1" >expect && + git cat-file commit HEAD >raw && + sed "1,/^$/d" raw >actual && + test_cmp expect actual +} + +test_expect_success setup ' + test_commit one && + git branch first && + test_commit two && + git switch first && + test_commit three && + git branch second && + test_commit four && + git switch second && + test_commit five && + test_commit six +' + +test_expect_success 'rebase --rebase-merges update encoding eucJP to UTF-8' ' + git switch -c merge-eucJP-UTF-8 first && + git config i18n.commitencoding eucJP && + git merge -F "$TEST_DIRECTORY/t3434/eucJP.txt" second && + git config i18n.commitencoding UTF-8 && + git rebase --rebase-merges master && + compare_msg eucJP.txt eucJP UTF-8 +' + +test_expect_failure 'rebase --rebase-merges update encoding eucJP to ISO-2022-JP' ' + git switch -c merge-eucJP-ISO-2022-JP first && + git config i18n.commitencoding eucJP && + git merge -F "$TEST_DIRECTORY/t3434/eucJP.txt" second && + git config i18n.commitencoding ISO-2022-JP && + git rebase --rebase-merges master && + compare_msg eucJP.txt eucJP ISO-2022-JP +' + +test_rebase_continue_update_encode () { + old=$1 + new=$2 + msgfile=$3 + test_expect_success "rebase --continue update from $old to $new" ' + (git rebase --abort || : abort current git-rebase failure) && + git switch -c conflict-$old-$new one && + echo for-conflict >two.t && + git add two.t && + git config i18n.commitencoding $old && + git commit -F "$TEST_DIRECTORY/t3434/$msgfile" && + git config i18n.commitencoding $new && + test_must_fail git rebase -m master && + test -f .git/rebase-merge/message && + git stripspace <.git/rebase-merge/message >two.t && + git add two.t && + git rebase --continue && + compare_msg $msgfile $old $new && + : git-commit assume invalid utf-8 is latin1 && + test_cmp expect two.t + ' +} + +test_rebase_continue_update_encode ISO-8859-1 UTF-8 ISO8859-1.txt +test_rebase_continue_update_encode eucJP UTF-8 eucJP.txt +test_rebase_continue_update_encode eucJP ISO-2022-JP eucJP.txt + +test_done diff --git a/t/t3434/ISO8859-1.txt b/t/t3434/ISO8859-1.txt new file mode 100644 index 0000000000..7cbef0ee6f --- /dev/null +++ b/t/t3434/ISO8859-1.txt @@ -0,0 +1,3 @@ +ÄËÑÏÖ + +Ábçdèfg diff --git a/t/t3434/eucJP.txt b/t/t3434/eucJP.txt new file mode 100644 index 0000000000..546f2aac01 --- /dev/null +++ b/t/t3434/eucJP.txt @@ -0,0 +1,4 @@ +¤Ï¤ì¤Ò¤Û¤Õ + +¤·¤Æ¤¤¤ë¤Î¤¬¡¢¤¤¤ë¤Î¤Ç¡£ +ßÀÉͤۤì¤×¤ê¤Ý¤ì¤Þ¤Ó¤°¤ê¤í¤Ø¡£ diff --git a/t/t3900-i18n-commit.sh b/t/t3900-i18n-commit.sh index b92ff95977..d277a9f4b7 100755 --- a/t/t3900-i18n-commit.sh +++ b/t/t3900-i18n-commit.sh @@ -204,4 +204,41 @@ test_commit_autosquash_flags eucJP fixup test_commit_autosquash_flags ISO-2022-JP squash +test_commit_autosquash_multi_encoding () { + flag=$1 + old=$2 + new=$3 + msg=$4 + test_expect_success "commit --$flag into $old from $new" ' + git checkout -b $flag-$old-$new C0 && + git config i18n.commitencoding $old && + echo $old >>F && + git commit -a -F "$TEST_DIRECTORY"/t3900/$msg && + test_tick && + echo intermediate stuff >>G && + git add G && + git commit -a -m "intermediate commit" && + test_tick && + git config i18n.commitencoding $new && + echo $new-$flag >>F && + git commit -a --$flag HEAD^ && + git rebase --autosquash -i HEAD^^^ && + git rev-list HEAD >actual && + test_line_count = 3 actual && + iconv -f $old -t UTF-8 "$TEST_DIRECTORY"/t3900/$msg >expect && + if test $flag = squash; then + subject="$(head -1 expect)" && + printf "\nsquash! %s\n" "$subject" >>expect + fi && + git cat-file commit HEAD^ >raw && + (sed "1,/^$/d" raw | iconv -f $new -t utf-8) >actual && + test_cmp expect actual + ' +} + +test_commit_autosquash_multi_encoding fixup UTF-8 ISO-8859-1 1-UTF-8.txt +test_commit_autosquash_multi_encoding squash ISO-8859-1 UTF-8 ISO8859-1.txt +test_commit_autosquash_multi_encoding squash eucJP ISO-2022-JP eucJP.txt +test_commit_autosquash_multi_encoding fixup ISO-2022-JP UTF-8 ISO-2022-JP.txt + test_done |