summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLibravatar Junio C Hamano <gitster@pobox.com>2019-12-01 09:04:36 -0800
committerLibravatar Junio C Hamano <gitster@pobox.com>2019-12-01 09:04:36 -0800
commit6511cb33c918e5156cfff32f70b33dc65380760e (patch)
tree36a09199288758c52434c821fdae2ef8b73d4357
parentMerge branch 'jk/remove-sha1-to-hex' (diff)
parentsequencer: reencode commit message for am/rebase --show-current-patch (diff)
downloadtgif-6511cb33c918e5156cfff32f70b33dc65380760e.tar.xz
Merge branch 'dd/sequencer-utf8'
Handling of commit objects that use non UTF-8 encoding during "rebase -i" has been improved. * dd/sequencer-utf8: sequencer: reencode commit message for am/rebase --show-current-patch sequencer: reencode old merge-commit message sequencer: reencode squashing commit's message sequencer: reencode revert/cherry-pick's todo list sequencer: reencode to utf-8 before arrange rebase's todo list t3900: demonstrate git-rebase problem with multi encoding configure.ac: define ICONV_OMITS_BOM if necessary t0028: eliminate non-standard usage of printf
-rw-r--r--configure.ac49
-rw-r--r--sequencer.c21
-rwxr-xr-xt/t0028-working-tree-encoding.sh4
-rwxr-xr-xt/t3434-rebase-i18n.sh84
-rw-r--r--t/t3434/ISO8859-1.txt3
-rw-r--r--t/t3434/eucJP.txt4
-rwxr-xr-xt/t3900-i18n-commit.sh37
7 files changed, 193 insertions, 9 deletions
diff --git a/configure.ac b/configure.ac
index 4d32d5e432..66aedb9288 100644
--- a/configure.ac
+++ b/configure.ac
@@ -844,12 +844,61 @@ AC_MSG_CHECKING([for old iconv()])
AC_COMPILE_IFELSE([OLDICONVTEST_SRC],
[AC_MSG_RESULT([no])],
[AC_MSG_RESULT([yes])
+ AC_DEFINE(HAVE_OLD_ICONV, 1)
OLD_ICONV=UnfortunatelyYes])
GIT_UNSTASH_FLAGS($ICONVDIR)
GIT_CONF_SUBST([OLD_ICONV])
+#
+# Define ICONV_OMITS_BOM if you are on a system which
+# iconv omits bom for utf-{16,32}
+if test -z "$NO_ICONV"; then
+AC_CACHE_CHECK([whether iconv omits bom for utf-16 and utf-32],
+ [ac_cv_iconv_omits_bom],
+[
+old_LIBS="$LIBS"
+if test -n "$NEEDS_LIBICONV"; then
+ LIBS="$LIBS -liconv"
+fi
+
+AC_RUN_IFELSE(
+ [AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT
+ #include <iconv.h>
+ #ifdef HAVE_OLD_ICONV
+ typedef const char *iconv_ibp;
+ #else
+ typedef char *iconv_ibp;
+ #endif
+ ],
+ [[
+ int v;
+ iconv_t conv;
+ char in[] = "a"; iconv_ibp pin = in;
+ char out[20] = ""; char *pout = out;
+ size_t isz = sizeof in;
+ size_t osz = sizeof out;
+
+ conv = iconv_open("UTF-16", "UTF-8");
+ iconv(conv, &pin, &isz, &pout, &osz);
+ iconv_close(conv);
+ v = (unsigned char)(out[0]) + (unsigned char)(out[1]);
+ return v != 0xfe + 0xff;
+ ]])],
+ [ac_cv_iconv_omits_bom=no],
+ [ac_cv_iconv_omits_bom=yes])
+
+LIBS="$old_LIBS"
+])
+if test "x$ac_cv_iconv_omits_bom" = xyes; then
+ ICONV_OMITS_BOM=Yes
+else
+ ICONV_OMITS_BOM=
+fi
+GIT_CONF_SUBST([ICONV_OMITS_BOM])
+fi
+
## Checks for typedefs, structures, and compiler characteristics.
AC_MSG_NOTICE([CHECKS for typedefs, structures, and compiler characteristics])
#
diff --git a/sequencer.c b/sequencer.c
index 655a99d096..2c6fbcb41a 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -1574,6 +1574,7 @@ static int update_squash_messages(struct repository *r,
struct strbuf buf = STRBUF_INIT;
int res;
const char *message, *body;
+ const char *encoding = get_commit_output_encoding();
if (opts->current_fixup_count > 0) {
struct strbuf header = STRBUF_INIT;
@@ -1600,7 +1601,7 @@ static int update_squash_messages(struct repository *r,
return error(_("need a HEAD to fixup"));
if (!(head_commit = lookup_commit_reference(r, &head)))
return error(_("could not read HEAD"));
- if (!(head_message = get_commit_buffer(head_commit, NULL)))
+ if (!(head_message = logmsg_reencode(head_commit, NULL, encoding)))
return error(_("could not read HEAD's commit message"));
find_commit_subject(head_message, &body);
@@ -1621,7 +1622,7 @@ static int update_squash_messages(struct repository *r,
unuse_commit_buffer(head_commit, head_message);
}
- if (!(message = get_commit_buffer(commit, NULL)))
+ if (!(message = logmsg_reencode(commit, NULL, encoding)))
return error(_("could not read commit message of %s"),
oid_to_hex(&commit->object.oid));
find_commit_subject(message, &body);
@@ -2562,14 +2563,17 @@ static int walk_revs_populate_todo(struct todo_list *todo_list,
enum todo_command command = opts->action == REPLAY_PICK ?
TODO_PICK : TODO_REVERT;
const char *command_string = todo_command_info[command].str;
+ const char *encoding;
struct commit *commit;
if (prepare_revs(opts))
return -1;
+ encoding = get_log_output_encoding();
+
while ((commit = get_revision(opts->revs))) {
struct todo_item *item = append_new_todo(todo_list);
- const char *commit_buffer = get_commit_buffer(commit, NULL);
+ const char *commit_buffer = logmsg_reencode(commit, NULL, encoding);
const char *subject;
int subject_len;
@@ -2966,7 +2970,8 @@ static int make_patch(struct repository *r,
strbuf_addf(&buf, "%s/message", get_dir(opts));
if (!file_exists(buf.buf)) {
- const char *commit_buffer = get_commit_buffer(commit, NULL);
+ const char *encoding = get_commit_output_encoding();
+ const char *commit_buffer = logmsg_reencode(commit, NULL, encoding);
find_commit_subject(commit_buffer, &subject);
res |= write_message(subject, strlen(subject), buf.buf, 1);
unuse_commit_buffer(commit, commit_buffer);
@@ -3368,7 +3373,8 @@ static int do_merge(struct repository *r,
}
if (commit) {
- const char *message = get_commit_buffer(commit, NULL);
+ const char *encoding = get_commit_output_encoding();
+ const char *message = logmsg_reencode(commit, NULL, encoding);
const char *body;
int len;
@@ -4149,9 +4155,10 @@ static int commit_staged_changes(struct repository *r,
*/
struct commit *commit;
const char *path = rebase_path_squash_msg();
+ const char *encoding = get_commit_output_encoding();
if (parse_head(r, &commit) ||
- !(p = get_commit_buffer(commit, NULL)) ||
+ !(p = logmsg_reencode(commit, NULL, encoding)) ||
write_message(p, strlen(p), path, 0)) {
unuse_commit_buffer(commit, p);
return error(_("could not write file: "
@@ -5167,7 +5174,7 @@ int todo_list_rearrange_squash(struct todo_list *todo_list)
*commit_todo_item_at(&commit_todo, item->commit) = item;
parse_commit(item->commit);
- commit_buffer = get_commit_buffer(item->commit, NULL);
+ commit_buffer = logmsg_reencode(item->commit, NULL, "UTF-8");
find_commit_subject(commit_buffer, &subject);
format_subject(&buf, subject, " ");
subject = subjects[i] = strbuf_detach(&buf, &subject_len);
diff --git a/t/t0028-working-tree-encoding.sh b/t/t0028-working-tree-encoding.sh
index 7aa0945d8d..bfc4fb9af5 100755
--- a/t/t0028-working-tree-encoding.sh
+++ b/t/t0028-working-tree-encoding.sh
@@ -17,7 +17,7 @@ test_lazy_prereq NO_UTF32_BOM '
write_utf16 () {
if test_have_prereq NO_UTF16_BOM
then
- printf '\xfe\xff'
+ printf '\376\377'
fi &&
iconv -f UTF-8 -t UTF-16
}
@@ -25,7 +25,7 @@ write_utf16 () {
write_utf32 () {
if test_have_prereq NO_UTF32_BOM
then
- printf '\x00\x00\xfe\xff'
+ printf '\0\0\376\377'
fi &&
iconv -f UTF-8 -t UTF-32
}
diff --git a/t/t3434-rebase-i18n.sh b/t/t3434-rebase-i18n.sh
new file mode 100755
index 0000000000..4b5b128cd6
--- /dev/null
+++ b/t/t3434-rebase-i18n.sh
@@ -0,0 +1,84 @@
+#!/bin/sh
+#
+# Copyright (c) 2019 Doan Tran Cong Danh
+#
+
+test_description='rebase with changing encoding
+
+Initial setup:
+
+1 - 2 master
+ \
+ 3 - 4 first
+ \
+ 5 - 6 second
+'
+
+. ./test-lib.sh
+
+compare_msg () {
+ iconv -f "$2" -t "$3" "$TEST_DIRECTORY/t3434/$1" >expect &&
+ git cat-file commit HEAD >raw &&
+ sed "1,/^$/d" raw >actual &&
+ test_cmp expect actual
+}
+
+test_expect_success setup '
+ test_commit one &&
+ git branch first &&
+ test_commit two &&
+ git switch first &&
+ test_commit three &&
+ git branch second &&
+ test_commit four &&
+ git switch second &&
+ test_commit five &&
+ test_commit six
+'
+
+test_expect_success 'rebase --rebase-merges update encoding eucJP to UTF-8' '
+ git switch -c merge-eucJP-UTF-8 first &&
+ git config i18n.commitencoding eucJP &&
+ git merge -F "$TEST_DIRECTORY/t3434/eucJP.txt" second &&
+ git config i18n.commitencoding UTF-8 &&
+ git rebase --rebase-merges master &&
+ compare_msg eucJP.txt eucJP UTF-8
+'
+
+test_expect_failure 'rebase --rebase-merges update encoding eucJP to ISO-2022-JP' '
+ git switch -c merge-eucJP-ISO-2022-JP first &&
+ git config i18n.commitencoding eucJP &&
+ git merge -F "$TEST_DIRECTORY/t3434/eucJP.txt" second &&
+ git config i18n.commitencoding ISO-2022-JP &&
+ git rebase --rebase-merges master &&
+ compare_msg eucJP.txt eucJP ISO-2022-JP
+'
+
+test_rebase_continue_update_encode () {
+ old=$1
+ new=$2
+ msgfile=$3
+ test_expect_success "rebase --continue update from $old to $new" '
+ (git rebase --abort || : abort current git-rebase failure) &&
+ git switch -c conflict-$old-$new one &&
+ echo for-conflict >two.t &&
+ git add two.t &&
+ git config i18n.commitencoding $old &&
+ git commit -F "$TEST_DIRECTORY/t3434/$msgfile" &&
+ git config i18n.commitencoding $new &&
+ test_must_fail git rebase -m master &&
+ test -f .git/rebase-merge/message &&
+ git stripspace <.git/rebase-merge/message >two.t &&
+ git add two.t &&
+ git rebase --continue &&
+ compare_msg $msgfile $old $new &&
+ : git-commit assume invalid utf-8 is latin1 &&
+ test_cmp expect two.t
+ '
+}
+
+test_rebase_continue_update_encode ISO-8859-1 UTF-8 ISO8859-1.txt
+test_rebase_continue_update_encode eucJP UTF-8 eucJP.txt
+test_rebase_continue_update_encode eucJP ISO-2022-JP eucJP.txt
+
+test_done
diff --git a/t/t3434/ISO8859-1.txt b/t/t3434/ISO8859-1.txt
new file mode 100644
index 0000000000..7cbef0ee6f
--- /dev/null
+++ b/t/t3434/ISO8859-1.txt
@@ -0,0 +1,3 @@
+ÄËÑÏÖ
+
+Ábçdèfg
diff --git a/t/t3434/eucJP.txt b/t/t3434/eucJP.txt
new file mode 100644
index 0000000000..546f2aac01
--- /dev/null
+++ b/t/t3434/eucJP.txt
@@ -0,0 +1,4 @@
+¤Ï¤ì¤Ò¤Û¤Õ
+
+¤·¤Æ¤¤¤ë¤Î¤¬¡¢¤¤¤ë¤Î¤Ç¡£
+ßÀÉͤۤì¤×¤ê¤Ý¤ì¤Þ¤Ó¤°¤ê¤í¤Ø¡£
diff --git a/t/t3900-i18n-commit.sh b/t/t3900-i18n-commit.sh
index b92ff95977..d277a9f4b7 100755
--- a/t/t3900-i18n-commit.sh
+++ b/t/t3900-i18n-commit.sh
@@ -204,4 +204,41 @@ test_commit_autosquash_flags eucJP fixup
test_commit_autosquash_flags ISO-2022-JP squash
+test_commit_autosquash_multi_encoding () {
+ flag=$1
+ old=$2
+ new=$3
+ msg=$4
+ test_expect_success "commit --$flag into $old from $new" '
+ git checkout -b $flag-$old-$new C0 &&
+ git config i18n.commitencoding $old &&
+ echo $old >>F &&
+ git commit -a -F "$TEST_DIRECTORY"/t3900/$msg &&
+ test_tick &&
+ echo intermediate stuff >>G &&
+ git add G &&
+ git commit -a -m "intermediate commit" &&
+ test_tick &&
+ git config i18n.commitencoding $new &&
+ echo $new-$flag >>F &&
+ git commit -a --$flag HEAD^ &&
+ git rebase --autosquash -i HEAD^^^ &&
+ git rev-list HEAD >actual &&
+ test_line_count = 3 actual &&
+ iconv -f $old -t UTF-8 "$TEST_DIRECTORY"/t3900/$msg >expect &&
+ if test $flag = squash; then
+ subject="$(head -1 expect)" &&
+ printf "\nsquash! %s\n" "$subject" >>expect
+ fi &&
+ git cat-file commit HEAD^ >raw &&
+ (sed "1,/^$/d" raw | iconv -f $new -t utf-8) >actual &&
+ test_cmp expect actual
+ '
+}
+
+test_commit_autosquash_multi_encoding fixup UTF-8 ISO-8859-1 1-UTF-8.txt
+test_commit_autosquash_multi_encoding squash ISO-8859-1 UTF-8 ISO8859-1.txt
+test_commit_autosquash_multi_encoding squash eucJP ISO-2022-JP eucJP.txt
+test_commit_autosquash_multi_encoding fixup ISO-2022-JP UTF-8 ISO-2022-JP.txt
+
test_done