From aab57205515d9a74fe20cd51c509f65757b97a66 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 29 Oct 2008 23:49:26 -0700 Subject: git-svn: respect i18n.commitencoding config SVN itself always stores log messages in the repository as UTF-8. git always stores/retrieves everything as raw binary data with no transformations whatsoever. To interact with SVN, we need to encode log messages as UTF-8 before sending them to SVN, as SVN cannot do it for us. When retrieving log messages from SVN, we also need to (attempt to) reencode the UTF-8 log message back to the user-specified commit encoding. Note, handling i18n.logoutputencoding for "git svn log" also needs to be done in a future change. Also, this change only deals with the encoding of commit messages and nothing else (path names, blob content, ...). In-Reply-To: <8b168cfb0810282014r789ac01dnec51824de1078f0@mail.gmail.com> James North wrote: > Hi, > > I'm using git-svn on a system with ISO-8859-1 encoding. The problem is > when I try to use "git svn dcommit" to send changes to a remote svn > (also ISO-8859-1). > > Seems like git-svn is sending commit messages with utf-8 (just a > guessing...) and they look bad on the remote svn log. E.g. "Ca?\241a > de cami?\243n" > > I have tried using i18n.commitencoding=ISO-8859-1 as suggested by the > warning when doing "git svn dcommit" but messages still are sent with > wrong encoding. Signed-off-by: Eric Wong --- t/t9129-git-svn-i18n-commitencoding.sh | 80 ++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100755 t/t9129-git-svn-i18n-commitencoding.sh (limited to 't/t9129-git-svn-i18n-commitencoding.sh') diff --git a/t/t9129-git-svn-i18n-commitencoding.sh b/t/t9129-git-svn-i18n-commitencoding.sh new file mode 100755 index 0000000000..2848e46e38 --- /dev/null +++ b/t/t9129-git-svn-i18n-commitencoding.sh @@ -0,0 +1,80 @@ +#!/bin/sh +# +# Copyright (c) 2008 Eric Wong + +test_description='git svn honors i18n.commitEncoding in config' + +. ./lib-git-svn.sh + +compare_git_head_with () { + nr=`wc -l < "$1"` + a=7 + b=$(($a + $nr - 1)) + git cat-file commit HEAD | sed -ne "$a,${b}p" >current && + test_cmp current "$1" +} + +compare_svn_head_with () { + LC_ALL=en_US.UTF-8 svn log --limit 1 `git svn info --url` | \ + sed -e 1,3d -e "/^-\+\$/d" >current && + test_cmp current "$1" +} + +for H in ISO-8859-1 EUCJP ISO-2022-JP +do + test_expect_success "$H setup" ' + mkdir $H && + svn import -m "$H test" $H "$svnrepo"/$H && + git svn clone "$svnrepo"/$H $H + ' +done + +for H in ISO-8859-1 EUCJP ISO-2022-JP +do + test_expect_success "$H commit on git side" ' + ( + cd $H && + git config i18n.commitencoding $H && + git checkout -b t refs/remotes/git-svn && + echo $H >F && + git add F && + git commit -a -F "$TEST_DIRECTORY"/t3900/$H.txt && + E=$(git cat-file commit HEAD | sed -ne "s/^encoding //p") && + test "z$E" = "z$H" + compare_git_head_with "$TEST_DIRECTORY"/t3900/$H.txt + ) + ' +done + +for H in ISO-8859-1 EUCJP ISO-2022-JP +do + test_expect_success "$H dcommit to svn" ' + ( + cd $H && + git svn dcommit && + git cat-file commit HEAD | grep git-svn-id: && + E=$(git cat-file commit HEAD | sed -ne "s/^encoding //p") && + test "z$E" = "z$H" && + compare_git_head_with "$TEST_DIRECTORY"/t3900/$H.txt + ) + ' +done + +test_expect_success 'ISO-8859-1 should match UTF-8 in svn' ' +( + cd ISO-8859-1 && + compare_svn_head_with "$TEST_DIRECTORY"/t3900/1-UTF-8.txt +) +' + +for H in EUCJP ISO-2022-JP +do + test_expect_success '$H should match UTF-8 in svn' ' + ( + cd $H && + compare_svn_head_with "$TEST_DIRECTORY"/t3900/2-UTF-8.txt + ) + ' +done + +test_done -- cgit v1.2.3 From 26d6cc555db0144961bcb3537312cc5a7b6d84d1 Mon Sep 17 00:00:00 2001 From: Marcel Koeppen Date: Wed, 26 Nov 2008 17:51:01 +0100 Subject: t9129-git-svn-i18n-commitencoding: Make compare_svn_head_with() compatible with OSX sed The sed call used in compare_svn_head_with() uses the + quantifier, which is not supported in the OSX version of sed. It is replaced by the equivalent \{1,\}. Signed-off-by: Marcel Koeppen Signed-off-by: Junio C Hamano --- t/t9129-git-svn-i18n-commitencoding.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 't/t9129-git-svn-i18n-commitencoding.sh') diff --git a/t/t9129-git-svn-i18n-commitencoding.sh b/t/t9129-git-svn-i18n-commitencoding.sh index 2848e46e38..938b7fe4b4 100755 --- a/t/t9129-git-svn-i18n-commitencoding.sh +++ b/t/t9129-git-svn-i18n-commitencoding.sh @@ -16,7 +16,7 @@ compare_git_head_with () { compare_svn_head_with () { LC_ALL=en_US.UTF-8 svn log --limit 1 `git svn info --url` | \ - sed -e 1,3d -e "/^-\+\$/d" >current && + sed -e 1,3d -e "/^-\{1,\}\$/d" >current && test_cmp current "$1" } -- cgit v1.2.3 From c66c0cbc7e55409ac934b7c6e89aa7aafa74ede4 Mon Sep 17 00:00:00 2001 From: Miklos Vajna Date: Tue, 23 Dec 2008 02:09:24 +0100 Subject: t9129: skip the last three tests if UTF-8 locale is not available Signed-off-by: Miklos Vajna Signed-off-by: Junio C Hamano --- t/t9129-git-svn-i18n-commitencoding.sh | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 't/t9129-git-svn-i18n-commitencoding.sh') diff --git a/t/t9129-git-svn-i18n-commitencoding.sh b/t/t9129-git-svn-i18n-commitencoding.sh index 938b7fe4b4..8a9dde44d5 100755 --- a/t/t9129-git-svn-i18n-commitencoding.sh +++ b/t/t9129-git-svn-i18n-commitencoding.sh @@ -60,21 +60,25 @@ do ' done -test_expect_success 'ISO-8859-1 should match UTF-8 in svn' ' -( - cd ISO-8859-1 && - compare_svn_head_with "$TEST_DIRECTORY"/t3900/1-UTF-8.txt -) -' - -for H in EUCJP ISO-2022-JP -do - test_expect_success '$H should match UTF-8 in svn' ' +if locale -a |grep -q en_US.utf8; then + test_expect_success 'ISO-8859-1 should match UTF-8 in svn' ' ( - cd $H && - compare_svn_head_with "$TEST_DIRECTORY"/t3900/2-UTF-8.txt + cd ISO-8859-1 && + compare_svn_head_with "$TEST_DIRECTORY"/t3900/1-UTF-8.txt ) ' -done + + for H in EUCJP ISO-2022-JP + do + test_expect_success '$H should match UTF-8 in svn' ' + ( + cd $H && + compare_svn_head_with "$TEST_DIRECTORY"/t3900/2-UTF-8.txt + ) + ' + done +else + say "UTF-8 locale not available, test skipped" +fi test_done -- cgit v1.2.3 From 1ef626b4b6c70fc13062faafdccb2f0da7578a29 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 17 Jan 2009 22:11:44 -0800 Subject: git-svn: fix SVN 1.1.x compatibility The get_log() function in the Perl SVN API introduced the limit parameter in 1.2.0. However, this got discarded in our SVN::Ra compatibility layer when used with SVN 1.1.x. We now emulate the limit functionality in older SVN versions by preventing the original callback from being called if the given limit has been reached. This emulation is less bandwidth efficient, but SVN 1.1.x is becoming rarer now. Additionally, the --limit parameter in svn(1) uses the aforementioned get_log() functionality change in SVN 1.2.x. t9129 no longer depends on --limit to work and instead uses Perl to parse out the commit message. Thanks to Tom G. Christensen for the bug report. Signed-off-by: Eric Wong --- t/t9129-git-svn-i18n-commitencoding.sh | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 't/t9129-git-svn-i18n-commitencoding.sh') diff --git a/t/t9129-git-svn-i18n-commitencoding.sh b/t/t9129-git-svn-i18n-commitencoding.sh index 8a9dde44d5..9c7b1ad18b 100755 --- a/t/t9129-git-svn-i18n-commitencoding.sh +++ b/t/t9129-git-svn-i18n-commitencoding.sh @@ -15,8 +15,17 @@ compare_git_head_with () { } compare_svn_head_with () { - LC_ALL=en_US.UTF-8 svn log --limit 1 `git svn info --url` | \ - sed -e 1,3d -e "/^-\{1,\}\$/d" >current && + # extract just the log message and strip out committer info. + # don't use --limit here since svn 1.1.x doesn't have it, + LC_ALL=en_US.UTF-8 svn log `git svn info --url` | perl -w -e ' + use bytes; + $/ = ("-"x72) . "\n"; + my @x = ; + @x = split(/\n/, $x[1]); + splice(@x, 0, 2); + $x[-1] = ""; + print join("\n", @x); + ' > current && test_cmp current "$1" } -- cgit v1.2.3 From 7b7247b0d7cb6a105d87574642343480707414b3 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Tue, 24 Feb 2009 21:13:39 +0100 Subject: t9100, t9129: Use prerequisite tags for UTF-8 tests Signed-off-by: Johannes Sixt --- t/t9129-git-svn-i18n-commitencoding.sh | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 't/t9129-git-svn-i18n-commitencoding.sh') diff --git a/t/t9129-git-svn-i18n-commitencoding.sh b/t/t9129-git-svn-i18n-commitencoding.sh index 9c7b1ad18b..3200ab38ef 100755 --- a/t/t9129-git-svn-i18n-commitencoding.sh +++ b/t/t9129-git-svn-i18n-commitencoding.sh @@ -70,24 +70,26 @@ do done if locale -a |grep -q en_US.utf8; then - test_expect_success 'ISO-8859-1 should match UTF-8 in svn' ' + test_set_prereq UTF8 +else + say "UTF-8 locale not available, test skipped" +fi + +test_expect_success UTF8 'ISO-8859-1 should match UTF-8 in svn' ' ( cd ISO-8859-1 && compare_svn_head_with "$TEST_DIRECTORY"/t3900/1-UTF-8.txt ) - ' +' - for H in EUCJP ISO-2022-JP - do - test_expect_success '$H should match UTF-8 in svn' ' +for H in EUCJP ISO-2022-JP +do + test_expect_success UTF8 "$H should match UTF-8 in svn" ' ( cd $H && compare_svn_head_with "$TEST_DIRECTORY"/t3900/2-UTF-8.txt ) - ' - done -else - say "UTF-8 locale not available, test skipped" -fi + ' +done test_done -- cgit v1.2.3