summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLibravatar Eric Wong <normalperson@yhbt.net>2009-05-28 00:56:23 -0700
committerLibravatar Eric Wong <normalperson@yhbt.net>2009-05-28 00:57:07 -0700
commitb510df8af2c86d441de87fa4be09f786b5411527 (patch)
tree44ac92ead768529ffefe3f09db23426db2c9a647
parentUpdate draft release notes to 1.6.4 (diff)
downloadtgif-b510df8af2c86d441de87fa4be09f786b5411527.tar.xz
git-svn: refuse to dcommit non-UTF-8 messages
...without i18n.commitencoding set in the config. SVN tries to store all commit messages in UTF-8, however it is up to the job of the clients to enforce this rule. SVN servers themselves do not always enforce this; allowing clients to commit malformed UTF-8 messages and break repositories. So git-svn will enforce this and tell the user to set i18n.commitencoding when a git commit is is not in UTF-8. Signed-off-by: Eric Wong <normalperson@yhbt.net>
-rwxr-xr-xgit-svn.perl17
-rwxr-xr-xt/t9139-git-svn-non-utf8-commitencoding.sh47
2 files changed, 61 insertions, 3 deletions
diff --git a/git-svn.perl b/git-svn.perl
index a70c7d7b2c..33017974d0 100755
--- a/git-svn.perl
+++ b/git-svn.perl
@@ -1178,16 +1178,27 @@ sub get_commit_entry {
}
rename $commit_editmsg, $commit_msg or croak $!;
{
+ require Encode;
# SVN requires messages to be UTF-8 when entering the repo
local $/;
open $log_fh, '<', $commit_msg or croak $!;
binmode $log_fh;
chomp($log_entry{log} = <$log_fh>);
- if (my $enc = Git::config('i18n.commitencoding')) {
- require Encode;
- Encode::from_to($log_entry{log}, $enc, 'UTF-8');
+ my $enc = Git::config('i18n.commitencoding') || 'UTF-8';
+ my $msg = $log_entry{log};
+
+ eval { $msg = Encode::decode($enc, $msg, 1) };
+ if ($@) {
+ die "Could not decode as $enc:\n", $msg,
+ "\nPerhaps you need to set i18n.commitencoding\n";
}
+
+ eval { $msg = Encode::encode('UTF-8', $msg, 1) };
+ die "Could not encode as UTF-8:\n$msg\n" if $@;
+
+ $log_entry{log} = $msg;
+
close $log_fh or croak $!;
}
unlink $commit_msg;
diff --git a/t/t9139-git-svn-non-utf8-commitencoding.sh b/t/t9139-git-svn-non-utf8-commitencoding.sh
new file mode 100755
index 0000000000..2b1db97337
--- /dev/null
+++ b/t/t9139-git-svn-non-utf8-commitencoding.sh
@@ -0,0 +1,47 @@
+#!/bin/sh
+#
+# Copyright (c) 2009 Eric Wong
+
+test_description='git svn refuses to dcommit non-UTF8 messages'
+
+. ./lib-git-svn.sh
+
+# ISO-2022-JP can pass for valid UTF-8, so skipping that in this test
+
+for H in ISO-8859-1 EUCJP
+do
+ test_expect_success "$H setup" '
+ mkdir $H &&
+ svn_cmd import -m "$H test" $H "$svnrepo"/$H &&
+ git svn clone "$svnrepo"/$H $H
+ '
+done
+
+for H in ISO-8859-1 EUCJP
+do
+ test_expect_success "$H commit on git side" '
+ (
+ cd $H &&
+ git config i18n.commitencoding $H &&
+ git checkout -b t refs/remotes/git-svn &&
+ echo $H >F &&
+ git add F &&
+ git commit -a -F "$TEST_DIRECTORY"/t3900/$H.txt &&
+ E=$(git cat-file commit HEAD | sed -ne "s/^encoding //p") &&
+ test "z$E" = "z$H"
+ )
+ '
+done
+
+for H in ISO-8859-1 EUCJP
+do
+ test_expect_success "$H dcommit to svn" '
+ (
+ cd $H &&
+ git config --unset i18n.commitencoding &&
+ ! git svn dcommit
+ )
+ '
+done
+
+test_done