diff options
Diffstat (limited to 'gettext.c')
-rw-r--r-- | gettext.c | 126 |
1 files changed, 34 insertions, 92 deletions
@@ -12,7 +12,25 @@ #ifndef NO_GETTEXT # include <locale.h> # include <libintl.h> -# ifdef HAVE_LIBCHARSET_H +# ifdef GIT_WINDOWS_NATIVE + +static const char *locale_charset(void) +{ + const char *env = getenv("LC_ALL"), *dot; + + if (!env || !*env) + env = getenv("LC_CTYPE"); + if (!env || !*env) + env = getenv("LANG"); + + if (!env) + return "UTF-8"; + + dot = strchr(env, '.'); + return !dot ? env : dot + 1; +} + +# elif defined HAVE_LIBCHARSET_H # include <libcharset.h> # else # include <langinfo.h> @@ -47,16 +65,6 @@ const char *get_preferred_languages(void) return NULL; } -int use_gettext_poison(void) -{ - static int poison_requested = -1; - if (poison_requested == -1) { - const char *v = getenv("GIT_TEST_GETTEXT_POISON"); - poison_requested = v && strlen(v) ? 1 : 0; - } - return poison_requested; -} - #ifndef NO_GETTEXT static int test_vsnprintf(const char *fmt, ...) { @@ -71,88 +79,24 @@ static int test_vsnprintf(const char *fmt, ...) static void init_gettext_charset(const char *domain) { - /* - This trick arranges for messages to be emitted in the user's - requested encoding, but avoids setting LC_CTYPE from the - environment for the whole program. - - This primarily done to avoid a bug in vsnprintf in the GNU C - Library [1]. which triggered a "your vsnprintf is broken" error - on Git's own repository when inspecting v0.99.6~1 under a UTF-8 - locale. - - That commit contains a ISO-8859-1 encoded author name, which - the locale aware vsnprintf(3) won't interpolate in the format - argument, due to mismatch between the data encoding and the - locale. - - Even if it wasn't for that bug we wouldn't want to use LC_CTYPE at - this point, because it'd require auditing all the code that uses C - functions whose semantics are modified by LC_CTYPE. - - But only setting LC_MESSAGES as we do creates a problem, since - we declare the encoding of our PO files[2] the gettext - implementation will try to recode it to the user's locale, but - without LC_CTYPE it'll emit something like this on 'git init' - under the Icelandic locale: - - Bj? til t?ma Git lind ? /hlagh/.git/ - - Gettext knows about the encoding of our PO file, but we haven't - told it about the user's encoding, so all the non-US-ASCII - characters get encoded to question marks. - - But we're in luck! We can set LC_CTYPE from the environment - only while we call nl_langinfo and - bind_textdomain_codeset. That suffices to tell gettext what - encoding it should emit in, so it'll now say: - - Bjó til tóma Git lind í /hlagh/.git/ - - And the equivalent ISO-8859-1 string will be emitted under a - ISO-8859-1 locale. - - With this change way we get the advantages of setting LC_CTYPE - (talk to the user in his language/encoding), without the major - drawbacks (changed semantics for C functions we rely on). - - However foreign functions using other message catalogs that - aren't using our neat trick will still have a problem, e.g. if - we have to call perror(3): - - #include <stdio.h> - #include <locale.h> - #include <errno.h> - - int main(void) - { - setlocale(LC_MESSAGES, ""); - setlocale(LC_CTYPE, "C"); - errno = ENODEV; - perror("test"); - return 0; - } - - Running that will give you a message with question marks: - - $ LANGUAGE= LANG=de_DE.utf8 ./test - test: Kein passendes Ger?t gefunden - - The vsnprintf bug has been fixed since glibc 2.17. - - Then we could simply set LC_CTYPE from the environment, which would - make things like the external perror(3) messages work. - - See t/t0203-gettext-setlocale-sanity.sh's "gettext.c" tests for - regression tests. - - 1. http://sourceware.org/bugzilla/show_bug.cgi?id=6530 - 2. E.g. "Content-Type: text/plain; charset=UTF-8\n" in po/is.po - */ setlocale(LC_CTYPE, ""); charset = locale_charset(); bind_textdomain_codeset(domain, charset); - /* the string is taken from v0.99.6~1 */ + + /* + * Work around an old bug fixed in glibc 2.17 (released on + * 2012-12-24), at the cost of potentially making translated + * messages from external functions like perror() emitted in + * the wrong encoding. + * + * The bug affected e.g. git.git's own 7eb93c89651 ([PATCH] + * Simplify git script, 2005-09-07), which is the origin of + * the "David_K\345gedal" test string. + * + * See a much longer comment added to this file in 5e9637c6297 + * (i18n: add infrastructure for translating Git with gettext, + * 2011-11-18) for more details. + */ if (test_vsnprintf("%.*s", 13, "David_K\345gedal") < 0) setlocale(LC_CTYPE, "C"); } @@ -165,8 +109,6 @@ void git_setup_gettext(void) if (!podir) podir = p = system_path(GIT_LOCALE_PATH); - use_gettext_poison(); /* getenv() reentrancy paranoia */ - if (!is_directory(podir)) { free(p); return; |