summaryrefslogtreecommitdiff
path: root/gettext.c
diff options
context:
space:
mode:
Diffstat (limited to 'gettext.c')
-rw-r--r--gettext.c126
1 files changed, 34 insertions, 92 deletions
diff --git a/gettext.c b/gettext.c
index d4021d690c..af2413b47e 100644
--- a/gettext.c
+++ b/gettext.c
@@ -12,7 +12,25 @@
#ifndef NO_GETTEXT
# include <locale.h>
# include <libintl.h>
-# ifdef HAVE_LIBCHARSET_H
+# ifdef GIT_WINDOWS_NATIVE
+
+static const char *locale_charset(void)
+{
+ const char *env = getenv("LC_ALL"), *dot;
+
+ if (!env || !*env)
+ env = getenv("LC_CTYPE");
+ if (!env || !*env)
+ env = getenv("LANG");
+
+ if (!env)
+ return "UTF-8";
+
+ dot = strchr(env, '.');
+ return !dot ? env : dot + 1;
+}
+
+# elif defined HAVE_LIBCHARSET_H
# include <libcharset.h>
# else
# include <langinfo.h>
@@ -47,16 +65,6 @@ const char *get_preferred_languages(void)
return NULL;
}
-int use_gettext_poison(void)
-{
- static int poison_requested = -1;
- if (poison_requested == -1) {
- const char *v = getenv("GIT_TEST_GETTEXT_POISON");
- poison_requested = v && strlen(v) ? 1 : 0;
- }
- return poison_requested;
-}
-
#ifndef NO_GETTEXT
static int test_vsnprintf(const char *fmt, ...)
{
@@ -71,88 +79,24 @@ static int test_vsnprintf(const char *fmt, ...)
static void init_gettext_charset(const char *domain)
{
- /*
- This trick arranges for messages to be emitted in the user's
- requested encoding, but avoids setting LC_CTYPE from the
- environment for the whole program.
-
- This primarily done to avoid a bug in vsnprintf in the GNU C
- Library [1]. which triggered a "your vsnprintf is broken" error
- on Git's own repository when inspecting v0.99.6~1 under a UTF-8
- locale.
-
- That commit contains a ISO-8859-1 encoded author name, which
- the locale aware vsnprintf(3) won't interpolate in the format
- argument, due to mismatch between the data encoding and the
- locale.
-
- Even if it wasn't for that bug we wouldn't want to use LC_CTYPE at
- this point, because it'd require auditing all the code that uses C
- functions whose semantics are modified by LC_CTYPE.
-
- But only setting LC_MESSAGES as we do creates a problem, since
- we declare the encoding of our PO files[2] the gettext
- implementation will try to recode it to the user's locale, but
- without LC_CTYPE it'll emit something like this on 'git init'
- under the Icelandic locale:
-
- Bj? til t?ma Git lind ? /hlagh/.git/
-
- Gettext knows about the encoding of our PO file, but we haven't
- told it about the user's encoding, so all the non-US-ASCII
- characters get encoded to question marks.
-
- But we're in luck! We can set LC_CTYPE from the environment
- only while we call nl_langinfo and
- bind_textdomain_codeset. That suffices to tell gettext what
- encoding it should emit in, so it'll now say:
-
- Bjó til tóma Git lind í /hlagh/.git/
-
- And the equivalent ISO-8859-1 string will be emitted under a
- ISO-8859-1 locale.
-
- With this change way we get the advantages of setting LC_CTYPE
- (talk to the user in his language/encoding), without the major
- drawbacks (changed semantics for C functions we rely on).
-
- However foreign functions using other message catalogs that
- aren't using our neat trick will still have a problem, e.g. if
- we have to call perror(3):
-
- #include <stdio.h>
- #include <locale.h>
- #include <errno.h>
-
- int main(void)
- {
- setlocale(LC_MESSAGES, "");
- setlocale(LC_CTYPE, "C");
- errno = ENODEV;
- perror("test");
- return 0;
- }
-
- Running that will give you a message with question marks:
-
- $ LANGUAGE= LANG=de_DE.utf8 ./test
- test: Kein passendes Ger?t gefunden
-
- The vsnprintf bug has been fixed since glibc 2.17.
-
- Then we could simply set LC_CTYPE from the environment, which would
- make things like the external perror(3) messages work.
-
- See t/t0203-gettext-setlocale-sanity.sh's "gettext.c" tests for
- regression tests.
-
- 1. http://sourceware.org/bugzilla/show_bug.cgi?id=6530
- 2. E.g. "Content-Type: text/plain; charset=UTF-8\n" in po/is.po
- */
setlocale(LC_CTYPE, "");
charset = locale_charset();
bind_textdomain_codeset(domain, charset);
- /* the string is taken from v0.99.6~1 */
+
+ /*
+ * Work around an old bug fixed in glibc 2.17 (released on
+ * 2012-12-24), at the cost of potentially making translated
+ * messages from external functions like perror() emitted in
+ * the wrong encoding.
+ *
+ * The bug affected e.g. git.git's own 7eb93c89651 ([PATCH]
+ * Simplify git script, 2005-09-07), which is the origin of
+ * the "David_K\345gedal" test string.
+ *
+ * See a much longer comment added to this file in 5e9637c6297
+ * (i18n: add infrastructure for translating Git with gettext,
+ * 2011-11-18) for more details.
+ */
if (test_vsnprintf("%.*s", 13, "David_K\345gedal") < 0)
setlocale(LC_CTYPE, "C");
}
@@ -165,8 +109,6 @@ void git_setup_gettext(void)
if (!podir)
podir = p = system_path(GIT_LOCALE_PATH);
- use_gettext_poison(); /* getenv() reentrancy paranoia */
-
if (!is_directory(podir)) {
free(p);
return;