From 3270741ea8c2a225183d272bf19ea19d5b3c05d8 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 26 Sep 2016 18:09:48 -0700 Subject: utf8: refactor code to decide fallback encoding The codepath we use to call iconv_open() has a provision to use a fallback encoding when it fails, hoping that "UTF-8" being spelled differently could be the reason why the library function did not like the encoding names we gave it. Essentially, we turn what we have observed to be used as variants of "UTF-8" (e.g. "utf8") into the most official spelling and use that as a fallback. We do the same thing for input and output encoding. Introduce a helper function to do just one side and call that twice. Signed-off-by: Junio C Hamano --- utf8.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/utf8.c b/utf8.c index 00e10c86ad..550e785ecb 100644 --- a/utf8.c +++ b/utf8.c @@ -489,6 +489,21 @@ char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv, int *outs return out; } +static const char *fallback_encoding(const char *name) +{ + /* + * Some platforms do not have the variously spelled variants of + * UTF-8, so let's fall back to trying the most official + * spelling. We do so only as a fallback in case the platform + * does understand the user's spelling, but not our official + * one. + */ + if (is_encoding_utf8(name)) + return "UTF-8"; + + return name; +} + char *reencode_string_len(const char *in, int insz, const char *out_encoding, const char *in_encoding, int *outsz) @@ -501,17 +516,9 @@ char *reencode_string_len(const char *in, int insz, conv = iconv_open(out_encoding, in_encoding); if (conv == (iconv_t) -1) { - /* - * Some platforms do not have the variously spelled variants of - * UTF-8, so let's fall back to trying the most official - * spelling. We do so only as a fallback in case the platform - * does understand the user's spelling, but not our official - * one. - */ - if (is_encoding_utf8(in_encoding)) - in_encoding = "UTF-8"; - if (is_encoding_utf8(out_encoding)) - out_encoding = "UTF-8"; + in_encoding = fallback_encoding(in_encoding); + out_encoding = fallback_encoding(out_encoding); + conv = iconv_open(out_encoding, in_encoding); if (conv == (iconv_t) -1) return NULL; -- cgit v1.2.3 From df3755888b9a54e69ab70881738d431587c57951 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 26 Sep 2016 18:09:48 -0700 Subject: utf8: accept "latin-1" as ISO-8859-1 Even though latin-1 is still seen in e-mail headers, some platforms only install ISO-8859-1. "iconv -f ISO-8859-1" succeeds, while "iconv -f latin-1" fails on such a system. Using the same fallback_encoding() mechanism factored out in the previous step, teach ourselves that "ISO-8859-1" has a better chance of being accepted than "latin-1". Signed-off-by: Junio C Hamano --- utf8.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/utf8.c b/utf8.c index 550e785ecb..0c8e011a58 100644 --- a/utf8.c +++ b/utf8.c @@ -501,6 +501,13 @@ static const char *fallback_encoding(const char *name) if (is_encoding_utf8(name)) return "UTF-8"; + /* + * Even though latin-1 is still seen in e-mail + * headers, some platforms only install ISO-8859-1. + */ + if (!strcasecmp(name, "latin-1")) + return "ISO-8859-1"; + return name; } -- cgit v1.2.3