From 52883fbd767f8a79a6f98a08907d0a9f6ba1ece1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 25 Dec 2006 11:48:35 -0800 Subject: Teach log family --encoding Updated commit objects record the encoding used in their encoding header. This updates the log family to reencode it into the encoding specified in i18n.commitencoding (or the default, which is "utf-8") upon output. To force a specific encoding that is different, log family takes command line flag --encoding=; giving --encoding=none entirely disables the reencoding and lets you view log messges in their original encoding. Signed-off-by: Junio C Hamano --- commit.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 3 deletions(-) (limited to 'commit.c') diff --git a/commit.c b/commit.c index 289ef65eb1..df4bc0775a 100644 --- a/commit.c +++ b/commit.c @@ -1,6 +1,7 @@ #include "cache.h" #include "tag.h" #include "commit.h" +#include "utf8.h" int save_commit_buffer = 1; @@ -563,10 +564,53 @@ static int add_merge_info(enum cmit_fmt fmt, char *buf, const struct commit *com return offset; } -unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit, - unsigned long len, char *buf, unsigned long space, +static char *get_header(const struct commit *commit, const char *key) +{ + int key_len = strlen(key); + const char *line = commit->buffer; + + for (;;) { + const char *eol = strchr(line, '\n'), *next; + + if (line == eol) + return NULL; + if (!eol) { + eol = line + strlen(line); + next = NULL; + } else + next = eol + 1; + if (!strncmp(line, key, key_len) && line[key_len] == ' ') { + int len = eol - line - key_len; + char *ret = xmalloc(len); + memcpy(ret, line + key_len + 1, len - 1); + ret[len - 1] = '\0'; + return ret; + } + line = next; + } +} + +static char *logmsg_reencode(const struct commit *commit) +{ + char *encoding = get_header(commit, "encoding"); + char *out; + + if (!encoding || !strcmp(encoding, git_commit_encoding)) + return NULL; + out = reencode_string(commit->buffer, git_commit_encoding, encoding); + free(encoding); + if (!out) + return NULL; + return out; +} + +unsigned long pretty_print_commit(enum cmit_fmt fmt, + const struct commit *commit, + unsigned long len, + char *buf, unsigned long space, int abbrev, const char *subject, - const char *after_subject, int relative_date) + const char *after_subject, + int relative_date) { int hdr = 1, body = 0; unsigned long offset = 0; @@ -574,6 +618,15 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit int parents_shown = 0; const char *msg = commit->buffer; int plain_non_ascii = 0; + char *reencoded = NULL; + + if (*git_commit_encoding) { + reencoded = logmsg_reencode(commit); + if (reencoded) { + msg = reencoded; + len = strlen(msg); + } + } if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL) indent = 0; @@ -721,6 +774,8 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit if (fmt == CMIT_FMT_EMAIL && !body) buf[offset++] = '\n'; buf[offset] = '\0'; + + free(reencoded); return offset; } -- cgit v1.2.3 From d2c11a38c476bdfa3dd2387a0d933b8c00e4dfe3 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 27 Dec 2006 16:41:33 -0800 Subject: UTF-8: introduce i18n.logoutputencoding. It is plausible for somebody to want to view the commit log in a different encoding from i18n.commitencoding -- the project's policy may be UTF-8 and the user may be using a commit message hook to run iconv to conform to that policy (and either not have i18n.commitencoding to default to UTF-8 or have it explicitly set to UTF-8). Even then, Latin-1 may be more convenient for the usual pager and the terminal the user uses. The new variable i18n.logoutputencoding is used in preference to i18n.commitencoding to decide what encoding to recode the log output in when git-log and friends formats the commit log message. Signed-off-by: Junio C Hamano --- commit.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'commit.c') diff --git a/commit.c b/commit.c index df4bc0775a..6f2839a5cd 100644 --- a/commit.c +++ b/commit.c @@ -592,12 +592,20 @@ static char *get_header(const struct commit *commit, const char *key) static char *logmsg_reencode(const struct commit *commit) { - char *encoding = get_header(commit, "encoding"); + char *encoding; char *out; + char *output_encoding = (git_log_output_encoding + ? git_log_output_encoding + : git_commit_encoding); - if (!encoding || !strcmp(encoding, git_commit_encoding)) + if (!output_encoding) return NULL; - out = reencode_string(commit->buffer, git_commit_encoding, encoding); + encoding = get_header(commit, "encoding"); + if (!encoding || !strcmp(encoding, output_encoding)) { + free(encoding); + return NULL; + } + out = reencode_string(commit->buffer, output_encoding, encoding); free(encoding); if (!out) return NULL; @@ -618,15 +626,10 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, int parents_shown = 0; const char *msg = commit->buffer; int plain_non_ascii = 0; - char *reencoded = NULL; + char *reencoded = logmsg_reencode(commit); - if (*git_commit_encoding) { - reencoded = logmsg_reencode(commit); - if (reencoded) { - msg = reencoded; - len = strlen(msg); - } - } + if (reencoded) + msg = reencoded; if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL) indent = 0; @@ -643,7 +646,7 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, for (in_body = i = 0; (ch = msg[i]) && i < len; i++) { if (!in_body) { /* author could be non 7-bit ASCII but - * the log may so; skip over the + * the log may be so; skip over the * header part first. */ if (ch == '\n' && -- cgit v1.2.3