summaryrefslogtreecommitdiff
path: root/pretty.c
diff options
context:
space:
mode:
Diffstat (limited to 'pretty.c')
-rw-r--r--pretty.c261
1 files changed, 147 insertions, 114 deletions
diff --git a/pretty.c b/pretty.c
index 8688b8f2d4..91bb2d3ef6 100644
--- a/pretty.c
+++ b/pretty.c
@@ -231,7 +231,7 @@ static int is_rfc822_special(char ch)
}
}
-static int has_rfc822_specials(const char *s, int len)
+static int needs_rfc822_quoting(const char *s, int len)
{
int i;
for (i = 0; i < len; i++)
@@ -240,6 +240,17 @@ static int has_rfc822_specials(const char *s, int len)
return 0;
}
+static int last_line_length(struct strbuf *sb)
+{
+ int i;
+
+ /* How many bytes are already used on the last line? */
+ for (i = sb->len - 1; i >= 0; i--)
+ if (sb->buf[i] == '\n')
+ break;
+ return sb->len - (i + 1);
+}
+
static void add_rfc822_quoted(struct strbuf *out, const char *s, int len)
{
int i;
@@ -261,57 +272,110 @@ static void add_rfc822_quoted(struct strbuf *out, const char *s, int len)
strbuf_addch(out, '"');
}
-static int is_rfc2047_special(char ch)
+enum rfc2047_type {
+ RFC2047_SUBJECT,
+ RFC2047_ADDRESS,
+};
+
+static int is_rfc2047_special(char ch, enum rfc2047_type type)
{
- return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_'));
+ /*
+ * rfc2047, section 4.2:
+ *
+ * 8-bit values which correspond to printable ASCII characters other
+ * than "=", "?", and "_" (underscore), MAY be represented as those
+ * characters. (But see section 5 for restrictions.) In
+ * particular, SPACE and TAB MUST NOT be represented as themselves
+ * within encoded words.
+ */
+
+ /*
+ * rule out non-ASCII characters and non-printable characters (the
+ * non-ASCII check should be redundant as isprint() is not localized
+ * and only knows about ASCII, but be defensive about that)
+ */
+ if (non_ascii(ch) || !isprint(ch))
+ return 1;
+
+ /*
+ * rule out special printable characters (' ' should be the only
+ * whitespace character considered printable, but be defensive and use
+ * isspace())
+ */
+ if (isspace(ch) || ch == '=' || ch == '?' || ch == '_')
+ return 1;
+
+ /*
+ * rfc2047, section 5.3:
+ *
+ * As a replacement for a 'word' entity within a 'phrase', for example,
+ * one that precedes an address in a From, To, or Cc header. The ABNF
+ * definition for 'phrase' from RFC 822 thus becomes:
+ *
+ * phrase = 1*( encoded-word / word )
+ *
+ * In this case the set of characters that may be used in a "Q"-encoded
+ * 'encoded-word' is restricted to: <upper and lower case ASCII
+ * letters, decimal digits, "!", "*", "+", "-", "/", "=", and "_"
+ * (underscore, ASCII 95.)>. An 'encoded-word' that appears within a
+ * 'phrase' MUST be separated from any adjacent 'word', 'text' or
+ * 'special' by 'linear-white-space'.
+ */
+
+ if (type != RFC2047_ADDRESS)
+ return 0;
+
+ /* '=' and '_' are special cases and have been checked above */
+ return !(isalnum(ch) || ch == '!' || ch == '*' || ch == '+' || ch == '-' || ch == '/');
}
-static void add_rfc2047(struct strbuf *sb, const char *line, int len,
- const char *encoding)
+static int needs_rfc2047_encoding(const char *line, int len,
+ enum rfc2047_type type)
{
- static const int max_length = 78; /* per rfc2822 */
int i;
- int line_len;
-
- /* How many bytes are already used on the current line? */
- for (i = sb->len - 1; i >= 0; i--)
- if (sb->buf[i] == '\n')
- break;
- line_len = sb->len - (i+1);
for (i = 0; i < len; i++) {
int ch = line[i];
if (non_ascii(ch) || ch == '\n')
- goto needquote;
+ return 1;
if ((i + 1 < len) && (ch == '=' && line[i+1] == '?'))
- goto needquote;
+ return 1;
}
- strbuf_add_wrapped_bytes(sb, line, len, 0, 1, max_length - line_len);
- return;
-needquote:
+ return 0;
+}
+
+static void add_rfc2047(struct strbuf *sb, const char *line, int len,
+ const char *encoding, enum rfc2047_type type)
+{
+ static const int max_encoded_length = 76; /* per rfc2047 */
+ int i;
+ int line_len = last_line_length(sb);
+
strbuf_grow(sb, len * 3 + strlen(encoding) + 100);
strbuf_addf(sb, "=?%s?q?", encoding);
line_len += strlen(encoding) + 5; /* 5 for =??q? */
for (i = 0; i < len; i++) {
unsigned ch = line[i] & 0xFF;
+ int is_special = is_rfc2047_special(ch, type);
- if (line_len >= max_length - 2) {
+ /*
+ * According to RFC 2047, we could encode the special character
+ * ' ' (space) with '_' (underscore) for readability. But many
+ * programs do not understand this and just leave the
+ * underscore in place. Thus, we do nothing special here, which
+ * causes ' ' to be encoded as '=20', avoiding this problem.
+ */
+
+ if (line_len + 2 + (is_special ? 3 : 1) > max_encoded_length) {
strbuf_addf(sb, "?=\n =?%s?q?", encoding);
line_len = strlen(encoding) + 5 + 1; /* =??q? plus SP */
}
- /*
- * We encode ' ' using '=20' even though rfc2047
- * allows using '_' for readability. Unfortunately,
- * many programs do not understand this and just
- * leave the underscore in place.
- */
- if (is_rfc2047_special(ch) || ch == ' ' || ch == '\n') {
+ if (is_special) {
strbuf_addf(sb, "=%02X", ch);
line_len += 3;
- }
- else {
+ } else {
strbuf_addch(sb, ch);
line_len++;
}
@@ -323,6 +387,7 @@ void pp_user_info(const struct pretty_print_context *pp,
const char *what, struct strbuf *sb,
const char *line, const char *encoding)
{
+ int max_length = 78; /* per rfc2822 */
char *date;
int namelen;
unsigned long time;
@@ -340,25 +405,27 @@ void pp_user_info(const struct pretty_print_context *pp,
if (pp->fmt == CMIT_FMT_EMAIL) {
char *name_tail = strchr(line, '<');
int display_name_length;
- int final_line;
if (!name_tail)
return;
while (line < name_tail && isspace(name_tail[-1]))
name_tail--;
display_name_length = name_tail - line;
strbuf_addstr(sb, "From: ");
- if (!has_rfc822_specials(line, display_name_length)) {
- add_rfc2047(sb, line, display_name_length, encoding);
- } else {
+ if (needs_rfc2047_encoding(line, display_name_length, RFC2047_ADDRESS)) {
+ add_rfc2047(sb, line, display_name_length,
+ encoding, RFC2047_ADDRESS);
+ max_length = 76; /* per rfc2047 */
+ } else if (needs_rfc822_quoting(line, display_name_length)) {
struct strbuf quoted = STRBUF_INIT;
add_rfc822_quoted(&quoted, line, display_name_length);
- add_rfc2047(sb, quoted.buf, quoted.len, encoding);
+ strbuf_add_wrapped_bytes(sb, quoted.buf, quoted.len,
+ -6, 1, max_length);
strbuf_release(&quoted);
+ } else {
+ strbuf_add_wrapped_bytes(sb, line, display_name_length,
+ -6, 1, max_length);
}
- for (final_line = 0; final_line < sb->len; final_line++)
- if (sb->buf[sb->len - final_line - 1] == '\n')
- break;
- if (namelen - display_name_length + final_line > 78) {
+ if (namelen - display_name_length + last_line_length(sb) > max_length) {
strbuf_addch(sb, '\n');
if (!isspace(name_tail[0]))
strbuf_addch(sb, ' ');
@@ -439,12 +506,14 @@ static char *get_header(const struct commit *commit, const char *key)
int key_len = strlen(key);
const char *line = commit->buffer;
- for (;;) {
+ while (line) {
const char *eol = strchr(line, '\n'), *next;
if (line == eol)
return NULL;
if (!eol) {
+ warning("malformed commit (header is missing newline): %s",
+ sha1_to_hex(commit->object.sha1));
eol = line + strlen(line);
next = NULL;
} else
@@ -456,6 +525,7 @@ static char *get_header(const struct commit *commit, const char *key)
}
line = next;
}
+ return NULL;
}
static char *replace_encoding_header(char *buf, const char *encoding)
@@ -497,11 +567,11 @@ char *logmsg_reencode(const struct commit *commit,
char *encoding;
char *out;
- if (!*output_encoding)
+ if (!output_encoding || !*output_encoding)
return NULL;
encoding = get_header(commit, "encoding");
use_encoding = encoding ? encoding : utf8;
- if (!strcmp(use_encoding, output_encoding))
+ if (same_encoding(use_encoding, output_encoding))
if (encoding) /* we'll strip encoding header later */
out = xstrdup(commit->buffer);
else
@@ -531,41 +601,26 @@ static size_t format_person_part(struct strbuf *sb, char part,
{
/* currently all placeholders have same length */
const int placeholder_len = 2;
- int start, end, tz = 0;
+ int tz;
unsigned long date = 0;
- char *ep;
- const char *name_start, *name_end, *mail_start, *mail_end, *msg_end = msg+len;
char person_name[1024];
char person_mail[1024];
+ struct ident_split s;
+ const char *name_start, *name_end, *mail_start, *mail_end;
- /* advance 'end' to point to email start delimiter */
- for (end = 0; end < len && msg[end] != '<'; end++)
- ; /* do nothing */
-
- /*
- * When end points at the '<' that we found, it should have
- * matching '>' later, which means 'end' must be strictly
- * below len - 1.
- */
- if (end >= len - 2)
+ if (split_ident_line(&s, msg, len) < 0)
goto skip;
- /* Seek for both name and email part */
- name_start = msg;
- name_end = msg+end;
- while (name_end > name_start && isspace(*(name_end-1)))
- name_end--;
- mail_start = msg+end+1;
- mail_end = mail_start;
- while (mail_end < msg_end && *mail_end != '>')
- mail_end++;
- if (mail_end == msg_end)
- goto skip;
- end = mail_end-msg;
+ name_start = s.name_begin;
+ name_end = s.name_end;
+ mail_start = s.mail_begin;
+ mail_end = s.mail_end;
if (part == 'N' || part == 'E') { /* mailmap lookup */
- strlcpy(person_name, name_start, name_end-name_start+1);
- strlcpy(person_mail, mail_start, mail_end-mail_start+1);
+ snprintf(person_name, sizeof(person_name), "%.*s",
+ (int)(name_end - name_start), name_start);
+ snprintf(person_mail, sizeof(person_mail), "%.*s",
+ (int)(mail_end - mail_start), mail_start);
mailmap_name(person_mail, sizeof(person_mail), person_name, sizeof(person_name));
name_start = person_name;
name_end = name_start + strlen(person_name);
@@ -581,28 +636,20 @@ static size_t format_person_part(struct strbuf *sb, char part,
return placeholder_len;
}
- /* advance 'start' to point to date start delimiter */
- for (start = end + 1; start < len && isspace(msg[start]); start++)
- ; /* do nothing */
- if (start >= len)
- goto skip;
- date = strtoul(msg + start, &ep, 10);
- if (msg + start == ep)
+ if (!s.date_begin)
goto skip;
+ date = strtoul(s.date_begin, NULL, 10);
+
if (part == 't') { /* date, UNIX timestamp */
- strbuf_add(sb, msg + start, ep - (msg + start));
+ strbuf_add(sb, s.date_begin, s.date_end - s.date_begin);
return placeholder_len;
}
/* parse tz */
- for (start = ep - msg + 1; start < len && isspace(msg[start]); start++)
- ; /* do nothing */
- if (start + 1 < len) {
- tz = strtoul(msg + start + 1, NULL, 10);
- if (msg[start] == '-')
- tz = -tz;
- }
+ tz = strtoul(s.tz_begin + 1, NULL, 10);
+ if (*s.tz_begin == '-')
+ tz = -tz;
switch (part) {
case 'd': /* date */
@@ -621,8 +668,9 @@ static size_t format_person_part(struct strbuf *sb, char part,
skip:
/*
- * bogus commit, 'sb' cannot be updated, but we still need to
- * compute a valid return value.
+ * reading from either a bogus commit, or a reflog entry with
+ * %gn, %ge, etc.; 'sb' cannot be updated, but we still need
+ * to compute a valid return value.
*/
if (part == 'n' || part == 'e' || part == 't' || part == 'd'
|| part == 'D' || part == 'r' || part == 'i')
@@ -1034,6 +1082,7 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
get_reflog_selector(sb,
c->pretty_ctx->reflog_info,
c->pretty_ctx->date_mode,
+ c->pretty_ctx->date_mode_explicit,
(placeholder[1] == 'd'));
return 2;
case 's': /* reflog message */
@@ -1051,9 +1100,8 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
}
return 0; /* unknown %g placeholder */
case 'N':
- if (c->pretty_ctx->show_notes) {
- format_display_notes(commit->object.sha1, sb,
- get_log_output_encoding(), 0);
+ if (c->pretty_ctx->notes_message) {
+ strbuf_addstr(sb, c->pretty_ctx->notes_message);
return 1;
}
return 0;
@@ -1202,23 +1250,15 @@ void format_commit_message(const struct commit *commit,
const struct pretty_print_context *pretty_ctx)
{
struct format_commit_context context;
- static const char utf8[] = "UTF-8";
const char *output_enc = pretty_ctx->output_encoding;
memset(&context, 0, sizeof(context));
context.commit = commit;
context.pretty_ctx = pretty_ctx;
context.wrap_start = sb->len;
- context.message = commit->buffer;
- if (output_enc) {
- char *enc = get_header(commit, "encoding");
- if (strcmp(enc ? enc : utf8, output_enc)) {
- context.message = logmsg_reencode(commit, output_enc);
- if (!context.message)
- context.message = commit->buffer;
- }
- free(enc);
- }
+ context.message = logmsg_reencode(commit, output_enc);
+ if (!context.message)
+ context.message = commit->buffer;
strbuf_expand(sb, format, format_commit_item, &context);
rewrap_message_tail(sb, &context, 0, 0, 0);
@@ -1296,6 +1336,7 @@ void pp_title_line(const struct pretty_print_context *pp,
const char *encoding,
int need_8bit_cte)
{
+ static const int max_length = 78; /* per rfc2047 */
struct strbuf title;
strbuf_init(&title, 80);
@@ -1305,7 +1346,12 @@ void pp_title_line(const struct pretty_print_context *pp,
strbuf_grow(sb, title.len + 1024);
if (pp->subject) {
strbuf_addstr(sb, pp->subject);
- add_rfc2047(sb, title.buf, title.len, encoding);
+ if (needs_rfc2047_encoding(title.buf, title.len, RFC2047_SUBJECT))
+ add_rfc2047(sb, title.buf, title.len,
+ encoding, RFC2047_SUBJECT);
+ else
+ strbuf_add_wrapped_bytes(sb, title.buf, title.len,
+ -last_line_length(sb), 1, max_length);
} else {
strbuf_addbuf(sb, &title);
}
@@ -1359,16 +1405,6 @@ void pp_remainder(const struct pretty_print_context *pp,
}
}
-char *reencode_commit_message(const struct commit *commit, const char **encoding_p)
-{
- const char *encoding;
-
- encoding = get_log_output_encoding();
- if (encoding_p)
- *encoding_p = encoding;
- return logmsg_reencode(commit, encoding);
-}
-
void pretty_print_commit(const struct pretty_print_context *pp,
const struct commit *commit,
struct strbuf *sb)
@@ -1385,7 +1421,8 @@ void pretty_print_commit(const struct pretty_print_context *pp,
return;
}
- reencoded = reencode_commit_message(commit, &encoding);
+ encoding = get_log_output_encoding();
+ reencoded = logmsg_reencode(commit, encoding);
if (reencoded) {
msg = reencoded;
}
@@ -1445,10 +1482,6 @@ void pretty_print_commit(const struct pretty_print_context *pp,
if (pp->fmt == CMIT_FMT_EMAIL && sb->len <= beginning_of_body)
strbuf_addch(sb, '\n');
- if (pp->show_notes)
- format_display_notes(commit->object.sha1, sb, encoding,
- NOTES_SHOW_HEADER | NOTES_INDENT);
-
free(reencoded);
}