diff options
Diffstat (limited to 'pretty.c')
-rw-r--r-- | pretty.c | 261 |
1 files changed, 147 insertions, 114 deletions
@@ -231,7 +231,7 @@ static int is_rfc822_special(char ch) } } -static int has_rfc822_specials(const char *s, int len) +static int needs_rfc822_quoting(const char *s, int len) { int i; for (i = 0; i < len; i++) @@ -240,6 +240,17 @@ static int has_rfc822_specials(const char *s, int len) return 0; } +static int last_line_length(struct strbuf *sb) +{ + int i; + + /* How many bytes are already used on the last line? */ + for (i = sb->len - 1; i >= 0; i--) + if (sb->buf[i] == '\n') + break; + return sb->len - (i + 1); +} + static void add_rfc822_quoted(struct strbuf *out, const char *s, int len) { int i; @@ -261,57 +272,110 @@ static void add_rfc822_quoted(struct strbuf *out, const char *s, int len) strbuf_addch(out, '"'); } -static int is_rfc2047_special(char ch) +enum rfc2047_type { + RFC2047_SUBJECT, + RFC2047_ADDRESS, +}; + +static int is_rfc2047_special(char ch, enum rfc2047_type type) { - return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_')); + /* + * rfc2047, section 4.2: + * + * 8-bit values which correspond to printable ASCII characters other + * than "=", "?", and "_" (underscore), MAY be represented as those + * characters. (But see section 5 for restrictions.) In + * particular, SPACE and TAB MUST NOT be represented as themselves + * within encoded words. + */ + + /* + * rule out non-ASCII characters and non-printable characters (the + * non-ASCII check should be redundant as isprint() is not localized + * and only knows about ASCII, but be defensive about that) + */ + if (non_ascii(ch) || !isprint(ch)) + return 1; + + /* + * rule out special printable characters (' ' should be the only + * whitespace character considered printable, but be defensive and use + * isspace()) + */ + if (isspace(ch) || ch == '=' || ch == '?' || ch == '_') + return 1; + + /* + * rfc2047, section 5.3: + * + * As a replacement for a 'word' entity within a 'phrase', for example, + * one that precedes an address in a From, To, or Cc header. The ABNF + * definition for 'phrase' from RFC 822 thus becomes: + * + * phrase = 1*( encoded-word / word ) + * + * In this case the set of characters that may be used in a "Q"-encoded + * 'encoded-word' is restricted to: <upper and lower case ASCII + * letters, decimal digits, "!", "*", "+", "-", "/", "=", and "_" + * (underscore, ASCII 95.)>. An 'encoded-word' that appears within a + * 'phrase' MUST be separated from any adjacent 'word', 'text' or + * 'special' by 'linear-white-space'. + */ + + if (type != RFC2047_ADDRESS) + return 0; + + /* '=' and '_' are special cases and have been checked above */ + return !(isalnum(ch) || ch == '!' || ch == '*' || ch == '+' || ch == '-' || ch == '/'); } -static void add_rfc2047(struct strbuf *sb, const char *line, int len, - const char *encoding) +static int needs_rfc2047_encoding(const char *line, int len, + enum rfc2047_type type) { - static const int max_length = 78; /* per rfc2822 */ int i; - int line_len; - - /* How many bytes are already used on the current line? */ - for (i = sb->len - 1; i >= 0; i--) - if (sb->buf[i] == '\n') - break; - line_len = sb->len - (i+1); for (i = 0; i < len; i++) { int ch = line[i]; if (non_ascii(ch) || ch == '\n') - goto needquote; + return 1; if ((i + 1 < len) && (ch == '=' && line[i+1] == '?')) - goto needquote; + return 1; } - strbuf_add_wrapped_bytes(sb, line, len, 0, 1, max_length - line_len); - return; -needquote: + return 0; +} + +static void add_rfc2047(struct strbuf *sb, const char *line, int len, + const char *encoding, enum rfc2047_type type) +{ + static const int max_encoded_length = 76; /* per rfc2047 */ + int i; + int line_len = last_line_length(sb); + strbuf_grow(sb, len * 3 + strlen(encoding) + 100); strbuf_addf(sb, "=?%s?q?", encoding); line_len += strlen(encoding) + 5; /* 5 for =??q? */ for (i = 0; i < len; i++) { unsigned ch = line[i] & 0xFF; + int is_special = is_rfc2047_special(ch, type); - if (line_len >= max_length - 2) { + /* + * According to RFC 2047, we could encode the special character + * ' ' (space) with '_' (underscore) for readability. But many + * programs do not understand this and just leave the + * underscore in place. Thus, we do nothing special here, which + * causes ' ' to be encoded as '=20', avoiding this problem. + */ + + if (line_len + 2 + (is_special ? 3 : 1) > max_encoded_length) { strbuf_addf(sb, "?=\n =?%s?q?", encoding); line_len = strlen(encoding) + 5 + 1; /* =??q? plus SP */ } - /* - * We encode ' ' using '=20' even though rfc2047 - * allows using '_' for readability. Unfortunately, - * many programs do not understand this and just - * leave the underscore in place. - */ - if (is_rfc2047_special(ch) || ch == ' ' || ch == '\n') { + if (is_special) { strbuf_addf(sb, "=%02X", ch); line_len += 3; - } - else { + } else { strbuf_addch(sb, ch); line_len++; } @@ -323,6 +387,7 @@ void pp_user_info(const struct pretty_print_context *pp, const char *what, struct strbuf *sb, const char *line, const char *encoding) { + int max_length = 78; /* per rfc2822 */ char *date; int namelen; unsigned long time; @@ -340,25 +405,27 @@ void pp_user_info(const struct pretty_print_context *pp, if (pp->fmt == CMIT_FMT_EMAIL) { char *name_tail = strchr(line, '<'); int display_name_length; - int final_line; if (!name_tail) return; while (line < name_tail && isspace(name_tail[-1])) name_tail--; display_name_length = name_tail - line; strbuf_addstr(sb, "From: "); - if (!has_rfc822_specials(line, display_name_length)) { - add_rfc2047(sb, line, display_name_length, encoding); - } else { + if (needs_rfc2047_encoding(line, display_name_length, RFC2047_ADDRESS)) { + add_rfc2047(sb, line, display_name_length, + encoding, RFC2047_ADDRESS); + max_length = 76; /* per rfc2047 */ + } else if (needs_rfc822_quoting(line, display_name_length)) { struct strbuf quoted = STRBUF_INIT; add_rfc822_quoted("ed, line, display_name_length); - add_rfc2047(sb, quoted.buf, quoted.len, encoding); + strbuf_add_wrapped_bytes(sb, quoted.buf, quoted.len, + -6, 1, max_length); strbuf_release("ed); + } else { + strbuf_add_wrapped_bytes(sb, line, display_name_length, + -6, 1, max_length); } - for (final_line = 0; final_line < sb->len; final_line++) - if (sb->buf[sb->len - final_line - 1] == '\n') - break; - if (namelen - display_name_length + final_line > 78) { + if (namelen - display_name_length + last_line_length(sb) > max_length) { strbuf_addch(sb, '\n'); if (!isspace(name_tail[0])) strbuf_addch(sb, ' '); @@ -439,12 +506,14 @@ static char *get_header(const struct commit *commit, const char *key) int key_len = strlen(key); const char *line = commit->buffer; - for (;;) { + while (line) { const char *eol = strchr(line, '\n'), *next; if (line == eol) return NULL; if (!eol) { + warning("malformed commit (header is missing newline): %s", + sha1_to_hex(commit->object.sha1)); eol = line + strlen(line); next = NULL; } else @@ -456,6 +525,7 @@ static char *get_header(const struct commit *commit, const char *key) } line = next; } + return NULL; } static char *replace_encoding_header(char *buf, const char *encoding) @@ -497,11 +567,11 @@ char *logmsg_reencode(const struct commit *commit, char *encoding; char *out; - if (!*output_encoding) + if (!output_encoding || !*output_encoding) return NULL; encoding = get_header(commit, "encoding"); use_encoding = encoding ? encoding : utf8; - if (!strcmp(use_encoding, output_encoding)) + if (same_encoding(use_encoding, output_encoding)) if (encoding) /* we'll strip encoding header later */ out = xstrdup(commit->buffer); else @@ -531,41 +601,26 @@ static size_t format_person_part(struct strbuf *sb, char part, { /* currently all placeholders have same length */ const int placeholder_len = 2; - int start, end, tz = 0; + int tz; unsigned long date = 0; - char *ep; - const char *name_start, *name_end, *mail_start, *mail_end, *msg_end = msg+len; char person_name[1024]; char person_mail[1024]; + struct ident_split s; + const char *name_start, *name_end, *mail_start, *mail_end; - /* advance 'end' to point to email start delimiter */ - for (end = 0; end < len && msg[end] != '<'; end++) - ; /* do nothing */ - - /* - * When end points at the '<' that we found, it should have - * matching '>' later, which means 'end' must be strictly - * below len - 1. - */ - if (end >= len - 2) + if (split_ident_line(&s, msg, len) < 0) goto skip; - /* Seek for both name and email part */ - name_start = msg; - name_end = msg+end; - while (name_end > name_start && isspace(*(name_end-1))) - name_end--; - mail_start = msg+end+1; - mail_end = mail_start; - while (mail_end < msg_end && *mail_end != '>') - mail_end++; - if (mail_end == msg_end) - goto skip; - end = mail_end-msg; + name_start = s.name_begin; + name_end = s.name_end; + mail_start = s.mail_begin; + mail_end = s.mail_end; if (part == 'N' || part == 'E') { /* mailmap lookup */ - strlcpy(person_name, name_start, name_end-name_start+1); - strlcpy(person_mail, mail_start, mail_end-mail_start+1); + snprintf(person_name, sizeof(person_name), "%.*s", + (int)(name_end - name_start), name_start); + snprintf(person_mail, sizeof(person_mail), "%.*s", + (int)(mail_end - mail_start), mail_start); mailmap_name(person_mail, sizeof(person_mail), person_name, sizeof(person_name)); name_start = person_name; name_end = name_start + strlen(person_name); @@ -581,28 +636,20 @@ static size_t format_person_part(struct strbuf *sb, char part, return placeholder_len; } - /* advance 'start' to point to date start delimiter */ - for (start = end + 1; start < len && isspace(msg[start]); start++) - ; /* do nothing */ - if (start >= len) - goto skip; - date = strtoul(msg + start, &ep, 10); - if (msg + start == ep) + if (!s.date_begin) goto skip; + date = strtoul(s.date_begin, NULL, 10); + if (part == 't') { /* date, UNIX timestamp */ - strbuf_add(sb, msg + start, ep - (msg + start)); + strbuf_add(sb, s.date_begin, s.date_end - s.date_begin); return placeholder_len; } /* parse tz */ - for (start = ep - msg + 1; start < len && isspace(msg[start]); start++) - ; /* do nothing */ - if (start + 1 < len) { - tz = strtoul(msg + start + 1, NULL, 10); - if (msg[start] == '-') - tz = -tz; - } + tz = strtoul(s.tz_begin + 1, NULL, 10); + if (*s.tz_begin == '-') + tz = -tz; switch (part) { case 'd': /* date */ @@ -621,8 +668,9 @@ static size_t format_person_part(struct strbuf *sb, char part, skip: /* - * bogus commit, 'sb' cannot be updated, but we still need to - * compute a valid return value. + * reading from either a bogus commit, or a reflog entry with + * %gn, %ge, etc.; 'sb' cannot be updated, but we still need + * to compute a valid return value. */ if (part == 'n' || part == 'e' || part == 't' || part == 'd' || part == 'D' || part == 'r' || part == 'i') @@ -1034,6 +1082,7 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder, get_reflog_selector(sb, c->pretty_ctx->reflog_info, c->pretty_ctx->date_mode, + c->pretty_ctx->date_mode_explicit, (placeholder[1] == 'd')); return 2; case 's': /* reflog message */ @@ -1051,9 +1100,8 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder, } return 0; /* unknown %g placeholder */ case 'N': - if (c->pretty_ctx->show_notes) { - format_display_notes(commit->object.sha1, sb, - get_log_output_encoding(), 0); + if (c->pretty_ctx->notes_message) { + strbuf_addstr(sb, c->pretty_ctx->notes_message); return 1; } return 0; @@ -1202,23 +1250,15 @@ void format_commit_message(const struct commit *commit, const struct pretty_print_context *pretty_ctx) { struct format_commit_context context; - static const char utf8[] = "UTF-8"; const char *output_enc = pretty_ctx->output_encoding; memset(&context, 0, sizeof(context)); context.commit = commit; context.pretty_ctx = pretty_ctx; context.wrap_start = sb->len; - context.message = commit->buffer; - if (output_enc) { - char *enc = get_header(commit, "encoding"); - if (strcmp(enc ? enc : utf8, output_enc)) { - context.message = logmsg_reencode(commit, output_enc); - if (!context.message) - context.message = commit->buffer; - } - free(enc); - } + context.message = logmsg_reencode(commit, output_enc); + if (!context.message) + context.message = commit->buffer; strbuf_expand(sb, format, format_commit_item, &context); rewrap_message_tail(sb, &context, 0, 0, 0); @@ -1296,6 +1336,7 @@ void pp_title_line(const struct pretty_print_context *pp, const char *encoding, int need_8bit_cte) { + static const int max_length = 78; /* per rfc2047 */ struct strbuf title; strbuf_init(&title, 80); @@ -1305,7 +1346,12 @@ void pp_title_line(const struct pretty_print_context *pp, strbuf_grow(sb, title.len + 1024); if (pp->subject) { strbuf_addstr(sb, pp->subject); - add_rfc2047(sb, title.buf, title.len, encoding); + if (needs_rfc2047_encoding(title.buf, title.len, RFC2047_SUBJECT)) + add_rfc2047(sb, title.buf, title.len, + encoding, RFC2047_SUBJECT); + else + strbuf_add_wrapped_bytes(sb, title.buf, title.len, + -last_line_length(sb), 1, max_length); } else { strbuf_addbuf(sb, &title); } @@ -1359,16 +1405,6 @@ void pp_remainder(const struct pretty_print_context *pp, } } -char *reencode_commit_message(const struct commit *commit, const char **encoding_p) -{ - const char *encoding; - - encoding = get_log_output_encoding(); - if (encoding_p) - *encoding_p = encoding; - return logmsg_reencode(commit, encoding); -} - void pretty_print_commit(const struct pretty_print_context *pp, const struct commit *commit, struct strbuf *sb) @@ -1385,7 +1421,8 @@ void pretty_print_commit(const struct pretty_print_context *pp, return; } - reencoded = reencode_commit_message(commit, &encoding); + encoding = get_log_output_encoding(); + reencoded = logmsg_reencode(commit, encoding); if (reencoded) { msg = reencoded; } @@ -1445,10 +1482,6 @@ void pretty_print_commit(const struct pretty_print_context *pp, if (pp->fmt == CMIT_FMT_EMAIL && sb->len <= beginning_of_body) strbuf_addch(sb, '\n'); - if (pp->show_notes) - format_display_notes(commit->object.sha1, sb, encoding, - NOTES_SHOW_HEADER | NOTES_INDENT); - free(reencoded); } |