diff options
Diffstat (limited to 'pretty.c')
-rw-r--r-- | pretty.c | 393 |
1 files changed, 256 insertions, 137 deletions
@@ -231,7 +231,7 @@ static int is_rfc822_special(char ch) } } -static int has_rfc822_specials(const char *s, int len) +static int needs_rfc822_quoting(const char *s, int len) { int i; for (i = 0; i < len; i++) @@ -240,6 +240,17 @@ static int has_rfc822_specials(const char *s, int len) return 0; } +static int last_line_length(struct strbuf *sb) +{ + int i; + + /* How many bytes are already used on the last line? */ + for (i = sb->len - 1; i >= 0; i--) + if (sb->buf[i] == '\n') + break; + return sb->len - (i + 1); +} + static void add_rfc822_quoted(struct strbuf *out, const char *s, int len) { int i; @@ -261,57 +272,110 @@ static void add_rfc822_quoted(struct strbuf *out, const char *s, int len) strbuf_addch(out, '"'); } -static int is_rfc2047_special(char ch) +enum rfc2047_type { + RFC2047_SUBJECT, + RFC2047_ADDRESS, +}; + +static int is_rfc2047_special(char ch, enum rfc2047_type type) { - return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_')); + /* + * rfc2047, section 4.2: + * + * 8-bit values which correspond to printable ASCII characters other + * than "=", "?", and "_" (underscore), MAY be represented as those + * characters. (But see section 5 for restrictions.) In + * particular, SPACE and TAB MUST NOT be represented as themselves + * within encoded words. + */ + + /* + * rule out non-ASCII characters and non-printable characters (the + * non-ASCII check should be redundant as isprint() is not localized + * and only knows about ASCII, but be defensive about that) + */ + if (non_ascii(ch) || !isprint(ch)) + return 1; + + /* + * rule out special printable characters (' ' should be the only + * whitespace character considered printable, but be defensive and use + * isspace()) + */ + if (isspace(ch) || ch == '=' || ch == '?' || ch == '_') + return 1; + + /* + * rfc2047, section 5.3: + * + * As a replacement for a 'word' entity within a 'phrase', for example, + * one that precedes an address in a From, To, or Cc header. The ABNF + * definition for 'phrase' from RFC 822 thus becomes: + * + * phrase = 1*( encoded-word / word ) + * + * In this case the set of characters that may be used in a "Q"-encoded + * 'encoded-word' is restricted to: <upper and lower case ASCII + * letters, decimal digits, "!", "*", "+", "-", "/", "=", and "_" + * (underscore, ASCII 95.)>. An 'encoded-word' that appears within a + * 'phrase' MUST be separated from any adjacent 'word', 'text' or + * 'special' by 'linear-white-space'. + */ + + if (type != RFC2047_ADDRESS) + return 0; + + /* '=' and '_' are special cases and have been checked above */ + return !(isalnum(ch) || ch == '!' || ch == '*' || ch == '+' || ch == '-' || ch == '/'); } -static void add_rfc2047(struct strbuf *sb, const char *line, int len, - const char *encoding) +static int needs_rfc2047_encoding(const char *line, int len, + enum rfc2047_type type) { - static const int max_length = 78; /* per rfc2822 */ int i; - int line_len; - - /* How many bytes are already used on the current line? */ - for (i = sb->len - 1; i >= 0; i--) - if (sb->buf[i] == '\n') - break; - line_len = sb->len - (i+1); for (i = 0; i < len; i++) { int ch = line[i]; if (non_ascii(ch) || ch == '\n') - goto needquote; + return 1; if ((i + 1 < len) && (ch == '=' && line[i+1] == '?')) - goto needquote; + return 1; } - strbuf_add_wrapped_bytes(sb, line, len, 0, 1, max_length - line_len); - return; -needquote: + return 0; +} + +static void add_rfc2047(struct strbuf *sb, const char *line, int len, + const char *encoding, enum rfc2047_type type) +{ + static const int max_encoded_length = 76; /* per rfc2047 */ + int i; + int line_len = last_line_length(sb); + strbuf_grow(sb, len * 3 + strlen(encoding) + 100); strbuf_addf(sb, "=?%s?q?", encoding); line_len += strlen(encoding) + 5; /* 5 for =??q? */ for (i = 0; i < len; i++) { unsigned ch = line[i] & 0xFF; + int is_special = is_rfc2047_special(ch, type); + + /* + * According to RFC 2047, we could encode the special character + * ' ' (space) with '_' (underscore) for readability. But many + * programs do not understand this and just leave the + * underscore in place. Thus, we do nothing special here, which + * causes ' ' to be encoded as '=20', avoiding this problem. + */ - if (line_len >= max_length - 2) { + if (line_len + 2 + (is_special ? 3 : 1) > max_encoded_length) { strbuf_addf(sb, "?=\n =?%s?q?", encoding); line_len = strlen(encoding) + 5 + 1; /* =??q? plus SP */ } - /* - * We encode ' ' using '=20' even though rfc2047 - * allows using '_' for readability. Unfortunately, - * many programs do not understand this and just - * leave the underscore in place. - */ - if (is_rfc2047_special(ch) || ch == ' ' || ch == '\n') { + if (is_special) { strbuf_addf(sb, "=%02X", ch); line_len += 3; - } - else { + } else { strbuf_addch(sb, ch); line_len++; } @@ -323,53 +387,79 @@ void pp_user_info(const struct pretty_print_context *pp, const char *what, struct strbuf *sb, const char *line, const char *encoding) { - char *date; - int namelen; + struct strbuf name; + struct strbuf mail; + struct ident_split ident; + int linelen; + char *line_end, *date; + const char *mailbuf, *namebuf; + size_t namelen, maillen; + int max_length = 78; /* per rfc2822 */ unsigned long time; int tz; if (pp->fmt == CMIT_FMT_ONELINE) return; - date = strchr(line, '>'); - if (!date) + + line_end = strchr(line, '\n'); + if (!line_end) { + line_end = strchr(line, '\0'); + if (!line_end) + return; + } + + linelen = ++line_end - line; + if (split_ident_line(&ident, line, linelen)) return; - namelen = ++date - line; - time = strtoul(date, &date, 10); + + + mailbuf = ident.mail_begin; + maillen = ident.mail_end - ident.mail_begin; + namebuf = ident.name_begin; + namelen = ident.name_end - ident.name_begin; + + if (pp->mailmap) + map_user(pp->mailmap, &mailbuf, &maillen, &namebuf, &namelen); + + strbuf_init(&mail, 0); + strbuf_init(&name, 0); + + strbuf_add(&mail, mailbuf, maillen); + strbuf_add(&name, namebuf, namelen); + + namelen = name.len + mail.len + 3; /* ' ' + '<' + '>' */ + time = strtoul(ident.date_begin, &date, 10); tz = strtol(date, NULL, 10); if (pp->fmt == CMIT_FMT_EMAIL) { - char *name_tail = strchr(line, '<'); - int display_name_length; - int final_line; - if (!name_tail) - return; - while (line < name_tail && isspace(name_tail[-1])) - name_tail--; - display_name_length = name_tail - line; strbuf_addstr(sb, "From: "); - if (!has_rfc822_specials(line, display_name_length)) { - add_rfc2047(sb, line, display_name_length, encoding); - } else { + if (needs_rfc2047_encoding(name.buf, name.len, RFC2047_ADDRESS)) { + add_rfc2047(sb, name.buf, name.len, + encoding, RFC2047_ADDRESS); + max_length = 76; /* per rfc2047 */ + } else if (needs_rfc822_quoting(name.buf, name.len)) { struct strbuf quoted = STRBUF_INIT; - add_rfc822_quoted("ed, line, display_name_length); - add_rfc2047(sb, quoted.buf, quoted.len, encoding); + add_rfc822_quoted("ed, name.buf, name.len); + strbuf_add_wrapped_bytes(sb, quoted.buf, quoted.len, + -6, 1, max_length); strbuf_release("ed); + } else { + strbuf_add_wrapped_bytes(sb, name.buf, name.len, + -6, 1, max_length); } - for (final_line = 0; final_line < sb->len; final_line++) - if (sb->buf[sb->len - final_line - 1] == '\n') - break; - if (namelen - display_name_length + final_line > 78) { + if (namelen - name.len + last_line_length(sb) > max_length) strbuf_addch(sb, '\n'); - if (!isspace(name_tail[0])) - strbuf_addch(sb, ' '); - } - strbuf_add(sb, name_tail, namelen - display_name_length); - strbuf_addch(sb, '\n'); + + strbuf_addf(sb, " <%s>\n", mail.buf); } else { - strbuf_addf(sb, "%s: %.*s%.*s\n", what, + strbuf_addf(sb, "%s: %.*s%s <%s>\n", what, (pp->fmt == CMIT_FMT_FULLER) ? 4 : 0, - " ", namelen, line); + " ", name.buf, mail.buf); } + + strbuf_release(&mail); + strbuf_release(&name); + switch (pp->fmt) { case CMIT_FMT_MEDIUM: strbuf_addf(sb, "Date: %s\n", show_date(time, tz, pp->date_mode)); @@ -434,10 +524,11 @@ static void add_merge_info(const struct pretty_print_context *pp, strbuf_addch(sb, '\n'); } -static char *get_header(const struct commit *commit, const char *key) +static char *get_header(const struct commit *commit, const char *msg, + const char *key) { int key_len = strlen(key); - const char *line = commit->buffer; + const char *line = msg; while (line) { const char *eol = strchr(line, '\n'), *next; @@ -498,28 +589,81 @@ char *logmsg_reencode(const struct commit *commit, static const char *utf8 = "UTF-8"; const char *use_encoding; char *encoding; + char *msg = commit->buffer; char *out; - if (!*output_encoding) - return NULL; - encoding = get_header(commit, "encoding"); + if (!msg) { + enum object_type type; + unsigned long size; + + msg = read_sha1_file(commit->object.sha1, &type, &size); + if (!msg) + die("Cannot read commit object %s", + sha1_to_hex(commit->object.sha1)); + if (type != OBJ_COMMIT) + die("Expected commit for '%s', got %s", + sha1_to_hex(commit->object.sha1), typename(type)); + } + + if (!output_encoding || !*output_encoding) + return msg; + encoding = get_header(commit, msg, "encoding"); use_encoding = encoding ? encoding : utf8; - if (!strcmp(use_encoding, output_encoding)) - if (encoding) /* we'll strip encoding header later */ - out = xstrdup(commit->buffer); - else - return NULL; /* nothing to do */ - else - out = reencode_string(commit->buffer, - output_encoding, use_encoding); + if (same_encoding(use_encoding, output_encoding)) { + /* + * No encoding work to be done. If we have no encoding header + * at all, then there's nothing to do, and we can return the + * message verbatim (whether newly allocated or not). + */ + if (!encoding) + return msg; + + /* + * Otherwise, we still want to munge the encoding header in the + * result, which will be done by modifying the buffer. If we + * are using a fresh copy, we can reuse it. But if we are using + * the cached copy from commit->buffer, we need to duplicate it + * to avoid munging commit->buffer. + */ + out = msg; + if (out == commit->buffer) + out = xstrdup(out); + } + else { + /* + * There's actual encoding work to do. Do the reencoding, which + * still leaves the header to be replaced in the next step. At + * this point, we are done with msg. If we allocated a fresh + * copy, we can free it. + */ + out = reencode_string(msg, output_encoding, use_encoding); + if (out && msg != commit->buffer) + free(msg); + } + + /* + * This replacement actually consumes the buffer we hand it, so we do + * not have to worry about freeing the old "out" here. + */ if (out) out = replace_encoding_header(out, output_encoding); free(encoding); - return out; + /* + * If the re-encoding failed, out might be NULL here; in that + * case we just return the commit message verbatim. + */ + return out ? out : msg; } -static int mailmap_name(char *email, int email_len, char *name, int name_len) +void logmsg_free(char *msg, const struct commit *commit) +{ + if (msg != commit->buffer) + free(msg); +} + +static int mailmap_name(const char **email, size_t *email_len, + const char **name, size_t *name_len) { static struct string_list *mail_map; if (!mail_map) { @@ -536,36 +680,26 @@ static size_t format_person_part(struct strbuf *sb, char part, const int placeholder_len = 2; int tz; unsigned long date = 0; - char person_name[1024]; - char person_mail[1024]; struct ident_split s; - const char *name_start, *name_end, *mail_start, *mail_end; + const char *name, *mail; + size_t maillen, namelen; if (split_ident_line(&s, msg, len) < 0) goto skip; - name_start = s.name_begin; - name_end = s.name_end; - mail_start = s.mail_begin; - mail_end = s.mail_end; - - if (part == 'N' || part == 'E') { /* mailmap lookup */ - snprintf(person_name, sizeof(person_name), "%.*s", - (int)(name_end - name_start), name_start); - snprintf(person_mail, sizeof(person_mail), "%.*s", - (int)(mail_end - mail_start), mail_start); - mailmap_name(person_mail, sizeof(person_mail), person_name, sizeof(person_name)); - name_start = person_name; - name_end = name_start + strlen(person_name); - mail_start = person_mail; - mail_end = mail_start + strlen(person_mail); - } + name = s.name_begin; + namelen = s.name_end - s.name_begin; + mail = s.mail_begin; + maillen = s.mail_end - s.mail_begin; + + if (part == 'N' || part == 'E') /* mailmap lookup */ + mailmap_name(&mail, &maillen, &name, &namelen); if (part == 'n' || part == 'N') { /* name */ - strbuf_add(sb, name_start, name_end-name_start); + strbuf_add(sb, name, namelen); return placeholder_len; } if (part == 'e' || part == 'E') { /* email */ - strbuf_add(sb, mail_start, mail_end-mail_start); + strbuf_add(sb, mail, maillen); return placeholder_len; } @@ -893,12 +1027,19 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder, switch (placeholder[0]) { case 'C': if (placeholder[1] == '(') { - const char *end = strchr(placeholder + 2, ')'); + const char *begin = placeholder + 2; + const char *end = strchr(begin, ')'); char color[COLOR_MAXLEN]; + if (!end) return 0; - color_parse_mem(placeholder + 2, - end - (placeholder + 2), + if (!prefixcmp(begin, "auto,")) { + if (!want_color(c->pretty_ctx->color)) + return end - placeholder + 1; + begin += 5; + } + color_parse_mem(begin, + end - begin, "--pretty format", color); strbuf_addstr(sb, color); return end - placeholder + 1; @@ -1033,9 +1174,8 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder, } return 0; /* unknown %g placeholder */ case 'N': - if (c->pretty_ctx->show_notes) { - format_display_notes(commit->object.sha1, sb, - get_log_output_encoding(), 0); + if (c->pretty_ctx->notes_message) { + strbuf_addstr(sb, c->pretty_ctx->notes_message); return 1; } return 0; @@ -1184,29 +1324,18 @@ void format_commit_message(const struct commit *commit, const struct pretty_print_context *pretty_ctx) { struct format_commit_context context; - static const char utf8[] = "UTF-8"; const char *output_enc = pretty_ctx->output_encoding; memset(&context, 0, sizeof(context)); context.commit = commit; context.pretty_ctx = pretty_ctx; context.wrap_start = sb->len; - context.message = commit->buffer; - if (output_enc) { - char *enc = get_header(commit, "encoding"); - if (strcmp(enc ? enc : utf8, output_enc)) { - context.message = logmsg_reencode(commit, output_enc); - if (!context.message) - context.message = commit->buffer; - } - free(enc); - } + context.message = logmsg_reencode(commit, output_enc); strbuf_expand(sb, format, format_commit_item, &context); rewrap_message_tail(sb, &context, 0, 0, 0); - if (context.message != commit->buffer) - free(context.message); + logmsg_free(context.message, commit); free(context.signature.gpg_output); free(context.signature.signer); } @@ -1236,7 +1365,7 @@ static void pp_header(const struct pretty_print_context *pp, continue; } - if (!memcmp(line, "parent ", 7)) { + if (!prefixcmp(line, "parent ")) { if (linelen != 48) die("bad parent line in commit"); continue; @@ -1260,11 +1389,11 @@ static void pp_header(const struct pretty_print_context *pp, * FULL shows both authors but not dates. * FULLER shows both authors and dates. */ - if (!memcmp(line, "author ", 7)) { + if (!prefixcmp(line, "author ")) { strbuf_grow(sb, linelen + 80); pp_user_info(pp, "Author", sb, line + 7, encoding); } - if (!memcmp(line, "committer ", 10) && + if (!prefixcmp(line, "committer ") && (pp->fmt == CMIT_FMT_FULL || pp->fmt == CMIT_FMT_FULLER)) { strbuf_grow(sb, linelen + 80); pp_user_info(pp, "Commit", sb, line + 10, encoding); @@ -1278,6 +1407,7 @@ void pp_title_line(const struct pretty_print_context *pp, const char *encoding, int need_8bit_cte) { + static const int max_length = 78; /* per rfc2047 */ struct strbuf title; strbuf_init(&title, 80); @@ -1287,7 +1417,12 @@ void pp_title_line(const struct pretty_print_context *pp, strbuf_grow(sb, title.len + 1024); if (pp->subject) { strbuf_addstr(sb, pp->subject); - add_rfc2047(sb, title.buf, title.len, encoding); + if (needs_rfc2047_encoding(title.buf, title.len, RFC2047_SUBJECT)) + add_rfc2047(sb, title.buf, title.len, + encoding, RFC2047_SUBJECT); + else + strbuf_add_wrapped_bytes(sb, title.buf, title.len, + -last_line_length(sb), 1, max_length); } else { strbuf_addbuf(sb, &title); } @@ -1341,23 +1476,13 @@ void pp_remainder(const struct pretty_print_context *pp, } } -char *reencode_commit_message(const struct commit *commit, const char **encoding_p) -{ - const char *encoding; - - encoding = get_log_output_encoding(); - if (encoding_p) - *encoding_p = encoding; - return logmsg_reencode(commit, encoding); -} - void pretty_print_commit(const struct pretty_print_context *pp, const struct commit *commit, struct strbuf *sb) { unsigned long beginning_of_body; int indent = 4; - const char *msg = commit->buffer; + const char *msg; char *reencoded; const char *encoding; int need_8bit_cte = pp->need_8bit_cte; @@ -1367,10 +1492,8 @@ void pretty_print_commit(const struct pretty_print_context *pp, return; } - reencoded = reencode_commit_message(commit, &encoding); - if (reencoded) { - msg = reencoded; - } + encoding = get_log_output_encoding(); + msg = reencoded = logmsg_reencode(commit, encoding); if (pp->fmt == CMIT_FMT_ONELINE || pp->fmt == CMIT_FMT_EMAIL) indent = 0; @@ -1427,11 +1550,7 @@ void pretty_print_commit(const struct pretty_print_context *pp, if (pp->fmt == CMIT_FMT_EMAIL && sb->len <= beginning_of_body) strbuf_addch(sb, '\n'); - if (pp->show_notes) - format_display_notes(commit->object.sha1, sb, encoding, - NOTES_SHOW_HEADER | NOTES_INDENT); - - free(reencoded); + logmsg_free(reencoded, commit); } void pp_commit_easy(enum cmit_fmt fmt, const struct commit *commit, |