diff options
Diffstat (limited to 'builtin/mailinfo.c')
-rw-r--r-- | builtin/mailinfo.c | 124 |
1 files changed, 68 insertions, 56 deletions
diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index 71e6262a87..c8a47c173d 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -15,18 +15,17 @@ static const char *metainfo_charset; static struct strbuf line = STRBUF_INIT; static struct strbuf name = STRBUF_INIT; static struct strbuf email = STRBUF_INIT; +static char *message_id; static enum { TE_DONTCARE, TE_QP, TE_BASE64 } transfer_encoding; -static enum { - TYPE_TEXT, TYPE_OTHER -} message_type; static struct strbuf charset = STRBUF_INIT; static int patch_lines; static struct strbuf **p_hdr_data, **s_hdr_data; static int use_scissors; +static int add_message_id; static int use_inbody_headers = 1; #define MAX_HDR_PARSED 10 @@ -160,10 +159,9 @@ static int slurp_attr(const char *line, const char *name, struct strbuf *attr) const char *ends, *ap = strcasestr(line, name); size_t sz; - if (!ap) { - strbuf_setlen(attr, 0); + strbuf_setlen(attr, 0); + if (!ap) return 0; - } ap += strlen(name); if (*ap == '"') { ap++; @@ -185,8 +183,6 @@ static void handle_content_type(struct strbuf *line) struct strbuf *boundary = xmalloc(sizeof(struct strbuf)); strbuf_init(boundary, line->len); - if (!strcasestr(line->buf, "text/")) - message_type = TYPE_OTHER; if (slurp_attr(line->buf, "boundary=", boundary)) { strbuf_insert(boundary, 0, "--", 2); if (++content_top > &content[MAX_BOUNDARIES]) { @@ -204,6 +200,12 @@ static void handle_content_type(struct strbuf *line) } } +static void handle_message_id(const struct strbuf *line) +{ + if (add_message_id) + message_id = strdup(line->buf); +} + static void handle_content_transfer_encoding(const struct strbuf *line) { if (strcasestr(line->buf, "base64")) @@ -232,7 +234,9 @@ static void cleanup_subject(struct strbuf *subject) case 'r': case 'R': if (subject->len <= at + 3) break; - if (!memcmp(subject->buf + at + 1, "e:", 2)) { + if ((subject->buf[at + 1] == 'e' || + subject->buf[at + 1] == 'E') && + subject->buf[at + 2] == ':') { strbuf_remove(subject, at, 3); continue; } @@ -250,8 +254,17 @@ static void cleanup_subject(struct strbuf *subject) (7 <= remove && memmem(subject->buf + at, remove, "PATCH", 5))) strbuf_remove(subject, at, remove); - else + else { at += remove; + /* + * If the input had a space after the ], keep + * it. We don't bother with finding the end of + * the space, since we later normalize it + * anyway. + */ + if (isspace(subject->buf[at])) + at += 1; + } continue; } break; @@ -283,6 +296,22 @@ static inline int cmp_header(const struct strbuf *line, const char *hdr) line->buf[len] == ':' && isspace(line->buf[len + 1]); } +static int is_format_patch_separator(const char *line, int len) +{ + static const char SAMPLE[] = + "From e6807f3efca28b30decfecb1732a56c7db1137ee Mon Sep 17 00:00:00 2001\n"; + const char *cp; + + if (len != strlen(SAMPLE)) + return 0; + if (!skip_prefix(line, "From ", &cp)) + return 0; + if (strspn(cp, "0123456789abcdef") != 40) + return 0; + cp += 40; + return !memcmp(SAMPLE + (cp - line), cp, strlen(SAMPLE) - (cp - line)); +} + static int check_header(const struct strbuf *line, struct strbuf *hdr_data[], int overwrite) { @@ -321,15 +350,23 @@ static int check_header(const struct strbuf *line, ret = 1; goto check_header_out; } + if (cmp_header(line, "Message-Id")) { + len = strlen("Message-Id: "); + strbuf_add(&sb, line->buf + len, line->len - len); + decode_header(&sb); + handle_message_id(&sb); + ret = 1; + goto check_header_out; + } /* for inbody stuff */ - if (!prefixcmp(line->buf, ">From") && isspace(line->buf[5])) { - ret = 1; /* Should this return 0? */ + if (starts_with(line->buf, ">From") && isspace(line->buf[5])) { + ret = is_format_patch_separator(line->buf + 1, line->len - 1); goto check_header_out; } - if (!prefixcmp(line->buf, "[PATCH]") && isspace(line->buf[7])) { + if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) { for (i = 0; header[i]; i++) { - if (!memcmp("Subject", header[i], 7)) { + if (!strcmp("Subject", header[i])) { handle_header(&hdr_data[i], line); ret = 1; goto check_header_out; @@ -356,7 +393,7 @@ static int is_rfc2822_header(const struct strbuf *line) char *cp = line->buf; /* Count mbox From headers as headers */ - if (!prefixcmp(cp, "From ") || !prefixcmp(cp, ">From ")) + if (starts_with(cp, "From ") || starts_with(cp, ">From ")) return 1; while ((ch = *cp++)) { @@ -400,7 +437,7 @@ static int read_one_header_line(struct strbuf *line, FILE *in) break; if (strbuf_getline(&continuation, in, '\n')) break; - continuation.buf[0] = '\n'; + continuation.buf[0] = ' '; strbuf_rtrim(&continuation); strbuf_addbuf(line, &continuation); } @@ -472,37 +509,14 @@ static struct strbuf *decode_b_segment(const struct strbuf *b_seg) return out; } -/* - * When there is no known charset, guess. - * - * Right now we assume that if the target is UTF-8 (the default), - * and it already looks like UTF-8 (which includes US-ASCII as its - * subset, of course) then that is what it is and there is nothing - * to do. - * - * Otherwise, we default to assuming it is Latin1 for historical - * reasons. - */ -static const char *guess_charset(const struct strbuf *line, const char *target_charset) -{ - if (is_encoding_utf8(target_charset)) { - if (is_utf8(line->buf)) - return NULL; - } - return "ISO8859-1"; -} - static void convert_to_utf8(struct strbuf *line, const char *charset) { char *out; - if (!charset || !*charset) { - charset = guess_charset(line, metainfo_charset); - if (!charset) - return; - } + if (!charset || !*charset) + return; - if (!strcasecmp(metainfo_charset, charset)) + if (same_encoding(metainfo_charset, charset)) return; out = reencode_string(line->buf, metainfo_charset, charset); if (!out) @@ -671,7 +685,6 @@ again: /* set some defaults */ transfer_encoding = TE_DONTCARE; strbuf_reset(&charset); - message_type = TYPE_TEXT; /* slurp in this section's info */ while (read_one_header_line(&line, fin)) @@ -690,11 +703,11 @@ static inline int patchbreak(const struct strbuf *line) size_t i; /* Beginning of a "diff -" header? */ - if (!prefixcmp(line->buf, "diff -")) + if (starts_with(line->buf, "diff -")) return 1; /* CVS "Index: " line? */ - if (!prefixcmp(line->buf, "Index: ")) + if (starts_with(line->buf, "Index: ")) return 1; /* @@ -704,7 +717,7 @@ static inline int patchbreak(const struct strbuf *line) if (line->len < 4) return 0; - if (!prefixcmp(line->buf, "---")) { + if (starts_with(line->buf, "---")) { /* space followed by a filename? */ if (line->buf[3] == ' ' && !isspace(line->buf[4])) return 1; @@ -819,6 +832,8 @@ static int handle_commit_msg(struct strbuf *line) } if (patchbreak(line)) { + if (message_id) + fprintf(cmitmsg, "Message-Id: %s\n", message_id); fclose(cmitmsg); cmitmsg = NULL; return 1; @@ -885,11 +900,6 @@ static void handle_body(void) strbuf_insert(&line, 0, prev.buf, prev.len); strbuf_reset(&prev); - /* binary data most likely doesn't have newlines */ - if (message_type != TYPE_TEXT) { - handle_filter(&line); - break; - } /* * This is a decoded line that may contain * multiple new lines. Pass only one chunk @@ -953,13 +963,13 @@ static void handle_info(void) else continue; - if (!memcmp(header[i], "Subject", 7)) { + if (!strcmp(header[i], "Subject")) { if (!keep_subject) { cleanup_subject(hdr); cleanup_space(hdr); } output_header_lines(fout, "Subject", hdr); - } else if (!memcmp(header[i], "From", 4)) { + } else if (!strcmp(header[i], "From")) { cleanup_space(hdr); handle_from(hdr); fprintf(fout, "Author: %s\n", name.buf); @@ -1010,7 +1020,7 @@ static int mailinfo(FILE *in, FILE *out, const char *msg, const char *patch) static int git_mailinfo_config(const char *var, const char *value, void *unused) { - if (prefixcmp(var, "mailinfo.")) + if (!starts_with(var, "mailinfo.")) return git_default_config(var, value, unused); if (!strcmp(var, "mailinfo.scissors")) { use_scissors = git_config_bool(var, value); @@ -1021,7 +1031,7 @@ static int git_mailinfo_config(const char *var, const char *value, void *unused) } static const char mailinfo_usage[] = - "git mailinfo [-k|-b] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] msg patch < mail >info"; + "git mailinfo [-k|-b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] msg patch < mail >info"; int cmd_mailinfo(int argc, const char **argv, const char *prefix) { @@ -1040,11 +1050,13 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix) keep_subject = 1; else if (!strcmp(argv[1], "-b")) keep_non_patch_brackets_in_subject = 1; + else if (!strcmp(argv[1], "-m") || !strcmp(argv[1], "--message-id")) + add_message_id = 1; else if (!strcmp(argv[1], "-u")) metainfo_charset = def_charset; else if (!strcmp(argv[1], "-n")) metainfo_charset = NULL; - else if (!prefixcmp(argv[1], "--encoding=")) + else if (starts_with(argv[1], "--encoding=")) metainfo_charset = argv[1] + 11; else if (!strcmp(argv[1], "--scissors")) use_scissors = 1; |