diff options
Diffstat (limited to 'builtin-mailinfo.c')
-rw-r--r-- | builtin-mailinfo.c | 153 |
1 files changed, 125 insertions, 28 deletions
diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c index dacc8ac2d0..3c4f0753fe 100644 --- a/builtin-mailinfo.c +++ b/builtin-mailinfo.c @@ -25,10 +25,15 @@ static enum { static struct strbuf charset = STRBUF_INIT; static int patch_lines; static struct strbuf **p_hdr_data, **s_hdr_data; +static int use_scissors; +static int use_inbody_headers = 1; #define MAX_HDR_PARSED 10 #define MAX_BOUNDARIES 5 +static void cleanup_space(struct strbuf *sb); + + static void get_sane_name(struct strbuf *out, struct strbuf *name, struct strbuf *email) { struct strbuf *src = name; @@ -109,11 +114,19 @@ static void handle_from(const struct strbuf *from) strbuf_add(&email, at, el); strbuf_remove(&f, at - f.buf, el + (at[el] ? 1 : 0)); - /* The remainder is name. It could be "John Doe <john.doe@xz>" - * or "john.doe@xz (John Doe)", but we have removed the - * email part, so trim from both ends, possibly removing - * the () pair at the end. + /* The remainder is name. It could be + * + * - "John Doe <john.doe@xz>" (a), or + * - "john.doe@xz (John Doe)" (b), or + * - "John (zzz) Doe <john.doe@xz> (Comment)" (c) + * + * but we have removed the email part, so + * + * - remove extra spaces which could stay after email (case 'c'), and + * - trim from both ends, possibly removing the () pair at the end + * (cases 'a' and 'b'). */ + cleanup_space(&f); strbuf_trim(&f); if (f.buf[0] == '(' && f.len && f.buf[f.len - 1] == ')') { strbuf_remove(&f, 0, 1); @@ -182,8 +195,7 @@ static void handle_content_type(struct strbuf *line) *content_top = boundary; boundary = NULL; } - if (slurp_attr(line->buf, "charset=", &charset)) - strbuf_tolower(&charset); + slurp_attr(line->buf, "charset=", &charset); if (boundary) { strbuf_release(boundary); @@ -470,7 +482,7 @@ static const char *guess_charset(const struct strbuf *line, const char *target_c if (is_utf8(line->buf)) return NULL; } - return "latin1"; + return "ISO8859-1"; } static void convert_to_utf8(struct strbuf *line, const char *charset) @@ -483,11 +495,11 @@ static void convert_to_utf8(struct strbuf *line, const char *charset) return; } - if (!strcmp(metainfo_charset, charset)) + if (!strcasecmp(metainfo_charset, charset)) return; out = reencode_string(line->buf, metainfo_charset, charset); if (!out) - die("cannot convert from %s to %s\n", + die("cannot convert from %s to %s", charset, metainfo_charset); strbuf_attach(line, out, strlen(out), strlen(out)); } @@ -526,7 +538,6 @@ static int decode_header_bq(struct strbuf *it) */ strbuf_add(&outbuf, in, ep - in); } - in = ep; } /* E.g. * ep : "=?iso-2022-jp?B?GyR...?= foo" @@ -540,7 +551,6 @@ static int decode_header_bq(struct strbuf *it) if (cp + 3 - it->buf > it->len) goto decode_header_bq_out; strbuf_add(&charset_q, ep, cp - ep); - strbuf_tolower(&charset_q); encoding = cp[1]; if (!encoding || cp[2] != '?') @@ -704,6 +714,56 @@ static inline int patchbreak(const struct strbuf *line) return 0; } +static int is_scissors_line(const struct strbuf *line) +{ + size_t i, len = line->len; + int scissors = 0, gap = 0; + int first_nonblank = -1; + int last_nonblank = 0, visible, perforation = 0, in_perforation = 0; + const char *buf = line->buf; + + for (i = 0; i < len; i++) { + if (isspace(buf[i])) { + if (in_perforation) { + perforation++; + gap++; + } + continue; + } + last_nonblank = i; + if (first_nonblank < 0) + first_nonblank = i; + if (buf[i] == '-') { + in_perforation = 1; + perforation++; + continue; + } + if (i + 1 < len && + (!memcmp(buf + i, ">8", 2) || !memcmp(buf + i, "8<", 2))) { + in_perforation = 1; + perforation += 2; + scissors += 2; + i++; + continue; + } + in_perforation = 0; + } + + /* + * The mark must be at least 8 bytes long (e.g. "-- >8 --"). + * Even though there can be arbitrary cruft on the same line + * (e.g. "cut here"), in order to avoid misidentification, the + * perforation must occupy more than a third of the visible + * width of the line, and dashes and scissors must occupy more + * than half of the perforation. + */ + + visible = last_nonblank - first_nonblank + 1; + return (scissors && 8 <= visible && + visible < perforation * 3 && + gap * 2 < perforation); +} + static int handle_commit_msg(struct strbuf *line) { static int still_looking = 1; @@ -715,14 +775,42 @@ static int handle_commit_msg(struct strbuf *line) strbuf_ltrim(line); if (!line->len) return 0; - if ((still_looking = check_header(line, s_hdr_data, 0)) != 0) - return 0; } + if (use_inbody_headers && still_looking) { + still_looking = check_header(line, s_hdr_data, 0); + if (still_looking) + return 0; + } else + /* Only trim the first (blank) line of the commit message + * when ignoring in-body headers. + */ + still_looking = 0; + /* normalize the log message to UTF-8. */ if (metainfo_charset) convert_to_utf8(line, charset.buf); + if (use_scissors && is_scissors_line(line)) { + int i; + if (fseek(cmitmsg, 0L, SEEK_SET)) + die_errno("Could not rewind output message file"); + if (ftruncate(fileno(cmitmsg), 0)) + die_errno("Could not truncate output message file at scissors"); + still_looking = 1; + + /* + * We may have already read "secondary headers"; purge + * them to give ourselves a clean restart. + */ + for (i = 0; header[i]; i++) { + if (s_hdr_data[i]) + strbuf_release(s_hdr_data[i]); + s_hdr_data[i] = NULL; + } + return 0; + } + if (patchbreak(line)) { fclose(cmitmsg); cmitmsg = NULL; @@ -757,7 +845,6 @@ static void handle_filter(struct strbuf *line) static void handle_body(void) { - int len = 0; struct strbuf prev = STRBUF_INIT; /* Skip up to the first boundary */ @@ -767,8 +854,6 @@ static void handle_body(void) } do { - strbuf_setlen(&line, line.len + len); - /* process any boundary lines */ if (*content_top && is_multipart_boundary(&line)) { /* flush any leftover */ @@ -824,10 +909,7 @@ static void handle_body(void) handle_filter(&line); } - strbuf_reset(&line); - if (strbuf_avail(&line) < 100) - strbuf_grow(&line, 100); - } while ((len = read_line_with_nul(line.buf, strbuf_avail(&line), fin))); + } while (!strbuf_getwholeline(&line, fin, '\n')); handle_body_out: strbuf_release(&prev); @@ -883,12 +965,9 @@ static void handle_info(void) fprintf(fout, "\n"); } -static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding, - const char *msg, const char *patch) +static int mailinfo(FILE *in, FILE *out, const char *msg, const char *patch) { int peek; - keep_subject = ks; - metainfo_charset = encoding; fin = in; fout = out; @@ -922,8 +1001,20 @@ static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding, return 0; } +static int git_mailinfo_config(const char *var, const char *value, void *unused) +{ + if (prefixcmp(var, "mailinfo.")) + return git_default_config(var, value, unused); + if (!strcmp(var, "mailinfo.scissors")) { + use_scissors = git_config_bool(var, value); + return 0; + } + /* perhaps others here */ + return 0; +} + static const char mailinfo_usage[] = - "git mailinfo [-k] [-u | --encoding=<encoding> | -n] msg patch <mail >info"; + "git mailinfo [-k] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] msg patch < mail >info"; int cmd_mailinfo(int argc, const char **argv, const char *prefix) { @@ -932,9 +1023,9 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix) /* NEEDSWORK: might want to do the optional .git/ directory * discovery */ - git_config(git_default_config, NULL); + git_config(git_mailinfo_config, NULL); - def_charset = (git_commit_encoding ? git_commit_encoding : "utf-8"); + def_charset = (git_commit_encoding ? git_commit_encoding : "UTF-8"); metainfo_charset = def_charset; while (1 < argc && argv[1][0] == '-') { @@ -946,6 +1037,12 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix) metainfo_charset = NULL; else if (!prefixcmp(argv[1], "--encoding=")) metainfo_charset = argv[1] + 11; + else if (!strcmp(argv[1], "--scissors")) + use_scissors = 1; + else if (!strcmp(argv[1], "--no-scissors")) + use_scissors = 0; + else if (!strcmp(argv[1], "--no-inbody-headers")) + use_inbody_headers = 0; else usage(mailinfo_usage); argc--; argv++; @@ -954,5 +1051,5 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix) if (argc != 3) usage(mailinfo_usage); - return !!mailinfo(stdin, stdout, keep_subject, metainfo_charset, argv[1], argv[2]); + return !!mailinfo(stdin, stdout, argv[1], argv[2]); } |