1 files changed, 68 insertions, 56 deletions
diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c
index 71e6262a87..c8a47c173d 100644
--- a/builtin/mailinfo.c
+++ b/builtin/mailinfo.c
@@ -15,18 +15,17 @@ static const char *metainfo_charset;
 static struct strbuf line = STRBUF_INIT;
 static struct strbuf name = STRBUF_INIT;
 static struct strbuf email = STRBUF_INIT;
+static char *message_id;
 
 static enum  {
 	TE_DONTCARE, TE_QP, TE_BASE64
 } transfer_encoding;
-static enum  {
-	TYPE_TEXT, TYPE_OTHER
-} message_type;
 
 static struct strbuf charset = STRBUF_INIT;
 static int patch_lines;
 static struct strbuf **p_hdr_data, **s_hdr_data;
 static int use_scissors;
+static int add_message_id;
 static int use_inbody_headers = 1;
 
 #define MAX_HDR_PARSED 10
@@ -160,10 +159,9 @@ static int slurp_attr(const char *line, const char *name, struct strbuf *attr)
 	const char *ends, *ap = strcasestr(line, name);
 	size_t sz;
 
-	if (!ap) {
-		strbuf_setlen(attr, 0);
+	strbuf_setlen(attr, 0);
+	if (!ap)
 		return 0;
-	}
 	ap += strlen(name);
 	if (*ap == '"') {
 		ap++;
@@ -185,8 +183,6 @@ static void handle_content_type(struct strbuf *line)
 	struct strbuf *boundary = xmalloc(sizeof(struct strbuf));
 	strbuf_init(boundary, line->len);
 
-	if (!strcasestr(line->buf, "text/"))
-		 message_type = TYPE_OTHER;
 	if (slurp_attr(line->buf, "boundary=", boundary)) {
 		strbuf_insert(boundary, 0, "--", 2);
 		if (++content_top > &content[MAX_BOUNDARIES]) {
@@ -204,6 +200,12 @@ static void handle_content_type(struct strbuf *line)
 	}
 }
 
+static void handle_message_id(const struct strbuf *line)
+{
+	if (add_message_id)
+		message_id = strdup(line->buf);
+}
+
 static void handle_content_transfer_encoding(const struct strbuf *line)
 {
 	if (strcasestr(line->buf, "base64"))
@@ -232,7 +234,9 @@ static void cleanup_subject(struct strbuf *subject)
 		case 'r': case 'R':
 			if (subject->len <= at + 3)
 				break;
-			if (!memcmp(subject->buf + at + 1, "e:", 2)) {
+			if ((subject->buf[at + 1] == 'e' ||
+			     subject->buf[at + 1] == 'E') &&
+			    subject->buf[at + 2] == ':') {
 				strbuf_remove(subject, at, 3);
 				continue;
 			}
@@ -250,8 +254,17 @@ static void cleanup_subject(struct strbuf *subject)
 			    (7 <= remove &&
 			     memmem(subject->buf + at, remove, "PATCH", 5)))
 				strbuf_remove(subject, at, remove);
-			else
+			else {
 				at += remove;
+				/*
+				 * If the input had a space after the ], keep
+				 * it.  We don't bother with finding the end of
+				 * the space, since we later normalize it
+				 * anyway.
+				 */
+				if (isspace(subject->buf[at]))
+					at += 1;
+			}
 			continue;
 		}
 		break;
@@ -283,6 +296,22 @@ static inline int cmp_header(const struct strbuf *line, const char *hdr)
 			line->buf[len] == ':' && isspace(line->buf[len + 1]);
 }
 
+static int is_format_patch_separator(const char *line, int len)
+{
+	static const char SAMPLE[] =
+		"From e6807f3efca28b30decfecb1732a56c7db1137ee Mon Sep 17 00:00:00 2001\n";
+	const char *cp;
+
+	if (len != strlen(SAMPLE))
+		return 0;
+	if (!skip_prefix(line, "From ", &cp))
+		return 0;
+	if (strspn(cp, "0123456789abcdef") != 40)
+		return 0;
+	cp += 40;
+	return !memcmp(SAMPLE + (cp - line), cp, strlen(SAMPLE) - (cp - line));
+}
+
 static int check_header(const struct strbuf *line,
 				struct strbuf *hdr_data[], int overwrite)
 {
@@ -321,15 +350,23 @@ static int check_header(const struct strbuf *line,
 		ret = 1;
 		goto check_header_out;
 	}
+	if (cmp_header(line, "Message-Id")) {
+		len = strlen("Message-Id: ");
+		strbuf_add(&sb, line->buf + len, line->len - len);
+		decode_header(&sb);
+		handle_message_id(&sb);
+		ret = 1;
+		goto check_header_out;
+	}
 
 	/* for inbody stuff */
-	if (!prefixcmp(line->buf, ">From") && isspace(line->buf[5])) {
-		ret = 1; /* Should this return 0? */
+	if (starts_with(line->buf, ">From") && isspace(line->buf[5])) {
+		ret = is_format_patch_separator(line->buf + 1, line->len - 1);
 		goto check_header_out;
 	}
-	if (!prefixcmp(line->buf, "[PATCH]") && isspace(line->buf[7])) {
+	if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) {
 		for (i = 0; header[i]; i++) {
-			if (!memcmp("Subject", header[i], 7)) {
+			if (!strcmp("Subject", header[i])) {
 				handle_header(&hdr_data[i], line);
 				ret = 1;
 				goto check_header_out;
@@ -356,7 +393,7 @@ static int is_rfc2822_header(const struct strbuf *line)
 	char *cp = line->buf;
 
 	/* Count mbox From headers as headers */
-	if (!prefixcmp(cp, "From ") || !prefixcmp(cp, ">From "))
+	if (starts_with(cp, "From ") || starts_with(cp, ">From "))
 		return 1;
 
 	while ((ch = *cp++)) {
@@ -400,7 +437,7 @@ static int read_one_header_line(struct strbuf *line, FILE *in)
 			break;
 		if (strbuf_getline(&continuation, in, '\n'))
 			break;
-		continuation.buf[0] = '\n';
+		continuation.buf[0] = ' ';
 		strbuf_rtrim(&continuation);
 		strbuf_addbuf(line, &continuation);
 	}
@@ -472,37 +509,14 @@ static struct strbuf *decode_b_segment(const struct strbuf *b_seg)
 	return out;
 }
 
-/*
- * When there is no known charset, guess.
- *
- * Right now we assume that if the target is UTF-8 (the default),
- * and it already looks like UTF-8 (which includes US-ASCII as its
- * subset, of course) then that is what it is and there is nothing
- * to do.
- *
- * Otherwise, we default to assuming it is Latin1 for historical
- * reasons.
- */
-static const char *guess_charset(const struct strbuf *line, const char *target_charset)
-{
-	if (is_encoding_utf8(target_charset)) {
-		if (is_utf8(line->buf))
-			return NULL;
-	}
-	return "ISO8859-1";
-}
-
 static void convert_to_utf8(struct strbuf *line, const char *charset)
 {
 	char *out;
 
-	if (!charset || !*charset) {
-		charset = guess_charset(line, metainfo_charset);
-		if (!charset)
-			return;
-	}
+	if (!charset || !*charset)
+		return;
 
-	if (!strcasecmp(metainfo_charset, charset))
+	if (same_encoding(metainfo_charset, charset))
 		return;
 	out = reencode_string(line->buf, metainfo_charset, charset);
 	if (!out)
@@ -671,7 +685,6 @@ again:
 	/* set some defaults */
 	transfer_encoding = TE_DONTCARE;
 	strbuf_reset(&charset);
-	message_type = TYPE_TEXT;
 
 	/* slurp in this section's info */
 	while (read_one_header_line(&line, fin))
@@ -690,11 +703,11 @@ static inline int patchbreak(const struct strbuf *line)
 	size_t i;
 
 	/* Beginning of a "diff -" header? */
-	if (!prefixcmp(line->buf, "diff -"))
+	if (starts_with(line->buf, "diff -"))
 		return 1;
 
 	/* CVS "Index: " line? */
-	if (!prefixcmp(line->buf, "Index: "))
+	if (starts_with(line->buf, "Index: "))
 		return 1;
 
 	/*
@@ -704,7 +717,7 @@ static inline int patchbreak(const struct strbuf *line)
 	if (line->len < 4)
 		return 0;
 
-	if (!prefixcmp(line->buf, "---")) {
+	if (starts_with(line->buf, "---")) {
 		/* space followed by a filename? */
 		if (line->buf[3] == ' ' && !isspace(line->buf[4]))
 			return 1;
@@ -819,6 +832,8 @@ static int handle_commit_msg(struct strbuf *line)
 	}
 
 	if (patchbreak(line)) {
+		if (message_id)
+			fprintf(cmitmsg, "Message-Id: %s\n", message_id);
 		fclose(cmitmsg);
 		cmitmsg = NULL;
 		return 1;
@@ -885,11 +900,6 @@ static void handle_body(void)
 			strbuf_insert(&line, 0, prev.buf, prev.len);
 			strbuf_reset(&prev);
 
-			/* binary data most likely doesn't have newlines */
-			if (message_type != TYPE_TEXT) {
-				handle_filter(&line);
-				break;
-			}
 			/*
 			 * This is a decoded line that may contain
 			 * multiple new lines.  Pass only one chunk
@@ -953,13 +963,13 @@ static void handle_info(void)
 		else
 			continue;
 
-		if (!memcmp(header[i], "Subject", 7)) {
+		if (!strcmp(header[i], "Subject")) {
 			if (!keep_subject) {
 				cleanup_subject(hdr);
 				cleanup_space(hdr);
 			}
 			output_header_lines(fout, "Subject", hdr);
-		} else if (!memcmp(header[i], "From", 4)) {
+		} else if (!strcmp(header[i], "From")) {
 			cleanup_space(hdr);
 			handle_from(hdr);
 			fprintf(fout, "Author: %s\n", name.buf);
@@ -1010,7 +1020,7 @@ static int mailinfo(FILE *in, FILE *out, const char *msg, const char *patch)
 
 static int git_mailinfo_config(const char *var, const char *value, void *unused)
 {
-	if (prefixcmp(var, "mailinfo."))
+	if (!starts_with(var, "mailinfo."))
 		return git_default_config(var, value, unused);
 	if (!strcmp(var, "mailinfo.scissors")) {
 		use_scissors = git_config_bool(var, value);
@@ -1021,7 +1031,7 @@ static int git_mailinfo_config(const char *var, const char *value, void *unused)
 }
 
 static const char mailinfo_usage[] =
-	"git mailinfo [-k|-b] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] msg patch < mail >info";
+	"git mailinfo [-k|-b] [-m | --message-id] [-u | --encoding=<encoding> | -n] [--scissors | --no-scissors] msg patch < mail >info";
 
 int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 {
@@ -1040,11 +1050,13 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
 			keep_subject = 1;
 		else if (!strcmp(argv[1], "-b"))
 			keep_non_patch_brackets_in_subject = 1;
+		else if (!strcmp(argv[1], "-m") || !strcmp(argv[1], "--message-id"))
+			add_message_id = 1;
 		else if (!strcmp(argv[1], "-u"))
 			metainfo_charset = def_charset;
 		else if (!strcmp(argv[1], "-n"))
 			metainfo_charset = NULL;
-		else if (!prefixcmp(argv[1], "--encoding="))
+		else if (starts_with(argv[1], "--encoding="))
 			metainfo_charset = argv[1] + 11;
 		else if (!strcmp(argv[1], "--scissors"))
 			use_scissors = 1;