From 81c5cf786581c82a8834726ffef26b7def96bf35 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 21 May 2006 17:15:06 -0700 Subject: mailinfo: skip bogus UNIX From line inside body Sometimes people just include the whole format-patch output in the commit e-mail. Detect it and skip the bogus ">From " line. Signed-off-by: Junio C Hamano --- mailinfo.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'mailinfo.c') diff --git a/mailinfo.c b/mailinfo.c index b27651935d..a133e6d08a 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -237,10 +237,17 @@ static int eatspace(char *line) #define SEEN_FROM 01 #define SEEN_DATE 02 #define SEEN_SUBJECT 04 +#define SEEN_BOGUS_UNIX_FROM 010 /* First lines of body can have From:, Date:, and Subject: */ static int handle_inbody_header(int *seen, char *line) { + if (!memcmp(">From", line, 5) && isspace(line[5])) { + if (!(*seen & SEEN_BOGUS_UNIX_FROM)) { + *seen |= SEEN_BOGUS_UNIX_FROM; + return 1; + } + } if (!memcmp("From:", line, 5) && isspace(line[5])) { if (!(*seen & SEEN_FROM) && handle_from(line+6)) { *seen |= SEEN_FROM; -- cgit v1.2.3 From f8128cfb8d5892e76611d024a19c1ecdace9a39e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 23 May 2006 13:44:11 -0600 Subject: Make read_one_header_line return a flag not a length. Currently we only use the return value from read_one_header line to tell if the line we have read is a header or not. So make it a flag. This paves the way for better email detection. Signed-off-by: Eric W. Biederman Signed-off-by: Junio C Hamano --- mailinfo.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'mailinfo.c') diff --git a/mailinfo.c b/mailinfo.c index b27651935d..83a2986e7e 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -331,7 +331,7 @@ struct header_def { int namelen; }; -static void check_header(char *line, int len, struct header_def *header) +static void check_header(char *line, struct header_def *header) { int i; @@ -349,7 +349,7 @@ static void check_header(char *line, int len, struct header_def *header) } } -static void check_subheader_line(char *line, int len) +static void check_subheader_line(char *line) { static struct header_def header[] = { { "Content-Type", handle_subcontent_type }, @@ -357,9 +357,9 @@ static void check_subheader_line(char *line, int len) handle_content_transfer_encoding }, { NULL }, }; - check_header(line, len, header); + check_header(line, header); } -static void check_header_line(char *line, int len) +static void check_header_line(char *line) { static struct header_def header[] = { { "From", handle_from }, @@ -370,7 +370,7 @@ static void check_header_line(char *line, int len) handle_content_transfer_encoding }, { NULL }, }; - check_header(line, len, header); + check_header(line, header); } static int read_one_header_line(char *line, int sz, FILE *in) @@ -709,8 +709,8 @@ static void handle_multipart_body(void) return; /* We are on boundary line. Start slurping the subhead. */ while (1) { - int len = read_one_header_line(line, sizeof(line), stdin); - if (!len) { + int hdr = read_one_header_line(line, sizeof(line), stdin); + if (!hdr) { if (handle_multipart_one_part() < 0) return; /* Reset per part headers */ @@ -718,7 +718,7 @@ static void handle_multipart_body(void) charset[0] = 0; } else - check_subheader_line(line, len); + check_subheader_line(line); } fclose(patchfile); if (!patch_lines) { @@ -787,15 +787,15 @@ int main(int argc, char **argv) exit(1); } while (1) { - int len = read_one_header_line(line, sizeof(line), stdin); - if (!len) { + int hdr = read_one_header_line(line, sizeof(line), stdin); + if (!hdr) { if (multipart_boundary[0]) handle_multipart_body(); else handle_body(); break; } - check_header_line(line, len); + check_header_line(line); } return 0; } -- cgit v1.2.3 From 3350453014324e375cdca722b50e93cdd78894ed Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 23 May 2006 13:45:37 -0600 Subject: Move B and Q decoding into check header. B and Q decoding is not appropriate for in body headers, so move it up to where we explicitly know we have a real email header. Signed-off-by: Eric W. Biederman Signed-off-by: Junio C Hamano --- mailinfo.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'mailinfo.c') diff --git a/mailinfo.c b/mailinfo.c index 83a2986e7e..bee7b202cf 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -324,6 +324,7 @@ static void cleanup_space(char *buf) } } +static void decode_header_bq(char *it); typedef int (*header_fn_t)(char *); struct header_def { const char *name; @@ -343,6 +344,10 @@ static void check_header(char *line, struct header_def *header) int len = header[i].namelen; if (!strncasecmp(line, header[i].name, len) && line[len] == ':' && isspace(line[len + 1])) { + /* Unwrap inline B and Q encoding, and optionally + * normalize the meta information to utf8. + */ + decode_header_bq(line + len + 2); header[i].func(line + len + 2); break; } @@ -597,13 +602,6 @@ static void handle_info(void) cleanup_space(email); cleanup_space(sub); - /* Unwrap inline B and Q encoding, and optionally - * normalize the meta information to utf8. - */ - decode_header_bq(name); - decode_header_bq(date); - decode_header_bq(email); - decode_header_bq(sub); printf("Author: %s\nEmail: %s\nSubject: %s\nDate: %s\n\n", name, email, sub, date); } -- cgit v1.2.3 From 8b4525fb3c6d79bd3a64b8f441237a4095db4e22 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 23 May 2006 13:47:28 -0600 Subject: Refactor commit messge handling. - Move handle_info into main so it is called once after everything has been parsed. This allows the removal of a static variable and removes two duplicate calls. - Move parsing of inbody headers into handle_commit. This means we parse the in-body headers after we have decoded the character set, and it removes code duplication between handle_multipart_one_part and handle_body. - Change the flag indicating that we have seen an in body prefix header into another bit in seen. This is a little more general and allows the possibility of parsing in body headers after the body message has begun. Signed-off-by: Eric W. Biederman Signed-off-by: Junio C Hamano --- mailinfo.c | 58 ++++++++++++++++++++++------------------------------------ 1 file changed, 22 insertions(+), 36 deletions(-) (limited to 'mailinfo.c') diff --git a/mailinfo.c b/mailinfo.c index bee7b202cf..3fa9505313 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -237,38 +237,41 @@ static int eatspace(char *line) #define SEEN_FROM 01 #define SEEN_DATE 02 #define SEEN_SUBJECT 04 +#define SEEN_PREFIX 0x08 /* First lines of body can have From:, Date:, and Subject: */ -static int handle_inbody_header(int *seen, char *line) +static void handle_inbody_header(int *seen, char *line) { + if (*seen & SEEN_PREFIX) + return; if (!memcmp("From:", line, 5) && isspace(line[5])) { if (!(*seen & SEEN_FROM) && handle_from(line+6)) { *seen |= SEEN_FROM; - return 1; + return; } } if (!memcmp("Date:", line, 5) && isspace(line[5])) { if (!(*seen & SEEN_DATE)) { handle_date(line+6); *seen |= SEEN_DATE; - return 1; + return; } } if (!memcmp("Subject:", line, 8) && isspace(line[8])) { if (!(*seen & SEEN_SUBJECT)) { handle_subject(line+9); *seen |= SEEN_SUBJECT; - return 1; + return; } } if (!memcmp("[PATCH]", line, 7) && isspace(line[7])) { if (!(*seen & SEEN_SUBJECT)) { handle_subject(line); *seen |= SEEN_SUBJECT; - return 1; + return; } } - return 0; + *seen |= SEEN_PREFIX; } static char *cleanup_subject(char *subject) @@ -590,12 +593,7 @@ static void decode_transfer_encoding(char *line) static void handle_info(void) { char *sub; - static int done_info = 0; - - if (done_info) - return; - done_info = 1; sub = cleanup_subject(subject); cleanup_space(name); cleanup_space(date); @@ -609,7 +607,7 @@ static void handle_info(void) /* We are inside message body and have read line[] already. * Spit out the commit log. */ -static int handle_commit_msg(void) +static int handle_commit_msg(int *seen) { if (!cmitmsg) return 0; @@ -633,6 +631,11 @@ static int handle_commit_msg(void) decode_transfer_encoding(line); if (metainfo_charset) convert_to_utf8(line, charset); + + handle_inbody_header(seen, line); + if (!(*seen & SEEN_PREFIX)) + continue; + fputs(line, cmitmsg); } while (fgets(line, sizeof(line), stdin) != NULL); fclose(cmitmsg); @@ -664,26 +667,16 @@ static void handle_patch(void) * that the first part to contain commit message and a patch, and * handle other parts as pure patches. */ -static int handle_multipart_one_part(void) +static int handle_multipart_one_part(int *seen) { - int seen = 0; int n = 0; - int len; while (fgets(line, sizeof(line), stdin) != NULL) { again: - len = eatspace(line); n++; - if (!len) - continue; if (is_multipart_boundary(line)) break; - if (0 <= seen && handle_inbody_header(&seen, line)) - continue; - seen = -1; /* no more inbody headers */ - line[len] = '\n'; - handle_info(); - if (handle_commit_msg()) + if (handle_commit_msg(seen)) goto again; handle_patch(); break; @@ -695,6 +688,7 @@ static int handle_multipart_one_part(void) static void handle_multipart_body(void) { + int seen = 0; int part_num = 0; /* Skip up to the first boundary */ @@ -709,7 +703,7 @@ static void handle_multipart_body(void) while (1) { int hdr = read_one_header_line(line, sizeof(line), stdin); if (!hdr) { - if (handle_multipart_one_part() < 0) + if (handle_multipart_one_part(&seen) < 0) return; /* Reset per part headers */ transfer_encoding = TE_DONTCARE; @@ -730,18 +724,9 @@ static void handle_body(void) { int seen = 0; - while (fgets(line, sizeof(line), stdin) != NULL) { - int len = eatspace(line); - if (!len) - continue; - if (0 <= seen && handle_inbody_header(&seen, line)) - continue; - seen = -1; /* no more inbody headers */ - line[len] = '\n'; - handle_info(); - handle_commit_msg(); + if (fgets(line, sizeof(line), stdin) != NULL) { + handle_commit_msg(&seen); handle_patch(); - break; } fclose(patchfile); if (!patch_lines) { @@ -791,6 +776,7 @@ int main(int argc, char **argv) handle_multipart_body(); else handle_body(); + handle_info(); break; } check_header_line(line); -- cgit v1.2.3 From 1f36bee67e604735bc48be7fc731a823e6c5807f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 23 May 2006 13:49:00 -0600 Subject: In handle_body only read a line if we don't already have one. This prepares for detecting non-email patches that don't have mail headers. In which case we have already read the first line so handle_body should not ignore it. Signed-off-by: Eric W. Biederman Signed-off-by: Junio C Hamano --- mailinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mailinfo.c') diff --git a/mailinfo.c b/mailinfo.c index 3fa9505313..99989c25b2 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -724,7 +724,7 @@ static void handle_body(void) { int seen = 0; - if (fgets(line, sizeof(line), stdin) != NULL) { + if (line[0] || fgets(line, sizeof(line), stdin) != NULL) { handle_commit_msg(&seen); handle_patch(); } -- cgit v1.2.3 From f30b20282babcd77bcadef70b4e36e24cd1f6d59 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 23 May 2006 13:53:20 -0600 Subject: More accurately detect header lines in read_one_header_line Only count lines of the form '^.*: ' and '^From ' as email header lines. Signed-off-by: Eric W. Biederman Signed-off-by: Junio C Hamano --- mailinfo.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'mailinfo.c') diff --git a/mailinfo.c b/mailinfo.c index 99989c25b2..a2b15e2624 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -385,20 +385,29 @@ static int read_one_header_line(char *line, int sz, FILE *in) { int ofs = 0; while (ofs < sz) { + const char *colon; int peek, len; if (fgets(line + ofs, sz - ofs, in) == NULL) - return ofs; + break; len = eatspace(line + ofs); if (len == 0) - return ofs; - peek = fgetc(in); ungetc(peek, in); - if (peek == ' ' || peek == '\t') { - /* Yuck, 2822 header "folding" */ - ofs += len; - continue; + break; + colon = strchr(line, ':'); + if (!colon || !isspace(colon[1])) { + /* Re-add the newline */ + line[ofs + len] = '\n'; + line[ofs + len + 1] = '\0'; + break; } - return ofs + len; + ofs += len; + /* Yuck, 2822 header "folding" */ + peek = fgetc(in); ungetc(peek, in); + if (peek != ' ' && peek != '\t') + break; } + /* Count mbox From headers as headers */ + if (!ofs && !memcmp(line, "From ", 5)) + ofs = 1; return ofs; } -- cgit v1.2.3 From 2dec02b1ecafc47d4031d0a68a94c775a6a9ff9e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 23 May 2006 13:58:36 -0600 Subject: Allow in body headers beyond the in body header prefix. - handle_from is fixed to not mangle it's input line. - Then handle_inbody_header is allowed to look in the body of a commit message for additional headers that we haven't already seen. This allows patches with all of the right information in unfortunate places to be imported. Signed-off-by: Eric W. Biederman Signed-off-by: Junio C Hamano --- mailinfo.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'mailinfo.c') diff --git a/mailinfo.c b/mailinfo.c index a2b15e2624..241bfb9e25 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -72,11 +72,14 @@ static int bogus_from(char *line) return 1; } -static int handle_from(char *line) +static int handle_from(char *in_line) { - char *at = strchr(line, '@'); + char line[1000]; + char *at; char *dst; + strcpy(line, in_line); + at = strchr(line, '@'); if (!at) return bogus_from(line); @@ -242,8 +245,6 @@ static int eatspace(char *line) /* First lines of body can have From:, Date:, and Subject: */ static void handle_inbody_header(int *seen, char *line) { - if (*seen & SEEN_PREFIX) - return; if (!memcmp("From:", line, 5) && isspace(line[5])) { if (!(*seen & SEEN_FROM) && handle_from(line+6)) { *seen |= SEEN_FROM; -- cgit v1.2.3 From ef29c11702594e616cf43bea260515d9f14f17b0 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 26 May 2006 00:46:58 -0700 Subject: mailinfo: More carefully parse header lines in read_one_header_line() We exited prematurely from header parsing loop when the header field did not have a space after the colon but we insisted on it, and we got the check wrong because we forgot that we strip the trailing whitespace before we do the check. The space after the colon is not even required by RFC2822, so stop requiring it. While we are at it, the header line is specified to be more strict than "anything with a colon in it" (there must be one or more characters before the colon, and they must not be controls, SP or non US-ASCII), so implement that check as well, lest we mistakenly think something like: Bogus not a header line: this is not. as a header line. Signed-off-by: Junio C Hamano --- mailinfo.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'mailinfo.c') diff --git a/mailinfo.c b/mailinfo.c index 241bfb9e25..88f9fbb198 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -382,19 +382,40 @@ static void check_header_line(char *line) check_header(line, header); } +static int is_rfc2822_header(char *line) +{ + /* + * The section that defines the loosest possible + * field name is "3.6.8 Optional fields". + * + * optional-field = field-name ":" unstructured CRLF + * field-name = 1*ftext + * ftext = %d33-57 / %59-126 + */ + int ch; + char *cp = line; + while ((ch = *cp++)) { + if (ch == ':') + return cp != line; + if ((33 <= ch && ch <= 57) || + (59 <= ch && ch <= 126)) + continue; + break; + } + return 0; +} + static int read_one_header_line(char *line, int sz, FILE *in) { int ofs = 0; while (ofs < sz) { - const char *colon; int peek, len; if (fgets(line + ofs, sz - ofs, in) == NULL) break; len = eatspace(line + ofs); if (len == 0) break; - colon = strchr(line, ':'); - if (!colon || !isspace(colon[1])) { + if (!is_rfc2822_header(line)) { /* Re-add the newline */ line[ofs + len] = '\n'; line[ofs + len + 1] = '\0'; -- cgit v1.2.3