From 7a76e68a177720da65e7d9cfa49d702a55e2d9de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Thu, 18 Oct 2012 16:43:29 +0200 Subject: format-patch: do not wrap non-rfc2047 headers too early MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do not wrap the second and later lines of non-rfc2047-encoded headers substantially before the 78 character limit. Instead of passing the remaining length of the first line as wrapping width, use the correct maximum length and tell strbuf_add_wrapped_bytes() how many characters of the first line are already used. Signed-off-by: Jan H. Schönherr Signed-off-by: Junio C Hamano --- pretty.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'pretty.c') diff --git a/pretty.c b/pretty.c index 8b1ea9ffad..71e4024663 100644 --- a/pretty.c +++ b/pretty.c @@ -286,7 +286,7 @@ static void add_rfc2047(struct strbuf *sb, const char *line, int len, if ((i + 1 < len) && (ch == '=' && line[i+1] == '?')) goto needquote; } - strbuf_add_wrapped_bytes(sb, line, len, 0, 1, max_length - line_len); + strbuf_add_wrapped_bytes(sb, line, len, -line_len, 1, max_length); return; needquote: -- cgit v1.2.3 From 94f6cdf693c10eaec5a7fcc4f0b5cb1e0ea80f1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Thu, 18 Oct 2012 16:43:30 +0200 Subject: format-patch: do not wrap rfc2047 encoded headers too late MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Encoded characters add more than one character at once to an encoded header. Include all characters that are about to be added in the length calculation for wrapping. Additionally, RFC 2047 imposes a maximum line length of 76 characters if that line contains an rfc2047 encoded word. Signed-off-by: Jan H. Schönherr Signed-off-by: Junio C Hamano --- pretty.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'pretty.c') diff --git a/pretty.c b/pretty.c index 71e4024663..da75879999 100644 --- a/pretty.c +++ b/pretty.c @@ -263,6 +263,9 @@ static void add_rfc822_quoted(struct strbuf *out, const char *s, int len) static int is_rfc2047_special(char ch) { + if (ch == ' ' || ch == '\n') + return 1; + return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_')); } @@ -270,6 +273,7 @@ static void add_rfc2047(struct strbuf *sb, const char *line, int len, const char *encoding) { static const int max_length = 78; /* per rfc2822 */ + static const int max_encoded_length = 76; /* per rfc2047 */ int i; int line_len; @@ -295,23 +299,25 @@ needquote: line_len += strlen(encoding) + 5; /* 5 for =??q? */ for (i = 0; i < len; i++) { unsigned ch = line[i] & 0xFF; + int is_special = is_rfc2047_special(ch); + + /* + * According to RFC 2047, we could encode the special character + * ' ' (space) with '_' (underscore) for readability. But many + * programs do not understand this and just leave the + * underscore in place. Thus, we do nothing special here, which + * causes ' ' to be encoded as '=20', avoiding this problem. + */ - if (line_len >= max_length - 2) { + if (line_len + 2 + (is_special ? 3 : 1) > max_encoded_length) { strbuf_addf(sb, "?=\n =?%s?q?", encoding); line_len = strlen(encoding) + 5 + 1; /* =??q? plus SP */ } - /* - * We encode ' ' using '=20' even though rfc2047 - * allows using '_' for readability. Unfortunately, - * many programs do not understand this and just - * leave the underscore in place. - */ - if (is_rfc2047_special(ch) || ch == ' ' || ch == '\n') { + if (is_special) { strbuf_addf(sb, "=%02X", ch); line_len += 3; - } - else { + } else { strbuf_addch(sb, ch); line_len++; } -- cgit v1.2.3 From f9b7204b6d10a1429271fde8cbcdee56beb0e035 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Thu, 18 Oct 2012 16:43:31 +0200 Subject: format-patch: introduce helper function last_line_length() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, an open-coded loop to calculate the length of the last line of a string buffer is used in multiple places. Move that code into a function of its own. Signed-off-by: Jan H. Schönherr Signed-off-by: Junio C Hamano --- pretty.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'pretty.c') diff --git a/pretty.c b/pretty.c index da75879999..482402d283 100644 --- a/pretty.c +++ b/pretty.c @@ -240,6 +240,17 @@ static int has_rfc822_specials(const char *s, int len) return 0; } +static int last_line_length(struct strbuf *sb) +{ + int i; + + /* How many bytes are already used on the last line? */ + for (i = sb->len - 1; i >= 0; i--) + if (sb->buf[i] == '\n') + break; + return sb->len - (i + 1); +} + static void add_rfc822_quoted(struct strbuf *out, const char *s, int len) { int i; @@ -275,13 +286,7 @@ static void add_rfc2047(struct strbuf *sb, const char *line, int len, static const int max_length = 78; /* per rfc2822 */ static const int max_encoded_length = 76; /* per rfc2047 */ int i; - int line_len; - - /* How many bytes are already used on the current line? */ - for (i = sb->len - 1; i >= 0; i--) - if (sb->buf[i] == '\n') - break; - line_len = sb->len - (i+1); + int line_len = last_line_length(sb); for (i = 0; i < len; i++) { int ch = line[i]; @@ -346,7 +351,6 @@ void pp_user_info(const struct pretty_print_context *pp, if (pp->fmt == CMIT_FMT_EMAIL) { char *name_tail = strchr(line, '<'); int display_name_length; - int final_line; if (!name_tail) return; while (line < name_tail && isspace(name_tail[-1])) @@ -361,10 +365,7 @@ void pp_user_info(const struct pretty_print_context *pp, add_rfc2047(sb, quoted.buf, quoted.len, encoding); strbuf_release("ed); } - for (final_line = 0; final_line < sb->len; final_line++) - if (sb->buf[sb->len - final_line - 1] == '\n') - break; - if (namelen - display_name_length + final_line > 78) { + if (namelen - display_name_length + last_line_length(sb) > 78) { strbuf_addch(sb, '\n'); if (!isspace(name_tail[0])) strbuf_addch(sb, ' '); -- cgit v1.2.3 From 0fcec2ce5467740eb613da6a977deca3e7e866ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Thu, 18 Oct 2012 16:43:32 +0200 Subject: format-patch: make rfc2047 encoding more strict MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RFC 2047 requires more characters to be encoded than it is currently done. Especially, RFC 2047 distinguishes between allowed remaining characters in encoded words in addresses (From, To, etc.) and other headers, such as Subject. Make add_rfc2047() and is_rfc2047_special() location dependent and include all non-allowed characters to hopefully be RFC 2047 conformant. This especially fixes a problem, where RFC 822 specials (e. g. ".") were left unencoded in addresses, which was solved with a non-standard-conforming workaround in the past (which is going to be removed in a follow-up patch). Signed-off-by: Jan H. Schönherr Signed-off-by: Junio C Hamano --- pretty.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 59 insertions(+), 8 deletions(-) (limited to 'pretty.c') diff --git a/pretty.c b/pretty.c index 482402d283..613e4eab0d 100644 --- a/pretty.c +++ b/pretty.c @@ -272,16 +272,65 @@ static void add_rfc822_quoted(struct strbuf *out, const char *s, int len) strbuf_addch(out, '"'); } -static int is_rfc2047_special(char ch) +enum rfc2047_type { + RFC2047_SUBJECT, + RFC2047_ADDRESS, +}; + +static int is_rfc2047_special(char ch, enum rfc2047_type type) { - if (ch == ' ' || ch == '\n') + /* + * rfc2047, section 4.2: + * + * 8-bit values which correspond to printable ASCII characters other + * than "=", "?", and "_" (underscore), MAY be represented as those + * characters. (But see section 5 for restrictions.) In + * particular, SPACE and TAB MUST NOT be represented as themselves + * within encoded words. + */ + + /* + * rule out non-ASCII characters and non-printable characters (the + * non-ASCII check should be redundant as isprint() is not localized + * and only knows about ASCII, but be defensive about that) + */ + if (non_ascii(ch) || !isprint(ch)) + return 1; + + /* + * rule out special printable characters (' ' should be the only + * whitespace character considered printable, but be defensive and use + * isspace()) + */ + if (isspace(ch) || ch == '=' || ch == '?' || ch == '_') return 1; - return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_')); + /* + * rfc2047, section 5.3: + * + * As a replacement for a 'word' entity within a 'phrase', for example, + * one that precedes an address in a From, To, or Cc header. The ABNF + * definition for 'phrase' from RFC 822 thus becomes: + * + * phrase = 1*( encoded-word / word ) + * + * In this case the set of characters that may be used in a "Q"-encoded + * 'encoded-word' is restricted to: . An 'encoded-word' that appears within a + * 'phrase' MUST be separated from any adjacent 'word', 'text' or + * 'special' by 'linear-white-space'. + */ + + if (type != RFC2047_ADDRESS) + return 0; + + /* '=' and '_' are special cases and have been checked above */ + return !(isalnum(ch) || ch == '!' || ch == '*' || ch == '+' || ch == '-' || ch == '/'); } static void add_rfc2047(struct strbuf *sb, const char *line, int len, - const char *encoding) + const char *encoding, enum rfc2047_type type) { static const int max_length = 78; /* per rfc2822 */ static const int max_encoded_length = 76; /* per rfc2047 */ @@ -304,7 +353,7 @@ needquote: line_len += strlen(encoding) + 5; /* 5 for =??q? */ for (i = 0; i < len; i++) { unsigned ch = line[i] & 0xFF; - int is_special = is_rfc2047_special(ch); + int is_special = is_rfc2047_special(ch, type); /* * According to RFC 2047, we could encode the special character @@ -358,11 +407,13 @@ void pp_user_info(const struct pretty_print_context *pp, display_name_length = name_tail - line; strbuf_addstr(sb, "From: "); if (!has_rfc822_specials(line, display_name_length)) { - add_rfc2047(sb, line, display_name_length, encoding); + add_rfc2047(sb, line, display_name_length, + encoding, RFC2047_ADDRESS); } else { struct strbuf quoted = STRBUF_INIT; add_rfc822_quoted("ed, line, display_name_length); - add_rfc2047(sb, quoted.buf, quoted.len, encoding); + add_rfc2047(sb, quoted.buf, quoted.len, + encoding, RFC2047_ADDRESS); strbuf_release("ed); } if (namelen - display_name_length + last_line_length(sb) > 78) { @@ -1294,7 +1345,7 @@ void pp_title_line(const struct pretty_print_context *pp, strbuf_grow(sb, title.len + 1024); if (pp->subject) { strbuf_addstr(sb, pp->subject); - add_rfc2047(sb, title.buf, title.len, encoding); + add_rfc2047(sb, title.buf, title.len, encoding, RFC2047_SUBJECT); } else { strbuf_addbuf(sb, &title); } -- cgit v1.2.3 From 41dd00bad3d7614be7e8a41ed3f31878af86758b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Thu, 18 Oct 2012 16:43:33 +0200 Subject: format-patch: fix rfc2047 address encoding with respect to rfc822 specials MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to RFC 2047 and RFC 822, rfc2047 encoded words and and rfc822 quoted strings do not mix. Since add_rfc2047() no longer leaves RFC 822 specials behind, the quoting is also no longer necessary to create a standard-conforming mail. Remove the quoting, when RFC 2047 encoding takes place. This actually requires to refactor add_rfc2047() a bit, so that the different cases can be distinguished. With this patch, my own name gets correctly decoded as Jan H. Schönherr (without quotes) and not as "Jan H. Schönherr" (with quotes). Signed-off-by: Jan H. Schönherr Signed-off-by: Junio C Hamano --- pretty.c | 49 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 17 deletions(-) (limited to 'pretty.c') diff --git a/pretty.c b/pretty.c index 613e4eab0d..413e7587b6 100644 --- a/pretty.c +++ b/pretty.c @@ -231,7 +231,7 @@ static int is_rfc822_special(char ch) } } -static int has_rfc822_specials(const char *s, int len) +static int needs_rfc822_quoting(const char *s, int len) { int i; for (i = 0; i < len; i++) @@ -329,25 +329,29 @@ static int is_rfc2047_special(char ch, enum rfc2047_type type) return !(isalnum(ch) || ch == '!' || ch == '*' || ch == '+' || ch == '-' || ch == '/'); } -static void add_rfc2047(struct strbuf *sb, const char *line, int len, - const char *encoding, enum rfc2047_type type) +static int needs_rfc2047_encoding(const char *line, int len, + enum rfc2047_type type) { - static const int max_length = 78; /* per rfc2822 */ - static const int max_encoded_length = 76; /* per rfc2047 */ int i; - int line_len = last_line_length(sb); for (i = 0; i < len; i++) { int ch = line[i]; if (non_ascii(ch) || ch == '\n') - goto needquote; + return 1; if ((i + 1 < len) && (ch == '=' && line[i+1] == '?')) - goto needquote; + return 1; } - strbuf_add_wrapped_bytes(sb, line, len, -line_len, 1, max_length); - return; -needquote: + return 0; +} + +static void add_rfc2047(struct strbuf *sb, const char *line, int len, + const char *encoding, enum rfc2047_type type) +{ + static const int max_encoded_length = 76; /* per rfc2047 */ + int i; + int line_len = last_line_length(sb); + strbuf_grow(sb, len * 3 + strlen(encoding) + 100); strbuf_addf(sb, "=?%s?q?", encoding); line_len += strlen(encoding) + 5; /* 5 for =??q? */ @@ -383,6 +387,7 @@ void pp_user_info(const struct pretty_print_context *pp, const char *what, struct strbuf *sb, const char *line, const char *encoding) { + int max_length = 78; /* per rfc2822 */ char *date; int namelen; unsigned long time; @@ -406,17 +411,21 @@ void pp_user_info(const struct pretty_print_context *pp, name_tail--; display_name_length = name_tail - line; strbuf_addstr(sb, "From: "); - if (!has_rfc822_specials(line, display_name_length)) { + if (needs_rfc2047_encoding(line, display_name_length, RFC2047_ADDRESS)) { add_rfc2047(sb, line, display_name_length, encoding, RFC2047_ADDRESS); - } else { + max_length = 76; /* per rfc2047 */ + } else if (needs_rfc822_quoting(line, display_name_length)) { struct strbuf quoted = STRBUF_INIT; add_rfc822_quoted("ed, line, display_name_length); - add_rfc2047(sb, quoted.buf, quoted.len, - encoding, RFC2047_ADDRESS); + strbuf_add_wrapped_bytes(sb, quoted.buf, quoted.len, + -6, 1, max_length); strbuf_release("ed); + } else { + strbuf_add_wrapped_bytes(sb, line, display_name_length, + -6, 1, max_length); } - if (namelen - display_name_length + last_line_length(sb) > 78) { + if (namelen - display_name_length + last_line_length(sb) > max_length) { strbuf_addch(sb, '\n'); if (!isspace(name_tail[0])) strbuf_addch(sb, ' '); @@ -1336,6 +1345,7 @@ void pp_title_line(const struct pretty_print_context *pp, const char *encoding, int need_8bit_cte) { + static const int max_length = 78; /* per rfc2047 */ struct strbuf title; strbuf_init(&title, 80); @@ -1345,7 +1355,12 @@ void pp_title_line(const struct pretty_print_context *pp, strbuf_grow(sb, title.len + 1024); if (pp->subject) { strbuf_addstr(sb, pp->subject); - add_rfc2047(sb, title.buf, title.len, encoding, RFC2047_SUBJECT); + if (needs_rfc2047_encoding(title.buf, title.len, RFC2047_SUBJECT)) + add_rfc2047(sb, title.buf, title.len, + encoding, RFC2047_SUBJECT); + else + strbuf_add_wrapped_bytes(sb, title.buf, title.len, + -last_line_length(sb), 1, max_length); } else { strbuf_addbuf(sb, &title); } -- cgit v1.2.3