/*
* Another stupid program, this one parsing the headers of an
* email to figure out authorship and subject
*/
#include "cache.h"
#include "builtin.h"
#include "utf8.h"
#include "strbuf.h"
static FILE *cmitmsg, *patchfile, *fin, *fout;
static int keep_subject;
static int keep_non_patch_brackets_in_subject;
static const char *metainfo_charset;
static struct strbuf line = STRBUF_INIT;
static struct strbuf name = STRBUF_INIT;
static struct strbuf email = STRBUF_INIT;
static enum {
TE_DONTCARE, TE_QP, TE_BASE64
} transfer_encoding;
static enum {
TYPE_TEXT, TYPE_OTHER
} message_type;
static struct strbuf charset = STRBUF_INIT;
static int patch_lines;
static struct strbuf **p_hdr_data, **s_hdr_data;
static int use_scissors;
static int use_inbody_headers = 1;
#define MAX_HDR_PARSED 10
#define MAX_BOUNDARIES 5
static void cleanup_space(struct strbuf *sb);
static void get_sane_name(struct strbuf *out, struct strbuf *name, struct strbuf *email)
{
struct strbuf *src = name;
if (name->len < 3 || 60 < name->len || strchr(name->buf, '@') ||
strchr(name->buf, '<') || strchr(name->buf, '>'))
src = email;
else if (name == out)
return;
strbuf_reset(out);
strbuf_addbuf(out, src);
}
static void parse_bogus_from(const struct strbuf *line)
{
/* John Doe <johndoe> */
char *bra, *ket;
/* This is fallback, so do not bother if we already have an
* e-mail address.
*/
if (email.len)
return;
bra = strchr(line->buf, '<');
if (!bra)
return;
ket = strchr(bra, '>');
if (!ket)
return;
strbuf_reset(&email);
strbuf_add(&email, bra + 1, ket - bra - 1);
strbuf_reset(&name);
strbuf_add(&name, line->buf, bra - line->buf);
strbuf_trim(&name);
get_sane_name(&name, &name, &email);
}
static void handle_from(const struct strbuf *from)
{
char *at;
size_t el;
struct strbuf f;
strbuf_init(&f, from->len);
strbuf_addbuf(&f, from);
at = strchr(f.buf, '@');
if (!at) {
parse_bogus_from(from);
return;
}
/*
* If we already have one email, don't take any confusing lines
*/
if (email.len && strchr(at + 1, '@')) {
strbuf_release(&f);
return;
}
/* Pick up the string around '@', possibly delimited with <>
* pair; that is the email part.
*/
while (at > f.buf) {
char c = at[-1];
if (isspace(c))
break;
if (c == '<') {
at[-1] = ' ';
break;
}
at--;
}
el = strcspn(at, " \n\t\r\v\f>");
strbuf_reset(&email);
strbuf_add(&email, at, el);
strbuf_remove(&f, at - f.buf, el + (at[el] ? 1 : 0));
/* The remainder is name. It could be
*
* - "John Doe <john.doe@xz>" (a), or
* - "john.doe@xz (John Doe)" (b), or
* - "John (zzz) Doe <john.doe@xz> (Comment)" (c)
*
* but we have removed the email part, so
*
* - remove extra spaces which could stay after email (case 'c'), and
* - trim from both ends, possibly removing the () pair at the end
* (cases 'a' and 'b').
*/
cleanup_space(&f);
strbuf_trim(&f);
if (f.buf[0] == '(' && f.len && f.buf[f.len - 1] == ')') {
strbuf_remove(&f, 0, 1);
strbuf_setlen(&f, f.len - 1);
}
get_sane_name(&name, &f, &email);
strbuf_release(&f);
}
static void handle_header(struct strbuf **out, const struct strbuf *line)
{
if (!*out) {
*out = xmalloc(sizeof(struct strbuf));
strbuf_init(*out, line->len);
} else
strbuf_reset(*out);
strbuf_addbuf(*out, line);
}
/* NOTE NOTE NOTE. We do not claim we do full MIME. We just attempt
* to have enough heuristics to grok MIME encoded patches often found
* on our mailing lists. For example, we do not even treat header lines
* case insensitively.
*/
static int slurp_attr(const char *line, const char *name, struct strbuf *attr)
{
const char *ends, *ap = strcasestr(line, name);
size_t sz;
if (!ap) {
strbuf_setlen(attr, 0);
return 0;
}
ap += strlen(name);
if (*ap == '"') {
ap++;
ends = "\"";
}
else
ends = "; \t";
sz = strcspn(ap, ends);
strbuf_add(attr, ap, sz);
return 1;
}
static struct strbuf *content[MAX_BOUNDARIES];
static struct strbuf **content_top = content;
static void handle_content_type(struct strbuf *line)
{
struct strbuf *boundary = xmalloc(sizeof(struct strbuf));
strbuf_init(boundary, line->len);
if (!strcasestr(line->buf, "text/"))
message_type = TYPE_OTHER;
if (slurp_attr(line->buf, "boundary=", boundary)) {
strbuf_insert(boundary, 0, "--", 2);
if (++content_top > &content[MAX_BOUNDARIES]) {
fprintf(stderr, "Too many boundaries to handle\n");
|