/*
* Another stupid program, this one parsing the headers of an
* email to figure out authorship and subject
*/
#include "cache.h"
#include "builtin.h"
#include "utf8.h"
static FILE *cmitmsg, *patchfile, *fin, *fout;
static int keep_subject;
static const char *metainfo_charset;
static char line[1000];
static char name[1000];
static char email[1000];
static enum {
TE_DONTCARE, TE_QP, TE_BASE64,
} transfer_encoding;
static enum {
TYPE_TEXT, TYPE_OTHER,
} message_type;
static char charset[256];
static int patch_lines;
static char **p_hdr_data, **s_hdr_data;
#define MAX_HDR_PARSED 10
#define MAX_BOUNDARIES 5
static char *sanity_check(char *name, char *email)
{
int len = strlen(name);
if (len < 3 || len > 60)
return email;
if (strchr(name, '@') || strchr(name, '<') || strchr(name, '>'))
return email;
return name;
}
static int bogus_from(char *line)
{
/* John Doe <johndoe> */
char *bra, *ket, *dst, *cp;
/* This is fallback, so do not bother if we already have an
* e-mail address.
*/
if (*email)
return 0;
bra = strchr(line, '<');
if (!bra)
return 0;
ket = strchr(bra, '>');
if (!ket)
return 0;
for (dst = email, cp = bra+1; cp < ket; )
*dst++ = *cp++;
*dst = 0;
for (cp = line; isspace(*cp); cp++)
;
for (bra--; isspace(*bra); bra--)
*bra = 0;
cp = sanity_check(cp, email);
strcpy(name, cp);
return 1;
}
static int handle_from(char *in_line)
{
char line[1000];
char *at;
char *dst;
strcpy(line, in_line);
at = strchr(line, '@');
if (!at)
return bogus_from(line);
/*
* If we already have one email, don't take any confusing lines
*/
if (*email && strchr(at+1, '@'))
return 0;
/* Pick up the string around '@', possibly delimited with <>
* pair; that is the email part. White them out while copying.
*/
while (at > line) {
char c = at[-1];
if (isspace(c))
break;
if (c == '<') {
at[-1] = ' ';
break;
}
at--;
}
dst = email;
for (;;) {
unsigned char c = *at;
if (!c || c == '>' || isspace(c)) {
if (c == '>')
*at = ' ';
break;
}
*at++ = ' ';
*dst++ = c;
}
*dst++ = 0;
/* The remainder is name. It could be "John Doe <john.doe@xz>"
* or "john.doe@xz (John Doe)", but we have whited out the
* email part, so trim from both ends, possibly removing
* the () pair at the end.
*/
at = line + strlen(line);
while (at > line) {
unsigned char c = *--at;
if (!isspace(c)) {
at[(c == ')') ? 0 : 1] = 0;
break;
}
}
at = line;
for (;;) {
unsigned char c = *at;
if (!c || !isspace(c)) {
if (c == '(')
at++;
break;
}
at++;
}
at = sanity_check(at, email);
strcpy(name, at);
return 1;
}
static int handle_header(char *line, char *data, int ofs)
{
if (!line || !data)
return 1;
strcpy(data, line+ofs);
return 0;
}
/* NOTE NOTE NOTE. We do not claim we do full MIME. We just attempt
* to have enough heuristics to grok MIME encoded patches often found
* on our mailing lists. For example, we do not even treat header lines
* case insensitively.
*/
static int slurp_attr(const char *line, const char *name, char *attr)
{
const char *ends, *ap = strcasestr(line, name);
size_t sz;
if (!ap) {
*attr = 0;
return 0;
}
ap += strlen(name);
if (*ap == '"') {
ap++;
ends = "\"";
}
else
ends = "; \t";
sz = strcspn(ap, ends);
memcpy(attr, ap, sz);
attr[sz] = 0;
return 1;
}
struct content_type {
char *boundary;
int boundary_len;
};
static struct content_type content[MAX_BOUNDARIES];
static struct content_type *content_top = content;
static int handle_content_type(char *line)
{
char boundary[256];
if (strcasestr(line, "text/") == NULL)
message_type = TYPE_OTHER;
if (slurp_attr(line, "boundary=", boundary + 2)) {
memcpy(boundary, "--", 2);
if (content_top++ >= &content[MAX_BOUNDARIES]) {
fprintf(stderr, "Too many boundaries to handle\n");
exit(1);
}
content_top->boundary_len = strlen(boundary);
content_top->boundary = xmalloc(content_top->boundary_len+1);
strcpy(content_top->boundary, boundary);
}
if (slurp_attr(line, "charset=", charset)) {
int i, c;
for (i = 0; (c = charset[i]) != 0; i++)
charset[i] = tolower(c);
}
return 0;
}
static int handle_content_transfer_encoding(char *line)
{
if (strcasestr(line, "base64"))
transfer_encoding = TE_BASE64;
else if (strcasestr(line, "quoted-printable"))
transfer_encoding = TE_QP;
else
transfer_encoding = TE_DONTCARE;
|