summaryrefslogtreecommitdiff
path: root/fast-import.c
diff options
context:
space:
mode:
authorLibravatar Shawn O. Pearce <spearce@spearce.org>2006-08-14 20:16:28 -0400
committerLibravatar Shawn O. Pearce <spearce@spearce.org>2007-01-14 02:15:04 -0500
commitc44cdc7eef212ec09901eb2e0996476e0468ed88 (patch)
treeab64a49e8719583156fb7a57281313db61cc60e6 /fast-import.c
parentImplement blob ID validation in fast-import. (diff)
downloadtgif-c44cdc7eef212ec09901eb2e0996476e0468ed88.tar.xz
Converted fast-import to a text based protocol.
Frontend clients can now send a text stream to fast-import rather than a binary stream. This should facilitate developing frontend software as the data stream is easier to view, manipulate and debug my hand and Mark-I eyeball. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
Diffstat (limited to 'fast-import.c')
-rw-r--r--fast-import.c486
1 files changed, 326 insertions, 160 deletions
diff --git a/fast-import.c b/fast-import.c
index 95b84f57e5..2953e80cde 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -4,57 +4,93 @@ Format of STDIN stream:
stream ::= cmd*;
cmd ::= new_blob
- | new_commit
| new_branch
+ | new_commit
| new_tag
;
- new_blob ::= 'blob' blob_data;
+ new_blob ::= 'blob' lf
+ mark?
+ file_content;
+ file_content ::= data;
- new_commit ::= 'comt' ref_name author_committer_msg
- file_change*
- '0';
+ new_branch ::= 'branch' sp ref_str lf
+ ('from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf)?
+ lf;
- new_branch ::= 'brch' dst_ref_name src_ref_name;
- dst_ref_name ::= ref_name;
- src_ref_name ::= ref_name | sha1_exp;
-
- new_tag ::= 'tagg' ref_name tag_name tagger_msg;
+ new_commit ::= 'commit' sp ref_str lf
+ mark?
+ ('author' sp name '<' email '>' ts tz lf)?
+ 'committer' sp name '<' email '>' ts tz lf
+ commit_msg
+ file_change*
+ lf;
+ commit_msg ::= data;
- file_change ::= 'M' path_name hexsha1
- | 'D' path_name
+ file_change ::= 'M' sp mode sp (hexsha1 | idnum) sp path_str lf
+ | 'D' sp path_str lf
;
-
- author_committer_msg ::= len32
- 'author' sp name '<' email '>' ts tz lf
- 'committer' sp name '<' email '>' ts tz lf
- lf
- binary_data;
-
- tagger_msg ::= len32
- 'tagger' sp name '<' email '>' ts tz lf
- lf
- binary_data;
-
- blob_data ::= len32 binary_data; # max len is 2^32-1
- path_name ::= len32 path; # max len is PATH_MAX-1
- ref_name ::= len32 ref; # max len is PATH_MAX-1
- tag_name ::= len32 tag; # max len is PATH_MAX-1
- sha1_exp ::= len32 sha1exp; # max len is PATH_MAX-1
-
- len32 ::= # unsigned 32 bit value, native format;
+ mode ::= '644' | '755';
+
+ new_tag ::= 'tag' sp tag_str lf
+ 'from' sp (ref_str | hexsha1 | sha1exp_str | idnum) lf
+ 'tagger' sp name '<' email '>' ts tz lf
+ tag_msg;
+ tag_msg ::= data;
+
+ # note: the first idnum in a stream should be 1 and subsequent
+ # idnums should not have gaps between values as this will cause
+ # the stream parser to reserve space for the gapped values. An
+ # idnum can be updated in the future to a new object by issuing
+ # a new mark directive with the old idnum.
+ #
+ mark ::= 'mark' sp idnum lf;
+
+ # note: declen indicates the length of binary_data in bytes.
+ # declen does not include the lf preceeding or trailing the
+ # binary data.
+ #
+ data ::= 'data' sp declen lf
+ binary_data
+ lf;
+
+ # note: quoted strings are C-style quoting supporting \c for
+ # common escapes of 'c' (e..g \n, \t, \\, \") or \nnn where nnn
+ # is the signed byte value in octal. Note that the only
+ # characters which must actually be escaped to protect the
+ # stream formatting is: \, " and LF. Otherwise these values
+ # are UTF8.
+ #
+ ref_str ::= ref | '"' quoted(ref) '"' ;
+ sha1exp_str ::= sha1exp | '"' quoted(sha1exp) '"' ;
+ tag_str ::= tag | '"' quoted(tag) '"' ;
+ path_str ::= path | '"' quoted(path) '"' ;
+
+ declen ::= # unsigned 32 bit value, ascii base10 notation;
binary_data ::= # file content, not interpreted;
+
sp ::= # ASCII space character;
lf ::= # ASCII newline (LF) character;
- path ::= # GIT style file path, e.g. "a/b/c";
- ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
- tag ::= # GIT tag name, e.g. "FIREFOX_1_5";
+
+ # note: a colon (':') must precede the numerical value assigned to
+ # an idnum. This is to distinguish it from a ref or tag name as
+ # GIT does not permit ':' in ref or tag strings.
+ #
+ idnum ::= ':' declen;
+ path ::= # GIT style file path, e.g. "a/b/c";
+ ref ::= # GIT ref name, e.g. "refs/heads/MOZ_GECKO_EXPERIMENT";
+ tag ::= # GIT tag name, e.g. "FIREFOX_1_5";
sha1exp ::= # Any valid GIT SHA1 expression;
hexsha1 ::= # SHA1 in hexadecimal format;
- name ::= # valid GIT author/committer name;
+
+ # note: name and email are UTF8 strings, however name must not
+ # contain '<' or lf and email must not contain any of the
+ # following: '<', '>', lf.
+ #
+ name ::= # valid GIT author/committer name;
email ::= # valid GIT author/committer email;
- ts ::= # time since the epoch in seconds, ascii decimal;
- tz ::= # GIT style timezone;
+ ts ::= # time since the epoch in seconds, ascii base10 notation;
+ tz ::= # GIT style timezone;
*/
#include "builtin.h"
@@ -66,6 +102,8 @@ Format of STDIN stream:
#include "pack.h"
#include "refs.h"
#include "csum-file.h"
+#include "strbuf.h"
+#include "quote.h"
struct object_entry
{
@@ -153,7 +191,7 @@ static size_t mem_pool_alloc = 2*1024*1024 - sizeof(struct mem_pool);
static size_t total_allocd;
static struct mem_pool *mem_pool;
-/* atom management */
+/* Atom management */
static unsigned int atom_table_sz = 4451;
static unsigned int atom_cnt;
static struct atom_str **atom_table;
@@ -184,6 +222,10 @@ static unsigned int branch_table_sz = 1039;
static struct branch **branch_table;
static struct branch *active_branches;
+/* Input stream parsing */
+static struct strbuf command_buf;
+static unsigned long command_mark;
+
static void alloc_objects(int cnt)
{
@@ -330,6 +372,8 @@ static struct branch* new_branch(const char *name)
if (b)
die("Invalid attempt to create duplicate branch: %s", name);
+ if (check_ref_format(name))
+ die("Branch name doesn't conform to GIT standards: %s", name);
b = pool_calloc(1, sizeof(struct branch));
b->name = pool_strdup(name);
@@ -433,22 +477,6 @@ static void yread(int fd, void *buffer, size_t length)
}
}
-static int optional_read(int fd, void *buffer, size_t length)
-{
- ssize_t ret = 0;
- while (ret < length) {
- ssize_t size = xread(fd, (char *) buffer + ret, length - ret);
- if (!size && !ret)
- return 1;
- if (!size)
- die("Read from descriptor %i: end of stream", fd);
- if (size < 0)
- die("Read from descriptor %i: %s", fd, strerror(errno));
- ret += size;
- }
- return 0;
-}
-
static void ywrite(int fd, void *buffer, size_t length)
{
ssize_t ret = 0;
@@ -462,24 +490,9 @@ static void ywrite(int fd, void *buffer, size_t length)
}
}
-static const char* read_path()
-{
- static char sn[PATH_MAX];
- unsigned long slen;
-
- yread(0, &slen, 4);
- if (!slen)
- die("Expected string command parameter, didn't find one");
- if (slen > (PATH_MAX - 1))
- die("Can't handle excessive string length %lu", slen);
- yread(0, sn, slen);
- sn[slen] = 0;
- return sn;
-}
-
-static unsigned long encode_header(
+static size_t encode_header(
enum object_type type,
- unsigned long size,
+ size_t size,
unsigned char *hdr)
{
int n = 1;
@@ -503,7 +516,7 @@ static unsigned long encode_header(
static int store_object(
enum object_type type,
void *dat,
- unsigned long datlen,
+ size_t datlen,
struct last_object *last,
unsigned char *sha1out)
{
@@ -896,15 +909,57 @@ static void dump_branches()
}
}
+static void read_next_command()
+{
+ read_line(&command_buf, stdin, '\n');
+}
+
+static void cmd_mark()
+{
+ if (!strncmp("mark :", command_buf.buf, 6)) {
+ command_mark = strtoul(command_buf.buf + 6, NULL, 10);
+ read_next_command();
+ }
+ else
+ command_mark = 0;
+}
+
+static void* cmd_data (size_t *size)
+{
+ size_t n = 0;
+ void *buffer;
+ size_t length;
+
+ if (strncmp("data ", command_buf.buf, 5))
+ die("Expected 'data n' command, found: %s", command_buf.buf);
+
+ length = strtoul(command_buf.buf + 5, NULL, 10);
+ buffer = xmalloc(length);
+
+ while (n < length) {
+ size_t s = fread((char*)buffer + n, 1, length - n, stdin);
+ if (!s && feof(stdin))
+ die("EOF in data (%lu bytes remaining)", length - n);
+ n += s;
+ }
+
+ if (fgetc(stdin) != '\n')
+ die("An lf did not trail the binary data as expected.");
+
+ *size = length;
+ return buffer;
+}
+
static void cmd_new_blob()
{
- unsigned long datlen;
- unsigned char sha1[20];
+ size_t datlen;
void *dat;
+ unsigned char sha1[20];
+
+ read_next_command();
+ cmd_mark();
+ dat = cmd_data(&datlen);
- yread(0, &datlen, 4);
- dat = xmalloc(datlen);
- yread(0, dat, datlen);
if (store_object(OBJ_BLOB, dat, datlen, &last_blob, sha1))
free(dat);
}
@@ -949,122 +1004,231 @@ static void load_branch(struct branch *b)
static void file_change_m(struct branch *b)
{
- const char *path = read_path();
+ const char *p = command_buf.buf + 2;
+ char *p_uq;
+ const char *endp;
struct object_entry *oe;
- char hexsha1[41];
unsigned char sha1[20];
+ unsigned int mode;
char type[20];
- yread(0, hexsha1, 40);
- hexsha1[40] = 0;
+ p = get_mode(p, &mode);
+ if (!p)
+ die("Corrupt mode: %s", command_buf.buf);
+ switch (mode) {
+ case S_IFREG | 0644:
+ case S_IFREG | 0755:
+ case 0644:
+ case 0755:
+ /* ok */
+ break;
+ default:
+ die("Corrupt mode: %s", command_buf.buf);
+ }
+
+ if (get_sha1_hex(p, sha1))
+ die("Invalid SHA1: %s", command_buf.buf);
+ p += 40;
+ if (*p++ != ' ')
+ die("Missing space after SHA1: %s", command_buf.buf);
+
+ p_uq = unquote_c_style(p, &endp);
+ if (p_uq) {
+ if (*endp)
+ die("Garbage after path in: %s", command_buf.buf);
+ p = p_uq;
+ }
- if (get_sha1_hex(hexsha1, sha1))
- die("Invalid sha1 %s for %s", hexsha1, path);
oe = find_object(sha1);
if (oe) {
if (oe->type != OBJ_BLOB)
- die("%s is a %s not a blob (for %s)", hexsha1, type_names[oe->type], path);
+ die("Not a blob (actually a %s): %s",
+ command_buf.buf, type_names[oe->type]);
} else {
if (sha1_object_info(sha1, type, NULL))
- die("No blob %s for %s", hexsha1, path);
+ die("Blob not found: %s", command_buf.buf);
if (strcmp(blob_type, type))
- die("%s is a %s not a blob (for %s)", hexsha1, type, path);
+ die("Not a blob (actually a %s): %s",
+ command_buf.buf, type);
}
- tree_content_set(&b->branch_tree, path, sha1, S_IFREG | 0644);
+ tree_content_set(&b->branch_tree, p, sha1, S_IFREG | mode);
+
+ if (p_uq)
+ free(p_uq);
}
static void file_change_d(struct branch *b)
{
- tree_content_remove(&b->branch_tree, read_path());
+ const char *p = command_buf.buf + 2;
+ char *p_uq;
+ const char *endp;
+
+ p_uq = unquote_c_style(p, &endp);
+ if (p_uq) {
+ if (*endp)
+ die("Garbage after path in: %s", command_buf.buf);
+ p = p_uq;
+ }
+ tree_content_remove(&b->branch_tree, p);
+ if (p_uq)
+ free(p_uq);
}
static void cmd_new_commit()
{
- static const unsigned int max_hdr_len = 94;
- const char *name = read_path();
- struct branch *b = lookup_branch(name);
- unsigned int acmsglen;
- char *body, *c;
-
+ struct branch *b;
+ void *msg;
+ size_t msglen;
+ char *str_uq;
+ const char *endp;
+ char *sp;
+ char *author = NULL;
+ char *committer = NULL;
+ char *body;
+
+ /* Obtain the branch name from the rest of our command */
+ sp = strchr(command_buf.buf, ' ') + 1;
+ str_uq = unquote_c_style(sp, &endp);
+ if (str_uq) {
+ if (*endp)
+ die("Garbage after ref in: %s", command_buf.buf);
+ sp = str_uq;
+ }
+ b = lookup_branch(sp);
if (!b)
- die("Branch not declared: %s", name);
+ die("Branch not declared: %s", sp);
+ if (str_uq)
+ free(str_uq);
+
+ read_next_command();
+ cmd_mark();
+ if (!strncmp("author ", command_buf.buf, 7)) {
+ author = strdup(command_buf.buf);
+ read_next_command();
+ }
+ if (!strncmp("committer ", command_buf.buf, 10)) {
+ committer = strdup(command_buf.buf);
+ read_next_command();
+ }
+ if (!committer)
+ die("Expected committer but didn't get one");
+ msg = cmd_data(&msglen);
+
+ /* ensure the branch is active/loaded */
if (!b->branch_tree.tree) {
unload_one_branch();
load_branch(b);
}
- /* author_committer_msg */
- yread(0, &acmsglen, 4);
- body = xmalloc(acmsglen + max_hdr_len);
- c = body + max_hdr_len;
- yread(0, c, acmsglen);
-
- /* oddly enough this is all that fsck-objects cares about */
- if (memcmp(c, "author ", 7))
- die("Invalid commit format on branch %s", name);
-
/* file_change* */
for (;;) {
- unsigned char cmd;
- yread(0, &cmd, 1);
- if (cmd == '0')
+ read_next_command();
+ if (1 == command_buf.len)
break;
- else if (cmd == 'M')
+ else if (!strncmp("M ", command_buf.buf, 2))
file_change_m(b);
- else if (cmd == 'D')
+ else if (!strncmp("D ", command_buf.buf, 2))
file_change_d(b);
else
- die("Unsupported file_change: %c", cmd);
+ die("Unsupported file_change: %s", command_buf.buf);
}
- if (memcmp(b->sha1, null_sha1, 20)) {
- sprintf(c - 48, "parent %s", sha1_to_hex(b->sha1));
- *(c - 1) = '\n';
- c -= 48;
- }
+ /* build the tree and the commit */
store_tree(&b->branch_tree);
- sprintf(c - 46, "tree %s", sha1_to_hex(b->branch_tree.sha1));
- *(c - 1) = '\n';
- c -= 46;
-
- store_object(OBJ_COMMIT,
- c, (body + max_hdr_len + acmsglen) - c,
- NULL, b->sha1);
+ body = xmalloc(97 + msglen
+ + (author
+ ? strlen(author) + strlen(committer)
+ : 2 * strlen(committer)));
+ sp = body;
+ sp += sprintf(sp, "tree %s\n", sha1_to_hex(b->branch_tree.sha1));
+ if (memcmp(b->sha1, null_sha1, 20))
+ sp += sprintf(sp, "parent %s\n", sha1_to_hex(b->sha1));
+ if (author)
+ sp += sprintf(sp, "%s\n", author);
+ else
+ sp += sprintf(sp, "author %s\n", committer + 10);
+ sp += sprintf(sp, "%s\n\n", committer);
+ memcpy(sp, msg, msglen);
+ sp += msglen;
+ if (author)
+ free(author);
+ free(committer);
+ free(msg);
+
+ store_object(OBJ_COMMIT, body, sp - body, NULL, b->sha1);
free(body);
b->last_commit = object_count_by_type[OBJ_COMMIT];
}
static void cmd_new_branch()
{
- struct branch *b = new_branch(read_path());
- const char *base = read_path();
- struct branch *s = lookup_branch(base);
-
- if (!strcmp(b->name, base))
- die("Can't create a branch from itself: %s", base);
- else if (s) {
- memcpy(b->sha1, s->sha1, 20);
- memcpy(b->branch_tree.sha1, s->branch_tree.sha1, 20);
+ struct branch *b;
+ char *str_uq;
+ const char *endp;
+ char *sp;
+
+ /* Obtain the new branch name from the rest of our command */
+ sp = strchr(command_buf.buf, ' ') + 1;
+ str_uq = unquote_c_style(sp, &endp);
+ if (str_uq) {
+ if (*endp)
+ die("Garbage after ref in: %s", command_buf.buf);
+ sp = str_uq;
}
- else if (!get_sha1(base, b->sha1)) {
- if (!memcmp(b->sha1, null_sha1, 20))
- memcpy(b->branch_tree.sha1, null_sha1, 20);
- else {
- unsigned long size;
- char *buf;
-
- buf = read_object_with_reference(b->sha1,
- type_names[OBJ_COMMIT], &size, b->sha1);
- if (!buf || size < 46)
- die("Not a valid commit: %s", base);
- if (memcmp("tree ", buf, 5)
- || get_sha1_hex(buf + 5, b->branch_tree.sha1))
- die("The commit %s is corrupt", sha1_to_hex(b->sha1));
- free(buf);
+ b = new_branch(sp);
+ if (str_uq)
+ free(str_uq);
+ read_next_command();
+
+ /* from ... */
+ if (!strncmp("from ", command_buf.buf, 5)) {
+ const char *from;
+ struct branch *s;
+
+ from = strchr(command_buf.buf, ' ') + 1;
+ str_uq = unquote_c_style(from, &endp);
+ if (str_uq) {
+ if (*endp)
+ die("Garbage after string in: %s", command_buf.buf);
+ from = str_uq;
}
- } else
- die("Not a SHA1 or branch: %s", base);
+
+ s = lookup_branch(from);
+ if (b == s)
+ die("Can't create a branch from itself: %s", b->name);
+ else if (s) {
+ memcpy(b->sha1, s->sha1, 20);
+ memcpy(b->branch_tree.sha1, s->branch_tree.sha1, 20);
+ } else if (!get_sha1(from, b->sha1)) {
+ if (!memcmp(b->sha1, null_sha1, 20))
+ memcpy(b->branch_tree.sha1, null_sha1, 20);
+ else {
+ unsigned long size;
+ char *buf;
+
+ buf = read_object_with_reference(b->sha1,
+ type_names[OBJ_COMMIT], &size, b->sha1);
+ if (!buf || size < 46)
+ die("Not a valid commit: %s", from);
+ if (memcmp("tree ", buf, 5)
+ || get_sha1_hex(buf + 5, b->branch_tree.sha1))
+ die("The commit %s is corrupt", sha1_to_hex(b->sha1));
+ free(buf);
+ }
+ } else
+ die("Invalid ref name or SHA1 expression: %s", from);
+
+ if (str_uq)
+ free(str_uq);
+ read_next_command();
+ } else {
+ memcpy(b->sha1, null_sha1, 20);
+ memcpy(b->branch_tree.sha1, null_sha1, 20);
+ }
+
+ if (command_buf.eof || command_buf.len > 1)
+ die("An lf did not terminate the branch command as expected.");
}
int main(int argc, const char **argv)
@@ -1087,26 +1251,28 @@ int main(int argc, const char **argv)
if (pack_fd < 0)
die("Can't create %s: %s", pack_name, strerror(errno));
+ init_pack_header();
alloc_objects(est_obj_cnt);
+ strbuf_init(&command_buf);
atom_table = xcalloc(atom_table_sz, sizeof(struct atom_str*));
branch_table = xcalloc(branch_table_sz, sizeof(struct branch*));
avail_tree_table = xcalloc(avail_tree_table_sz, sizeof(struct avail_tree_content*));
- init_pack_header();
for (;;) {
- unsigned long cmd;
- if (optional_read(0, &cmd, 4))
+ read_next_command();
+ if (command_buf.eof)
break;
-
- switch (ntohl(cmd)) {
- case 'blob': cmd_new_blob(); break;
- case 'comt': cmd_new_commit(); break;
- case 'brch': cmd_new_branch(); break;
- default:
- die("Invalid command %lu", cmd);
- }
+ else if (!strcmp("blob", command_buf.buf))
+ cmd_new_blob();
+ else if (!strncmp("branch ", command_buf.buf, 7))
+ cmd_new_branch();
+ else if (!strncmp("commit ", command_buf.buf, 7))
+ cmd_new_commit();
+ else
+ die("Unsupported command: %s", command_buf.buf);
}
+
fixup_header_footer();
close(pack_fd);
write_index(idx_name);