diff options
Diffstat (limited to 'vcs-svn/svndump.c')
-rw-r--r-- | vcs-svn/svndump.c | 577 |
1 files changed, 404 insertions, 173 deletions
diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 2ad2c307dd..08d136b8cc 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -8,11 +8,18 @@ */ #include "cache.h" -#include "repo_tree.h" #include "fast_export.h" #include "line_buffer.h" -#include "obj_pool.h" -#include "string_pool.h" +#include "strbuf.h" +#include "svndump.h" + +/* + * Compare start of string to literal of equal length; + * must be guarded by length test. + */ +#define constcmp(s, ref) memcmp(s, ref, sizeof(ref) - 1) + +#define REPORT_FILENO 3 #define NODEACT_REPLACE 4 #define NODEACT_DELETE 3 @@ -20,221 +27,374 @@ #define NODEACT_CHANGE 1 #define NODEACT_UNKNOWN 0 -#define DUMP_CTX 0 -#define REV_CTX 1 -#define NODE_CTX 2 +/* States: */ +#define DUMP_CTX 0 /* dump metadata */ +#define REV_CTX 1 /* revision metadata */ +#define NODE_CTX 2 /* node metadata */ +#define INTERNODE_CTX 3 /* between nodes */ -#define LENGTH_UNKNOWN (~0) #define DATE_RFC2822_LEN 31 -/* Create memory pool for log messages */ -obj_pool_gen(log, char, 4096) - -static char* log_copy(uint32_t length, char *log) -{ - char *buffer; - log_free(log_pool.size); - buffer = log_pointer(log_alloc(length)); - strncpy(buffer, log, length); - return buffer; -} +static struct line_buffer input = LINE_BUFFER_INIT; static struct { - uint32_t action, propLength, textLength, srcRev, srcMode, mark, type; - uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH]; + uint32_t action, srcRev, type; + off_t prop_length, text_length; + struct strbuf src, dst; + uint32_t text_delta, prop_delta; } node_ctx; static struct { - uint32_t revision, author; - unsigned long timestamp; - char *log; + uint32_t revision; + timestamp_t timestamp; + struct strbuf log, author, note; } rev_ctx; static struct { - uint32_t version, uuid, url; + uint32_t version; + struct strbuf uuid, url; } dump_ctx; -static struct { - uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid, - revision_number, node_path, node_kind, node_action, - node_copyfrom_path, node_copyfrom_rev, text_content_length, - prop_content_length, content_length, svn_fs_dump_format_version; -} keys; - static void reset_node_ctx(char *fname) { node_ctx.type = 0; node_ctx.action = NODEACT_UNKNOWN; - node_ctx.propLength = LENGTH_UNKNOWN; - node_ctx.textLength = LENGTH_UNKNOWN; - node_ctx.src[0] = ~0; + node_ctx.prop_length = -1; + node_ctx.text_length = -1; + strbuf_reset(&node_ctx.src); node_ctx.srcRev = 0; - node_ctx.srcMode = 0; - pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname); - node_ctx.mark = 0; + strbuf_reset(&node_ctx.dst); + if (fname) + strbuf_addstr(&node_ctx.dst, fname); + node_ctx.text_delta = 0; + node_ctx.prop_delta = 0; } static void reset_rev_ctx(uint32_t revision) { rev_ctx.revision = revision; rev_ctx.timestamp = 0; - rev_ctx.log = NULL; - rev_ctx.author = ~0; + strbuf_reset(&rev_ctx.log); + strbuf_reset(&rev_ctx.author); + strbuf_reset(&rev_ctx.note); } -static void reset_dump_ctx(uint32_t url) +static void reset_dump_ctx(const char *url) { - dump_ctx.url = url; + strbuf_reset(&dump_ctx.url); + if (url) + strbuf_addstr(&dump_ctx.url, url); dump_ctx.version = 1; - dump_ctx.uuid = ~0; + strbuf_reset(&dump_ctx.uuid); } -static void init_keys(void) +static void handle_property(const struct strbuf *key_buf, + struct strbuf *val, + uint32_t *type_set) { - keys.svn_log = pool_intern("svn:log"); - keys.svn_author = pool_intern("svn:author"); - keys.svn_date = pool_intern("svn:date"); - keys.svn_executable = pool_intern("svn:executable"); - keys.svn_special = pool_intern("svn:special"); - keys.uuid = pool_intern("UUID"); - keys.revision_number = pool_intern("Revision-number"); - keys.node_path = pool_intern("Node-path"); - keys.node_kind = pool_intern("Node-kind"); - keys.node_action = pool_intern("Node-action"); - keys.node_copyfrom_path = pool_intern("Node-copyfrom-path"); - keys.node_copyfrom_rev = pool_intern("Node-copyfrom-rev"); - keys.text_content_length = pool_intern("Text-content-length"); - keys.prop_content_length = pool_intern("Prop-content-length"); - keys.content_length = pool_intern("Content-length"); - keys.svn_fs_dump_format_version = pool_intern("SVN-fs-dump-format-version"); + const char *key = key_buf->buf; + size_t keylen = key_buf->len; + + switch (keylen + 1) { + case sizeof("svn:log"): + if (constcmp(key, "svn:log")) + break; + if (!val) + die("invalid dump: unsets svn:log"); + strbuf_swap(&rev_ctx.log, val); + break; + case sizeof("svn:author"): + if (constcmp(key, "svn:author")) + break; + if (!val) + strbuf_reset(&rev_ctx.author); + else + strbuf_swap(&rev_ctx.author, val); + break; + case sizeof("svn:date"): + if (constcmp(key, "svn:date")) + break; + if (!val) + die("invalid dump: unsets svn:date"); + if (parse_date_basic(val->buf, &rev_ctx.timestamp, NULL)) + warning("invalid timestamp: %s", val->buf); + break; + case sizeof("svn:executable"): + case sizeof("svn:special"): + if (keylen == strlen("svn:executable") && + constcmp(key, "svn:executable")) + break; + if (keylen == strlen("svn:special") && + constcmp(key, "svn:special")) + break; + if (*type_set) { + if (!val) + return; + die("invalid dump: sets type twice"); + } + if (!val) { + node_ctx.type = S_IFREG | 0644; + return; + } + *type_set = 1; + node_ctx.type = keylen == strlen("svn:executable") ? + (S_IFREG | 0755) : + S_IFLNK; + } +} + +static void die_short_read(void) +{ + if (buffer_ferror(&input)) + die_errno("error reading dump file"); + die("invalid dump: unexpected end of file"); } static void read_props(void) { - uint32_t len; - uint32_t key = ~0; - char *val = NULL; - char *t; - while ((t = buffer_read_line()) && strcmp(t, "PROPS-END")) { - if (!strncmp(t, "K ", 2)) { - len = atoi(&t[2]); - key = pool_intern(buffer_read_string(len)); - buffer_read_line(); - } else if (!strncmp(t, "V ", 2)) { - len = atoi(&t[2]); - val = buffer_read_string(len); - if (key == keys.svn_log) { - /* Value length excludes terminating nul. */ - rev_ctx.log = log_copy(len + 1, val); - } else if (key == keys.svn_author) { - rev_ctx.author = pool_intern(val); - } else if (key == keys.svn_date) { - if (parse_date_basic(val, &rev_ctx.timestamp, NULL)) - fprintf(stderr, "Invalid timestamp: %s\n", val); - } else if (key == keys.svn_executable) { - node_ctx.type = REPO_MODE_EXE; - } else if (key == keys.svn_special) { - node_ctx.type = REPO_MODE_LNK; - } - key = ~0; - buffer_read_line(); + static struct strbuf key = STRBUF_INIT; + static struct strbuf val = STRBUF_INIT; + const char *t; + /* + * NEEDSWORK: to support simple mode changes like + * K 11 + * svn:special + * V 1 + * * + * D 14 + * svn:executable + * we keep track of whether a mode has been set and reset to + * plain file only if not. We should be keeping track of the + * symlink and executable bits separately instead. + */ + uint32_t type_set = 0; + while ((t = buffer_read_line(&input)) && strcmp(t, "PROPS-END")) { + uint32_t len; + const char type = t[0]; + int ch; + + if (!type || t[1] != ' ') + die("invalid property line: %s", t); + len = atoi(&t[2]); + strbuf_reset(&val); + buffer_read_binary(&input, &val, len); + if (val.len < len) + die_short_read(); + + /* Discard trailing newline. */ + ch = buffer_read_char(&input); + if (ch == EOF) + die_short_read(); + if (ch != '\n') + die("invalid dump: expected newline after %s", val.buf); + + switch (type) { + case 'K': + strbuf_swap(&key, &val); + continue; + case 'D': + handle_property(&val, NULL, &type_set); + continue; + case 'V': + handle_property(&key, &val, &type_set); + strbuf_reset(&key); + continue; + default: + die("invalid property line: %s", t); } } } static void handle_node(void) { - if (node_ctx.propLength != LENGTH_UNKNOWN && node_ctx.propLength) - read_props(); - - if (node_ctx.srcRev) - node_ctx.srcMode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); - - if (node_ctx.textLength != LENGTH_UNKNOWN && - node_ctx.type != REPO_MODE_DIR) - node_ctx.mark = next_blob_mark(); + const uint32_t type = node_ctx.type; + const int have_props = node_ctx.prop_length != -1; + const int have_text = node_ctx.text_length != -1; + /* + * Old text for this node: + * NULL - directory or bug + * empty_blob - empty + * "<dataref>" - data retrievable from fast-import + */ + static const char *const empty_blob = "::empty::"; + const char *old_data = NULL; + uint32_t old_mode = S_IFREG | 0644; if (node_ctx.action == NODEACT_DELETE) { - repo_delete(node_ctx.dst); - } else if (node_ctx.action == NODEACT_CHANGE || - node_ctx.action == NODEACT_REPLACE) { - if (node_ctx.action == NODEACT_REPLACE && - node_ctx.type == REPO_MODE_DIR) - repo_replace(node_ctx.dst, node_ctx.mark); - else if (node_ctx.propLength != LENGTH_UNKNOWN) - repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); - else if (node_ctx.textLength != LENGTH_UNKNOWN) - node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); + if (have_text || have_props || node_ctx.srcRev) + die("invalid dump: deletion node has " + "copyfrom info, text, or properties"); + fast_export_delete(node_ctx.dst.buf); + return; + } + if (node_ctx.action == NODEACT_REPLACE) { + fast_export_delete(node_ctx.dst.buf); + node_ctx.action = NODEACT_ADD; + } + if (node_ctx.srcRev) { + fast_export_copy(node_ctx.srcRev, node_ctx.src.buf, node_ctx.dst.buf); + if (node_ctx.action == NODEACT_ADD) + node_ctx.action = NODEACT_CHANGE; + } + if (have_text && type == S_IFDIR) + die("invalid dump: directories cannot have text attached"); + + /* + * Find old content (old_data) and decide on the new mode. + */ + if (node_ctx.action == NODEACT_CHANGE && !*node_ctx.dst.buf) { + if (type != S_IFDIR) + die("invalid dump: root of tree is not a regular file"); + old_data = NULL; + } else if (node_ctx.action == NODEACT_CHANGE) { + uint32_t mode; + old_data = fast_export_read_path(node_ctx.dst.buf, &mode); + if (mode == S_IFDIR && type != S_IFDIR) + die("invalid dump: cannot modify a directory into a file"); + if (mode != S_IFDIR && type == S_IFDIR) + die("invalid dump: cannot modify a file into a directory"); + node_ctx.type = mode; + old_mode = mode; } else if (node_ctx.action == NODEACT_ADD) { - if (node_ctx.srcRev && node_ctx.propLength != LENGTH_UNKNOWN) - repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); - else if (node_ctx.srcRev && node_ctx.textLength != LENGTH_UNKNOWN) - node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); - else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) || - node_ctx.textLength != LENGTH_UNKNOWN) - repo_add(node_ctx.dst, node_ctx.type, node_ctx.mark); + if (type == S_IFDIR) + old_data = NULL; + else if (have_text) + old_data = empty_blob; + else + die("invalid dump: adds node without text"); + } else { + die("invalid dump: Node-path block lacks Node-action"); + } + + /* + * Adjust mode to reflect properties. + */ + if (have_props) { + if (!node_ctx.prop_delta) + node_ctx.type = type; + if (node_ctx.prop_length) + read_props(); } - if (node_ctx.propLength == LENGTH_UNKNOWN && node_ctx.srcMode) - node_ctx.type = node_ctx.srcMode; + /* + * Save the result. + */ + if (type == S_IFDIR) /* directories are not tracked. */ + return; + assert(old_data); + if (old_data == empty_blob) + /* For the fast_export_* functions, NULL means empty. */ + old_data = NULL; + if (!have_text) { + fast_export_modify(node_ctx.dst.buf, node_ctx.type, old_data); + return; + } + if (!node_ctx.text_delta) { + fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline"); + fast_export_data(node_ctx.type, node_ctx.text_length, &input); + return; + } + fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline"); + fast_export_blob_delta(node_ctx.type, old_mode, old_data, + node_ctx.text_length, &input); +} - if (node_ctx.mark) - fast_export_blob(node_ctx.type, node_ctx.mark, node_ctx.textLength); - else if (node_ctx.textLength != LENGTH_UNKNOWN) - buffer_skip_bytes(node_ctx.textLength); +static void begin_revision(const char *remote_ref) +{ + if (!rev_ctx.revision) /* revision 0 gets no git commit. */ + return; + fast_export_begin_commit(rev_ctx.revision, rev_ctx.author.buf, + &rev_ctx.log, dump_ctx.uuid.buf, dump_ctx.url.buf, + rev_ctx.timestamp, remote_ref); } -static void handle_revision(void) +static void end_revision(const char *note_ref) { - if (rev_ctx.revision) - repo_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log, - dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp); + struct strbuf mark = STRBUF_INIT; + if (rev_ctx.revision) { + fast_export_end_commit(rev_ctx.revision); + fast_export_begin_note(rev_ctx.revision, "remote-svn", + "Note created by remote-svn.", rev_ctx.timestamp, note_ref); + strbuf_addf(&mark, ":%"PRIu32, rev_ctx.revision); + fast_export_note(mark.buf, "inline"); + fast_export_buf_to_data(&rev_ctx.note); + strbuf_release(&mark); + } } -void svndump_read(const char *url) +void svndump_read(const char *url, const char *local_ref, const char *notes_ref) { char *val; char *t; uint32_t active_ctx = DUMP_CTX; uint32_t len; - uint32_t key; - reset_dump_ctx(pool_intern(url)); - while ((t = buffer_read_line())) { - val = strstr(t, ": "); + reset_dump_ctx(url); + while ((t = buffer_read_line(&input))) { + val = strchr(t, ':'); if (!val) continue; - *val++ = '\0'; - *val++ = '\0'; - key = pool_intern(t); + val++; + if (*val != ' ') + continue; + val++; - if (key == keys.svn_fs_dump_format_version) { + /* strlen(key) + 1 */ + switch (val - t - 1) { + case sizeof("SVN-fs-dump-format-version"): + if (constcmp(t, "SVN-fs-dump-format-version")) + continue; dump_ctx.version = atoi(val); - if (dump_ctx.version > 2) - die("expected svn dump format version <= 2, found %"PRIu32, + if (dump_ctx.version > 3) + die("expected svn dump format version <= 3, found %"PRIu32, dump_ctx.version); - } else if (key == keys.uuid) { - dump_ctx.uuid = pool_intern(val); - } else if (key == keys.revision_number) { + break; + case sizeof("UUID"): + if (constcmp(t, "UUID")) + continue; + strbuf_reset(&dump_ctx.uuid); + strbuf_addstr(&dump_ctx.uuid, val); + break; + case sizeof("Revision-number"): + if (constcmp(t, "Revision-number")) + continue; if (active_ctx == NODE_CTX) handle_node(); + if (active_ctx == REV_CTX) + begin_revision(local_ref); if (active_ctx != DUMP_CTX) - handle_revision(); + end_revision(notes_ref); active_ctx = REV_CTX; reset_rev_ctx(atoi(val)); - } else if (key == keys.node_path) { - if (active_ctx == NODE_CTX) - handle_node(); - active_ctx = NODE_CTX; - reset_node_ctx(val); - } else if (key == keys.node_kind) { + strbuf_addf(&rev_ctx.note, "%s\n", t); + break; + case sizeof("Node-path"): + if (constcmp(t, "Node-")) + continue; + if (!constcmp(t + strlen("Node-"), "path")) { + if (active_ctx == NODE_CTX) + handle_node(); + if (active_ctx == REV_CTX) + begin_revision(local_ref); + active_ctx = NODE_CTX; + reset_node_ctx(val); + strbuf_addf(&rev_ctx.note, "%s\n", t); + break; + } + if (constcmp(t + strlen("Node-"), "kind")) + continue; + strbuf_addf(&rev_ctx.note, "%s\n", t); if (!strcmp(val, "dir")) - node_ctx.type = REPO_MODE_DIR; + node_ctx.type = S_IFDIR; else if (!strcmp(val, "file")) - node_ctx.type = REPO_MODE_BLB; + node_ctx.type = S_IFREG | 0644; else fprintf(stderr, "Unknown node-kind: %s\n", val); - } else if (key == keys.node_action) { + break; + case sizeof("Node-action"): + if (constcmp(t, "Node-action")) + continue; + strbuf_addf(&rev_ctx.note, "%s\n", t); if (!strcmp(val, "delete")) { node_ctx.action = NODEACT_DELETE; } else if (!strcmp(val, "add")) { @@ -247,52 +407,125 @@ void svndump_read(const char *url) fprintf(stderr, "Unknown node-action: %s\n", val); node_ctx.action = NODEACT_UNKNOWN; } - } else if (key == keys.node_copyfrom_path) { - pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val); - } else if (key == keys.node_copyfrom_rev) { + break; + case sizeof("Node-copyfrom-path"): + if (constcmp(t, "Node-copyfrom-path")) + continue; + strbuf_reset(&node_ctx.src); + strbuf_addstr(&node_ctx.src, val); + strbuf_addf(&rev_ctx.note, "%s\n", t); + break; + case sizeof("Node-copyfrom-rev"): + if (constcmp(t, "Node-copyfrom-rev")) + continue; node_ctx.srcRev = atoi(val); - } else if (key == keys.text_content_length) { - node_ctx.textLength = atoi(val); - } else if (key == keys.prop_content_length) { - node_ctx.propLength = atoi(val); - } else if (key == keys.content_length) { + strbuf_addf(&rev_ctx.note, "%s\n", t); + break; + case sizeof("Text-content-length"): + if (constcmp(t, "Text") && constcmp(t, "Prop")) + continue; + if (constcmp(t + 4, "-content-length")) + continue; + { + char *end; + uintmax_t len; + + len = strtoumax(val, &end, 10); + if (!isdigit(*val) || *end) + die("invalid dump: non-numeric length %s", val); + if (len > maximum_signed_value_of_type(off_t)) + die("unrepresentable length in dump: %s", val); + + if (*t == 'T') + node_ctx.text_length = (off_t) len; + else + node_ctx.prop_length = (off_t) len; + break; + } + case sizeof("Text-delta"): + if (!constcmp(t, "Text-delta")) { + node_ctx.text_delta = !strcmp(val, "true"); + break; + } + if (constcmp(t, "Prop-delta")) + continue; + node_ctx.prop_delta = !strcmp(val, "true"); + break; + case sizeof("Content-length"): + if (constcmp(t, "Content-length")) + continue; len = atoi(val); - buffer_read_line(); + t = buffer_read_line(&input); + if (!t) + die_short_read(); + if (*t) + die("invalid dump: expected blank line after content length header"); if (active_ctx == REV_CTX) { read_props(); } else if (active_ctx == NODE_CTX) { handle_node(); - active_ctx = REV_CTX; + active_ctx = INTERNODE_CTX; } else { fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len); - buffer_skip_bytes(len); + if (buffer_skip_bytes(&input, len) != len) + die_short_read(); } } } + if (buffer_ferror(&input)) + die_short_read(); if (active_ctx == NODE_CTX) handle_node(); + if (active_ctx == REV_CTX) + begin_revision(local_ref); if (active_ctx != DUMP_CTX) - handle_revision(); + end_revision(notes_ref); } -void svndump_init(const char *filename) +static void init(int report_fd) { - buffer_init(filename); - repo_init(); - reset_dump_ctx(~0); + fast_export_init(report_fd); + strbuf_init(&dump_ctx.uuid, 4096); + strbuf_init(&dump_ctx.url, 4096); + strbuf_init(&rev_ctx.log, 4096); + strbuf_init(&rev_ctx.author, 4096); + strbuf_init(&rev_ctx.note, 4096); + strbuf_init(&node_ctx.src, 4096); + strbuf_init(&node_ctx.dst, 4096); + reset_dump_ctx(NULL); reset_rev_ctx(0); reset_node_ctx(NULL); - init_keys(); + return; +} + +int svndump_init(const char *filename) +{ + if (buffer_init(&input, filename)) + return error_errno("cannot open %s", filename ? filename : "NULL"); + init(REPORT_FILENO); + return 0; +} + +int svndump_init_fd(int in_fd, int back_fd) +{ + if(buffer_fdinit(&input, xdup(in_fd))) + return error_errno("cannot open fd %d", in_fd); + init(xdup(back_fd)); + return 0; } void svndump_deinit(void) { - log_reset(); - repo_reset(); - reset_dump_ctx(~0); + fast_export_deinit(); + reset_dump_ctx(NULL); reset_rev_ctx(0); reset_node_ctx(NULL); - if (buffer_deinit()) + strbuf_release(&rev_ctx.log); + strbuf_release(&rev_ctx.author); + strbuf_release(&rev_ctx.note); + strbuf_release(&node_ctx.src); + strbuf_release(&node_ctx.dst); + if (buffer_deinit(&input)) fprintf(stderr, "Input error\n"); if (ferror(stdout)) fprintf(stderr, "Output error\n"); @@ -300,10 +533,8 @@ void svndump_deinit(void) void svndump_reset(void) { - log_reset(); - buffer_reset(); - repo_reset(); - reset_dump_ctx(~0); - reset_rev_ctx(0); - reset_node_ctx(NULL); + strbuf_release(&dump_ctx.uuid); + strbuf_release(&dump_ctx.url); + strbuf_release(&rev_ctx.log); + strbuf_release(&rev_ctx.author); } |