diff options
Diffstat (limited to 'vcs-svn')
-rw-r--r-- | vcs-svn/fast_export.c | 40 | ||||
-rw-r--r-- | vcs-svn/fast_export.h | 11 | ||||
-rw-r--r-- | vcs-svn/line_buffer.c | 121 | ||||
-rw-r--r-- | vcs-svn/line_buffer.h | 33 | ||||
-rw-r--r-- | vcs-svn/line_buffer.txt | 49 | ||||
-rw-r--r-- | vcs-svn/repo_tree.c | 55 | ||||
-rw-r--r-- | vcs-svn/repo_tree.h | 13 | ||||
-rw-r--r-- | vcs-svn/string_pool.c | 2 | ||||
-rw-r--r-- | vcs-svn/svndump.c | 438 | ||||
-rw-r--r-- | vcs-svn/svndump.h | 2 | ||||
-rw-r--r-- | vcs-svn/trp.h | 3 | ||||
-rw-r--r-- | vcs-svn/trp.txt | 12 |
12 files changed, 512 insertions, 267 deletions
diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 6cfa256a37..99ed70b88a 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -31,27 +31,30 @@ void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, } static char gitsvnline[MAX_GITSVN_LINE_LEN]; -void fast_export_commit(uint32_t revision, uint32_t author, char *log, - uint32_t uuid, uint32_t url, +void fast_export_commit(uint32_t revision, const char *author, + const struct strbuf *log, + const char *uuid, const char *url, unsigned long timestamp) { + static const struct strbuf empty = STRBUF_INIT; if (!log) - log = ""; - if (~uuid && ~url) { + log = ∅ + if (*uuid && *url) { snprintf(gitsvnline, MAX_GITSVN_LINE_LEN, "\n\ngit-svn-id: %s@%"PRIu32" %s\n", - pool_fetch(url), revision, pool_fetch(uuid)); + url, revision, uuid); } else { *gitsvnline = '\0'; } printf("commit refs/heads/master\n"); printf("committer %s <%s@%s> %ld +0000\n", - ~author ? pool_fetch(author) : "nobody", - ~author ? pool_fetch(author) : "nobody", - ~uuid ? pool_fetch(uuid) : "local", timestamp); - printf("data %"PRIu32"\n%s%s\n", - (uint32_t) (strlen(log) + strlen(gitsvnline)), - log, gitsvnline); + *author ? author : "nobody", + *author ? author : "nobody", + *uuid ? uuid : "local", timestamp); + printf("data %"PRIuMAX"\n", + (uintmax_t) (log->len + strlen(gitsvnline))); + fwrite(log->buf, log->len, 1, stdout); + printf("%s\n", gitsvnline); if (!first_commit_done) { if (revision > 1) printf("from refs/heads/master^0\n"); @@ -63,14 +66,23 @@ void fast_export_commit(uint32_t revision, uint32_t author, char *log, printf("progress Imported commit %"PRIu32".\n\n", revision); } -void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len) +static void die_short_read(struct line_buffer *input) +{ + if (buffer_ferror(input)) + die_errno("error reading dump file"); + die("invalid dump: unexpected end of file"); +} + +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, struct line_buffer *input) { if (mode == REPO_MODE_LNK) { /* svn symlink blobs start with "link " */ - buffer_skip_bytes(5); len -= 5; + if (buffer_skip_bytes(input, 5) != 5) + die_short_read(input); } printf("blob\nmark :%"PRIu32"\ndata %"PRIu32"\n", mark, len); - buffer_copy_bytes(len); + if (buffer_copy_bytes(input, len) != len) + die_short_read(input); fputc('\n', stdout); } diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h index 2aaaea53d5..33a8fe996f 100644 --- a/vcs-svn/fast_export.h +++ b/vcs-svn/fast_export.h @@ -1,11 +1,16 @@ #ifndef FAST_EXPORT_H_ #define FAST_EXPORT_H_ +#include "line_buffer.h" +struct strbuf; + void fast_export_delete(uint32_t depth, uint32_t *path); void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, uint32_t mark); -void fast_export_commit(uint32_t revision, uint32_t author, char *log, - uint32_t uuid, uint32_t url, unsigned long timestamp); -void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len); +void fast_export_commit(uint32_t revision, const char *author, + const struct strbuf *log, const char *uuid, + const char *url, unsigned long timestamp); +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, + struct line_buffer *input); #endif diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index 1543567093..c39038723e 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -5,47 +5,81 @@ #include "git-compat-util.h" #include "line_buffer.h" -#include "obj_pool.h" +#include "strbuf.h" -#define LINE_BUFFER_LEN 10000 #define COPY_BUFFER_LEN 4096 -/* Create memory pool for char sequence of known length */ -obj_pool_gen(blob, char, 4096) +int buffer_init(struct line_buffer *buf, const char *filename) +{ + buf->infile = filename ? fopen(filename, "r") : stdin; + if (!buf->infile) + return -1; + return 0; +} -static char line_buffer[LINE_BUFFER_LEN]; -static char byte_buffer[COPY_BUFFER_LEN]; -static FILE *infile; +int buffer_fdinit(struct line_buffer *buf, int fd) +{ + buf->infile = fdopen(fd, "r"); + if (!buf->infile) + return -1; + return 0; +} -int buffer_init(const char *filename) +int buffer_tmpfile_init(struct line_buffer *buf) { - infile = filename ? fopen(filename, "r") : stdin; - if (!infile) + buf->infile = tmpfile(); + if (!buf->infile) return -1; return 0; } -int buffer_deinit(void) +int buffer_deinit(struct line_buffer *buf) { int err; - if (infile == stdin) - return ferror(infile); - err = ferror(infile); - err |= fclose(infile); + if (buf->infile == stdin) + return ferror(buf->infile); + err = ferror(buf->infile); + err |= fclose(buf->infile); return err; } +FILE *buffer_tmpfile_rewind(struct line_buffer *buf) +{ + rewind(buf->infile); + return buf->infile; +} + +long buffer_tmpfile_prepare_to_read(struct line_buffer *buf) +{ + long pos = ftell(buf->infile); + if (pos < 0) + return error("ftell error: %s", strerror(errno)); + if (fseek(buf->infile, 0, SEEK_SET)) + return error("seek error: %s", strerror(errno)); + return pos; +} + +int buffer_ferror(struct line_buffer *buf) +{ + return ferror(buf->infile); +} + +int buffer_read_char(struct line_buffer *buf) +{ + return fgetc(buf->infile); +} + /* Read a line without trailing newline. */ -char *buffer_read_line(void) +char *buffer_read_line(struct line_buffer *buf) { char *end; - if (!fgets(line_buffer, sizeof(line_buffer), infile)) + if (!fgets(buf->line_buffer, sizeof(buf->line_buffer), buf->infile)) /* Error or data exhausted. */ return NULL; - end = line_buffer + strlen(line_buffer); + end = buf->line_buffer + strlen(buf->line_buffer); if (end[-1] == '\n') end[-1] = '\0'; - else if (feof(infile)) + else if (feof(buf->infile)) ; /* No newline at end of file. That's fine. */ else /* @@ -54,44 +88,43 @@ char *buffer_read_line(void) * but for now let's return an error. */ return NULL; - return line_buffer; + return buf->line_buffer; } -char *buffer_read_string(uint32_t len) +void buffer_read_binary(struct line_buffer *buf, + struct strbuf *sb, uint32_t size) { - char *s; - blob_free(blob_pool.size); - s = blob_pointer(blob_alloc(len + 1)); - s[fread(s, 1, len, infile)] = '\0'; - return ferror(infile) ? NULL : s; + strbuf_fread(sb, size, buf->infile); } -void buffer_copy_bytes(uint32_t len) +off_t buffer_copy_bytes(struct line_buffer *buf, off_t nbytes) { - uint32_t in; - while (len > 0 && !feof(infile) && !ferror(infile)) { - in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; - in = fread(byte_buffer, 1, in, infile); - len -= in; + char byte_buffer[COPY_BUFFER_LEN]; + off_t done = 0; + while (done < nbytes && !feof(buf->infile) && !ferror(buf->infile)) { + off_t len = nbytes - done; + size_t in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; + in = fread(byte_buffer, 1, in, buf->infile); + done += in; fwrite(byte_buffer, 1, in, stdout); - if (ferror(stdout)) { - buffer_skip_bytes(len); - return; - } + if (ferror(stdout)) + return done + buffer_skip_bytes(buf, nbytes - done); } + return done; } -void buffer_skip_bytes(uint32_t len) +off_t buffer_skip_bytes(struct line_buffer *buf, off_t nbytes) { - uint32_t in; - while (len > 0 && !feof(infile) && !ferror(infile)) { - in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; - in = fread(byte_buffer, 1, in, infile); - len -= in; + char byte_buffer[COPY_BUFFER_LEN]; + off_t done = 0; + while (done < nbytes && !feof(buf->infile) && !ferror(buf->infile)) { + off_t len = nbytes - done; + size_t in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; + done += fread(byte_buffer, 1, in, buf->infile); } + return done; } -void buffer_reset(void) +void buffer_reset(struct line_buffer *buf) { - blob_reset(); } diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index 9c78ae11a1..d0b22dda76 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -1,12 +1,31 @@ #ifndef LINE_BUFFER_H_ #define LINE_BUFFER_H_ -int buffer_init(const char *filename); -int buffer_deinit(void); -char *buffer_read_line(void); -char *buffer_read_string(uint32_t len); -void buffer_copy_bytes(uint32_t len); -void buffer_skip_bytes(uint32_t len); -void buffer_reset(void); +#include "strbuf.h" + +#define LINE_BUFFER_LEN 10000 + +struct line_buffer { + char line_buffer[LINE_BUFFER_LEN]; + FILE *infile; +}; +#define LINE_BUFFER_INIT { "", NULL } + +int buffer_init(struct line_buffer *buf, const char *filename); +int buffer_fdinit(struct line_buffer *buf, int fd); +int buffer_deinit(struct line_buffer *buf); +void buffer_reset(struct line_buffer *buf); + +int buffer_tmpfile_init(struct line_buffer *buf); +FILE *buffer_tmpfile_rewind(struct line_buffer *buf); /* prepare to write. */ +long buffer_tmpfile_prepare_to_read(struct line_buffer *buf); + +int buffer_ferror(struct line_buffer *buf); +char *buffer_read_line(struct line_buffer *buf); +int buffer_read_char(struct line_buffer *buf); +void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len); +/* Returns number of bytes read (not necessarily written). */ +off_t buffer_copy_bytes(struct line_buffer *buf, off_t len); +off_t buffer_skip_bytes(struct line_buffer *buf, off_t len); #endif diff --git a/vcs-svn/line_buffer.txt b/vcs-svn/line_buffer.txt index 8906fb1f50..8e139eb22d 100644 --- a/vcs-svn/line_buffer.txt +++ b/vcs-svn/line_buffer.txt @@ -14,22 +14,46 @@ Calling sequence The calling program: + - initializes a `struct line_buffer` to LINE_BUFFER_INIT - specifies a file to read with `buffer_init` - - processes input with `buffer_read_line`, `buffer_read_string`, - `buffer_skip_bytes`, and `buffer_copy_bytes` + - processes input with `buffer_read_line`, `buffer_skip_bytes`, + and `buffer_copy_bytes` - closes the file with `buffer_deinit`, perhaps to start over and read another file. -Before exiting, the caller can use `buffer_reset` to deallocate -resources for the benefit of profiling tools. +When finished, the caller can use `buffer_reset` to deallocate +resources. + +Using temporary files +--------------------- + +Temporary files provide a place to store data that should not outlive +the calling program. A program + + - initializes a `struct line_buffer` to LINE_BUFFER_INIT + - requests a temporary file with `buffer_tmpfile_init` + - acquires an output handle by calling `buffer_tmpfile_rewind` + - uses standard I/O functions like `fprintf` and `fwrite` to fill + the temporary file + - declares writing is over with `buffer_tmpfile_prepare_to_read` + - can re-read what was written with `buffer_read_line`, + `buffer_copy_bytes`, and so on + - can reuse the temporary file by calling `buffer_tmpfile_rewind` + again + - removes the temporary file with `buffer_deinit`, perhaps to + reuse the line_buffer for some other file. + +When finished, the calling program can use `buffer_reset` to deallocate +resources. Functions --------- -`buffer_init`:: - Open the named file for input. If filename is NULL, - start reading from stdin. On failure, returns -1 (with - errno indicating the nature of the failure). +`buffer_init`, `buffer_fdinit`:: + Open the named file or file descriptor for input. + buffer_init(buf, NULL) prepares to read from stdin. + On failure, returns -1 (with errno indicating the nature + of the failure). `buffer_deinit`:: Stop reading from the current file (closing it unless @@ -40,19 +64,14 @@ Functions Read a line and strip off the trailing newline. On failure or end of file, returns NULL. -`buffer_read_string`:: - Read `len` characters of input or up to the end of the - file, whichever comes first. Returns NULL on error. - Returns whatever characters were read (possibly "") - for end of file. - `buffer_copy_bytes`:: Read `len` bytes of input and dump them to the standard output stream. Returns early for error or end of file. `buffer_skip_bytes`:: Discards `len` bytes from the input stream (stopping early - if necessary because of an error or eof). + if necessary because of an error or eof). Return value is + the number of bytes successfully read. `buffer_reset`:: Deallocates non-static buffers. diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c index e94d91d129..a21d89de97 100644 --- a/vcs-svn/repo_tree.c +++ b/vcs-svn/repo_tree.c @@ -38,7 +38,7 @@ static uint32_t mark; static int repo_dirent_name_cmp(const void *a, const void *b); /* Treap for directory entries */ -trp_gen(static, dent_, struct repo_dirent, children, dent, repo_dirent_name_cmp); +trp_gen(static, dent_, struct repo_dirent, children, dent, repo_dirent_name_cmp) uint32_t next_blob_mark(void) { @@ -87,7 +87,8 @@ static struct repo_dir *repo_clone_dir(struct repo_dir *orig_dir) return dir_pointer(new_o); } -static struct repo_dirent *repo_read_dirent(uint32_t revision, uint32_t *path) +static struct repo_dirent *repo_read_dirent(uint32_t revision, + const uint32_t *path) { uint32_t name = 0; struct repo_dirent *key = dent_pointer(dent_alloc(1)); @@ -105,7 +106,7 @@ static struct repo_dirent *repo_read_dirent(uint32_t revision, uint32_t *path) return dent; } -static void repo_write_dirent(uint32_t *path, uint32_t mode, +static void repo_write_dirent(const uint32_t *path, uint32_t mode, uint32_t content_offset, uint32_t del) { uint32_t name, revision, dir_o = ~0, parent_dir_o = ~0; @@ -131,7 +132,7 @@ static void repo_write_dirent(uint32_t *path, uint32_t mode, if (dent == key) { dent->mode = REPO_MODE_DIR; dent->content_offset = 0; - dent_insert(&dir->entries, dent); + dent = dent_insert(&dir->entries, dent); } if (dent_offset(dent) < dent_pool.committed) { @@ -142,7 +143,7 @@ static void repo_write_dirent(uint32_t *path, uint32_t mode, dent->name_offset = name; dent->mode = REPO_MODE_DIR; dent->content_offset = dir_o; - dent_insert(&dir->entries, dent); + dent = dent_insert(&dir->entries, dent); } dir = repo_dir_from_dirent(dent); @@ -157,42 +158,37 @@ static void repo_write_dirent(uint32_t *path, uint32_t mode, dent_remove(&dir_pointer(parent_dir_o)->entries, dent); } -uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst) +uint32_t repo_read_path(const uint32_t *path) { - uint32_t mode = 0, content_offset = 0; - struct repo_dirent *src_dent; - src_dent = repo_read_dirent(revision, src); - if (src_dent != NULL) { - mode = src_dent->mode; - content_offset = src_dent->content_offset; - repo_write_dirent(dst, mode, content_offset, 0); - } - return mode; + uint32_t content_offset = 0; + struct repo_dirent *dent = repo_read_dirent(active_commit, path); + if (dent != NULL) + content_offset = dent->content_offset; + return content_offset; } -void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark) +uint32_t repo_read_mode(const uint32_t *path) { - repo_write_dirent(path, mode, blob_mark, 0); + struct repo_dirent *dent = repo_read_dirent(active_commit, path); + if (dent == NULL) + die("invalid dump: path to be modified is missing"); + return dent->mode; } -uint32_t repo_replace(uint32_t *path, uint32_t blob_mark) +void repo_copy(uint32_t revision, const uint32_t *src, const uint32_t *dst) { - uint32_t mode = 0; + uint32_t mode = 0, content_offset = 0; struct repo_dirent *src_dent; - src_dent = repo_read_dirent(active_commit, path); + src_dent = repo_read_dirent(revision, src); if (src_dent != NULL) { mode = src_dent->mode; - repo_write_dirent(path, mode, blob_mark, 0); + content_offset = src_dent->content_offset; + repo_write_dirent(dst, mode, content_offset, 0); } - return mode; } -void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark) +void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark) { - struct repo_dirent *src_dent; - src_dent = repo_read_dirent(active_commit, path); - if (src_dent != NULL && blob_mark == 0) - blob_mark = src_dent->content_offset; repo_write_dirent(path, mode, blob_mark, 0); } @@ -282,8 +278,9 @@ void repo_diff(uint32_t r1, uint32_t r2) repo_commit_root_dir(commit_pointer(r2))); } -void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, - uint32_t url, unsigned long timestamp) +void repo_commit(uint32_t revision, const char *author, + const struct strbuf *log, const char *uuid, const char *url, + unsigned long timestamp) { fast_export_commit(revision, author, log, uuid, url, timestamp); dent_commit(); diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h index 5476175922..37bde2e374 100644 --- a/vcs-svn/repo_tree.h +++ b/vcs-svn/repo_tree.h @@ -1,7 +1,7 @@ #ifndef REPO_TREE_H_ #define REPO_TREE_H_ -#include "git-compat-util.h" +struct strbuf; #define REPO_MODE_DIR 0040000 #define REPO_MODE_BLB 0100644 @@ -12,13 +12,14 @@ #define REPO_MAX_PATH_DEPTH 1000 uint32_t next_blob_mark(void); -uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst); +void repo_copy(uint32_t revision, const uint32_t *src, const uint32_t *dst); void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark); -uint32_t repo_replace(uint32_t *path, uint32_t blob_mark); -void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark); +uint32_t repo_read_path(const uint32_t *path); +uint32_t repo_read_mode(const uint32_t *path); void repo_delete(uint32_t *path); -void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, - uint32_t url, long unsigned timestamp); +void repo_commit(uint32_t revision, const char *author, + const struct strbuf *log, const char *uuid, const char *url, + long unsigned timestamp); void repo_diff(uint32_t r1, uint32_t r2); void repo_init(void); void repo_reset(void); diff --git a/vcs-svn/string_pool.c b/vcs-svn/string_pool.c index f5b1da836e..8af8d54d6e 100644 --- a/vcs-svn/string_pool.c +++ b/vcs-svn/string_pool.c @@ -30,7 +30,7 @@ static int node_cmp(struct node *a, struct node *b) } /* Build a Treap from the node structure (a trp_node w/ offset) */ -trp_gen(static, tree_, struct node, children, node, node_cmp); +trp_gen(static, tree_, struct node, children, node, node_cmp) const char *pool_fetch(uint32_t entry) { diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 53d0215d2d..bc792223b2 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -11,8 +11,15 @@ #include "repo_tree.h" #include "fast_export.h" #include "line_buffer.h" -#include "obj_pool.h" #include "string_pool.h" +#include "strbuf.h" +#include "svndump.h" + +/* + * Compare start of string to literal of equal length; + * must be guarded by length test. + */ +#define constcmp(s, ref) memcmp(s, ref, sizeof(ref) - 1) #define NODEACT_REPLACE 4 #define NODEACT_DELETE 3 @@ -27,40 +34,25 @@ #define LENGTH_UNKNOWN (~0) #define DATE_RFC2822_LEN 31 -/* Create memory pool for log messages */ -obj_pool_gen(log, char, 4096) - -static char* log_copy(uint32_t length, char *log) -{ - char *buffer; - log_free(log_pool.size); - buffer = log_pointer(log_alloc(length)); - strncpy(buffer, log, length); - return buffer; -} +static struct line_buffer input = LINE_BUFFER_INIT; static struct { - uint32_t action, propLength, textLength, srcRev, srcMode, mark, type; + uint32_t action, propLength, textLength, srcRev, type; uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH]; + uint32_t text_delta, prop_delta; } node_ctx; static struct { - uint32_t revision, author; + uint32_t revision; unsigned long timestamp; - char *log; + struct strbuf log, author; } rev_ctx; static struct { - uint32_t uuid, url; + uint32_t version; + struct strbuf uuid, url; } dump_ctx; -static struct { - uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid, - revision_number, node_path, node_kind, node_action, - node_copyfrom_path, node_copyfrom_rev, text_content_length, - prop_content_length, content_length; -} keys; - static void reset_node_ctx(char *fname) { node_ctx.type = 0; @@ -69,124 +61,223 @@ static void reset_node_ctx(char *fname) node_ctx.textLength = LENGTH_UNKNOWN; node_ctx.src[0] = ~0; node_ctx.srcRev = 0; - node_ctx.srcMode = 0; pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname); - node_ctx.mark = 0; + node_ctx.text_delta = 0; + node_ctx.prop_delta = 0; } static void reset_rev_ctx(uint32_t revision) { rev_ctx.revision = revision; rev_ctx.timestamp = 0; - rev_ctx.log = NULL; - rev_ctx.author = ~0; + strbuf_reset(&rev_ctx.log); + strbuf_reset(&rev_ctx.author); +} + +static void reset_dump_ctx(const char *url) +{ + strbuf_reset(&dump_ctx.url); + if (url) + strbuf_addstr(&dump_ctx.url, url); + dump_ctx.version = 1; + strbuf_reset(&dump_ctx.uuid); } -static void reset_dump_ctx(uint32_t url) +static void handle_property(const struct strbuf *key_buf, + struct strbuf *val, + uint32_t *type_set) { - dump_ctx.url = url; - dump_ctx.uuid = ~0; + const char *key = key_buf->buf; + size_t keylen = key_buf->len; + + switch (keylen + 1) { + case sizeof("svn:log"): + if (constcmp(key, "svn:log")) + break; + if (!val) + die("invalid dump: unsets svn:log"); + strbuf_swap(&rev_ctx.log, val); + break; + case sizeof("svn:author"): + if (constcmp(key, "svn:author")) + break; + if (!val) + strbuf_reset(&rev_ctx.author); + else + strbuf_swap(&rev_ctx.author, val); + break; + case sizeof("svn:date"): + if (constcmp(key, "svn:date")) + break; + if (!val) + die("invalid dump: unsets svn:date"); + if (parse_date_basic(val->buf, &rev_ctx.timestamp, NULL)) + warning("invalid timestamp: %s", val->buf); + break; + case sizeof("svn:executable"): + case sizeof("svn:special"): + if (keylen == strlen("svn:executable") && + constcmp(key, "svn:executable")) + break; + if (keylen == strlen("svn:special") && + constcmp(key, "svn:special")) + break; + if (*type_set) { + if (!val) + return; + die("invalid dump: sets type twice"); + } + if (!val) { + node_ctx.type = REPO_MODE_BLB; + return; + } + *type_set = 1; + node_ctx.type = keylen == strlen("svn:executable") ? + REPO_MODE_EXE : + REPO_MODE_LNK; + } } -static void init_keys(void) +static void die_short_read(void) { - keys.svn_log = pool_intern("svn:log"); - keys.svn_author = pool_intern("svn:author"); - keys.svn_date = pool_intern("svn:date"); - keys.svn_executable = pool_intern("svn:executable"); - keys.svn_special = pool_intern("svn:special"); - keys.uuid = pool_intern("UUID"); - keys.revision_number = pool_intern("Revision-number"); - keys.node_path = pool_intern("Node-path"); - keys.node_kind = pool_intern("Node-kind"); - keys.node_action = pool_intern("Node-action"); - keys.node_copyfrom_path = pool_intern("Node-copyfrom-path"); - keys.node_copyfrom_rev = pool_intern("Node-copyfrom-rev"); - keys.text_content_length = pool_intern("Text-content-length"); - keys.prop_content_length = pool_intern("Prop-content-length"); - keys.content_length = pool_intern("Content-length"); + if (buffer_ferror(&input)) + die_errno("error reading dump file"); + die("invalid dump: unexpected end of file"); } static void read_props(void) { - uint32_t len; - uint32_t key = ~0; - char *val = NULL; - char *t; - while ((t = buffer_read_line()) && strcmp(t, "PROPS-END")) { - if (!strncmp(t, "K ", 2)) { - len = atoi(&t[2]); - key = pool_intern(buffer_read_string(len)); - buffer_read_line(); - } else if (!strncmp(t, "V ", 2)) { - len = atoi(&t[2]); - val = buffer_read_string(len); - if (key == keys.svn_log) { - /* Value length excludes terminating nul. */ - rev_ctx.log = log_copy(len + 1, val); - } else if (key == keys.svn_author) { - rev_ctx.author = pool_intern(val); - } else if (key == keys.svn_date) { - if (parse_date_basic(val, &rev_ctx.timestamp, NULL)) - fprintf(stderr, "Invalid timestamp: %s\n", val); - } else if (key == keys.svn_executable) { - node_ctx.type = REPO_MODE_EXE; - } else if (key == keys.svn_special) { - node_ctx.type = REPO_MODE_LNK; - } - key = ~0; - buffer_read_line(); + static struct strbuf key = STRBUF_INIT; + static struct strbuf val = STRBUF_INIT; + const char *t; + /* + * NEEDSWORK: to support simple mode changes like + * K 11 + * svn:special + * V 1 + * * + * D 14 + * svn:executable + * we keep track of whether a mode has been set and reset to + * plain file only if not. We should be keeping track of the + * symlink and executable bits separately instead. + */ + uint32_t type_set = 0; + while ((t = buffer_read_line(&input)) && strcmp(t, "PROPS-END")) { + uint32_t len; + const char type = t[0]; + int ch; + + if (!type || t[1] != ' ') + die("invalid property line: %s\n", t); + len = atoi(&t[2]); + strbuf_reset(&val); + buffer_read_binary(&input, &val, len); + if (val.len < len) + die_short_read(); + + /* Discard trailing newline. */ + ch = buffer_read_char(&input); + if (ch == EOF) + die_short_read(); + if (ch != '\n') + die("invalid dump: expected newline after %s", val.buf); + + switch (type) { + case 'K': + strbuf_swap(&key, &val); + continue; + case 'D': + handle_property(&val, NULL, &type_set); + continue; + case 'V': + handle_property(&key, &val, &type_set); + strbuf_reset(&key); + continue; + default: + die("invalid property line: %s\n", t); } } } static void handle_node(void) { - if (node_ctx.propLength != LENGTH_UNKNOWN && node_ctx.propLength) - read_props(); - - if (node_ctx.srcRev) - node_ctx.srcMode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); - - if (node_ctx.textLength != LENGTH_UNKNOWN && - node_ctx.type != REPO_MODE_DIR) - node_ctx.mark = next_blob_mark(); + uint32_t mark = 0; + const uint32_t type = node_ctx.type; + const int have_props = node_ctx.propLength != LENGTH_UNKNOWN; + const int have_text = node_ctx.textLength != LENGTH_UNKNOWN; + if (node_ctx.text_delta) + die("text deltas not supported"); + if (have_text) + mark = next_blob_mark(); if (node_ctx.action == NODEACT_DELETE) { + if (have_text || have_props || node_ctx.srcRev) + die("invalid dump: deletion node has " + "copyfrom info, text, or properties"); + repo_delete(node_ctx.dst); + return; + } + if (node_ctx.action == NODEACT_REPLACE) { repo_delete(node_ctx.dst); - } else if (node_ctx.action == NODEACT_CHANGE || - node_ctx.action == NODEACT_REPLACE) { - if (node_ctx.action == NODEACT_REPLACE && - node_ctx.type == REPO_MODE_DIR) - repo_replace(node_ctx.dst, node_ctx.mark); - else if (node_ctx.propLength != LENGTH_UNKNOWN) - repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); - else if (node_ctx.textLength != LENGTH_UNKNOWN) - node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); + node_ctx.action = NODEACT_ADD; + } + if (node_ctx.srcRev) { + repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); + if (node_ctx.action == NODEACT_ADD) + node_ctx.action = NODEACT_CHANGE; + } + if (have_text && type == REPO_MODE_DIR) + die("invalid dump: directories cannot have text attached"); + + /* + * Decide on the new content (mark) and mode (node_ctx.type). + */ + if (node_ctx.action == NODEACT_CHANGE && !~*node_ctx.dst) { + if (type != REPO_MODE_DIR) + die("invalid dump: root of tree is not a regular file"); + } else if (node_ctx.action == NODEACT_CHANGE) { + uint32_t mode; + if (!have_text) + mark = repo_read_path(node_ctx.dst); + mode = repo_read_mode(node_ctx.dst); + if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR) + die("invalid dump: cannot modify a directory into a file"); + if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR) + die("invalid dump: cannot modify a file into a directory"); + node_ctx.type = mode; } else if (node_ctx.action == NODEACT_ADD) { - if (node_ctx.srcRev && node_ctx.propLength != LENGTH_UNKNOWN) - repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); - else if (node_ctx.srcRev && node_ctx.textLength != LENGTH_UNKNOWN) - node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); - else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) || - node_ctx.textLength != LENGTH_UNKNOWN) - repo_add(node_ctx.dst, node_ctx.type, node_ctx.mark); + if (!have_text && type != REPO_MODE_DIR) + die("invalid dump: adds node without text"); + } else { + die("invalid dump: Node-path block lacks Node-action"); } - if (node_ctx.propLength == LENGTH_UNKNOWN && node_ctx.srcMode) - node_ctx.type = node_ctx.srcMode; + /* + * Adjust mode to reflect properties. + */ + if (have_props) { + if (!node_ctx.prop_delta) + node_ctx.type = type; + if (node_ctx.propLength) + read_props(); + } - if (node_ctx.mark) - fast_export_blob(node_ctx.type, node_ctx.mark, node_ctx.textLength); - else if (node_ctx.textLength != LENGTH_UNKNOWN) - buffer_skip_bytes(node_ctx.textLength); + /* + * Save the result. + */ + repo_add(node_ctx.dst, node_ctx.type, mark); + if (have_text) + fast_export_blob(node_ctx.type, mark, + node_ctx.textLength, &input); } static void handle_revision(void) { if (rev_ctx.revision) - repo_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log, - dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp); + repo_commit(rev_ctx.revision, rev_ctx.author.buf, + &rev_ctx.log, dump_ctx.uuid.buf, dump_ctx.url.buf, + rev_ctx.timestamp); } void svndump_read(const char *url) @@ -195,39 +286,65 @@ void svndump_read(const char *url) char *t; uint32_t active_ctx = DUMP_CTX; uint32_t len; - uint32_t key; - reset_dump_ctx(pool_intern(url)); - while ((t = buffer_read_line())) { - val = strstr(t, ": "); + reset_dump_ctx(url); + while ((t = buffer_read_line(&input))) { + val = strchr(t, ':'); if (!val) continue; - *val++ = '\0'; - *val++ = '\0'; - key = pool_intern(t); + val++; + if (*val != ' ') + continue; + val++; - if (key == keys.uuid) { - dump_ctx.uuid = pool_intern(val); - } else if (key == keys.revision_number) { + /* strlen(key) + 1 */ + switch (val - t - 1) { + case sizeof("SVN-fs-dump-format-version"): + if (constcmp(t, "SVN-fs-dump-format-version")) + continue; + dump_ctx.version = atoi(val); + if (dump_ctx.version > 3) + die("expected svn dump format version <= 3, found %"PRIu32, + dump_ctx.version); + break; + case sizeof("UUID"): + if (constcmp(t, "UUID")) + continue; + strbuf_reset(&dump_ctx.uuid); + strbuf_addstr(&dump_ctx.uuid, val); + break; + case sizeof("Revision-number"): + if (constcmp(t, "Revision-number")) + continue; if (active_ctx == NODE_CTX) handle_node(); if (active_ctx != DUMP_CTX) handle_revision(); active_ctx = REV_CTX; reset_rev_ctx(atoi(val)); - } else if (key == keys.node_path) { - if (active_ctx == NODE_CTX) - handle_node(); - active_ctx = NODE_CTX; - reset_node_ctx(val); - } else if (key == keys.node_kind) { + break; + case sizeof("Node-path"): + if (prefixcmp(t, "Node-")) + continue; + if (!constcmp(t + strlen("Node-"), "path")) { + if (active_ctx == NODE_CTX) + handle_node(); + active_ctx = NODE_CTX; + reset_node_ctx(val); + break; + } + if (constcmp(t + strlen("Node-"), "kind")) + continue; if (!strcmp(val, "dir")) node_ctx.type = REPO_MODE_DIR; else if (!strcmp(val, "file")) node_ctx.type = REPO_MODE_BLB; else fprintf(stderr, "Unknown node-kind: %s\n", val); - } else if (key == keys.node_action) { + break; + case sizeof("Node-action"): + if (constcmp(t, "Node-action")) + continue; if (!strcmp(val, "delete")) { node_ctx.action = NODEACT_DELETE; } else if (!strcmp(val, "add")) { @@ -240,17 +357,44 @@ void svndump_read(const char *url) fprintf(stderr, "Unknown node-action: %s\n", val); node_ctx.action = NODEACT_UNKNOWN; } - } else if (key == keys.node_copyfrom_path) { + break; + case sizeof("Node-copyfrom-path"): + if (constcmp(t, "Node-copyfrom-path")) + continue; pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val); - } else if (key == keys.node_copyfrom_rev) { + break; + case sizeof("Node-copyfrom-rev"): + if (constcmp(t, "Node-copyfrom-rev")) + continue; node_ctx.srcRev = atoi(val); - } else if (key == keys.text_content_length) { - node_ctx.textLength = atoi(val); - } else if (key == keys.prop_content_length) { + break; + case sizeof("Text-content-length"): + if (!constcmp(t, "Text-content-length")) { + node_ctx.textLength = atoi(val); + break; + } + if (constcmp(t, "Prop-content-length")) + continue; node_ctx.propLength = atoi(val); - } else if (key == keys.content_length) { + break; + case sizeof("Text-delta"): + if (!constcmp(t, "Text-delta")) { + node_ctx.text_delta = !strcmp(val, "true"); + break; + } + if (constcmp(t, "Prop-delta")) + continue; + node_ctx.prop_delta = !strcmp(val, "true"); + break; + case sizeof("Content-length"): + if (constcmp(t, "Content-length")) + continue; len = atoi(val); - buffer_read_line(); + t = buffer_read_line(&input); + if (!t) + die_short_read(); + if (*t) + die("invalid dump: expected blank line after content length header"); if (active_ctx == REV_CTX) { read_props(); } else if (active_ctx == NODE_CTX) { @@ -258,34 +402,42 @@ void svndump_read(const char *url) active_ctx = REV_CTX; } else { fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len); - buffer_skip_bytes(len); + if (buffer_skip_bytes(&input, len) != len) + die_short_read(); } } } + if (buffer_ferror(&input)) + die_short_read(); if (active_ctx == NODE_CTX) handle_node(); if (active_ctx != DUMP_CTX) handle_revision(); } -void svndump_init(const char *filename) +int svndump_init(const char *filename) { - buffer_init(filename); + if (buffer_init(&input, filename)) + return error("cannot open %s: %s", filename, strerror(errno)); repo_init(); - reset_dump_ctx(~0); + strbuf_init(&dump_ctx.uuid, 4096); + strbuf_init(&dump_ctx.url, 4096); + strbuf_init(&rev_ctx.log, 4096); + strbuf_init(&rev_ctx.author, 4096); + reset_dump_ctx(NULL); reset_rev_ctx(0); reset_node_ctx(NULL); - init_keys(); + return 0; } void svndump_deinit(void) { - log_reset(); repo_reset(); - reset_dump_ctx(~0); + reset_dump_ctx(NULL); reset_rev_ctx(0); reset_node_ctx(NULL); - if (buffer_deinit()) + strbuf_release(&rev_ctx.log); + if (buffer_deinit(&input)) fprintf(stderr, "Input error\n"); if (ferror(stdout)) fprintf(stderr, "Output error\n"); @@ -293,10 +445,10 @@ void svndump_deinit(void) void svndump_reset(void) { - log_reset(); - buffer_reset(); + buffer_reset(&input); repo_reset(); - reset_dump_ctx(~0); - reset_rev_ctx(0); - reset_node_ctx(NULL); + strbuf_release(&dump_ctx.uuid); + strbuf_release(&dump_ctx.url); + strbuf_release(&rev_ctx.log); + strbuf_release(&rev_ctx.author); } diff --git a/vcs-svn/svndump.h b/vcs-svn/svndump.h index 93c412f14a..df9ceb0e8d 100644 --- a/vcs-svn/svndump.h +++ b/vcs-svn/svndump.h @@ -1,7 +1,7 @@ #ifndef SVNDUMP_H_ #define SVNDUMP_H_ -void svndump_init(const char *filename); +int svndump_init(const char *filename); void svndump_read(const char *url); void svndump_deinit(void); void svndump_reset(void); diff --git a/vcs-svn/trp.h b/vcs-svn/trp.h index ee35c688a0..c32b9184e9 100644 --- a/vcs-svn/trp.h +++ b/vcs-svn/trp.h @@ -188,11 +188,12 @@ a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t i return ret; \ } \ } \ -a_attr void MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \ +a_attr a_type *MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \ { \ uint32_t offset = trpn_offset(a_base, node); \ trp_node_new(a_base, a_field, offset); \ treap->trp_root = a_pre##insert_recurse(treap->trp_root, offset); \ + return trpn_pointer(a_base, offset); \ } \ a_attr uint32_t MAYBE_UNUSED a_pre##remove_recurse(uint32_t cur_node, uint32_t rem_node) \ { \ diff --git a/vcs-svn/trp.txt b/vcs-svn/trp.txt index eb4c191875..177ebca335 100644 --- a/vcs-svn/trp.txt +++ b/vcs-svn/trp.txt @@ -21,7 +21,9 @@ The caller: . Allocates a `struct trp_root` variable and sets it to {~0}. -. Adds new nodes to the set using `foo_insert`. +. Adds new nodes to the set using `foo_insert`. Any pointers + to existing nodes cannot be relied upon any more, so the caller + might retrieve them anew with `foo_pointer`. . Can find a specific item in the set using `foo_search`. @@ -73,10 +75,14 @@ int (*cmp)(node_type \*a, node_type \*b) and returning a value less than, equal to, or greater than zero according to the result of comparison. -void foo_insert(struct trp_root *treap, node_type \*node):: +node_type {asterisk}foo_insert(struct trp_root *treap, node_type \*node):: Insert node into treap. If inserted multiple times, a node will appear in the treap multiple times. ++ +The return value is the address of the node within the treap, +which might differ from `node` if `pool_alloc` had to call +`realloc` to expand the pool. void foo_remove(struct trp_root *treap, node_type \*node):: @@ -90,7 +96,7 @@ node_type *foo_search(struct trp_root \*treap, node_type \*key):: node_type *foo_nsearch(struct trp_root \*treap, node_type \*key):: - Like `foo_search`, but if if the key is missing return what + Like `foo_search`, but if the key is missing return what would be key's successor, were key in treap (NULL if no successor). |