diff options
Diffstat (limited to 'vcs-svn')
-rw-r--r-- | vcs-svn/LICENSE | 3 | ||||
-rw-r--r-- | vcs-svn/fast_export.c | 292 | ||||
-rw-r--r-- | vcs-svn/fast_export.h | 29 | ||||
-rw-r--r-- | vcs-svn/line_buffer.c | 121 | ||||
-rw-r--r-- | vcs-svn/line_buffer.h | 33 | ||||
-rw-r--r-- | vcs-svn/line_buffer.txt | 49 | ||||
-rw-r--r-- | vcs-svn/obj_pool.h | 61 | ||||
-rw-r--r-- | vcs-svn/repo_tree.c | 333 | ||||
-rw-r--r-- | vcs-svn/repo_tree.h | 19 | ||||
-rw-r--r-- | vcs-svn/sliding_window.c | 79 | ||||
-rw-r--r-- | vcs-svn/sliding_window.h | 18 | ||||
-rw-r--r-- | vcs-svn/string_pool.c | 102 | ||||
-rw-r--r-- | vcs-svn/string_pool.h | 11 | ||||
-rw-r--r-- | vcs-svn/string_pool.txt | 43 | ||||
-rw-r--r-- | vcs-svn/svndiff.c | 308 | ||||
-rw-r--r-- | vcs-svn/svndiff.h | 10 | ||||
-rw-r--r-- | vcs-svn/svndump.c | 526 | ||||
-rw-r--r-- | vcs-svn/svndump.h | 2 | ||||
-rw-r--r-- | vcs-svn/trp.h | 236 | ||||
-rw-r--r-- | vcs-svn/trp.txt | 103 |
20 files changed, 1243 insertions, 1135 deletions
diff --git a/vcs-svn/LICENSE b/vcs-svn/LICENSE index 0a5e3c43a0..eb91858b82 100644 --- a/vcs-svn/LICENSE +++ b/vcs-svn/LICENSE @@ -1,8 +1,7 @@ Copyright (C) 2010 David Barr <david.barr@cordelta.com>. All rights reserved. -Copyright (C) 2008 Jason Evans <jasone@canonware.com>. -All rights reserved. +Copyright (C) 2010 Jonathan Nieder <jrnieder@gmail.com>. Copyright (C) 2005 Stefan Hegny, hydrografix Consulting GmbH, Frankfurt/Main, Germany diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 6cfa256a37..b823b8519c 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -4,73 +4,313 @@ */ #include "git-compat-util.h" +#include "strbuf.h" +#include "quote.h" #include "fast_export.h" -#include "line_buffer.h" #include "repo_tree.h" -#include "string_pool.h" +#include "strbuf.h" +#include "svndiff.h" +#include "sliding_window.h" +#include "line_buffer.h" #define MAX_GITSVN_LINE_LEN 4096 static uint32_t first_commit_done; +static struct line_buffer postimage = LINE_BUFFER_INIT; +static struct line_buffer report_buffer = LINE_BUFFER_INIT; + +/* NEEDSWORK: move to fast_export_init() */ +static int init_postimage(void) +{ + static int postimage_initialized; + if (postimage_initialized) + return 0; + postimage_initialized = 1; + return buffer_tmpfile_init(&postimage); +} -void fast_export_delete(uint32_t depth, uint32_t *path) +void fast_export_init(int fd) +{ + first_commit_done = 0; + if (buffer_fdinit(&report_buffer, fd)) + die_errno("cannot read from file descriptor %d", fd); +} + +void fast_export_deinit(void) +{ + if (buffer_deinit(&report_buffer)) + die_errno("error closing fast-import feedback stream"); +} + +void fast_export_reset(void) +{ + buffer_reset(&report_buffer); +} + +void fast_export_delete(const char *path) { putchar('D'); putchar(' '); - pool_print_seq(depth, path, '/', stdout); + quote_c_style(path, NULL, stdout, 0); putchar('\n'); } -void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, - uint32_t mark) +static void fast_export_truncate(const char *path, uint32_t mode) +{ + fast_export_modify(path, mode, "inline"); + printf("data 0\n\n"); +} + +void fast_export_modify(const char *path, uint32_t mode, const char *dataref) { /* Mode must be 100644, 100755, 120000, or 160000. */ - printf("M %06"PRIo32" :%"PRIu32" ", mode, mark); - pool_print_seq(depth, path, '/', stdout); + if (!dataref) { + fast_export_truncate(path, mode); + return; + } + printf("M %06"PRIo32" %s ", mode, dataref); + quote_c_style(path, NULL, stdout, 0); putchar('\n'); } static char gitsvnline[MAX_GITSVN_LINE_LEN]; -void fast_export_commit(uint32_t revision, uint32_t author, char *log, - uint32_t uuid, uint32_t url, +void fast_export_begin_commit(uint32_t revision, const char *author, + const struct strbuf *log, + const char *uuid, const char *url, unsigned long timestamp) { + static const struct strbuf empty = STRBUF_INIT; if (!log) - log = ""; - if (~uuid && ~url) { + log = ∅ + if (*uuid && *url) { snprintf(gitsvnline, MAX_GITSVN_LINE_LEN, "\n\ngit-svn-id: %s@%"PRIu32" %s\n", - pool_fetch(url), revision, pool_fetch(uuid)); + url, revision, uuid); } else { *gitsvnline = '\0'; } printf("commit refs/heads/master\n"); + printf("mark :%"PRIu32"\n", revision); printf("committer %s <%s@%s> %ld +0000\n", - ~author ? pool_fetch(author) : "nobody", - ~author ? pool_fetch(author) : "nobody", - ~uuid ? pool_fetch(uuid) : "local", timestamp); - printf("data %"PRIu32"\n%s%s\n", - (uint32_t) (strlen(log) + strlen(gitsvnline)), - log, gitsvnline); + *author ? author : "nobody", + *author ? author : "nobody", + *uuid ? uuid : "local", timestamp); + printf("data %"PRIuMAX"\n", + (uintmax_t) (log->len + strlen(gitsvnline))); + fwrite(log->buf, log->len, 1, stdout); + printf("%s\n", gitsvnline); if (!first_commit_done) { if (revision > 1) - printf("from refs/heads/master^0\n"); + printf("from :%"PRIu32"\n", revision - 1); first_commit_done = 1; } - repo_diff(revision - 1, revision); - fputc('\n', stdout); +} +void fast_export_end_commit(uint32_t revision) +{ printf("progress Imported commit %"PRIu32".\n\n", revision); } -void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len) +static void ls_from_rev(uint32_t rev, const char *path) +{ + /* ls :5 path/to/old/file */ + printf("ls :%"PRIu32" ", rev); + quote_c_style(path, NULL, stdout, 0); + putchar('\n'); + fflush(stdout); +} + +static void ls_from_active_commit(const char *path) +{ + /* ls "path/to/file" */ + printf("ls \""); + quote_c_style(path, NULL, stdout, 1); + printf("\"\n"); + fflush(stdout); +} + +static const char *get_response_line(void) +{ + const char *line = buffer_read_line(&report_buffer); + if (line) + return line; + if (buffer_ferror(&report_buffer)) + die_errno("error reading from fast-import"); + die("unexpected end of fast-import feedback"); +} + +static void die_short_read(struct line_buffer *input) +{ + if (buffer_ferror(input)) + die_errno("error reading dump file"); + die("invalid dump: unexpected end of file"); +} + +static int ends_with(const char *s, size_t len, const char *suffix) +{ + const size_t suffixlen = strlen(suffix); + if (len < suffixlen) + return 0; + return !memcmp(s + len - suffixlen, suffix, suffixlen); +} + +static int parse_cat_response_line(const char *header, off_t *len) { + size_t headerlen = strlen(header); + uintmax_t n; + const char *type; + const char *end; + + if (ends_with(header, headerlen, " missing")) + return error("cat-blob reports missing blob: %s", header); + type = memmem(header, headerlen, " blob ", strlen(" blob ")); + if (!type) + return error("cat-blob header has wrong object type: %s", header); + n = strtoumax(type + strlen(" blob "), (char **) &end, 10); + if (end == type + strlen(" blob ")) + return error("cat-blob header does not contain length: %s", header); + if (memchr(type + strlen(" blob "), '-', end - type - strlen(" blob "))) + return error("cat-blob header contains negative length: %s", header); + if (n == UINTMAX_MAX || n > maximum_signed_value_of_type(off_t)) + return error("blob too large for current definition of off_t"); + *len = n; + if (*end) + return error("cat-blob header contains garbage after length: %s", header); + return 0; +} + +static void check_preimage_overflow(off_t a, off_t b) +{ + if (signed_add_overflows(a, b)) + die("blob too large for current definition of off_t"); +} + +static long apply_delta(off_t len, struct line_buffer *input, + const char *old_data, uint32_t old_mode) +{ + long ret; + struct sliding_view preimage = SLIDING_VIEW_INIT(&report_buffer, 0); + FILE *out; + + if (init_postimage() || !(out = buffer_tmpfile_rewind(&postimage))) + die("cannot open temporary file for blob retrieval"); + if (old_data) { + const char *response; + printf("cat-blob %s\n", old_data); + fflush(stdout); + response = get_response_line(); + if (parse_cat_response_line(response, &preimage.max_off)) + die("invalid cat-blob response: %s", response); + check_preimage_overflow(preimage.max_off, 1); + } + if (old_mode == REPO_MODE_LNK) { + strbuf_addstr(&preimage.buf, "link "); + check_preimage_overflow(preimage.max_off, strlen("link ")); + preimage.max_off += strlen("link "); + check_preimage_overflow(preimage.max_off, 1); + } + if (svndiff0_apply(input, len, &preimage, out)) + die("cannot apply delta"); + if (old_data) { + /* Read the remainder of preimage and trailing newline. */ + assert(!signed_add_overflows(preimage.max_off, 1)); + preimage.max_off++; /* room for newline */ + if (move_window(&preimage, preimage.max_off - 1, 1)) + die("cannot seek to end of input"); + if (preimage.buf.buf[0] != '\n') + die("missing newline after cat-blob response"); + } + ret = buffer_tmpfile_prepare_to_read(&postimage); + if (ret < 0) + die("cannot read temporary file for blob retrieval"); + strbuf_release(&preimage.buf); + return ret; +} + +void fast_export_data(uint32_t mode, off_t len, struct line_buffer *input) +{ + assert(len >= 0); if (mode == REPO_MODE_LNK) { /* svn symlink blobs start with "link " */ - buffer_skip_bytes(5); + if (len < 5) + die("invalid dump: symlink too short for \"link\" prefix"); len -= 5; + if (buffer_skip_bytes(input, 5) != 5) + die_short_read(input); + } + printf("data %"PRIuMAX"\n", (uintmax_t) len); + if (buffer_copy_bytes(input, len) != len) + die_short_read(input); + fputc('\n', stdout); +} + +static int parse_ls_response(const char *response, uint32_t *mode, + struct strbuf *dataref) +{ + const char *tab; + const char *response_end; + + assert(response); + response_end = response + strlen(response); + + if (*response == 'm') { /* Missing. */ + errno = ENOENT; + return -1; + } + + /* Mode. */ + if (response_end - response < strlen("100644") || + response[strlen("100644")] != ' ') + die("invalid ls response: missing mode: %s", response); + *mode = 0; + for (; *response != ' '; response++) { + char ch = *response; + if (ch < '0' || ch > '7') + die("invalid ls response: mode is not octal: %s", response); + *mode *= 8; + *mode += ch - '0'; + } + + /* ' blob ' or ' tree ' */ + if (response_end - response < strlen(" blob ") || + (response[1] != 'b' && response[1] != 't')) + die("unexpected ls response: not a tree or blob: %s", response); + response += strlen(" blob "); + + /* Dataref. */ + tab = memchr(response, '\t', response_end - response); + if (!tab) + die("invalid ls response: missing tab: %s", response); + strbuf_add(dataref, response, tab - response); + return 0; +} + +int fast_export_ls_rev(uint32_t rev, const char *path, + uint32_t *mode, struct strbuf *dataref) +{ + ls_from_rev(rev, path); + return parse_ls_response(get_response_line(), mode, dataref); +} + +int fast_export_ls(const char *path, uint32_t *mode, struct strbuf *dataref) +{ + ls_from_active_commit(path); + return parse_ls_response(get_response_line(), mode, dataref); +} + +void fast_export_blob_delta(uint32_t mode, + uint32_t old_mode, const char *old_data, + off_t len, struct line_buffer *input) +{ + long postimage_len; + + assert(len >= 0); + postimage_len = apply_delta(len, input, old_data, old_mode); + if (mode == REPO_MODE_LNK) { + buffer_skip_bytes(&postimage, strlen("link ")); + postimage_len -= strlen("link "); } - printf("blob\nmark :%"PRIu32"\ndata %"PRIu32"\n", mark, len); - buffer_copy_bytes(len); + printf("data %ld\n", postimage_len); + buffer_copy_bytes(&postimage, postimage_len); fputc('\n', stdout); } diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h index 2aaaea53d5..aa629f54ff 100644 --- a/vcs-svn/fast_export.h +++ b/vcs-svn/fast_export.h @@ -1,11 +1,28 @@ #ifndef FAST_EXPORT_H_ #define FAST_EXPORT_H_ -void fast_export_delete(uint32_t depth, uint32_t *path); -void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, - uint32_t mark); -void fast_export_commit(uint32_t revision, uint32_t author, char *log, - uint32_t uuid, uint32_t url, unsigned long timestamp); -void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len); +struct strbuf; +struct line_buffer; + +void fast_export_init(int fd); +void fast_export_deinit(void); +void fast_export_reset(void); + +void fast_export_delete(const char *path); +void fast_export_modify(const char *path, uint32_t mode, const char *dataref); +void fast_export_begin_commit(uint32_t revision, const char *author, + const struct strbuf *log, const char *uuid, + const char *url, unsigned long timestamp); +void fast_export_end_commit(uint32_t revision); +void fast_export_data(uint32_t mode, off_t len, struct line_buffer *input); +void fast_export_blob_delta(uint32_t mode, + uint32_t old_mode, const char *old_data, + off_t len, struct line_buffer *input); + +/* If there is no such file at that rev, returns -1, errno == ENOENT. */ +int fast_export_ls_rev(uint32_t rev, const char *path, + uint32_t *mode_out, struct strbuf *dataref_out); +int fast_export_ls(const char *path, + uint32_t *mode_out, struct strbuf *dataref_out); #endif diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index 1543567093..01fcb842f1 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -5,47 +5,81 @@ #include "git-compat-util.h" #include "line_buffer.h" -#include "obj_pool.h" +#include "strbuf.h" -#define LINE_BUFFER_LEN 10000 #define COPY_BUFFER_LEN 4096 -/* Create memory pool for char sequence of known length */ -obj_pool_gen(blob, char, 4096) +int buffer_init(struct line_buffer *buf, const char *filename) +{ + buf->infile = filename ? fopen(filename, "r") : stdin; + if (!buf->infile) + return -1; + return 0; +} -static char line_buffer[LINE_BUFFER_LEN]; -static char byte_buffer[COPY_BUFFER_LEN]; -static FILE *infile; +int buffer_fdinit(struct line_buffer *buf, int fd) +{ + buf->infile = fdopen(fd, "r"); + if (!buf->infile) + return -1; + return 0; +} -int buffer_init(const char *filename) +int buffer_tmpfile_init(struct line_buffer *buf) { - infile = filename ? fopen(filename, "r") : stdin; - if (!infile) + buf->infile = tmpfile(); + if (!buf->infile) return -1; return 0; } -int buffer_deinit(void) +int buffer_deinit(struct line_buffer *buf) { int err; - if (infile == stdin) - return ferror(infile); - err = ferror(infile); - err |= fclose(infile); + if (buf->infile == stdin) + return ferror(buf->infile); + err = ferror(buf->infile); + err |= fclose(buf->infile); return err; } +FILE *buffer_tmpfile_rewind(struct line_buffer *buf) +{ + rewind(buf->infile); + return buf->infile; +} + +long buffer_tmpfile_prepare_to_read(struct line_buffer *buf) +{ + long pos = ftell(buf->infile); + if (pos < 0) + return error("ftell error: %s", strerror(errno)); + if (fseek(buf->infile, 0, SEEK_SET)) + return error("seek error: %s", strerror(errno)); + return pos; +} + +int buffer_ferror(struct line_buffer *buf) +{ + return ferror(buf->infile); +} + +int buffer_read_char(struct line_buffer *buf) +{ + return fgetc(buf->infile); +} + /* Read a line without trailing newline. */ -char *buffer_read_line(void) +char *buffer_read_line(struct line_buffer *buf) { char *end; - if (!fgets(line_buffer, sizeof(line_buffer), infile)) + if (!fgets(buf->line_buffer, sizeof(buf->line_buffer), buf->infile)) /* Error or data exhausted. */ return NULL; - end = line_buffer + strlen(line_buffer); + end = buf->line_buffer + strlen(buf->line_buffer); if (end[-1] == '\n') end[-1] = '\0'; - else if (feof(infile)) + else if (feof(buf->infile)) ; /* No newline at end of file. That's fine. */ else /* @@ -54,44 +88,43 @@ char *buffer_read_line(void) * but for now let's return an error. */ return NULL; - return line_buffer; + return buf->line_buffer; } -char *buffer_read_string(uint32_t len) +size_t buffer_read_binary(struct line_buffer *buf, + struct strbuf *sb, size_t size) { - char *s; - blob_free(blob_pool.size); - s = blob_pointer(blob_alloc(len + 1)); - s[fread(s, 1, len, infile)] = '\0'; - return ferror(infile) ? NULL : s; + return strbuf_fread(sb, size, buf->infile); } -void buffer_copy_bytes(uint32_t len) +off_t buffer_copy_bytes(struct line_buffer *buf, off_t nbytes) { - uint32_t in; - while (len > 0 && !feof(infile) && !ferror(infile)) { - in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; - in = fread(byte_buffer, 1, in, infile); - len -= in; + char byte_buffer[COPY_BUFFER_LEN]; + off_t done = 0; + while (done < nbytes && !feof(buf->infile) && !ferror(buf->infile)) { + off_t len = nbytes - done; + size_t in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; + in = fread(byte_buffer, 1, in, buf->infile); + done += in; fwrite(byte_buffer, 1, in, stdout); - if (ferror(stdout)) { - buffer_skip_bytes(len); - return; - } + if (ferror(stdout)) + return done + buffer_skip_bytes(buf, nbytes - done); } + return done; } -void buffer_skip_bytes(uint32_t len) +off_t buffer_skip_bytes(struct line_buffer *buf, off_t nbytes) { - uint32_t in; - while (len > 0 && !feof(infile) && !ferror(infile)) { - in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; - in = fread(byte_buffer, 1, in, infile); - len -= in; + char byte_buffer[COPY_BUFFER_LEN]; + off_t done = 0; + while (done < nbytes && !feof(buf->infile) && !ferror(buf->infile)) { + off_t len = nbytes - done; + size_t in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; + done += fread(byte_buffer, 1, in, buf->infile); } + return done; } -void buffer_reset(void) +void buffer_reset(struct line_buffer *buf) { - blob_reset(); } diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index 9c78ae11a1..8901f214ba 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -1,12 +1,31 @@ #ifndef LINE_BUFFER_H_ #define LINE_BUFFER_H_ -int buffer_init(const char *filename); -int buffer_deinit(void); -char *buffer_read_line(void); -char *buffer_read_string(uint32_t len); -void buffer_copy_bytes(uint32_t len); -void buffer_skip_bytes(uint32_t len); -void buffer_reset(void); +#include "strbuf.h" + +#define LINE_BUFFER_LEN 10000 + +struct line_buffer { + char line_buffer[LINE_BUFFER_LEN]; + FILE *infile; +}; +#define LINE_BUFFER_INIT { "", NULL } + +int buffer_init(struct line_buffer *buf, const char *filename); +int buffer_fdinit(struct line_buffer *buf, int fd); +int buffer_deinit(struct line_buffer *buf); +void buffer_reset(struct line_buffer *buf); + +int buffer_tmpfile_init(struct line_buffer *buf); +FILE *buffer_tmpfile_rewind(struct line_buffer *buf); /* prepare to write. */ +long buffer_tmpfile_prepare_to_read(struct line_buffer *buf); + +int buffer_ferror(struct line_buffer *buf); +char *buffer_read_line(struct line_buffer *buf); +int buffer_read_char(struct line_buffer *buf); +size_t buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, size_t len); +/* Returns number of bytes read (not necessarily written). */ +off_t buffer_copy_bytes(struct line_buffer *buf, off_t len); +off_t buffer_skip_bytes(struct line_buffer *buf, off_t len); #endif diff --git a/vcs-svn/line_buffer.txt b/vcs-svn/line_buffer.txt index 8906fb1f50..8e139eb22d 100644 --- a/vcs-svn/line_buffer.txt +++ b/vcs-svn/line_buffer.txt @@ -14,22 +14,46 @@ Calling sequence The calling program: + - initializes a `struct line_buffer` to LINE_BUFFER_INIT - specifies a file to read with `buffer_init` - - processes input with `buffer_read_line`, `buffer_read_string`, - `buffer_skip_bytes`, and `buffer_copy_bytes` + - processes input with `buffer_read_line`, `buffer_skip_bytes`, + and `buffer_copy_bytes` - closes the file with `buffer_deinit`, perhaps to start over and read another file. -Before exiting, the caller can use `buffer_reset` to deallocate -resources for the benefit of profiling tools. +When finished, the caller can use `buffer_reset` to deallocate +resources. + +Using temporary files +--------------------- + +Temporary files provide a place to store data that should not outlive +the calling program. A program + + - initializes a `struct line_buffer` to LINE_BUFFER_INIT + - requests a temporary file with `buffer_tmpfile_init` + - acquires an output handle by calling `buffer_tmpfile_rewind` + - uses standard I/O functions like `fprintf` and `fwrite` to fill + the temporary file + - declares writing is over with `buffer_tmpfile_prepare_to_read` + - can re-read what was written with `buffer_read_line`, + `buffer_copy_bytes`, and so on + - can reuse the temporary file by calling `buffer_tmpfile_rewind` + again + - removes the temporary file with `buffer_deinit`, perhaps to + reuse the line_buffer for some other file. + +When finished, the calling program can use `buffer_reset` to deallocate +resources. Functions --------- -`buffer_init`:: - Open the named file for input. If filename is NULL, - start reading from stdin. On failure, returns -1 (with - errno indicating the nature of the failure). +`buffer_init`, `buffer_fdinit`:: + Open the named file or file descriptor for input. + buffer_init(buf, NULL) prepares to read from stdin. + On failure, returns -1 (with errno indicating the nature + of the failure). `buffer_deinit`:: Stop reading from the current file (closing it unless @@ -40,19 +64,14 @@ Functions Read a line and strip off the trailing newline. On failure or end of file, returns NULL. -`buffer_read_string`:: - Read `len` characters of input or up to the end of the - file, whichever comes first. Returns NULL on error. - Returns whatever characters were read (possibly "") - for end of file. - `buffer_copy_bytes`:: Read `len` bytes of input and dump them to the standard output stream. Returns early for error or end of file. `buffer_skip_bytes`:: Discards `len` bytes from the input stream (stopping early - if necessary because of an error or eof). + if necessary because of an error or eof). Return value is + the number of bytes successfully read. `buffer_reset`:: Deallocates non-static buffers. diff --git a/vcs-svn/obj_pool.h b/vcs-svn/obj_pool.h deleted file mode 100644 index deb6eb8135..0000000000 --- a/vcs-svn/obj_pool.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed under a two-clause BSD-style license. - * See LICENSE for details. - */ - -#ifndef OBJ_POOL_H_ -#define OBJ_POOL_H_ - -#include "git-compat-util.h" - -#define MAYBE_UNUSED __attribute__((__unused__)) - -#define obj_pool_gen(pre, obj_t, initial_capacity) \ -static struct { \ - uint32_t committed; \ - uint32_t size; \ - uint32_t capacity; \ - obj_t *base; \ -} pre##_pool = {0, 0, 0, NULL}; \ -static MAYBE_UNUSED uint32_t pre##_alloc(uint32_t count) \ -{ \ - uint32_t offset; \ - if (pre##_pool.size + count > pre##_pool.capacity) { \ - while (pre##_pool.size + count > pre##_pool.capacity) \ - if (pre##_pool.capacity) \ - pre##_pool.capacity *= 2; \ - else \ - pre##_pool.capacity = initial_capacity; \ - pre##_pool.base = realloc(pre##_pool.base, \ - pre##_pool.capacity * sizeof(obj_t)); \ - } \ - offset = pre##_pool.size; \ - pre##_pool.size += count; \ - return offset; \ -} \ -static MAYBE_UNUSED void pre##_free(uint32_t count) \ -{ \ - pre##_pool.size -= count; \ -} \ -static MAYBE_UNUSED uint32_t pre##_offset(obj_t *obj) \ -{ \ - return obj == NULL ? ~0 : obj - pre##_pool.base; \ -} \ -static MAYBE_UNUSED obj_t *pre##_pointer(uint32_t offset) \ -{ \ - return offset >= pre##_pool.size ? NULL : &pre##_pool.base[offset]; \ -} \ -static MAYBE_UNUSED void pre##_commit(void) \ -{ \ - pre##_pool.committed = pre##_pool.size; \ -} \ -static MAYBE_UNUSED void pre##_reset(void) \ -{ \ - free(pre##_pool.base); \ - pre##_pool.base = NULL; \ - pre##_pool.size = 0; \ - pre##_pool.capacity = 0; \ - pre##_pool.committed = 0; \ -} - -#endif diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c index e94d91d129..67d27f0b6c 100644 --- a/vcs-svn/repo_tree.c +++ b/vcs-svn/repo_tree.c @@ -4,326 +4,45 @@ */ #include "git-compat-util.h" - -#include "string_pool.h" +#include "strbuf.h" #include "repo_tree.h" -#include "obj_pool.h" #include "fast_export.h" -#include "trp.h" - -struct repo_dirent { - uint32_t name_offset; - struct trp_node children; - uint32_t mode; - uint32_t content_offset; -}; - -struct repo_dir { - struct trp_root entries; -}; - -struct repo_commit { - uint32_t root_dir_offset; -}; - -/* Memory pools for commit, dir and dirent */ -obj_pool_gen(commit, struct repo_commit, 4096) -obj_pool_gen(dir, struct repo_dir, 4096) -obj_pool_gen(dent, struct repo_dirent, 4096) - -static uint32_t active_commit; -static uint32_t mark; - -static int repo_dirent_name_cmp(const void *a, const void *b); - -/* Treap for directory entries */ -trp_gen(static, dent_, struct repo_dirent, children, dent, repo_dirent_name_cmp); - -uint32_t next_blob_mark(void) -{ - return mark++; -} - -static struct repo_dir *repo_commit_root_dir(struct repo_commit *commit) -{ - return dir_pointer(commit->root_dir_offset); -} - -static struct repo_dirent *repo_first_dirent(struct repo_dir *dir) -{ - return dent_first(&dir->entries); -} - -static int repo_dirent_name_cmp(const void *a, const void *b) -{ - const struct repo_dirent *dent1 = a, *dent2 = b; - uint32_t a_offset = dent1->name_offset; - uint32_t b_offset = dent2->name_offset; - return (a_offset > b_offset) - (a_offset < b_offset); -} - -static int repo_dirent_is_dir(struct repo_dirent *dent) +const char *repo_read_path(const char *path, uint32_t *mode_out) { - return dent != NULL && dent->mode == REPO_MODE_DIR; -} + int err; + static struct strbuf buf = STRBUF_INIT; -static struct repo_dir *repo_dir_from_dirent(struct repo_dirent *dent) -{ - if (!repo_dirent_is_dir(dent)) + strbuf_reset(&buf); + err = fast_export_ls(path, mode_out, &buf); + if (err) { + if (errno != ENOENT) + die_errno("BUG: unexpected fast_export_ls error"); + /* Treat missing paths as directories. */ + *mode_out = REPO_MODE_DIR; return NULL; - return dir_pointer(dent->content_offset); -} - -static struct repo_dir *repo_clone_dir(struct repo_dir *orig_dir) -{ - uint32_t orig_o, new_o; - orig_o = dir_offset(orig_dir); - if (orig_o >= dir_pool.committed) - return orig_dir; - new_o = dir_alloc(1); - orig_dir = dir_pointer(orig_o); - *dir_pointer(new_o) = *orig_dir; - return dir_pointer(new_o); -} - -static struct repo_dirent *repo_read_dirent(uint32_t revision, uint32_t *path) -{ - uint32_t name = 0; - struct repo_dirent *key = dent_pointer(dent_alloc(1)); - struct repo_dir *dir = NULL; - struct repo_dirent *dent = NULL; - dir = repo_commit_root_dir(commit_pointer(revision)); - while (~(name = *path++)) { - key->name_offset = name; - dent = dent_search(&dir->entries, key); - if (dent == NULL || !repo_dirent_is_dir(dent)) - break; - dir = repo_dir_from_dirent(dent); } - dent_free(1); - return dent; + return buf.buf; } -static void repo_write_dirent(uint32_t *path, uint32_t mode, - uint32_t content_offset, uint32_t del) +void repo_copy(uint32_t revision, const char *src, const char *dst) { - uint32_t name, revision, dir_o = ~0, parent_dir_o = ~0; - struct repo_dir *dir; - struct repo_dirent *key; - struct repo_dirent *dent = NULL; - revision = active_commit; - dir = repo_commit_root_dir(commit_pointer(revision)); - dir = repo_clone_dir(dir); - commit_pointer(revision)->root_dir_offset = dir_offset(dir); - while (~(name = *path++)) { - parent_dir_o = dir_offset(dir); - - key = dent_pointer(dent_alloc(1)); - key->name_offset = name; - - dent = dent_search(&dir->entries, key); - if (dent == NULL) - dent = key; - else - dent_free(1); - - if (dent == key) { - dent->mode = REPO_MODE_DIR; - dent->content_offset = 0; - dent_insert(&dir->entries, dent); - } - - if (dent_offset(dent) < dent_pool.committed) { - dir_o = repo_dirent_is_dir(dent) ? - dent->content_offset : ~0; - dent_remove(&dir->entries, dent); - dent = dent_pointer(dent_alloc(1)); - dent->name_offset = name; - dent->mode = REPO_MODE_DIR; - dent->content_offset = dir_o; - dent_insert(&dir->entries, dent); - } - - dir = repo_dir_from_dirent(dent); - dir = repo_clone_dir(dir); - dent->content_offset = dir_offset(dir); - } - if (dent == NULL) + int err; + uint32_t mode; + static struct strbuf data = STRBUF_INIT; + + strbuf_reset(&data); + err = fast_export_ls_rev(revision, src, &mode, &data); + if (err) { + if (errno != ENOENT) + die_errno("BUG: unexpected fast_export_ls_rev error"); + fast_export_delete(dst); return; - dent->mode = mode; - dent->content_offset = content_offset; - if (del && ~parent_dir_o) - dent_remove(&dir_pointer(parent_dir_o)->entries, dent); -} - -uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst) -{ - uint32_t mode = 0, content_offset = 0; - struct repo_dirent *src_dent; - src_dent = repo_read_dirent(revision, src); - if (src_dent != NULL) { - mode = src_dent->mode; - content_offset = src_dent->content_offset; - repo_write_dirent(dst, mode, content_offset, 0); - } - return mode; -} - -void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark) -{ - repo_write_dirent(path, mode, blob_mark, 0); -} - -uint32_t repo_replace(uint32_t *path, uint32_t blob_mark) -{ - uint32_t mode = 0; - struct repo_dirent *src_dent; - src_dent = repo_read_dirent(active_commit, path); - if (src_dent != NULL) { - mode = src_dent->mode; - repo_write_dirent(path, mode, blob_mark, 0); - } - return mode; -} - -void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark) -{ - struct repo_dirent *src_dent; - src_dent = repo_read_dirent(active_commit, path); - if (src_dent != NULL && blob_mark == 0) - blob_mark = src_dent->content_offset; - repo_write_dirent(path, mode, blob_mark, 0); -} - -void repo_delete(uint32_t *path) -{ - repo_write_dirent(path, 0, 0, 1); -} - -static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir); - -static void repo_git_add(uint32_t depth, uint32_t *path, struct repo_dirent *dent) -{ - if (repo_dirent_is_dir(dent)) - repo_git_add_r(depth, path, repo_dir_from_dirent(dent)); - else - fast_export_modify(depth, path, - dent->mode, dent->content_offset); -} - -static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir) -{ - struct repo_dirent *de = repo_first_dirent(dir); - while (de) { - path[depth] = de->name_offset; - repo_git_add(depth + 1, path, de); - de = dent_next(&dir->entries, de); - } -} - -static void repo_diff_r(uint32_t depth, uint32_t *path, struct repo_dir *dir1, - struct repo_dir *dir2) -{ - struct repo_dirent *de1, *de2; - de1 = repo_first_dirent(dir1); - de2 = repo_first_dirent(dir2); - - while (de1 && de2) { - if (de1->name_offset < de2->name_offset) { - path[depth] = de1->name_offset; - fast_export_delete(depth + 1, path); - de1 = dent_next(&dir1->entries, de1); - continue; - } - if (de1->name_offset > de2->name_offset) { - path[depth] = de2->name_offset; - repo_git_add(depth + 1, path, de2); - de2 = dent_next(&dir2->entries, de2); - continue; - } - path[depth] = de1->name_offset; - - if (de1->mode == de2->mode && - de1->content_offset == de2->content_offset) { - ; /* No change. */ - } else if (repo_dirent_is_dir(de1) && repo_dirent_is_dir(de2)) { - repo_diff_r(depth + 1, path, - repo_dir_from_dirent(de1), - repo_dir_from_dirent(de2)); - } else if (!repo_dirent_is_dir(de1) && !repo_dirent_is_dir(de2)) { - repo_git_add(depth + 1, path, de2); - } else { - fast_export_delete(depth + 1, path); - repo_git_add(depth + 1, path, de2); - } - de1 = dent_next(&dir1->entries, de1); - de2 = dent_next(&dir2->entries, de2); - } - while (de1) { - path[depth] = de1->name_offset; - fast_export_delete(depth + 1, path); - de1 = dent_next(&dir1->entries, de1); - } - while (de2) { - path[depth] = de2->name_offset; - repo_git_add(depth + 1, path, de2); - de2 = dent_next(&dir2->entries, de2); - } -} - -static uint32_t path_stack[REPO_MAX_PATH_DEPTH]; - -void repo_diff(uint32_t r1, uint32_t r2) -{ - repo_diff_r(0, - path_stack, - repo_commit_root_dir(commit_pointer(r1)), - repo_commit_root_dir(commit_pointer(r2))); -} - -void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, - uint32_t url, unsigned long timestamp) -{ - fast_export_commit(revision, author, log, uuid, url, timestamp); - dent_commit(); - dir_commit(); - active_commit = commit_alloc(1); - commit_pointer(active_commit)->root_dir_offset = - commit_pointer(active_commit - 1)->root_dir_offset; -} - -static void mark_init(void) -{ - uint32_t i; - mark = 0; - for (i = 0; i < dent_pool.size; i++) - if (!repo_dirent_is_dir(dent_pointer(i)) && - dent_pointer(i)->content_offset > mark) - mark = dent_pointer(i)->content_offset; - mark++; -} - -void repo_init(void) -{ - mark_init(); - if (commit_pool.size == 0) { - /* Create empty tree for commit 0. */ - commit_alloc(1); - commit_pointer(0)->root_dir_offset = dir_alloc(1); - dir_pointer(0)->entries.trp_root = ~0; - dir_commit(); } - /* Preallocate next commit, ready for changes. */ - active_commit = commit_alloc(1); - commit_pointer(active_commit)->root_dir_offset = - commit_pointer(active_commit - 1)->root_dir_offset; + fast_export_modify(dst, mode, data.buf); } -void repo_reset(void) +void repo_delete(const char *path) { - pool_reset(); - commit_reset(); - dir_reset(); - dent_reset(); + fast_export_delete(path); } diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h index 5476175922..889c6a3c95 100644 --- a/vcs-svn/repo_tree.h +++ b/vcs-svn/repo_tree.h @@ -1,24 +1,21 @@ #ifndef REPO_TREE_H_ #define REPO_TREE_H_ -#include "git-compat-util.h" +struct strbuf; #define REPO_MODE_DIR 0040000 #define REPO_MODE_BLB 0100644 #define REPO_MODE_EXE 0100755 #define REPO_MODE_LNK 0120000 -#define REPO_MAX_PATH_LEN 4096 -#define REPO_MAX_PATH_DEPTH 1000 - uint32_t next_blob_mark(void); -uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst); -void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark); -uint32_t repo_replace(uint32_t *path, uint32_t blob_mark); -void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark); -void repo_delete(uint32_t *path); -void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, - uint32_t url, long unsigned timestamp); +void repo_copy(uint32_t revision, const char *src, const char *dst); +void repo_add(const char *path, uint32_t mode, uint32_t blob_mark); +const char *repo_read_path(const char *path, uint32_t *mode_out); +void repo_delete(const char *path); +void repo_commit(uint32_t revision, const char *author, + const struct strbuf *log, const char *uuid, const char *url, + long unsigned timestamp); void repo_diff(uint32_t r1, uint32_t r2); void repo_init(void); void repo_reset(void); diff --git a/vcs-svn/sliding_window.c b/vcs-svn/sliding_window.c new file mode 100644 index 0000000000..ec2707c9c4 --- /dev/null +++ b/vcs-svn/sliding_window.c @@ -0,0 +1,79 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "sliding_window.h" +#include "line_buffer.h" +#include "strbuf.h" + +static int input_error(struct line_buffer *file) +{ + if (!buffer_ferror(file)) + return error("delta preimage ends early"); + return error("cannot read delta preimage: %s", strerror(errno)); +} + +static int skip_or_whine(struct line_buffer *file, off_t gap) +{ + if (buffer_skip_bytes(file, gap) != gap) + return input_error(file); + return 0; +} + +static int read_to_fill_or_whine(struct line_buffer *file, + struct strbuf *buf, size_t width) +{ + buffer_read_binary(file, buf, width - buf->len); + if (buf->len != width) + return input_error(file); + return 0; +} + +static int check_offset_overflow(off_t offset, uintmax_t len) +{ + if (len > maximum_signed_value_of_type(off_t)) + return error("unrepresentable length in delta: " + "%"PRIuMAX" > OFF_MAX", len); + if (signed_add_overflows(offset, (off_t) len)) + return error("unrepresentable offset in delta: " + "%"PRIuMAX" + %"PRIuMAX" > OFF_MAX", + (uintmax_t) offset, len); + return 0; +} + +int move_window(struct sliding_view *view, off_t off, size_t width) +{ + off_t file_offset; + assert(view); + assert(view->width <= view->buf.len); + assert(!check_offset_overflow(view->off, view->buf.len)); + + if (check_offset_overflow(off, width)) + return -1; + if (off < view->off || off + width < view->off + view->width) + return error("invalid delta: window slides left"); + if (view->max_off >= 0 && view->max_off < off + width) + return error("delta preimage ends early"); + + file_offset = view->off + view->buf.len; + if (off < file_offset) { + /* Move the overlapping region into place. */ + strbuf_remove(&view->buf, 0, off - view->off); + } else { + /* Seek ahead to skip the gap. */ + if (skip_or_whine(view->file, off - file_offset)) + return -1; + strbuf_setlen(&view->buf, 0); + } + + if (view->buf.len > width) + ; /* Already read. */ + else if (read_to_fill_or_whine(view->file, &view->buf, width)) + return -1; + + view->off = off; + view->width = width; + return 0; +} diff --git a/vcs-svn/sliding_window.h b/vcs-svn/sliding_window.h new file mode 100644 index 0000000000..b43a825cba --- /dev/null +++ b/vcs-svn/sliding_window.h @@ -0,0 +1,18 @@ +#ifndef SLIDING_WINDOW_H_ +#define SLIDING_WINDOW_H_ + +#include "strbuf.h" + +struct sliding_view { + struct line_buffer *file; + off_t off; + size_t width; + off_t max_off; /* -1 means unlimited */ + struct strbuf buf; +}; + +#define SLIDING_VIEW_INIT(input, len) { (input), 0, 0, (len), STRBUF_INIT } + +extern int move_window(struct sliding_view *view, off_t off, size_t width); + +#endif diff --git a/vcs-svn/string_pool.c b/vcs-svn/string_pool.c deleted file mode 100644 index f5b1da836e..0000000000 --- a/vcs-svn/string_pool.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Licensed under a two-clause BSD-style license. - * See LICENSE for details. - */ - -#include "git-compat-util.h" -#include "trp.h" -#include "obj_pool.h" -#include "string_pool.h" - -static struct trp_root tree = { ~0 }; - -struct node { - uint32_t offset; - struct trp_node children; -}; - -/* Two memory pools: one for struct node, and another for strings */ -obj_pool_gen(node, struct node, 4096) -obj_pool_gen(string, char, 4096) - -static char *node_value(struct node *node) -{ - return node ? string_pointer(node->offset) : NULL; -} - -static int node_cmp(struct node *a, struct node *b) -{ - return strcmp(node_value(a), node_value(b)); -} - -/* Build a Treap from the node structure (a trp_node w/ offset) */ -trp_gen(static, tree_, struct node, children, node, node_cmp); - -const char *pool_fetch(uint32_t entry) -{ - return node_value(node_pointer(entry)); -} - -uint32_t pool_intern(const char *key) -{ - /* Canonicalize key */ - struct node *match = NULL, *node; - uint32_t key_len; - if (key == NULL) - return ~0; - key_len = strlen(key) + 1; - node = node_pointer(node_alloc(1)); - node->offset = string_alloc(key_len); - strcpy(node_value(node), key); - match = tree_search(&tree, node); - if (!match) { - tree_insert(&tree, node); - } else { - node_free(1); - string_free(key_len); - node = match; - } - return node_offset(node); -} - -uint32_t pool_tok_r(char *str, const char *delim, char **saveptr) -{ - char *token = strtok_r(str, delim, saveptr); - return token ? pool_intern(token) : ~0; -} - -void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream) -{ - uint32_t i; - for (i = 0; i < len && ~seq[i]; i++) { - fputs(pool_fetch(seq[i]), stream); - if (i < len - 1 && ~seq[i + 1]) - fputc(delim, stream); - } -} - -uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str) -{ - char *context = NULL; - uint32_t token = ~0; - uint32_t length; - - if (sz == 0) - return ~0; - if (str) - token = pool_tok_r(str, delim, &context); - for (length = 0; length < sz; length++) { - seq[length] = token; - if (token == ~0) - return length; - token = pool_tok_r(NULL, delim, &context); - } - seq[sz - 1] = ~0; - return sz; -} - -void pool_reset(void) -{ - node_reset(); - string_reset(); -} diff --git a/vcs-svn/string_pool.h b/vcs-svn/string_pool.h deleted file mode 100644 index 222fb66e68..0000000000 --- a/vcs-svn/string_pool.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef STRING_POOL_H_ -#define STRING_POOL_H_ - -uint32_t pool_intern(const char *key); -const char *pool_fetch(uint32_t entry); -uint32_t pool_tok_r(char *str, const char *delim, char **saveptr); -void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream); -uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str); -void pool_reset(void); - -#endif diff --git a/vcs-svn/string_pool.txt b/vcs-svn/string_pool.txt deleted file mode 100644 index 1b41f15628..0000000000 --- a/vcs-svn/string_pool.txt +++ /dev/null @@ -1,43 +0,0 @@ -string_pool API -=============== - -The string_pool API provides facilities for replacing strings -with integer keys that can be more easily compared and stored. -The facilities are designed so that one could teach Git without -too much trouble to store the information needed for these keys to -remain valid over multiple executions. - -Functions ---------- - -pool_intern:: - Include a string in the string pool and get its key. - If that string is already in the pool, retrieves its - existing key. - -pool_fetch:: - Retrieve the string associated to a given key. - -pool_tok_r:: - Extract the key of the next token from a string. - Interface mimics strtok_r. - -pool_print_seq:: - Print a sequence of strings named by key to a file, using the - specified delimiter to separate them. - - If NULL (key ~0) appears in the sequence, the sequence ends - early. - -pool_tok_seq:: - Split a string into tokens, storing the keys of segments - into a caller-provided array. - - Unless sz is 0, the array will always be ~0-terminated. - If there is not enough room for all the tokens, the - array holds as many tokens as fit in the entries before - the terminating ~0. Return value is the index after the - last token, or sz if the tokens did not fit. - -pool_reset:: - Deallocate storage for the string pool. diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c new file mode 100644 index 0000000000..1647c1a780 --- /dev/null +++ b/vcs-svn/svndiff.c @@ -0,0 +1,308 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "sliding_window.h" +#include "line_buffer.h" +#include "svndiff.h" + +/* + * svndiff0 applier + * + * See http://svn.apache.org/repos/asf/subversion/trunk/notes/svndiff. + * + * svndiff0 ::= 'SVN\0' window* + * window ::= int int int int int instructions inline_data; + * instructions ::= instruction*; + * instruction ::= view_selector int int + * | copyfrom_data int + * | packed_view_selector int + * | packed_copyfrom_data + * ; + * view_selector ::= copyfrom_source + * | copyfrom_target + * ; + * copyfrom_source ::= # binary 00 000000; + * copyfrom_target ::= # binary 01 000000; + * copyfrom_data ::= # binary 10 000000; + * packed_view_selector ::= # view_selector OR-ed with 6 bit value; + * packed_copyfrom_data ::= # copyfrom_data OR-ed with 6 bit value; + * int ::= highdigit* lowdigit; + * highdigit ::= # binary 1000 0000 OR-ed with 7 bit value; + * lowdigit ::= # 7 bit value; + */ + +#define INSN_MASK 0xc0 +#define INSN_COPYFROM_SOURCE 0x00 +#define INSN_COPYFROM_TARGET 0x40 +#define INSN_COPYFROM_DATA 0x80 +#define OPERAND_MASK 0x3f + +#define VLI_CONTINUE 0x80 +#define VLI_DIGIT_MASK 0x7f +#define VLI_BITS_PER_DIGIT 7 + +struct window { + struct sliding_view *in; + struct strbuf out; + struct strbuf instructions; + struct strbuf data; +}; + +#define WINDOW_INIT(w) { (w), STRBUF_INIT, STRBUF_INIT, STRBUF_INIT } + +static void window_release(struct window *ctx) +{ + strbuf_release(&ctx->out); + strbuf_release(&ctx->instructions); + strbuf_release(&ctx->data); +} + +static int write_strbuf(struct strbuf *sb, FILE *out) +{ + if (fwrite(sb->buf, 1, sb->len, out) == sb->len) /* Success. */ + return 0; + return error("cannot write delta postimage: %s", strerror(errno)); +} + +static int error_short_read(struct line_buffer *input) +{ + if (buffer_ferror(input)) + return error("error reading delta: %s", strerror(errno)); + return error("invalid delta: unexpected end of file"); +} + +static int read_chunk(struct line_buffer *delta, off_t *delta_len, + struct strbuf *buf, size_t len) +{ + strbuf_reset(buf); + if (len > *delta_len || + buffer_read_binary(delta, buf, len) != len) + return error_short_read(delta); + *delta_len -= buf->len; + return 0; +} + +static int read_magic(struct line_buffer *in, off_t *len) +{ + static const char magic[] = {'S', 'V', 'N', '\0'}; + struct strbuf sb = STRBUF_INIT; + + if (read_chunk(in, len, &sb, sizeof(magic))) { + strbuf_release(&sb); + return -1; + } + if (memcmp(sb.buf, magic, sizeof(magic))) { + strbuf_release(&sb); + return error("invalid delta: unrecognized file type"); + } + strbuf_release(&sb); + return 0; +} + +static int read_int(struct line_buffer *in, uintmax_t *result, off_t *len) +{ + uintmax_t rv = 0; + off_t sz; + for (sz = *len; sz; sz--) { + const int ch = buffer_read_char(in); + if (ch == EOF) + break; + + rv <<= VLI_BITS_PER_DIGIT; + rv += (ch & VLI_DIGIT_MASK); + if (ch & VLI_CONTINUE) + continue; + + *result = rv; + *len = sz - 1; + return 0; + } + return error_short_read(in); +} + +static int parse_int(const char **buf, size_t *result, const char *end) +{ + size_t rv = 0; + const char *pos; + for (pos = *buf; pos != end; pos++) { + unsigned char ch = *pos; + + rv <<= VLI_BITS_PER_DIGIT; + rv += (ch & VLI_DIGIT_MASK); + if (ch & VLI_CONTINUE) + continue; + + *result = rv; + *buf = pos + 1; + return 0; + } + return error("invalid delta: unexpected end of instructions section"); +} + +static int read_offset(struct line_buffer *in, off_t *result, off_t *len) +{ + uintmax_t val; + if (read_int(in, &val, len)) + return -1; + if (val > maximum_signed_value_of_type(off_t)) + return error("unrepresentable offset in delta: %"PRIuMAX"", val); + *result = val; + return 0; +} + +static int read_length(struct line_buffer *in, size_t *result, off_t *len) +{ + uintmax_t val; + if (read_int(in, &val, len)) + return -1; + if (val > SIZE_MAX) + return error("unrepresentable length in delta: %"PRIuMAX"", val); + *result = val; + return 0; +} + +static int copyfrom_source(struct window *ctx, const char **instructions, + size_t nbytes, const char *insns_end) +{ + size_t offset; + if (parse_int(instructions, &offset, insns_end)) + return -1; + if (unsigned_add_overflows(offset, nbytes) || + offset + nbytes > ctx->in->width) + return error("invalid delta: copies source data outside view"); + strbuf_add(&ctx->out, ctx->in->buf.buf + offset, nbytes); + return 0; +} + +static int copyfrom_target(struct window *ctx, const char **instructions, + size_t nbytes, const char *instructions_end) +{ + size_t offset; + if (parse_int(instructions, &offset, instructions_end)) + return -1; + if (offset >= ctx->out.len) + return error("invalid delta: copies from the future"); + for (; nbytes > 0; nbytes--) + strbuf_addch(&ctx->out, ctx->out.buf[offset++]); + return 0; +} + +static int copyfrom_data(struct window *ctx, size_t *data_pos, size_t nbytes) +{ + const size_t pos = *data_pos; + if (unsigned_add_overflows(pos, nbytes) || + pos + nbytes > ctx->data.len) + return error("invalid delta: copies unavailable inline data"); + strbuf_add(&ctx->out, ctx->data.buf + pos, nbytes); + *data_pos += nbytes; + return 0; +} + +static int parse_first_operand(const char **buf, size_t *out, const char *end) +{ + size_t result = (unsigned char) *(*buf)++ & OPERAND_MASK; + if (result) { /* immediate operand */ + *out = result; + return 0; + } + return parse_int(buf, out, end); +} + +static int execute_one_instruction(struct window *ctx, + const char **instructions, size_t *data_pos) +{ + unsigned int instruction; + const char *insns_end = ctx->instructions.buf + ctx->instructions.len; + size_t nbytes; + assert(ctx); + assert(instructions && *instructions); + assert(data_pos); + + instruction = (unsigned char) **instructions; + if (parse_first_operand(instructions, &nbytes, insns_end)) + return -1; + switch (instruction & INSN_MASK) { + case INSN_COPYFROM_SOURCE: + return copyfrom_source(ctx, instructions, nbytes, insns_end); + case INSN_COPYFROM_TARGET: + return copyfrom_target(ctx, instructions, nbytes, insns_end); + case INSN_COPYFROM_DATA: + return copyfrom_data(ctx, data_pos, nbytes); + default: + return error("invalid delta: unrecognized instruction"); + } +} + +static int apply_window_in_core(struct window *ctx) +{ + const char *instructions; + size_t data_pos = 0; + + /* + * Fill ctx->out.buf using data from the source, target, + * and inline data views. + */ + for (instructions = ctx->instructions.buf; + instructions != ctx->instructions.buf + ctx->instructions.len; + ) + if (execute_one_instruction(ctx, &instructions, &data_pos)) + return -1; + if (data_pos != ctx->data.len) + return error("invalid delta: does not copy all inline data"); + return 0; +} + +static int apply_one_window(struct line_buffer *delta, off_t *delta_len, + struct sliding_view *preimage, FILE *out) +{ + struct window ctx = WINDOW_INIT(preimage); + size_t out_len; + size_t instructions_len; + size_t data_len; + assert(delta_len); + + /* "source view" offset and length already handled; */ + if (read_length(delta, &out_len, delta_len) || + read_length(delta, &instructions_len, delta_len) || + read_length(delta, &data_len, delta_len) || + read_chunk(delta, delta_len, &ctx.instructions, instructions_len) || + read_chunk(delta, delta_len, &ctx.data, data_len)) + goto error_out; + strbuf_grow(&ctx.out, out_len); + if (apply_window_in_core(&ctx)) + goto error_out; + if (ctx.out.len != out_len) { + error("invalid delta: incorrect postimage length"); + goto error_out; + } + if (write_strbuf(&ctx.out, out)) + goto error_out; + window_release(&ctx); + return 0; +error_out: + window_release(&ctx); + return -1; +} + +int svndiff0_apply(struct line_buffer *delta, off_t delta_len, + struct sliding_view *preimage, FILE *postimage) +{ + assert(delta && preimage && postimage); + + if (read_magic(delta, &delta_len)) + return -1; + while (delta_len) { /* For each window: */ + off_t pre_off = pre_off; /* stupid GCC... */ + size_t pre_len; + + if (read_offset(delta, &pre_off, &delta_len) || + read_length(delta, &pre_len, &delta_len) || + move_window(preimage, pre_off, pre_len) || + apply_one_window(delta, &delta_len, preimage, postimage)) + return -1; + } + return 0; +} diff --git a/vcs-svn/svndiff.h b/vcs-svn/svndiff.h new file mode 100644 index 0000000000..74eb464bab --- /dev/null +++ b/vcs-svn/svndiff.h @@ -0,0 +1,10 @@ +#ifndef SVNDIFF_H_ +#define SVNDIFF_H_ + +struct line_buffer; +struct sliding_view; + +extern int svndiff0_apply(struct line_buffer *delta, off_t delta_len, + struct sliding_view *preimage, FILE *postimage); + +#endif diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 53d0215d2d..0899790a33 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -11,8 +11,16 @@ #include "repo_tree.h" #include "fast_export.h" #include "line_buffer.h" -#include "obj_pool.h" -#include "string_pool.h" +#include "strbuf.h" +#include "svndump.h" + +/* + * Compare start of string to literal of equal length; + * must be guarded by length test. + */ +#define constcmp(s, ref) memcmp(s, ref, sizeof(ref) - 1) + +#define REPORT_FILENO 3 #define NODEACT_REPLACE 4 #define NODEACT_DELETE 3 @@ -20,173 +28,291 @@ #define NODEACT_CHANGE 1 #define NODEACT_UNKNOWN 0 -#define DUMP_CTX 0 -#define REV_CTX 1 -#define NODE_CTX 2 +/* States: */ +#define DUMP_CTX 0 /* dump metadata */ +#define REV_CTX 1 /* revision metadata */ +#define NODE_CTX 2 /* node metadata */ +#define INTERNODE_CTX 3 /* between nodes */ #define LENGTH_UNKNOWN (~0) #define DATE_RFC2822_LEN 31 -/* Create memory pool for log messages */ -obj_pool_gen(log, char, 4096) - -static char* log_copy(uint32_t length, char *log) -{ - char *buffer; - log_free(log_pool.size); - buffer = log_pointer(log_alloc(length)); - strncpy(buffer, log, length); - return buffer; -} +static struct line_buffer input = LINE_BUFFER_INIT; static struct { - uint32_t action, propLength, textLength, srcRev, srcMode, mark, type; - uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH]; + uint32_t action, propLength, srcRev, type; + off_t text_length; + struct strbuf src, dst; + uint32_t text_delta, prop_delta; } node_ctx; static struct { - uint32_t revision, author; + uint32_t revision; unsigned long timestamp; - char *log; + struct strbuf log, author; } rev_ctx; static struct { - uint32_t uuid, url; + uint32_t version; + struct strbuf uuid, url; } dump_ctx; -static struct { - uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid, - revision_number, node_path, node_kind, node_action, - node_copyfrom_path, node_copyfrom_rev, text_content_length, - prop_content_length, content_length; -} keys; - static void reset_node_ctx(char *fname) { node_ctx.type = 0; node_ctx.action = NODEACT_UNKNOWN; node_ctx.propLength = LENGTH_UNKNOWN; - node_ctx.textLength = LENGTH_UNKNOWN; - node_ctx.src[0] = ~0; + node_ctx.text_length = -1; + strbuf_reset(&node_ctx.src); node_ctx.srcRev = 0; - node_ctx.srcMode = 0; - pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname); - node_ctx.mark = 0; + strbuf_reset(&node_ctx.dst); + if (fname) + strbuf_addstr(&node_ctx.dst, fname); + node_ctx.text_delta = 0; + node_ctx.prop_delta = 0; } static void reset_rev_ctx(uint32_t revision) { rev_ctx.revision = revision; rev_ctx.timestamp = 0; - rev_ctx.log = NULL; - rev_ctx.author = ~0; + strbuf_reset(&rev_ctx.log); + strbuf_reset(&rev_ctx.author); } -static void reset_dump_ctx(uint32_t url) +static void reset_dump_ctx(const char *url) { - dump_ctx.url = url; - dump_ctx.uuid = ~0; + strbuf_reset(&dump_ctx.url); + if (url) + strbuf_addstr(&dump_ctx.url, url); + dump_ctx.version = 1; + strbuf_reset(&dump_ctx.uuid); } -static void init_keys(void) +static void handle_property(const struct strbuf *key_buf, + struct strbuf *val, + uint32_t *type_set) { - keys.svn_log = pool_intern("svn:log"); - keys.svn_author = pool_intern("svn:author"); - keys.svn_date = pool_intern("svn:date"); - keys.svn_executable = pool_intern("svn:executable"); - keys.svn_special = pool_intern("svn:special"); - keys.uuid = pool_intern("UUID"); - keys.revision_number = pool_intern("Revision-number"); - keys.node_path = pool_intern("Node-path"); - keys.node_kind = pool_intern("Node-kind"); - keys.node_action = pool_intern("Node-action"); - keys.node_copyfrom_path = pool_intern("Node-copyfrom-path"); - keys.node_copyfrom_rev = pool_intern("Node-copyfrom-rev"); - keys.text_content_length = pool_intern("Text-content-length"); - keys.prop_content_length = pool_intern("Prop-content-length"); - keys.content_length = pool_intern("Content-length"); + const char *key = key_buf->buf; + size_t keylen = key_buf->len; + + switch (keylen + 1) { + case sizeof("svn:log"): + if (constcmp(key, "svn:log")) + break; + if (!val) + die("invalid dump: unsets svn:log"); + strbuf_swap(&rev_ctx.log, val); + break; + case sizeof("svn:author"): + if (constcmp(key, "svn:author")) + break; + if (!val) + strbuf_reset(&rev_ctx.author); + else + strbuf_swap(&rev_ctx.author, val); + break; + case sizeof("svn:date"): + if (constcmp(key, "svn:date")) + break; + if (!val) + die("invalid dump: unsets svn:date"); + if (parse_date_basic(val->buf, &rev_ctx.timestamp, NULL)) + warning("invalid timestamp: %s", val->buf); + break; + case sizeof("svn:executable"): + case sizeof("svn:special"): + if (keylen == strlen("svn:executable") && + constcmp(key, "svn:executable")) + break; + if (keylen == strlen("svn:special") && + constcmp(key, "svn:special")) + break; + if (*type_set) { + if (!val) + return; + die("invalid dump: sets type twice"); + } + if (!val) { + node_ctx.type = REPO_MODE_BLB; + return; + } + *type_set = 1; + node_ctx.type = keylen == strlen("svn:executable") ? + REPO_MODE_EXE : + REPO_MODE_LNK; + } +} + +static void die_short_read(void) +{ + if (buffer_ferror(&input)) + die_errno("error reading dump file"); + die("invalid dump: unexpected end of file"); } static void read_props(void) { - uint32_t len; - uint32_t key = ~0; - char *val = NULL; - char *t; - while ((t = buffer_read_line()) && strcmp(t, "PROPS-END")) { - if (!strncmp(t, "K ", 2)) { - len = atoi(&t[2]); - key = pool_intern(buffer_read_string(len)); - buffer_read_line(); - } else if (!strncmp(t, "V ", 2)) { - len = atoi(&t[2]); - val = buffer_read_string(len); - if (key == keys.svn_log) { - /* Value length excludes terminating nul. */ - rev_ctx.log = log_copy(len + 1, val); - } else if (key == keys.svn_author) { - rev_ctx.author = pool_intern(val); - } else if (key == keys.svn_date) { - if (parse_date_basic(val, &rev_ctx.timestamp, NULL)) - fprintf(stderr, "Invalid timestamp: %s\n", val); - } else if (key == keys.svn_executable) { - node_ctx.type = REPO_MODE_EXE; - } else if (key == keys.svn_special) { - node_ctx.type = REPO_MODE_LNK; - } - key = ~0; - buffer_read_line(); + static struct strbuf key = STRBUF_INIT; + static struct strbuf val = STRBUF_INIT; + const char *t; + /* + * NEEDSWORK: to support simple mode changes like + * K 11 + * svn:special + * V 1 + * * + * D 14 + * svn:executable + * we keep track of whether a mode has been set and reset to + * plain file only if not. We should be keeping track of the + * symlink and executable bits separately instead. + */ + uint32_t type_set = 0; + while ((t = buffer_read_line(&input)) && strcmp(t, "PROPS-END")) { + uint32_t len; + const char type = t[0]; + int ch; + + if (!type || t[1] != ' ') + die("invalid property line: %s", t); + len = atoi(&t[2]); + strbuf_reset(&val); + buffer_read_binary(&input, &val, len); + if (val.len < len) + die_short_read(); + + /* Discard trailing newline. */ + ch = buffer_read_char(&input); + if (ch == EOF) + die_short_read(); + if (ch != '\n') + die("invalid dump: expected newline after %s", val.buf); + + switch (type) { + case 'K': + strbuf_swap(&key, &val); + continue; + case 'D': + handle_property(&val, NULL, &type_set); + continue; + case 'V': + handle_property(&key, &val, &type_set); + strbuf_reset(&key); + continue; + default: + die("invalid property line: %s", t); } } } static void handle_node(void) { - if (node_ctx.propLength != LENGTH_UNKNOWN && node_ctx.propLength) - read_props(); - - if (node_ctx.srcRev) - node_ctx.srcMode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); - - if (node_ctx.textLength != LENGTH_UNKNOWN && - node_ctx.type != REPO_MODE_DIR) - node_ctx.mark = next_blob_mark(); + const uint32_t type = node_ctx.type; + const int have_props = node_ctx.propLength != LENGTH_UNKNOWN; + const int have_text = node_ctx.text_length != -1; + /* + * Old text for this node: + * NULL - directory or bug + * empty_blob - empty + * "<dataref>" - data retrievable from fast-import + */ + static const char *const empty_blob = "::empty::"; + const char *old_data = NULL; + uint32_t old_mode = REPO_MODE_BLB; if (node_ctx.action == NODEACT_DELETE) { - repo_delete(node_ctx.dst); - } else if (node_ctx.action == NODEACT_CHANGE || - node_ctx.action == NODEACT_REPLACE) { - if (node_ctx.action == NODEACT_REPLACE && - node_ctx.type == REPO_MODE_DIR) - repo_replace(node_ctx.dst, node_ctx.mark); - else if (node_ctx.propLength != LENGTH_UNKNOWN) - repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); - else if (node_ctx.textLength != LENGTH_UNKNOWN) - node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); + if (have_text || have_props || node_ctx.srcRev) + die("invalid dump: deletion node has " + "copyfrom info, text, or properties"); + repo_delete(node_ctx.dst.buf); + return; + } + if (node_ctx.action == NODEACT_REPLACE) { + repo_delete(node_ctx.dst.buf); + node_ctx.action = NODEACT_ADD; + } + if (node_ctx.srcRev) { + repo_copy(node_ctx.srcRev, node_ctx.src.buf, node_ctx.dst.buf); + if (node_ctx.action == NODEACT_ADD) + node_ctx.action = NODEACT_CHANGE; + } + if (have_text && type == REPO_MODE_DIR) + die("invalid dump: directories cannot have text attached"); + + /* + * Find old content (old_data) and decide on the new mode. + */ + if (node_ctx.action == NODEACT_CHANGE && !*node_ctx.dst.buf) { + if (type != REPO_MODE_DIR) + die("invalid dump: root of tree is not a regular file"); + old_data = NULL; + } else if (node_ctx.action == NODEACT_CHANGE) { + uint32_t mode; + old_data = repo_read_path(node_ctx.dst.buf, &mode); + if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR) + die("invalid dump: cannot modify a directory into a file"); + if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR) + die("invalid dump: cannot modify a file into a directory"); + node_ctx.type = mode; + old_mode = mode; } else if (node_ctx.action == NODEACT_ADD) { - if (node_ctx.srcRev && node_ctx.propLength != LENGTH_UNKNOWN) - repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); - else if (node_ctx.srcRev && node_ctx.textLength != LENGTH_UNKNOWN) - node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); - else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) || - node_ctx.textLength != LENGTH_UNKNOWN) - repo_add(node_ctx.dst, node_ctx.type, node_ctx.mark); + if (type == REPO_MODE_DIR) + old_data = NULL; + else if (have_text) + old_data = empty_blob; + else + die("invalid dump: adds node without text"); + } else { + die("invalid dump: Node-path block lacks Node-action"); } - if (node_ctx.propLength == LENGTH_UNKNOWN && node_ctx.srcMode) - node_ctx.type = node_ctx.srcMode; + /* + * Adjust mode to reflect properties. + */ + if (have_props) { + if (!node_ctx.prop_delta) + node_ctx.type = type; + if (node_ctx.propLength) + read_props(); + } + + /* + * Save the result. + */ + if (type == REPO_MODE_DIR) /* directories are not tracked. */ + return; + assert(old_data); + if (old_data == empty_blob) + /* For the fast_export_* functions, NULL means empty. */ + old_data = NULL; + if (!have_text) { + fast_export_modify(node_ctx.dst.buf, node_ctx.type, old_data); + return; + } + if (!node_ctx.text_delta) { + fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline"); + fast_export_data(node_ctx.type, node_ctx.text_length, &input); + return; + } + fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline"); + fast_export_blob_delta(node_ctx.type, old_mode, old_data, + node_ctx.text_length, &input); +} - if (node_ctx.mark) - fast_export_blob(node_ctx.type, node_ctx.mark, node_ctx.textLength); - else if (node_ctx.textLength != LENGTH_UNKNOWN) - buffer_skip_bytes(node_ctx.textLength); +static void begin_revision(void) +{ + if (!rev_ctx.revision) /* revision 0 gets no git commit. */ + return; + fast_export_begin_commit(rev_ctx.revision, rev_ctx.author.buf, + &rev_ctx.log, dump_ctx.uuid.buf, dump_ctx.url.buf, + rev_ctx.timestamp); } -static void handle_revision(void) +static void end_revision(void) { if (rev_ctx.revision) - repo_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log, - dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp); + fast_export_end_commit(rev_ctx.revision); } void svndump_read(const char *url) @@ -195,39 +321,69 @@ void svndump_read(const char *url) char *t; uint32_t active_ctx = DUMP_CTX; uint32_t len; - uint32_t key; - reset_dump_ctx(pool_intern(url)); - while ((t = buffer_read_line())) { - val = strstr(t, ": "); + reset_dump_ctx(url); + while ((t = buffer_read_line(&input))) { + val = strchr(t, ':'); if (!val) continue; - *val++ = '\0'; - *val++ = '\0'; - key = pool_intern(t); + val++; + if (*val != ' ') + continue; + val++; - if (key == keys.uuid) { - dump_ctx.uuid = pool_intern(val); - } else if (key == keys.revision_number) { + /* strlen(key) + 1 */ + switch (val - t - 1) { + case sizeof("SVN-fs-dump-format-version"): + if (constcmp(t, "SVN-fs-dump-format-version")) + continue; + dump_ctx.version = atoi(val); + if (dump_ctx.version > 3) + die("expected svn dump format version <= 3, found %"PRIu32, + dump_ctx.version); + break; + case sizeof("UUID"): + if (constcmp(t, "UUID")) + continue; + strbuf_reset(&dump_ctx.uuid); + strbuf_addstr(&dump_ctx.uuid, val); + break; + case sizeof("Revision-number"): + if (constcmp(t, "Revision-number")) + continue; if (active_ctx == NODE_CTX) handle_node(); + if (active_ctx == REV_CTX) + begin_revision(); if (active_ctx != DUMP_CTX) - handle_revision(); + end_revision(); active_ctx = REV_CTX; reset_rev_ctx(atoi(val)); - } else if (key == keys.node_path) { - if (active_ctx == NODE_CTX) - handle_node(); - active_ctx = NODE_CTX; - reset_node_ctx(val); - } else if (key == keys.node_kind) { + break; + case sizeof("Node-path"): + if (prefixcmp(t, "Node-")) + continue; + if (!constcmp(t + strlen("Node-"), "path")) { + if (active_ctx == NODE_CTX) + handle_node(); + if (active_ctx == REV_CTX) + begin_revision(); + active_ctx = NODE_CTX; + reset_node_ctx(val); + break; + } + if (constcmp(t + strlen("Node-"), "kind")) + continue; if (!strcmp(val, "dir")) node_ctx.type = REPO_MODE_DIR; else if (!strcmp(val, "file")) node_ctx.type = REPO_MODE_BLB; else fprintf(stderr, "Unknown node-kind: %s\n", val); - } else if (key == keys.node_action) { + break; + case sizeof("Node-action"): + if (constcmp(t, "Node-action")) + continue; if (!strcmp(val, "delete")) { node_ctx.action = NODEACT_DELETE; } else if (!strcmp(val, "add")) { @@ -240,52 +396,102 @@ void svndump_read(const char *url) fprintf(stderr, "Unknown node-action: %s\n", val); node_ctx.action = NODEACT_UNKNOWN; } - } else if (key == keys.node_copyfrom_path) { - pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val); - } else if (key == keys.node_copyfrom_rev) { + break; + case sizeof("Node-copyfrom-path"): + if (constcmp(t, "Node-copyfrom-path")) + continue; + strbuf_reset(&node_ctx.src); + strbuf_addstr(&node_ctx.src, val); + break; + case sizeof("Node-copyfrom-rev"): + if (constcmp(t, "Node-copyfrom-rev")) + continue; node_ctx.srcRev = atoi(val); - } else if (key == keys.text_content_length) { - node_ctx.textLength = atoi(val); - } else if (key == keys.prop_content_length) { + break; + case sizeof("Text-content-length"): + if (!constcmp(t, "Text-content-length")) { + char *end; + uintmax_t textlen; + + textlen = strtoumax(val, &end, 10); + if (!isdigit(*val) || *end) + die("invalid dump: non-numeric length %s", val); + if (textlen > maximum_signed_value_of_type(off_t)) + die("unrepresentable length in dump: %s", val); + node_ctx.text_length = (off_t) textlen; + break; + } + if (constcmp(t, "Prop-content-length")) + continue; node_ctx.propLength = atoi(val); - } else if (key == keys.content_length) { + break; + case sizeof("Text-delta"): + if (!constcmp(t, "Text-delta")) { + node_ctx.text_delta = !strcmp(val, "true"); + break; + } + if (constcmp(t, "Prop-delta")) + continue; + node_ctx.prop_delta = !strcmp(val, "true"); + break; + case sizeof("Content-length"): + if (constcmp(t, "Content-length")) + continue; len = atoi(val); - buffer_read_line(); + t = buffer_read_line(&input); + if (!t) + die_short_read(); + if (*t) + die("invalid dump: expected blank line after content length header"); if (active_ctx == REV_CTX) { read_props(); } else if (active_ctx == NODE_CTX) { handle_node(); - active_ctx = REV_CTX; + active_ctx = INTERNODE_CTX; } else { fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len); - buffer_skip_bytes(len); + if (buffer_skip_bytes(&input, len) != len) + die_short_read(); } } } + if (buffer_ferror(&input)) + die_short_read(); if (active_ctx == NODE_CTX) handle_node(); + if (active_ctx == REV_CTX) + begin_revision(); if (active_ctx != DUMP_CTX) - handle_revision(); + end_revision(); } -void svndump_init(const char *filename) +int svndump_init(const char *filename) { - buffer_init(filename); - repo_init(); - reset_dump_ctx(~0); + if (buffer_init(&input, filename)) + return error("cannot open %s: %s", filename, strerror(errno)); + fast_export_init(REPORT_FILENO); + strbuf_init(&dump_ctx.uuid, 4096); + strbuf_init(&dump_ctx.url, 4096); + strbuf_init(&rev_ctx.log, 4096); + strbuf_init(&rev_ctx.author, 4096); + strbuf_init(&node_ctx.src, 4096); + strbuf_init(&node_ctx.dst, 4096); + reset_dump_ctx(NULL); reset_rev_ctx(0); reset_node_ctx(NULL); - init_keys(); + return 0; } void svndump_deinit(void) { - log_reset(); - repo_reset(); - reset_dump_ctx(~0); + fast_export_deinit(); + reset_dump_ctx(NULL); reset_rev_ctx(0); reset_node_ctx(NULL); - if (buffer_deinit()) + strbuf_release(&rev_ctx.log); + strbuf_release(&node_ctx.src); + strbuf_release(&node_ctx.dst); + if (buffer_deinit(&input)) fprintf(stderr, "Input error\n"); if (ferror(stdout)) fprintf(stderr, "Output error\n"); @@ -293,10 +499,10 @@ void svndump_deinit(void) void svndump_reset(void) { - log_reset(); - buffer_reset(); - repo_reset(); - reset_dump_ctx(~0); - reset_rev_ctx(0); - reset_node_ctx(NULL); + fast_export_reset(); + buffer_reset(&input); + strbuf_release(&dump_ctx.uuid); + strbuf_release(&dump_ctx.url); + strbuf_release(&rev_ctx.log); + strbuf_release(&rev_ctx.author); } diff --git a/vcs-svn/svndump.h b/vcs-svn/svndump.h index 93c412f14a..df9ceb0e8d 100644 --- a/vcs-svn/svndump.h +++ b/vcs-svn/svndump.h @@ -1,7 +1,7 @@ #ifndef SVNDUMP_H_ #define SVNDUMP_H_ -void svndump_init(const char *filename); +int svndump_init(const char *filename); void svndump_read(const char *url); void svndump_deinit(void); void svndump_reset(void); diff --git a/vcs-svn/trp.h b/vcs-svn/trp.h deleted file mode 100644 index ee35c688a0..0000000000 --- a/vcs-svn/trp.h +++ /dev/null @@ -1,236 +0,0 @@ -/* - * C macro implementation of treaps. - * - * Usage: - * #include <stdint.h> - * #include "trp.h" - * trp_gen(...) - * - * Licensed under a two-clause BSD-style license. - * See LICENSE for details. - */ - -#ifndef TRP_H_ -#define TRP_H_ - -#define MAYBE_UNUSED __attribute__((__unused__)) - -/* Node structure. */ -struct trp_node { - uint32_t trpn_left; - uint32_t trpn_right; -}; - -/* Root structure. */ -struct trp_root { - uint32_t trp_root; -}; - -/* Pointer/Offset conversion. */ -#define trpn_pointer(a_base, a_offset) (a_base##_pointer(a_offset)) -#define trpn_offset(a_base, a_pointer) (a_base##_offset(a_pointer)) -#define trpn_modify(a_base, a_offset) \ - do { \ - if ((a_offset) < a_base##_pool.committed) { \ - uint32_t old_offset = (a_offset);\ - (a_offset) = a_base##_alloc(1); \ - *trpn_pointer(a_base, a_offset) = \ - *trpn_pointer(a_base, old_offset); \ - } \ - } while (0) - -/* Left accessors. */ -#define trp_left_get(a_base, a_field, a_node) \ - (trpn_pointer(a_base, a_node)->a_field.trpn_left) -#define trp_left_set(a_base, a_field, a_node, a_left) \ - do { \ - trpn_modify(a_base, a_node); \ - trp_left_get(a_base, a_field, a_node) = (a_left); \ - } while (0) - -/* Right accessors. */ -#define trp_right_get(a_base, a_field, a_node) \ - (trpn_pointer(a_base, a_node)->a_field.trpn_right) -#define trp_right_set(a_base, a_field, a_node, a_right) \ - do { \ - trpn_modify(a_base, a_node); \ - trp_right_get(a_base, a_field, a_node) = (a_right); \ - } while (0) - -/* - * Fibonacci hash function. - * The multiplier is the nearest prime to (2^32 times (√5 - 1)/2). - * See Knuth §6.4: volume 3, 3rd ed, p518. - */ -#define trpn_hash(a_node) (uint32_t) (2654435761u * (a_node)) - -/* Priority accessors. */ -#define trp_prio_get(a_node) trpn_hash(a_node) - -/* Node initializer. */ -#define trp_node_new(a_base, a_field, a_node) \ - do { \ - trp_left_set(a_base, a_field, (a_node), ~0); \ - trp_right_set(a_base, a_field, (a_node), ~0); \ - } while (0) - -/* Internal utility macros. */ -#define trpn_first(a_base, a_field, a_root, r_node) \ - do { \ - (r_node) = (a_root); \ - if ((r_node) == ~0) \ - return NULL; \ - while (~trp_left_get(a_base, a_field, (r_node))) \ - (r_node) = trp_left_get(a_base, a_field, (r_node)); \ - } while (0) - -#define trpn_rotate_left(a_base, a_field, a_node, r_node) \ - do { \ - (r_node) = trp_right_get(a_base, a_field, (a_node)); \ - trp_right_set(a_base, a_field, (a_node), \ - trp_left_get(a_base, a_field, (r_node))); \ - trp_left_set(a_base, a_field, (r_node), (a_node)); \ - } while (0) - -#define trpn_rotate_right(a_base, a_field, a_node, r_node) \ - do { \ - (r_node) = trp_left_get(a_base, a_field, (a_node)); \ - trp_left_set(a_base, a_field, (a_node), \ - trp_right_get(a_base, a_field, (r_node))); \ - trp_right_set(a_base, a_field, (r_node), (a_node)); \ - } while (0) - -#define trp_gen(a_attr, a_pre, a_type, a_field, a_base, a_cmp) \ -a_attr a_type MAYBE_UNUSED *a_pre##first(struct trp_root *treap) \ -{ \ - uint32_t ret; \ - trpn_first(a_base, a_field, treap->trp_root, ret); \ - return trpn_pointer(a_base, ret); \ -} \ -a_attr a_type MAYBE_UNUSED *a_pre##next(struct trp_root *treap, a_type *node) \ -{ \ - uint32_t ret; \ - uint32_t offset = trpn_offset(a_base, node); \ - if (~trp_right_get(a_base, a_field, offset)) { \ - trpn_first(a_base, a_field, \ - trp_right_get(a_base, a_field, offset), ret); \ - } else { \ - uint32_t tnode = treap->trp_root; \ - ret = ~0; \ - while (1) { \ - int cmp = (a_cmp)(trpn_pointer(a_base, offset), \ - trpn_pointer(a_base, tnode)); \ - if (cmp < 0) { \ - ret = tnode; \ - tnode = trp_left_get(a_base, a_field, tnode); \ - } else if (cmp > 0) { \ - tnode = trp_right_get(a_base, a_field, tnode); \ - } else { \ - break; \ - } \ - } \ - } \ - return trpn_pointer(a_base, ret); \ -} \ -a_attr a_type MAYBE_UNUSED *a_pre##search(struct trp_root *treap, a_type *key) \ -{ \ - int cmp; \ - uint32_t ret = treap->trp_root; \ - while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \ - if (cmp < 0) { \ - ret = trp_left_get(a_base, a_field, ret); \ - } else { \ - ret = trp_right_get(a_base, a_field, ret); \ - } \ - } \ - return trpn_pointer(a_base, ret); \ -} \ -a_attr a_type MAYBE_UNUSED *a_pre##nsearch(struct trp_root *treap, a_type *key) \ -{ \ - int cmp; \ - uint32_t ret = treap->trp_root; \ - while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \ - if (cmp < 0) { \ - if (!~trp_left_get(a_base, a_field, ret)) \ - break; \ - ret = trp_left_get(a_base, a_field, ret); \ - } else { \ - ret = trp_right_get(a_base, a_field, ret); \ - } \ - } \ - return trpn_pointer(a_base, ret); \ -} \ -a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t ins_node) \ -{ \ - if (cur_node == ~0) { \ - return ins_node; \ - } else { \ - uint32_t ret; \ - int cmp = (a_cmp)(trpn_pointer(a_base, ins_node), \ - trpn_pointer(a_base, cur_node)); \ - if (cmp < 0) { \ - uint32_t left = a_pre##insert_recurse( \ - trp_left_get(a_base, a_field, cur_node), ins_node); \ - trp_left_set(a_base, a_field, cur_node, left); \ - if (trp_prio_get(left) < trp_prio_get(cur_node)) \ - trpn_rotate_right(a_base, a_field, cur_node, ret); \ - else \ - ret = cur_node; \ - } else { \ - uint32_t right = a_pre##insert_recurse( \ - trp_right_get(a_base, a_field, cur_node), ins_node); \ - trp_right_set(a_base, a_field, cur_node, right); \ - if (trp_prio_get(right) < trp_prio_get(cur_node)) \ - trpn_rotate_left(a_base, a_field, cur_node, ret); \ - else \ - ret = cur_node; \ - } \ - return ret; \ - } \ -} \ -a_attr void MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \ -{ \ - uint32_t offset = trpn_offset(a_base, node); \ - trp_node_new(a_base, a_field, offset); \ - treap->trp_root = a_pre##insert_recurse(treap->trp_root, offset); \ -} \ -a_attr uint32_t MAYBE_UNUSED a_pre##remove_recurse(uint32_t cur_node, uint32_t rem_node) \ -{ \ - int cmp = a_cmp(trpn_pointer(a_base, rem_node), \ - trpn_pointer(a_base, cur_node)); \ - if (cmp == 0) { \ - uint32_t ret; \ - uint32_t left = trp_left_get(a_base, a_field, cur_node); \ - uint32_t right = trp_right_get(a_base, a_field, cur_node); \ - if (left == ~0) { \ - if (right == ~0) \ - return ~0; \ - } else if (right == ~0 || trp_prio_get(left) < trp_prio_get(right)) { \ - trpn_rotate_right(a_base, a_field, cur_node, ret); \ - right = a_pre##remove_recurse(cur_node, rem_node); \ - trp_right_set(a_base, a_field, ret, right); \ - return ret; \ - } \ - trpn_rotate_left(a_base, a_field, cur_node, ret); \ - left = a_pre##remove_recurse(cur_node, rem_node); \ - trp_left_set(a_base, a_field, ret, left); \ - return ret; \ - } else if (cmp < 0) { \ - uint32_t left = a_pre##remove_recurse( \ - trp_left_get(a_base, a_field, cur_node), rem_node); \ - trp_left_set(a_base, a_field, cur_node, left); \ - return cur_node; \ - } else { \ - uint32_t right = a_pre##remove_recurse( \ - trp_right_get(a_base, a_field, cur_node), rem_node); \ - trp_right_set(a_base, a_field, cur_node, right); \ - return cur_node; \ - } \ -} \ -a_attr void MAYBE_UNUSED a_pre##remove(struct trp_root *treap, a_type *node) \ -{ \ - treap->trp_root = a_pre##remove_recurse(treap->trp_root, \ - trpn_offset(a_base, node)); \ -} \ - -#endif diff --git a/vcs-svn/trp.txt b/vcs-svn/trp.txt deleted file mode 100644 index eb4c191875..0000000000 --- a/vcs-svn/trp.txt +++ /dev/null @@ -1,103 +0,0 @@ -Motivation -========== - -Treaps provide a memory-efficient binary search tree structure. -Insertion/deletion/search are about as about as fast in the average -case as red-black trees and the chances of worst-case behavior are -vanishingly small, thanks to (pseudo-)randomness. The bad worst-case -behavior is a small price to pay, given that treaps are much simpler -to implement. - -API -=== - -The trp API generates a data structure and functions to handle a -large growing set of objects stored in a pool. - -The caller: - -. Specifies parameters for the generated functions with the - trp_gen(static, foo_, ...) macro. - -. Allocates a `struct trp_root` variable and sets it to {~0}. - -. Adds new nodes to the set using `foo_insert`. - -. Can find a specific item in the set using `foo_search`. - -. Can iterate over items in the set using `foo_first` and `foo_next`. - -. Can remove an item from the set using `foo_remove`. - -Example: - ----- -struct ex_node { - const char *s; - struct trp_node ex_link; -}; -static struct trp_root ex_base = {~0}; -obj_pool_gen(ex, struct ex_node, 4096); -trp_gen(static, ex_, struct ex_node, ex_link, ex, strcmp) -struct ex_node *item; - -item = ex_pointer(ex_alloc(1)); -item->s = "hello"; -ex_insert(&ex_base, item); -item = ex_pointer(ex_alloc(1)); -item->s = "goodbye"; -ex_insert(&ex_base, item); -for (item = ex_first(&ex_base); item; item = ex_next(&ex_base, item)) - printf("%s\n", item->s); ----- - -Functions ---------- - -trp_gen(attr, foo_, node_type, link_field, pool, cmp):: - - Generate a type-specific treap implementation. -+ -. The storage class for generated functions will be 'attr' (e.g., `static`). -. Generated function names are prefixed with 'foo_' (e.g., `treap_`). -. Treap nodes will be of type 'node_type' (e.g., `struct treap_node`). - This type must be a struct with at least one `struct trp_node` field - to point to its children. -. The field used to access child nodes will be 'link_field'. -. All treap nodes must lie in the 'pool' object pool. -. Treap nodes must be totally ordered by the 'cmp' relation, with the - following prototype: -+ -int (*cmp)(node_type \*a, node_type \*b) -+ -and returning a value less than, equal to, or greater than zero -according to the result of comparison. - -void foo_insert(struct trp_root *treap, node_type \*node):: - - Insert node into treap. If inserted multiple times, - a node will appear in the treap multiple times. - -void foo_remove(struct trp_root *treap, node_type \*node):: - - Remove node from treap. Caller must ensure node is - present in treap before using this function. - -node_type *foo_search(struct trp_root \*treap, node_type \*key):: - - Search for a node that matches key. If no match is found, - result is NULL. - -node_type *foo_nsearch(struct trp_root \*treap, node_type \*key):: - - Like `foo_search`, but if if the key is missing return what - would be key's successor, were key in treap (NULL if no - successor). - -node_type *foo_first(struct trp_root \*treap):: - - Find the first item from the treap, in sorted order. - -node_type *foo_next(struct trp_root \*treap, node_type \*node):: - - Find the next item. |