diff options
Diffstat (limited to 'vcs-svn')
-rw-r--r-- | vcs-svn/LICENSE | 33 | ||||
-rw-r--r-- | vcs-svn/fast_export.c | 88 | ||||
-rw-r--r-- | vcs-svn/fast_export.h | 16 | ||||
-rw-r--r-- | vcs-svn/line_buffer.c | 130 | ||||
-rw-r--r-- | vcs-svn/line_buffer.h | 31 | ||||
-rw-r--r-- | vcs-svn/line_buffer.txt | 77 | ||||
-rw-r--r-- | vcs-svn/obj_pool.h | 61 | ||||
-rw-r--r-- | vcs-svn/repo_tree.c | 326 | ||||
-rw-r--r-- | vcs-svn/repo_tree.h | 27 | ||||
-rw-r--r-- | vcs-svn/string_pool.c | 102 | ||||
-rw-r--r-- | vcs-svn/string_pool.h | 11 | ||||
-rw-r--r-- | vcs-svn/string_pool.txt | 43 | ||||
-rw-r--r-- | vcs-svn/svndump.c | 454 | ||||
-rw-r--r-- | vcs-svn/svndump.h | 9 | ||||
-rw-r--r-- | vcs-svn/trp.h | 237 | ||||
-rw-r--r-- | vcs-svn/trp.txt | 109 |
16 files changed, 1754 insertions, 0 deletions
diff --git a/vcs-svn/LICENSE b/vcs-svn/LICENSE new file mode 100644 index 0000000000..0a5e3c43a0 --- /dev/null +++ b/vcs-svn/LICENSE @@ -0,0 +1,33 @@ +Copyright (C) 2010 David Barr <david.barr@cordelta.com>. +All rights reserved. + +Copyright (C) 2008 Jason Evans <jasone@canonware.com>. +All rights reserved. + +Copyright (C) 2005 Stefan Hegny, hydrografix Consulting GmbH, +Frankfurt/Main, Germany +and others, see http://svn2cc.sarovar.org + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice(s), this list of conditions and the following disclaimer + unmodified other than the allowable addition of one or more + copyright notices. +2. Redistributions in binary form must reproduce the above copyright + notice(s), this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c new file mode 100644 index 0000000000..99ed70b88a --- /dev/null +++ b/vcs-svn/fast_export.c @@ -0,0 +1,88 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "fast_export.h" +#include "line_buffer.h" +#include "repo_tree.h" +#include "string_pool.h" + +#define MAX_GITSVN_LINE_LEN 4096 + +static uint32_t first_commit_done; + +void fast_export_delete(uint32_t depth, uint32_t *path) +{ + putchar('D'); + putchar(' '); + pool_print_seq(depth, path, '/', stdout); + putchar('\n'); +} + +void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, + uint32_t mark) +{ + /* Mode must be 100644, 100755, 120000, or 160000. */ + printf("M %06"PRIo32" :%"PRIu32" ", mode, mark); + pool_print_seq(depth, path, '/', stdout); + putchar('\n'); +} + +static char gitsvnline[MAX_GITSVN_LINE_LEN]; +void fast_export_commit(uint32_t revision, const char *author, + const struct strbuf *log, + const char *uuid, const char *url, + unsigned long timestamp) +{ + static const struct strbuf empty = STRBUF_INIT; + if (!log) + log = ∅ + if (*uuid && *url) { + snprintf(gitsvnline, MAX_GITSVN_LINE_LEN, + "\n\ngit-svn-id: %s@%"PRIu32" %s\n", + url, revision, uuid); + } else { + *gitsvnline = '\0'; + } + printf("commit refs/heads/master\n"); + printf("committer %s <%s@%s> %ld +0000\n", + *author ? author : "nobody", + *author ? author : "nobody", + *uuid ? uuid : "local", timestamp); + printf("data %"PRIuMAX"\n", + (uintmax_t) (log->len + strlen(gitsvnline))); + fwrite(log->buf, log->len, 1, stdout); + printf("%s\n", gitsvnline); + if (!first_commit_done) { + if (revision > 1) + printf("from refs/heads/master^0\n"); + first_commit_done = 1; + } + repo_diff(revision - 1, revision); + fputc('\n', stdout); + + printf("progress Imported commit %"PRIu32".\n\n", revision); +} + +static void die_short_read(struct line_buffer *input) +{ + if (buffer_ferror(input)) + die_errno("error reading dump file"); + die("invalid dump: unexpected end of file"); +} + +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, struct line_buffer *input) +{ + if (mode == REPO_MODE_LNK) { + /* svn symlink blobs start with "link " */ + len -= 5; + if (buffer_skip_bytes(input, 5) != 5) + die_short_read(input); + } + printf("blob\nmark :%"PRIu32"\ndata %"PRIu32"\n", mark, len); + if (buffer_copy_bytes(input, len) != len) + die_short_read(input); + fputc('\n', stdout); +} diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h new file mode 100644 index 0000000000..33a8fe996f --- /dev/null +++ b/vcs-svn/fast_export.h @@ -0,0 +1,16 @@ +#ifndef FAST_EXPORT_H_ +#define FAST_EXPORT_H_ + +#include "line_buffer.h" +struct strbuf; + +void fast_export_delete(uint32_t depth, uint32_t *path); +void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, + uint32_t mark); +void fast_export_commit(uint32_t revision, const char *author, + const struct strbuf *log, const char *uuid, + const char *url, unsigned long timestamp); +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, + struct line_buffer *input); + +#endif diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c new file mode 100644 index 0000000000..c39038723e --- /dev/null +++ b/vcs-svn/line_buffer.c @@ -0,0 +1,130 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "line_buffer.h" +#include "strbuf.h" + +#define COPY_BUFFER_LEN 4096 + +int buffer_init(struct line_buffer *buf, const char *filename) +{ + buf->infile = filename ? fopen(filename, "r") : stdin; + if (!buf->infile) + return -1; + return 0; +} + +int buffer_fdinit(struct line_buffer *buf, int fd) +{ + buf->infile = fdopen(fd, "r"); + if (!buf->infile) + return -1; + return 0; +} + +int buffer_tmpfile_init(struct line_buffer *buf) +{ + buf->infile = tmpfile(); + if (!buf->infile) + return -1; + return 0; +} + +int buffer_deinit(struct line_buffer *buf) +{ + int err; + if (buf->infile == stdin) + return ferror(buf->infile); + err = ferror(buf->infile); + err |= fclose(buf->infile); + return err; +} + +FILE *buffer_tmpfile_rewind(struct line_buffer *buf) +{ + rewind(buf->infile); + return buf->infile; +} + +long buffer_tmpfile_prepare_to_read(struct line_buffer *buf) +{ + long pos = ftell(buf->infile); + if (pos < 0) + return error("ftell error: %s", strerror(errno)); + if (fseek(buf->infile, 0, SEEK_SET)) + return error("seek error: %s", strerror(errno)); + return pos; +} + +int buffer_ferror(struct line_buffer *buf) +{ + return ferror(buf->infile); +} + +int buffer_read_char(struct line_buffer *buf) +{ + return fgetc(buf->infile); +} + +/* Read a line without trailing newline. */ +char *buffer_read_line(struct line_buffer *buf) +{ + char *end; + if (!fgets(buf->line_buffer, sizeof(buf->line_buffer), buf->infile)) + /* Error or data exhausted. */ + return NULL; + end = buf->line_buffer + strlen(buf->line_buffer); + if (end[-1] == '\n') + end[-1] = '\0'; + else if (feof(buf->infile)) + ; /* No newline at end of file. That's fine. */ + else + /* + * Line was too long. + * There is probably a saner way to deal with this, + * but for now let's return an error. + */ + return NULL; + return buf->line_buffer; +} + +void buffer_read_binary(struct line_buffer *buf, + struct strbuf *sb, uint32_t size) +{ + strbuf_fread(sb, size, buf->infile); +} + +off_t buffer_copy_bytes(struct line_buffer *buf, off_t nbytes) +{ + char byte_buffer[COPY_BUFFER_LEN]; + off_t done = 0; + while (done < nbytes && !feof(buf->infile) && !ferror(buf->infile)) { + off_t len = nbytes - done; + size_t in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; + in = fread(byte_buffer, 1, in, buf->infile); + done += in; + fwrite(byte_buffer, 1, in, stdout); + if (ferror(stdout)) + return done + buffer_skip_bytes(buf, nbytes - done); + } + return done; +} + +off_t buffer_skip_bytes(struct line_buffer *buf, off_t nbytes) +{ + char byte_buffer[COPY_BUFFER_LEN]; + off_t done = 0; + while (done < nbytes && !feof(buf->infile) && !ferror(buf->infile)) { + off_t len = nbytes - done; + size_t in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; + done += fread(byte_buffer, 1, in, buf->infile); + } + return done; +} + +void buffer_reset(struct line_buffer *buf) +{ +} diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h new file mode 100644 index 0000000000..d0b22dda76 --- /dev/null +++ b/vcs-svn/line_buffer.h @@ -0,0 +1,31 @@ +#ifndef LINE_BUFFER_H_ +#define LINE_BUFFER_H_ + +#include "strbuf.h" + +#define LINE_BUFFER_LEN 10000 + +struct line_buffer { + char line_buffer[LINE_BUFFER_LEN]; + FILE *infile; +}; +#define LINE_BUFFER_INIT { "", NULL } + +int buffer_init(struct line_buffer *buf, const char *filename); +int buffer_fdinit(struct line_buffer *buf, int fd); +int buffer_deinit(struct line_buffer *buf); +void buffer_reset(struct line_buffer *buf); + +int buffer_tmpfile_init(struct line_buffer *buf); +FILE *buffer_tmpfile_rewind(struct line_buffer *buf); /* prepare to write. */ +long buffer_tmpfile_prepare_to_read(struct line_buffer *buf); + +int buffer_ferror(struct line_buffer *buf); +char *buffer_read_line(struct line_buffer *buf); +int buffer_read_char(struct line_buffer *buf); +void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len); +/* Returns number of bytes read (not necessarily written). */ +off_t buffer_copy_bytes(struct line_buffer *buf, off_t len); +off_t buffer_skip_bytes(struct line_buffer *buf, off_t len); + +#endif diff --git a/vcs-svn/line_buffer.txt b/vcs-svn/line_buffer.txt new file mode 100644 index 0000000000..8e139eb22d --- /dev/null +++ b/vcs-svn/line_buffer.txt @@ -0,0 +1,77 @@ +line_buffer API +=============== + +The line_buffer library provides a convenient interface for +mostly-line-oriented input. + +Each line is not permitted to exceed 10000 bytes. The provided +functions are not thread-safe or async-signal-safe, and like +`fgets()`, they generally do not function correctly if interrupted +by a signal without SA_RESTART set. + +Calling sequence +---------------- + +The calling program: + + - initializes a `struct line_buffer` to LINE_BUFFER_INIT + - specifies a file to read with `buffer_init` + - processes input with `buffer_read_line`, `buffer_skip_bytes`, + and `buffer_copy_bytes` + - closes the file with `buffer_deinit`, perhaps to start over and + read another file. + +When finished, the caller can use `buffer_reset` to deallocate +resources. + +Using temporary files +--------------------- + +Temporary files provide a place to store data that should not outlive +the calling program. A program + + - initializes a `struct line_buffer` to LINE_BUFFER_INIT + - requests a temporary file with `buffer_tmpfile_init` + - acquires an output handle by calling `buffer_tmpfile_rewind` + - uses standard I/O functions like `fprintf` and `fwrite` to fill + the temporary file + - declares writing is over with `buffer_tmpfile_prepare_to_read` + - can re-read what was written with `buffer_read_line`, + `buffer_copy_bytes`, and so on + - can reuse the temporary file by calling `buffer_tmpfile_rewind` + again + - removes the temporary file with `buffer_deinit`, perhaps to + reuse the line_buffer for some other file. + +When finished, the calling program can use `buffer_reset` to deallocate +resources. + +Functions +--------- + +`buffer_init`, `buffer_fdinit`:: + Open the named file or file descriptor for input. + buffer_init(buf, NULL) prepares to read from stdin. + On failure, returns -1 (with errno indicating the nature + of the failure). + +`buffer_deinit`:: + Stop reading from the current file (closing it unless + it was stdin). Returns nonzero if `fclose` fails or + the error indicator was set. + +`buffer_read_line`:: + Read a line and strip off the trailing newline. + On failure or end of file, returns NULL. + +`buffer_copy_bytes`:: + Read `len` bytes of input and dump them to the standard output + stream. Returns early for error or end of file. + +`buffer_skip_bytes`:: + Discards `len` bytes from the input stream (stopping early + if necessary because of an error or eof). Return value is + the number of bytes successfully read. + +`buffer_reset`:: + Deallocates non-static buffers. diff --git a/vcs-svn/obj_pool.h b/vcs-svn/obj_pool.h new file mode 100644 index 0000000000..deb6eb8135 --- /dev/null +++ b/vcs-svn/obj_pool.h @@ -0,0 +1,61 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#ifndef OBJ_POOL_H_ +#define OBJ_POOL_H_ + +#include "git-compat-util.h" + +#define MAYBE_UNUSED __attribute__((__unused__)) + +#define obj_pool_gen(pre, obj_t, initial_capacity) \ +static struct { \ + uint32_t committed; \ + uint32_t size; \ + uint32_t capacity; \ + obj_t *base; \ +} pre##_pool = {0, 0, 0, NULL}; \ +static MAYBE_UNUSED uint32_t pre##_alloc(uint32_t count) \ +{ \ + uint32_t offset; \ + if (pre##_pool.size + count > pre##_pool.capacity) { \ + while (pre##_pool.size + count > pre##_pool.capacity) \ + if (pre##_pool.capacity) \ + pre##_pool.capacity *= 2; \ + else \ + pre##_pool.capacity = initial_capacity; \ + pre##_pool.base = realloc(pre##_pool.base, \ + pre##_pool.capacity * sizeof(obj_t)); \ + } \ + offset = pre##_pool.size; \ + pre##_pool.size += count; \ + return offset; \ +} \ +static MAYBE_UNUSED void pre##_free(uint32_t count) \ +{ \ + pre##_pool.size -= count; \ +} \ +static MAYBE_UNUSED uint32_t pre##_offset(obj_t *obj) \ +{ \ + return obj == NULL ? ~0 : obj - pre##_pool.base; \ +} \ +static MAYBE_UNUSED obj_t *pre##_pointer(uint32_t offset) \ +{ \ + return offset >= pre##_pool.size ? NULL : &pre##_pool.base[offset]; \ +} \ +static MAYBE_UNUSED void pre##_commit(void) \ +{ \ + pre##_pool.committed = pre##_pool.size; \ +} \ +static MAYBE_UNUSED void pre##_reset(void) \ +{ \ + free(pre##_pool.base); \ + pre##_pool.base = NULL; \ + pre##_pool.size = 0; \ + pre##_pool.capacity = 0; \ + pre##_pool.committed = 0; \ +} + +#endif diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c new file mode 100644 index 0000000000..a21d89de97 --- /dev/null +++ b/vcs-svn/repo_tree.c @@ -0,0 +1,326 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" + +#include "string_pool.h" +#include "repo_tree.h" +#include "obj_pool.h" +#include "fast_export.h" + +#include "trp.h" + +struct repo_dirent { + uint32_t name_offset; + struct trp_node children; + uint32_t mode; + uint32_t content_offset; +}; + +struct repo_dir { + struct trp_root entries; +}; + +struct repo_commit { + uint32_t root_dir_offset; +}; + +/* Memory pools for commit, dir and dirent */ +obj_pool_gen(commit, struct repo_commit, 4096) +obj_pool_gen(dir, struct repo_dir, 4096) +obj_pool_gen(dent, struct repo_dirent, 4096) + +static uint32_t active_commit; +static uint32_t mark; + +static int repo_dirent_name_cmp(const void *a, const void *b); + +/* Treap for directory entries */ +trp_gen(static, dent_, struct repo_dirent, children, dent, repo_dirent_name_cmp) + +uint32_t next_blob_mark(void) +{ + return mark++; +} + +static struct repo_dir *repo_commit_root_dir(struct repo_commit *commit) +{ + return dir_pointer(commit->root_dir_offset); +} + +static struct repo_dirent *repo_first_dirent(struct repo_dir *dir) +{ + return dent_first(&dir->entries); +} + +static int repo_dirent_name_cmp(const void *a, const void *b) +{ + const struct repo_dirent *dent1 = a, *dent2 = b; + uint32_t a_offset = dent1->name_offset; + uint32_t b_offset = dent2->name_offset; + return (a_offset > b_offset) - (a_offset < b_offset); +} + +static int repo_dirent_is_dir(struct repo_dirent *dent) +{ + return dent != NULL && dent->mode == REPO_MODE_DIR; +} + +static struct repo_dir *repo_dir_from_dirent(struct repo_dirent *dent) +{ + if (!repo_dirent_is_dir(dent)) + return NULL; + return dir_pointer(dent->content_offset); +} + +static struct repo_dir *repo_clone_dir(struct repo_dir *orig_dir) +{ + uint32_t orig_o, new_o; + orig_o = dir_offset(orig_dir); + if (orig_o >= dir_pool.committed) + return orig_dir; + new_o = dir_alloc(1); + orig_dir = dir_pointer(orig_o); + *dir_pointer(new_o) = *orig_dir; + return dir_pointer(new_o); +} + +static struct repo_dirent *repo_read_dirent(uint32_t revision, + const uint32_t *path) +{ + uint32_t name = 0; + struct repo_dirent *key = dent_pointer(dent_alloc(1)); + struct repo_dir *dir = NULL; + struct repo_dirent *dent = NULL; + dir = repo_commit_root_dir(commit_pointer(revision)); + while (~(name = *path++)) { + key->name_offset = name; + dent = dent_search(&dir->entries, key); + if (dent == NULL || !repo_dirent_is_dir(dent)) + break; + dir = repo_dir_from_dirent(dent); + } + dent_free(1); + return dent; +} + +static void repo_write_dirent(const uint32_t *path, uint32_t mode, + uint32_t content_offset, uint32_t del) +{ + uint32_t name, revision, dir_o = ~0, parent_dir_o = ~0; + struct repo_dir *dir; + struct repo_dirent *key; + struct repo_dirent *dent = NULL; + revision = active_commit; + dir = repo_commit_root_dir(commit_pointer(revision)); + dir = repo_clone_dir(dir); + commit_pointer(revision)->root_dir_offset = dir_offset(dir); + while (~(name = *path++)) { + parent_dir_o = dir_offset(dir); + + key = dent_pointer(dent_alloc(1)); + key->name_offset = name; + + dent = dent_search(&dir->entries, key); + if (dent == NULL) + dent = key; + else + dent_free(1); + + if (dent == key) { + dent->mode = REPO_MODE_DIR; + dent->content_offset = 0; + dent = dent_insert(&dir->entries, dent); + } + + if (dent_offset(dent) < dent_pool.committed) { + dir_o = repo_dirent_is_dir(dent) ? + dent->content_offset : ~0; + dent_remove(&dir->entries, dent); + dent = dent_pointer(dent_alloc(1)); + dent->name_offset = name; + dent->mode = REPO_MODE_DIR; + dent->content_offset = dir_o; + dent = dent_insert(&dir->entries, dent); + } + + dir = repo_dir_from_dirent(dent); + dir = repo_clone_dir(dir); + dent->content_offset = dir_offset(dir); + } + if (dent == NULL) + return; + dent->mode = mode; + dent->content_offset = content_offset; + if (del && ~parent_dir_o) + dent_remove(&dir_pointer(parent_dir_o)->entries, dent); +} + +uint32_t repo_read_path(const uint32_t *path) +{ + uint32_t content_offset = 0; + struct repo_dirent *dent = repo_read_dirent(active_commit, path); + if (dent != NULL) + content_offset = dent->content_offset; + return content_offset; +} + +uint32_t repo_read_mode(const uint32_t *path) +{ + struct repo_dirent *dent = repo_read_dirent(active_commit, path); + if (dent == NULL) + die("invalid dump: path to be modified is missing"); + return dent->mode; +} + +void repo_copy(uint32_t revision, const uint32_t *src, const uint32_t *dst) +{ + uint32_t mode = 0, content_offset = 0; + struct repo_dirent *src_dent; + src_dent = repo_read_dirent(revision, src); + if (src_dent != NULL) { + mode = src_dent->mode; + content_offset = src_dent->content_offset; + repo_write_dirent(dst, mode, content_offset, 0); + } +} + +void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark) +{ + repo_write_dirent(path, mode, blob_mark, 0); +} + +void repo_delete(uint32_t *path) +{ + repo_write_dirent(path, 0, 0, 1); +} + +static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir); + +static void repo_git_add(uint32_t depth, uint32_t *path, struct repo_dirent *dent) +{ + if (repo_dirent_is_dir(dent)) + repo_git_add_r(depth, path, repo_dir_from_dirent(dent)); + else + fast_export_modify(depth, path, + dent->mode, dent->content_offset); +} + +static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir) +{ + struct repo_dirent *de = repo_first_dirent(dir); + while (de) { + path[depth] = de->name_offset; + repo_git_add(depth + 1, path, de); + de = dent_next(&dir->entries, de); + } +} + +static void repo_diff_r(uint32_t depth, uint32_t *path, struct repo_dir *dir1, + struct repo_dir *dir2) +{ + struct repo_dirent *de1, *de2; + de1 = repo_first_dirent(dir1); + de2 = repo_first_dirent(dir2); + + while (de1 && de2) { + if (de1->name_offset < de2->name_offset) { + path[depth] = de1->name_offset; + fast_export_delete(depth + 1, path); + de1 = dent_next(&dir1->entries, de1); + continue; + } + if (de1->name_offset > de2->name_offset) { + path[depth] = de2->name_offset; + repo_git_add(depth + 1, path, de2); + de2 = dent_next(&dir2->entries, de2); + continue; + } + path[depth] = de1->name_offset; + + if (de1->mode == de2->mode && + de1->content_offset == de2->content_offset) { + ; /* No change. */ + } else if (repo_dirent_is_dir(de1) && repo_dirent_is_dir(de2)) { + repo_diff_r(depth + 1, path, + repo_dir_from_dirent(de1), + repo_dir_from_dirent(de2)); + } else if (!repo_dirent_is_dir(de1) && !repo_dirent_is_dir(de2)) { + repo_git_add(depth + 1, path, de2); + } else { + fast_export_delete(depth + 1, path); + repo_git_add(depth + 1, path, de2); + } + de1 = dent_next(&dir1->entries, de1); + de2 = dent_next(&dir2->entries, de2); + } + while (de1) { + path[depth] = de1->name_offset; + fast_export_delete(depth + 1, path); + de1 = dent_next(&dir1->entries, de1); + } + while (de2) { + path[depth] = de2->name_offset; + repo_git_add(depth + 1, path, de2); + de2 = dent_next(&dir2->entries, de2); + } +} + +static uint32_t path_stack[REPO_MAX_PATH_DEPTH]; + +void repo_diff(uint32_t r1, uint32_t r2) +{ + repo_diff_r(0, + path_stack, + repo_commit_root_dir(commit_pointer(r1)), + repo_commit_root_dir(commit_pointer(r2))); +} + +void repo_commit(uint32_t revision, const char *author, + const struct strbuf *log, const char *uuid, const char *url, + unsigned long timestamp) +{ + fast_export_commit(revision, author, log, uuid, url, timestamp); + dent_commit(); + dir_commit(); + active_commit = commit_alloc(1); + commit_pointer(active_commit)->root_dir_offset = + commit_pointer(active_commit - 1)->root_dir_offset; +} + +static void mark_init(void) +{ + uint32_t i; + mark = 0; + for (i = 0; i < dent_pool.size; i++) + if (!repo_dirent_is_dir(dent_pointer(i)) && + dent_pointer(i)->content_offset > mark) + mark = dent_pointer(i)->content_offset; + mark++; +} + +void repo_init(void) +{ + mark_init(); + if (commit_pool.size == 0) { + /* Create empty tree for commit 0. */ + commit_alloc(1); + commit_pointer(0)->root_dir_offset = dir_alloc(1); + dir_pointer(0)->entries.trp_root = ~0; + dir_commit(); + } + /* Preallocate next commit, ready for changes. */ + active_commit = commit_alloc(1); + commit_pointer(active_commit)->root_dir_offset = + commit_pointer(active_commit - 1)->root_dir_offset; +} + +void repo_reset(void) +{ + pool_reset(); + commit_reset(); + dir_reset(); + dent_reset(); +} diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h new file mode 100644 index 0000000000..37bde2e374 --- /dev/null +++ b/vcs-svn/repo_tree.h @@ -0,0 +1,27 @@ +#ifndef REPO_TREE_H_ +#define REPO_TREE_H_ + +struct strbuf; + +#define REPO_MODE_DIR 0040000 +#define REPO_MODE_BLB 0100644 +#define REPO_MODE_EXE 0100755 +#define REPO_MODE_LNK 0120000 + +#define REPO_MAX_PATH_LEN 4096 +#define REPO_MAX_PATH_DEPTH 1000 + +uint32_t next_blob_mark(void); +void repo_copy(uint32_t revision, const uint32_t *src, const uint32_t *dst); +void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark); +uint32_t repo_read_path(const uint32_t *path); +uint32_t repo_read_mode(const uint32_t *path); +void repo_delete(uint32_t *path); +void repo_commit(uint32_t revision, const char *author, + const struct strbuf *log, const char *uuid, const char *url, + long unsigned timestamp); +void repo_diff(uint32_t r1, uint32_t r2); +void repo_init(void); +void repo_reset(void); + +#endif diff --git a/vcs-svn/string_pool.c b/vcs-svn/string_pool.c new file mode 100644 index 0000000000..8af8d54d6e --- /dev/null +++ b/vcs-svn/string_pool.c @@ -0,0 +1,102 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "trp.h" +#include "obj_pool.h" +#include "string_pool.h" + +static struct trp_root tree = { ~0 }; + +struct node { + uint32_t offset; + struct trp_node children; +}; + +/* Two memory pools: one for struct node, and another for strings */ +obj_pool_gen(node, struct node, 4096) +obj_pool_gen(string, char, 4096) + +static char *node_value(struct node *node) +{ + return node ? string_pointer(node->offset) : NULL; +} + +static int node_cmp(struct node *a, struct node *b) +{ + return strcmp(node_value(a), node_value(b)); +} + +/* Build a Treap from the node structure (a trp_node w/ offset) */ +trp_gen(static, tree_, struct node, children, node, node_cmp) + +const char *pool_fetch(uint32_t entry) +{ + return node_value(node_pointer(entry)); +} + +uint32_t pool_intern(const char *key) +{ + /* Canonicalize key */ + struct node *match = NULL, *node; + uint32_t key_len; + if (key == NULL) + return ~0; + key_len = strlen(key) + 1; + node = node_pointer(node_alloc(1)); + node->offset = string_alloc(key_len); + strcpy(node_value(node), key); + match = tree_search(&tree, node); + if (!match) { + tree_insert(&tree, node); + } else { + node_free(1); + string_free(key_len); + node = match; + } + return node_offset(node); +} + +uint32_t pool_tok_r(char *str, const char *delim, char **saveptr) +{ + char *token = strtok_r(str, delim, saveptr); + return token ? pool_intern(token) : ~0; +} + +void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream) +{ + uint32_t i; + for (i = 0; i < len && ~seq[i]; i++) { + fputs(pool_fetch(seq[i]), stream); + if (i < len - 1 && ~seq[i + 1]) + fputc(delim, stream); + } +} + +uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str) +{ + char *context = NULL; + uint32_t token = ~0; + uint32_t length; + + if (sz == 0) + return ~0; + if (str) + token = pool_tok_r(str, delim, &context); + for (length = 0; length < sz; length++) { + seq[length] = token; + if (token == ~0) + return length; + token = pool_tok_r(NULL, delim, &context); + } + seq[sz - 1] = ~0; + return sz; +} + +void pool_reset(void) +{ + node_reset(); + string_reset(); +} diff --git a/vcs-svn/string_pool.h b/vcs-svn/string_pool.h new file mode 100644 index 0000000000..222fb66e68 --- /dev/null +++ b/vcs-svn/string_pool.h @@ -0,0 +1,11 @@ +#ifndef STRING_POOL_H_ +#define STRING_POOL_H_ + +uint32_t pool_intern(const char *key); +const char *pool_fetch(uint32_t entry); +uint32_t pool_tok_r(char *str, const char *delim, char **saveptr); +void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream); +uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str); +void pool_reset(void); + +#endif diff --git a/vcs-svn/string_pool.txt b/vcs-svn/string_pool.txt new file mode 100644 index 0000000000..1b41f15628 --- /dev/null +++ b/vcs-svn/string_pool.txt @@ -0,0 +1,43 @@ +string_pool API +=============== + +The string_pool API provides facilities for replacing strings +with integer keys that can be more easily compared and stored. +The facilities are designed so that one could teach Git without +too much trouble to store the information needed for these keys to +remain valid over multiple executions. + +Functions +--------- + +pool_intern:: + Include a string in the string pool and get its key. + If that string is already in the pool, retrieves its + existing key. + +pool_fetch:: + Retrieve the string associated to a given key. + +pool_tok_r:: + Extract the key of the next token from a string. + Interface mimics strtok_r. + +pool_print_seq:: + Print a sequence of strings named by key to a file, using the + specified delimiter to separate them. + + If NULL (key ~0) appears in the sequence, the sequence ends + early. + +pool_tok_seq:: + Split a string into tokens, storing the keys of segments + into a caller-provided array. + + Unless sz is 0, the array will always be ~0-terminated. + If there is not enough room for all the tokens, the + array holds as many tokens as fit in the entries before + the terminating ~0. Return value is the index after the + last token, or sz if the tokens did not fit. + +pool_reset:: + Deallocate storage for the string pool. diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c new file mode 100644 index 0000000000..bc792223b2 --- /dev/null +++ b/vcs-svn/svndump.c @@ -0,0 +1,454 @@ +/* + * Parse and rearrange a svnadmin dump. + * Create the dump with: + * svnadmin dump --incremental -r<startrev>:<endrev> <repository> >outfile + * + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "cache.h" +#include "repo_tree.h" +#include "fast_export.h" +#include "line_buffer.h" +#include "string_pool.h" +#include "strbuf.h" +#include "svndump.h" + +/* + * Compare start of string to literal of equal length; + * must be guarded by length test. + */ +#define constcmp(s, ref) memcmp(s, ref, sizeof(ref) - 1) + +#define NODEACT_REPLACE 4 +#define NODEACT_DELETE 3 +#define NODEACT_ADD 2 +#define NODEACT_CHANGE 1 +#define NODEACT_UNKNOWN 0 + +#define DUMP_CTX 0 +#define REV_CTX 1 +#define NODE_CTX 2 + +#define LENGTH_UNKNOWN (~0) +#define DATE_RFC2822_LEN 31 + +static struct line_buffer input = LINE_BUFFER_INIT; + +static struct { + uint32_t action, propLength, textLength, srcRev, type; + uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH]; + uint32_t text_delta, prop_delta; +} node_ctx; + +static struct { + uint32_t revision; + unsigned long timestamp; + struct strbuf log, author; +} rev_ctx; + +static struct { + uint32_t version; + struct strbuf uuid, url; +} dump_ctx; + +static void reset_node_ctx(char *fname) +{ + node_ctx.type = 0; + node_ctx.action = NODEACT_UNKNOWN; + node_ctx.propLength = LENGTH_UNKNOWN; + node_ctx.textLength = LENGTH_UNKNOWN; + node_ctx.src[0] = ~0; + node_ctx.srcRev = 0; + pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname); + node_ctx.text_delta = 0; + node_ctx.prop_delta = 0; +} + +static void reset_rev_ctx(uint32_t revision) +{ + rev_ctx.revision = revision; + rev_ctx.timestamp = 0; + strbuf_reset(&rev_ctx.log); + strbuf_reset(&rev_ctx.author); +} + +static void reset_dump_ctx(const char *url) +{ + strbuf_reset(&dump_ctx.url); + if (url) + strbuf_addstr(&dump_ctx.url, url); + dump_ctx.version = 1; + strbuf_reset(&dump_ctx.uuid); +} + +static void handle_property(const struct strbuf *key_buf, + struct strbuf *val, + uint32_t *type_set) +{ + const char *key = key_buf->buf; + size_t keylen = key_buf->len; + + switch (keylen + 1) { + case sizeof("svn:log"): + if (constcmp(key, "svn:log")) + break; + if (!val) + die("invalid dump: unsets svn:log"); + strbuf_swap(&rev_ctx.log, val); + break; + case sizeof("svn:author"): + if (constcmp(key, "svn:author")) + break; + if (!val) + strbuf_reset(&rev_ctx.author); + else + strbuf_swap(&rev_ctx.author, val); + break; + case sizeof("svn:date"): + if (constcmp(key, "svn:date")) + break; + if (!val) + die("invalid dump: unsets svn:date"); + if (parse_date_basic(val->buf, &rev_ctx.timestamp, NULL)) + warning("invalid timestamp: %s", val->buf); + break; + case sizeof("svn:executable"): + case sizeof("svn:special"): + if (keylen == strlen("svn:executable") && + constcmp(key, "svn:executable")) + break; + if (keylen == strlen("svn:special") && + constcmp(key, "svn:special")) + break; + if (*type_set) { + if (!val) + return; + die("invalid dump: sets type twice"); + } + if (!val) { + node_ctx.type = REPO_MODE_BLB; + return; + } + *type_set = 1; + node_ctx.type = keylen == strlen("svn:executable") ? + REPO_MODE_EXE : + REPO_MODE_LNK; + } +} + +static void die_short_read(void) +{ + if (buffer_ferror(&input)) + die_errno("error reading dump file"); + die("invalid dump: unexpected end of file"); +} + +static void read_props(void) +{ + static struct strbuf key = STRBUF_INIT; + static struct strbuf val = STRBUF_INIT; + const char *t; + /* + * NEEDSWORK: to support simple mode changes like + * K 11 + * svn:special + * V 1 + * * + * D 14 + * svn:executable + * we keep track of whether a mode has been set and reset to + * plain file only if not. We should be keeping track of the + * symlink and executable bits separately instead. + */ + uint32_t type_set = 0; + while ((t = buffer_read_line(&input)) && strcmp(t, "PROPS-END")) { + uint32_t len; + const char type = t[0]; + int ch; + + if (!type || t[1] != ' ') + die("invalid property line: %s\n", t); + len = atoi(&t[2]); + strbuf_reset(&val); + buffer_read_binary(&input, &val, len); + if (val.len < len) + die_short_read(); + + /* Discard trailing newline. */ + ch = buffer_read_char(&input); + if (ch == EOF) + die_short_read(); + if (ch != '\n') + die("invalid dump: expected newline after %s", val.buf); + + switch (type) { + case 'K': + strbuf_swap(&key, &val); + continue; + case 'D': + handle_property(&val, NULL, &type_set); + continue; + case 'V': + handle_property(&key, &val, &type_set); + strbuf_reset(&key); + continue; + default: + die("invalid property line: %s\n", t); + } + } +} + +static void handle_node(void) +{ + uint32_t mark = 0; + const uint32_t type = node_ctx.type; + const int have_props = node_ctx.propLength != LENGTH_UNKNOWN; + const int have_text = node_ctx.textLength != LENGTH_UNKNOWN; + + if (node_ctx.text_delta) + die("text deltas not supported"); + if (have_text) + mark = next_blob_mark(); + if (node_ctx.action == NODEACT_DELETE) { + if (have_text || have_props || node_ctx.srcRev) + die("invalid dump: deletion node has " + "copyfrom info, text, or properties"); + repo_delete(node_ctx.dst); + return; + } + if (node_ctx.action == NODEACT_REPLACE) { + repo_delete(node_ctx.dst); + node_ctx.action = NODEACT_ADD; + } + if (node_ctx.srcRev) { + repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); + if (node_ctx.action == NODEACT_ADD) + node_ctx.action = NODEACT_CHANGE; + } + if (have_text && type == REPO_MODE_DIR) + die("invalid dump: directories cannot have text attached"); + + /* + * Decide on the new content (mark) and mode (node_ctx.type). + */ + if (node_ctx.action == NODEACT_CHANGE && !~*node_ctx.dst) { + if (type != REPO_MODE_DIR) + die("invalid dump: root of tree is not a regular file"); + } else if (node_ctx.action == NODEACT_CHANGE) { + uint32_t mode; + if (!have_text) + mark = repo_read_path(node_ctx.dst); + mode = repo_read_mode(node_ctx.dst); + if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR) + die("invalid dump: cannot modify a directory into a file"); + if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR) + die("invalid dump: cannot modify a file into a directory"); + node_ctx.type = mode; + } else if (node_ctx.action == NODEACT_ADD) { + if (!have_text && type != REPO_MODE_DIR) + die("invalid dump: adds node without text"); + } else { + die("invalid dump: Node-path block lacks Node-action"); + } + + /* + * Adjust mode to reflect properties. + */ + if (have_props) { + if (!node_ctx.prop_delta) + node_ctx.type = type; + if (node_ctx.propLength) + read_props(); + } + + /* + * Save the result. + */ + repo_add(node_ctx.dst, node_ctx.type, mark); + if (have_text) + fast_export_blob(node_ctx.type, mark, + node_ctx.textLength, &input); +} + +static void handle_revision(void) +{ + if (rev_ctx.revision) + repo_commit(rev_ctx.revision, rev_ctx.author.buf, + &rev_ctx.log, dump_ctx.uuid.buf, dump_ctx.url.buf, + rev_ctx.timestamp); +} + +void svndump_read(const char *url) +{ + char *val; + char *t; + uint32_t active_ctx = DUMP_CTX; + uint32_t len; + + reset_dump_ctx(url); + while ((t = buffer_read_line(&input))) { + val = strchr(t, ':'); + if (!val) + continue; + val++; + if (*val != ' ') + continue; + val++; + + /* strlen(key) + 1 */ + switch (val - t - 1) { + case sizeof("SVN-fs-dump-format-version"): + if (constcmp(t, "SVN-fs-dump-format-version")) + continue; + dump_ctx.version = atoi(val); + if (dump_ctx.version > 3) + die("expected svn dump format version <= 3, found %"PRIu32, + dump_ctx.version); + break; + case sizeof("UUID"): + if (constcmp(t, "UUID")) + continue; + strbuf_reset(&dump_ctx.uuid); + strbuf_addstr(&dump_ctx.uuid, val); + break; + case sizeof("Revision-number"): + if (constcmp(t, "Revision-number")) + continue; + if (active_ctx == NODE_CTX) + handle_node(); + if (active_ctx != DUMP_CTX) + handle_revision(); + active_ctx = REV_CTX; + reset_rev_ctx(atoi(val)); + break; + case sizeof("Node-path"): + if (prefixcmp(t, "Node-")) + continue; + if (!constcmp(t + strlen("Node-"), "path")) { + if (active_ctx == NODE_CTX) + handle_node(); + active_ctx = NODE_CTX; + reset_node_ctx(val); + break; + } + if (constcmp(t + strlen("Node-"), "kind")) + continue; + if (!strcmp(val, "dir")) + node_ctx.type = REPO_MODE_DIR; + else if (!strcmp(val, "file")) + node_ctx.type = REPO_MODE_BLB; + else + fprintf(stderr, "Unknown node-kind: %s\n", val); + break; + case sizeof("Node-action"): + if (constcmp(t, "Node-action")) + continue; + if (!strcmp(val, "delete")) { + node_ctx.action = NODEACT_DELETE; + } else if (!strcmp(val, "add")) { + node_ctx.action = NODEACT_ADD; + } else if (!strcmp(val, "change")) { + node_ctx.action = NODEACT_CHANGE; + } else if (!strcmp(val, "replace")) { + node_ctx.action = NODEACT_REPLACE; + } else { + fprintf(stderr, "Unknown node-action: %s\n", val); + node_ctx.action = NODEACT_UNKNOWN; + } + break; + case sizeof("Node-copyfrom-path"): + if (constcmp(t, "Node-copyfrom-path")) + continue; + pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val); + break; + case sizeof("Node-copyfrom-rev"): + if (constcmp(t, "Node-copyfrom-rev")) + continue; + node_ctx.srcRev = atoi(val); + break; + case sizeof("Text-content-length"): + if (!constcmp(t, "Text-content-length")) { + node_ctx.textLength = atoi(val); + break; + } + if (constcmp(t, "Prop-content-length")) + continue; + node_ctx.propLength = atoi(val); + break; + case sizeof("Text-delta"): + if (!constcmp(t, "Text-delta")) { + node_ctx.text_delta = !strcmp(val, "true"); + break; + } + if (constcmp(t, "Prop-delta")) + continue; + node_ctx.prop_delta = !strcmp(val, "true"); + break; + case sizeof("Content-length"): + if (constcmp(t, "Content-length")) + continue; + len = atoi(val); + t = buffer_read_line(&input); + if (!t) + die_short_read(); + if (*t) + die("invalid dump: expected blank line after content length header"); + if (active_ctx == REV_CTX) { + read_props(); + } else if (active_ctx == NODE_CTX) { + handle_node(); + active_ctx = REV_CTX; + } else { + fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len); + if (buffer_skip_bytes(&input, len) != len) + die_short_read(); + } + } + } + if (buffer_ferror(&input)) + die_short_read(); + if (active_ctx == NODE_CTX) + handle_node(); + if (active_ctx != DUMP_CTX) + handle_revision(); +} + +int svndump_init(const char *filename) +{ + if (buffer_init(&input, filename)) + return error("cannot open %s: %s", filename, strerror(errno)); + repo_init(); + strbuf_init(&dump_ctx.uuid, 4096); + strbuf_init(&dump_ctx.url, 4096); + strbuf_init(&rev_ctx.log, 4096); + strbuf_init(&rev_ctx.author, 4096); + reset_dump_ctx(NULL); + reset_rev_ctx(0); + reset_node_ctx(NULL); + return 0; +} + +void svndump_deinit(void) +{ + repo_reset(); + reset_dump_ctx(NULL); + reset_rev_ctx(0); + reset_node_ctx(NULL); + strbuf_release(&rev_ctx.log); + if (buffer_deinit(&input)) + fprintf(stderr, "Input error\n"); + if (ferror(stdout)) + fprintf(stderr, "Output error\n"); +} + +void svndump_reset(void) +{ + buffer_reset(&input); + repo_reset(); + strbuf_release(&dump_ctx.uuid); + strbuf_release(&dump_ctx.url); + strbuf_release(&rev_ctx.log); + strbuf_release(&rev_ctx.author); +} diff --git a/vcs-svn/svndump.h b/vcs-svn/svndump.h new file mode 100644 index 0000000000..df9ceb0e8d --- /dev/null +++ b/vcs-svn/svndump.h @@ -0,0 +1,9 @@ +#ifndef SVNDUMP_H_ +#define SVNDUMP_H_ + +int svndump_init(const char *filename); +void svndump_read(const char *url); +void svndump_deinit(void); +void svndump_reset(void); + +#endif diff --git a/vcs-svn/trp.h b/vcs-svn/trp.h new file mode 100644 index 0000000000..c32b9184e9 --- /dev/null +++ b/vcs-svn/trp.h @@ -0,0 +1,237 @@ +/* + * C macro implementation of treaps. + * + * Usage: + * #include <stdint.h> + * #include "trp.h" + * trp_gen(...) + * + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#ifndef TRP_H_ +#define TRP_H_ + +#define MAYBE_UNUSED __attribute__((__unused__)) + +/* Node structure. */ +struct trp_node { + uint32_t trpn_left; + uint32_t trpn_right; +}; + +/* Root structure. */ +struct trp_root { + uint32_t trp_root; +}; + +/* Pointer/Offset conversion. */ +#define trpn_pointer(a_base, a_offset) (a_base##_pointer(a_offset)) +#define trpn_offset(a_base, a_pointer) (a_base##_offset(a_pointer)) +#define trpn_modify(a_base, a_offset) \ + do { \ + if ((a_offset) < a_base##_pool.committed) { \ + uint32_t old_offset = (a_offset);\ + (a_offset) = a_base##_alloc(1); \ + *trpn_pointer(a_base, a_offset) = \ + *trpn_pointer(a_base, old_offset); \ + } \ + } while (0) + +/* Left accessors. */ +#define trp_left_get(a_base, a_field, a_node) \ + (trpn_pointer(a_base, a_node)->a_field.trpn_left) +#define trp_left_set(a_base, a_field, a_node, a_left) \ + do { \ + trpn_modify(a_base, a_node); \ + trp_left_get(a_base, a_field, a_node) = (a_left); \ + } while (0) + +/* Right accessors. */ +#define trp_right_get(a_base, a_field, a_node) \ + (trpn_pointer(a_base, a_node)->a_field.trpn_right) +#define trp_right_set(a_base, a_field, a_node, a_right) \ + do { \ + trpn_modify(a_base, a_node); \ + trp_right_get(a_base, a_field, a_node) = (a_right); \ + } while (0) + +/* + * Fibonacci hash function. + * The multiplier is the nearest prime to (2^32 times (√5 - 1)/2). + * See Knuth §6.4: volume 3, 3rd ed, p518. + */ +#define trpn_hash(a_node) (uint32_t) (2654435761u * (a_node)) + +/* Priority accessors. */ +#define trp_prio_get(a_node) trpn_hash(a_node) + +/* Node initializer. */ +#define trp_node_new(a_base, a_field, a_node) \ + do { \ + trp_left_set(a_base, a_field, (a_node), ~0); \ + trp_right_set(a_base, a_field, (a_node), ~0); \ + } while (0) + +/* Internal utility macros. */ +#define trpn_first(a_base, a_field, a_root, r_node) \ + do { \ + (r_node) = (a_root); \ + if ((r_node) == ~0) \ + return NULL; \ + while (~trp_left_get(a_base, a_field, (r_node))) \ + (r_node) = trp_left_get(a_base, a_field, (r_node)); \ + } while (0) + +#define trpn_rotate_left(a_base, a_field, a_node, r_node) \ + do { \ + (r_node) = trp_right_get(a_base, a_field, (a_node)); \ + trp_right_set(a_base, a_field, (a_node), \ + trp_left_get(a_base, a_field, (r_node))); \ + trp_left_set(a_base, a_field, (r_node), (a_node)); \ + } while (0) + +#define trpn_rotate_right(a_base, a_field, a_node, r_node) \ + do { \ + (r_node) = trp_left_get(a_base, a_field, (a_node)); \ + trp_left_set(a_base, a_field, (a_node), \ + trp_right_get(a_base, a_field, (r_node))); \ + trp_right_set(a_base, a_field, (r_node), (a_node)); \ + } while (0) + +#define trp_gen(a_attr, a_pre, a_type, a_field, a_base, a_cmp) \ +a_attr a_type MAYBE_UNUSED *a_pre##first(struct trp_root *treap) \ +{ \ + uint32_t ret; \ + trpn_first(a_base, a_field, treap->trp_root, ret); \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr a_type MAYBE_UNUSED *a_pre##next(struct trp_root *treap, a_type *node) \ +{ \ + uint32_t ret; \ + uint32_t offset = trpn_offset(a_base, node); \ + if (~trp_right_get(a_base, a_field, offset)) { \ + trpn_first(a_base, a_field, \ + trp_right_get(a_base, a_field, offset), ret); \ + } else { \ + uint32_t tnode = treap->trp_root; \ + ret = ~0; \ + while (1) { \ + int cmp = (a_cmp)(trpn_pointer(a_base, offset), \ + trpn_pointer(a_base, tnode)); \ + if (cmp < 0) { \ + ret = tnode; \ + tnode = trp_left_get(a_base, a_field, tnode); \ + } else if (cmp > 0) { \ + tnode = trp_right_get(a_base, a_field, tnode); \ + } else { \ + break; \ + } \ + } \ + } \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr a_type MAYBE_UNUSED *a_pre##search(struct trp_root *treap, a_type *key) \ +{ \ + int cmp; \ + uint32_t ret = treap->trp_root; \ + while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \ + if (cmp < 0) { \ + ret = trp_left_get(a_base, a_field, ret); \ + } else { \ + ret = trp_right_get(a_base, a_field, ret); \ + } \ + } \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr a_type MAYBE_UNUSED *a_pre##nsearch(struct trp_root *treap, a_type *key) \ +{ \ + int cmp; \ + uint32_t ret = treap->trp_root; \ + while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \ + if (cmp < 0) { \ + if (!~trp_left_get(a_base, a_field, ret)) \ + break; \ + ret = trp_left_get(a_base, a_field, ret); \ + } else { \ + ret = trp_right_get(a_base, a_field, ret); \ + } \ + } \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t ins_node) \ +{ \ + if (cur_node == ~0) { \ + return ins_node; \ + } else { \ + uint32_t ret; \ + int cmp = (a_cmp)(trpn_pointer(a_base, ins_node), \ + trpn_pointer(a_base, cur_node)); \ + if (cmp < 0) { \ + uint32_t left = a_pre##insert_recurse( \ + trp_left_get(a_base, a_field, cur_node), ins_node); \ + trp_left_set(a_base, a_field, cur_node, left); \ + if (trp_prio_get(left) < trp_prio_get(cur_node)) \ + trpn_rotate_right(a_base, a_field, cur_node, ret); \ + else \ + ret = cur_node; \ + } else { \ + uint32_t right = a_pre##insert_recurse( \ + trp_right_get(a_base, a_field, cur_node), ins_node); \ + trp_right_set(a_base, a_field, cur_node, right); \ + if (trp_prio_get(right) < trp_prio_get(cur_node)) \ + trpn_rotate_left(a_base, a_field, cur_node, ret); \ + else \ + ret = cur_node; \ + } \ + return ret; \ + } \ +} \ +a_attr a_type *MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \ +{ \ + uint32_t offset = trpn_offset(a_base, node); \ + trp_node_new(a_base, a_field, offset); \ + treap->trp_root = a_pre##insert_recurse(treap->trp_root, offset); \ + return trpn_pointer(a_base, offset); \ +} \ +a_attr uint32_t MAYBE_UNUSED a_pre##remove_recurse(uint32_t cur_node, uint32_t rem_node) \ +{ \ + int cmp = a_cmp(trpn_pointer(a_base, rem_node), \ + trpn_pointer(a_base, cur_node)); \ + if (cmp == 0) { \ + uint32_t ret; \ + uint32_t left = trp_left_get(a_base, a_field, cur_node); \ + uint32_t right = trp_right_get(a_base, a_field, cur_node); \ + if (left == ~0) { \ + if (right == ~0) \ + return ~0; \ + } else if (right == ~0 || trp_prio_get(left) < trp_prio_get(right)) { \ + trpn_rotate_right(a_base, a_field, cur_node, ret); \ + right = a_pre##remove_recurse(cur_node, rem_node); \ + trp_right_set(a_base, a_field, ret, right); \ + return ret; \ + } \ + trpn_rotate_left(a_base, a_field, cur_node, ret); \ + left = a_pre##remove_recurse(cur_node, rem_node); \ + trp_left_set(a_base, a_field, ret, left); \ + return ret; \ + } else if (cmp < 0) { \ + uint32_t left = a_pre##remove_recurse( \ + trp_left_get(a_base, a_field, cur_node), rem_node); \ + trp_left_set(a_base, a_field, cur_node, left); \ + return cur_node; \ + } else { \ + uint32_t right = a_pre##remove_recurse( \ + trp_right_get(a_base, a_field, cur_node), rem_node); \ + trp_right_set(a_base, a_field, cur_node, right); \ + return cur_node; \ + } \ +} \ +a_attr void MAYBE_UNUSED a_pre##remove(struct trp_root *treap, a_type *node) \ +{ \ + treap->trp_root = a_pre##remove_recurse(treap->trp_root, \ + trpn_offset(a_base, node)); \ +} \ + +#endif diff --git a/vcs-svn/trp.txt b/vcs-svn/trp.txt new file mode 100644 index 0000000000..177ebca335 --- /dev/null +++ b/vcs-svn/trp.txt @@ -0,0 +1,109 @@ +Motivation +========== + +Treaps provide a memory-efficient binary search tree structure. +Insertion/deletion/search are about as about as fast in the average +case as red-black trees and the chances of worst-case behavior are +vanishingly small, thanks to (pseudo-)randomness. The bad worst-case +behavior is a small price to pay, given that treaps are much simpler +to implement. + +API +=== + +The trp API generates a data structure and functions to handle a +large growing set of objects stored in a pool. + +The caller: + +. Specifies parameters for the generated functions with the + trp_gen(static, foo_, ...) macro. + +. Allocates a `struct trp_root` variable and sets it to {~0}. + +. Adds new nodes to the set using `foo_insert`. Any pointers + to existing nodes cannot be relied upon any more, so the caller + might retrieve them anew with `foo_pointer`. + +. Can find a specific item in the set using `foo_search`. + +. Can iterate over items in the set using `foo_first` and `foo_next`. + +. Can remove an item from the set using `foo_remove`. + +Example: + +---- +struct ex_node { + const char *s; + struct trp_node ex_link; +}; +static struct trp_root ex_base = {~0}; +obj_pool_gen(ex, struct ex_node, 4096); +trp_gen(static, ex_, struct ex_node, ex_link, ex, strcmp) +struct ex_node *item; + +item = ex_pointer(ex_alloc(1)); +item->s = "hello"; +ex_insert(&ex_base, item); +item = ex_pointer(ex_alloc(1)); +item->s = "goodbye"; +ex_insert(&ex_base, item); +for (item = ex_first(&ex_base); item; item = ex_next(&ex_base, item)) + printf("%s\n", item->s); +---- + +Functions +--------- + +trp_gen(attr, foo_, node_type, link_field, pool, cmp):: + + Generate a type-specific treap implementation. ++ +. The storage class for generated functions will be 'attr' (e.g., `static`). +. Generated function names are prefixed with 'foo_' (e.g., `treap_`). +. Treap nodes will be of type 'node_type' (e.g., `struct treap_node`). + This type must be a struct with at least one `struct trp_node` field + to point to its children. +. The field used to access child nodes will be 'link_field'. +. All treap nodes must lie in the 'pool' object pool. +. Treap nodes must be totally ordered by the 'cmp' relation, with the + following prototype: ++ +int (*cmp)(node_type \*a, node_type \*b) ++ +and returning a value less than, equal to, or greater than zero +according to the result of comparison. + +node_type {asterisk}foo_insert(struct trp_root *treap, node_type \*node):: + + Insert node into treap. If inserted multiple times, + a node will appear in the treap multiple times. ++ +The return value is the address of the node within the treap, +which might differ from `node` if `pool_alloc` had to call +`realloc` to expand the pool. + +void foo_remove(struct trp_root *treap, node_type \*node):: + + Remove node from treap. Caller must ensure node is + present in treap before using this function. + +node_type *foo_search(struct trp_root \*treap, node_type \*key):: + + Search for a node that matches key. If no match is found, + result is NULL. + +node_type *foo_nsearch(struct trp_root \*treap, node_type \*key):: + + Like `foo_search`, but if the key is missing return what + would be key's successor, were key in treap (NULL if no + successor). + +node_type *foo_first(struct trp_root \*treap):: + + Find the first item from the treap, in sorted order. + +node_type *foo_next(struct trp_root \*treap, node_type \*node):: + + Find the next item. |