diff options
author | Junio C Hamano <gitster@pobox.com> | 2010-08-31 16:23:38 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2010-08-31 16:23:38 -0700 |
commit | aca35505db3706c87d391dd213e856f73edfd42c (patch) | |
tree | 80b2d3878ae3ff21db1b1c21b9eaa6fefc290550 | |
parent | Merge branch 'tr/maint-no-unquote-plus' (diff) | |
parent | t/t9010-svn-fe.sh: add an +x bit to this test (diff) | |
download | tgif-aca35505db3706c87d391dd213e856f73edfd42c.tar.xz |
Merge branch 'jn/svn-fe'
* jn/svn-fe:
t/t9010-svn-fe.sh: add an +x bit to this test
t9010 (svn-fe): avoid symlinks in test
t9010 (svn-fe): use Unix-style path in URI
vcs-svn: Avoid %z in format string
vcs-svn: Rename dirent pool to build on Windows
compat: add strtok_r()
treap: style fix
vcs-svn: remove build artifacts on "make clean"
svn-fe manual: Clarify warning about deltas in dump files
Update svn-fe manual
SVN dump parser
Infrastructure to write revisions in fast-export format
Add stream helper library
Add string-specific memory pool
Add treap implementation
Add memory pool library
Introduce vcs-svn lib
-rw-r--r-- | .gitignore | 5 | ||||
-rw-r--r-- | Makefile | 37 | ||||
-rw-r--r-- | compat/strtok_r.c | 61 | ||||
-rw-r--r-- | config.mak.in | 1 | ||||
-rw-r--r-- | configure.ac | 6 | ||||
-rw-r--r-- | contrib/svn-fe/svn-fe.c | 1 | ||||
-rw-r--r-- | contrib/svn-fe/svn-fe.txt | 19 | ||||
-rw-r--r-- | git-compat-util.h | 5 | ||||
-rwxr-xr-x | t/t0080-vcs-svn.sh | 171 | ||||
-rwxr-xr-x | t/t9010-svn-fe.sh | 32 | ||||
-rw-r--r-- | test-line-buffer.c | 46 | ||||
-rw-r--r-- | test-obj-pool.c | 116 | ||||
-rw-r--r-- | test-string-pool.c | 31 | ||||
-rw-r--r-- | test-svn-fe.c | 17 | ||||
-rw-r--r-- | test-treap.c | 65 | ||||
-rw-r--r-- | vcs-svn/LICENSE | 33 | ||||
-rw-r--r-- | vcs-svn/fast_export.c | 75 | ||||
-rw-r--r-- | vcs-svn/fast_export.h | 11 | ||||
-rw-r--r-- | vcs-svn/line_buffer.c | 97 | ||||
-rw-r--r-- | vcs-svn/line_buffer.h | 12 | ||||
-rw-r--r-- | vcs-svn/line_buffer.txt | 58 | ||||
-rw-r--r-- | vcs-svn/obj_pool.h | 61 | ||||
-rw-r--r-- | vcs-svn/repo_tree.c | 329 | ||||
-rw-r--r-- | vcs-svn/repo_tree.h | 26 | ||||
-rw-r--r-- | vcs-svn/string_pool.c | 102 | ||||
-rw-r--r-- | vcs-svn/string_pool.h | 11 | ||||
-rw-r--r-- | vcs-svn/string_pool.txt | 43 | ||||
-rw-r--r-- | vcs-svn/svndump.c | 302 | ||||
-rw-r--r-- | vcs-svn/svndump.h | 9 | ||||
-rw-r--r-- | vcs-svn/trp.h | 236 | ||||
-rw-r--r-- | vcs-svn/trp.txt | 103 |
31 files changed, 2108 insertions, 13 deletions
diff --git a/.gitignore b/.gitignore index fcdd822d8a..4cb14e0bae 100644 --- a/.gitignore +++ b/.gitignore @@ -166,12 +166,17 @@ /test-dump-cache-tree /test-genrandom /test-index-version +/test-line-buffer /test-match-trees +/test-obj-pool /test-parse-options /test-path-utils /test-run-command /test-sha1 /test-sigchain +/test-string-pool +/test-svn-fe +/test-treap /common-cmds.h *.tar.gz *.dsc @@ -68,6 +68,8 @@ all:: # # Define NO_MKSTEMPS if you don't have mkstemps in the C library. # +# Define NO_STRTOK_R if you don't have strtok_r in the C library. +# # Define NO_LIBGEN_H if you don't have libgen.h. # # Define NEEDS_LIBGEN if your libgen needs -lgen when linking @@ -408,12 +410,17 @@ TEST_PROGRAMS_NEED_X += test-date TEST_PROGRAMS_NEED_X += test-delta TEST_PROGRAMS_NEED_X += test-dump-cache-tree TEST_PROGRAMS_NEED_X += test-genrandom +TEST_PROGRAMS_NEED_X += test-line-buffer TEST_PROGRAMS_NEED_X += test-match-trees +TEST_PROGRAMS_NEED_X += test-obj-pool TEST_PROGRAMS_NEED_X += test-parse-options TEST_PROGRAMS_NEED_X += test-path-utils TEST_PROGRAMS_NEED_X += test-run-command TEST_PROGRAMS_NEED_X += test-sha1 TEST_PROGRAMS_NEED_X += test-sigchain +TEST_PROGRAMS_NEED_X += test-string-pool +TEST_PROGRAMS_NEED_X += test-svn-fe +TEST_PROGRAMS_NEED_X += test-treap TEST_PROGRAMS_NEED_X += test-index-version TEST_PROGRAMS = $(patsubst %,%$X,$(TEST_PROGRAMS_NEED_X)) @@ -468,6 +475,7 @@ export PYTHON_PATH LIB_FILE=libgit.a XDIFF_LIB=xdiff/lib.a +VCSSVN_LIB=vcs-svn/lib.a LIB_H += advice.h LIB_H += archive.h @@ -1035,6 +1043,7 @@ ifeq ($(uname_S),Windows) NO_UNSETENV = YesPlease NO_STRCASESTR = YesPlease NO_STRLCPY = YesPlease + NO_STRTOK_R = YesPlease NO_MEMMEM = YesPlease # NEEDS_LIBICONV = YesPlease NO_ICONV = YesPlease @@ -1089,6 +1098,7 @@ ifneq (,$(findstring MINGW,$(uname_S))) NO_UNSETENV = YesPlease NO_STRCASESTR = YesPlease NO_STRLCPY = YesPlease + NO_STRTOK_R = YesPlease NO_MEMMEM = YesPlease NEEDS_LIBICONV = YesPlease OLD_ICONV = YesPlease @@ -1319,6 +1329,10 @@ endif ifdef NO_STRTOULL COMPAT_CFLAGS += -DNO_STRTOULL endif +ifdef NO_STRTOK_R + COMPAT_CFLAGS += -DNO_STRTOK_R + COMPAT_OBJS += compat/strtok_r.o +endif ifdef NO_SETENV COMPAT_CFLAGS += -DNO_SETENV COMPAT_OBJS += compat/setenv.o @@ -1739,7 +1753,9 @@ ifndef NO_CURL endif XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \ xdiff/xmerge.o xdiff/xpatience.o -OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) +VCSSVN_OBJS = vcs-svn/string_pool.o vcs-svn/line_buffer.o \ + vcs-svn/repo_tree.o vcs-svn/fast_export.o vcs-svn/svndump.o +OBJECTS := $(GIT_OBJS) $(XDIFF_OBJS) $(VCSSVN_OBJS) dep_files := $(foreach f,$(OBJECTS),$(dir $f).depend/$(notdir $f).d) dep_dirs := $(addsuffix .depend,$(sort $(dir $(OBJECTS)))) @@ -1861,6 +1877,11 @@ http.o http-walker.o http-push.o http-fetch.o remote-curl.o: http.h xdiff-interface.o $(XDIFF_OBJS): \ xdiff/xinclude.h xdiff/xmacros.h xdiff/xdiff.h xdiff/xtypes.h \ xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h + +$(VCSSVN_OBJS): \ + vcs-svn/obj_pool.h vcs-svn/trp.h vcs-svn/string_pool.h \ + vcs-svn/line_buffer.h vcs-svn/repo_tree.h vcs-svn/fast_export.h \ + vcs-svn/svndump.h endif exec_cmd.s exec_cmd.o: EXTRA_CPPFLAGS = \ @@ -1909,6 +1930,8 @@ $(LIB_FILE): $(LIB_OBJS) $(XDIFF_LIB): $(XDIFF_OBJS) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(XDIFF_OBJS) +$(VCSSVN_LIB): $(VCSSVN_OBJS) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(VCSSVN_OBJS) doc: $(MAKE) -C Documentation all @@ -2007,12 +2030,18 @@ test-date$X: date.o ctype.o test-delta$X: diff-delta.o patch-delta.o +test-line-buffer$X: vcs-svn/lib.a + test-parse-options$X: parse-options.o +test-string-pool$X: vcs-svn/lib.a + +test-svn-fe$X: vcs-svn/lib.a + .PRECIOUS: $(TEST_OBJS) test-%$X: test-%.o $(GITLIBS) - $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) + $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(filter %.a,$^) $(LIBS) check-sha1:: test-sha1$X ./test-sha1.sh @@ -2187,8 +2216,8 @@ distclean: clean $(RM) configure clean: - $(RM) *.o block-sha1/*.o ppc/*.o compat/*.o compat/*/*.o xdiff/*.o \ - builtin/*.o $(LIB_FILE) $(XDIFF_LIB) + $(RM) *.o block-sha1/*.o ppc/*.o compat/*.o compat/*/*.o xdiff/*.o vcs-svn/*.o \ + builtin/*.o $(LIB_FILE) $(XDIFF_LIB) $(VCSSVN_LIB) $(RM) $(ALL_PROGRAMS) $(SCRIPT_LIB) $(BUILT_INS) git$X $(RM) $(TEST_PROGRAMS) $(RM) -r bin-wrappers diff --git a/compat/strtok_r.c b/compat/strtok_r.c new file mode 100644 index 0000000000..7b5d568a96 --- /dev/null +++ b/compat/strtok_r.c @@ -0,0 +1,61 @@ +/* Reentrant string tokenizer. Generic version. + Copyright (C) 1991,1996-1999,2001,2004 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include "../git-compat-util.h" + +/* Parse S into tokens separated by characters in DELIM. + If S is NULL, the saved pointer in SAVE_PTR is used as + the next starting point. For example: + char s[] = "-abc-=-def"; + char *sp; + x = strtok_r(s, "-", &sp); // x = "abc", sp = "=-def" + x = strtok_r(NULL, "-=", &sp); // x = "def", sp = NULL + x = strtok_r(NULL, "=", &sp); // x = NULL + // s = "abc\0-def\0" +*/ +char * +gitstrtok_r (char *s, const char *delim, char **save_ptr) +{ + char *token; + + if (s == NULL) + s = *save_ptr; + + /* Scan leading delimiters. */ + s += strspn (s, delim); + if (*s == '\0') + { + *save_ptr = s; + return NULL; + } + + /* Find the end of the token. */ + token = s; + s = strpbrk (token, delim); + if (s == NULL) + /* This token finishes the string. */ + *save_ptr = token + strlen (token); + else + { + /* Terminate the token and make *SAVE_PTR point past it. */ + *s = '\0'; + *save_ptr = s + 1; + } + return token; +} diff --git a/config.mak.in b/config.mak.in index b4e65c32b2..4ffd77420f 100644 --- a/config.mak.in +++ b/config.mak.in @@ -46,6 +46,7 @@ NO_IPV6=@NO_IPV6@ NO_C99_FORMAT=@NO_C99_FORMAT@ NO_HSTRERROR=@NO_HSTRERROR@ NO_STRCASESTR=@NO_STRCASESTR@ +NO_STRTOK_R=@NO_STRTOK_R@ NO_MEMMEM=@NO_MEMMEM@ NO_STRLCPY=@NO_STRLCPY@ NO_UINTMAX_T=@NO_UINTMAX_T@ diff --git a/configure.ac b/configure.ac index 5601e8bac9..708e7b86ce 100644 --- a/configure.ac +++ b/configure.ac @@ -783,6 +783,12 @@ GIT_CHECK_FUNC(strcasestr, [NO_STRCASESTR=YesPlease]) AC_SUBST(NO_STRCASESTR) # +# Define NO_STRTOK_R if you don't have strtok_r +GIT_CHECK_FUNC(strtok_r, +[NO_STRTOK_R=], +[NO_STRTOK_R=YesPlease]) +AC_SUBST(NO_STRTOK_R) +# # Define NO_MEMMEM if you don't have memmem. GIT_CHECK_FUNC(memmem, [NO_MEMMEM=], diff --git a/contrib/svn-fe/svn-fe.c b/contrib/svn-fe/svn-fe.c index e9b9ba4da4..a2677b03e0 100644 --- a/contrib/svn-fe/svn-fe.c +++ b/contrib/svn-fe/svn-fe.c @@ -10,6 +10,7 @@ int main(int argc, char **argv) { svndump_init(NULL); svndump_read((argc > 1) ? argv[1] : NULL); + svndump_deinit(); svndump_reset(); return 0; } diff --git a/contrib/svn-fe/svn-fe.txt b/contrib/svn-fe/svn-fe.txt index de30f83a1f..35f84bd9e7 100644 --- a/contrib/svn-fe/svn-fe.txt +++ b/contrib/svn-fe/svn-fe.txt @@ -12,7 +12,7 @@ svnadmin dump --incremental REPO | svn-fe [url] | git fast-import DESCRIPTION ----------- -Converts a Subversion dumpfile (version: 2) into input suitable for +Converts a Subversion dumpfile into input suitable for git-fast-import(1) and similar importers. REPO is a path to a Subversion repository mirrored on the local disk. Remote Subversion repositories can be mirrored on local disk using the `svnsync` @@ -25,6 +25,9 @@ Subversion's repository dump format is documented in full in Files in this format can be generated using the 'svnadmin dump' or 'svk admin dump' command. +Dumps produced with 'svnadmin dump --deltas' (dumpfile format v3) +are not supported. + OUTPUT FORMAT ------------- The fast-import format is documented by the git-fast-import(1) @@ -43,11 +46,9 @@ user <user@UUID> as committer, where 'user' is the value of the `svn:author` property and 'UUID' the repository's identifier. -To support incremental imports, 'svn-fe' will put a `git-svn-id` -line at the end of each commit log message if passed an url on the -command line. This line has the form `git-svn-id: URL@REVNO UUID`. - -Empty directories and unknown properties are silently discarded. +To support incremental imports, 'svn-fe' puts a `git-svn-id` line at +the end of each commit log message if passed an url on the command +line. This line has the form `git-svn-id: URL@REVNO UUID`. The resulting repository will generally require further processing to put each project in its own repository and to separate the history @@ -56,9 +57,9 @@ may be useful for this purpose. BUGS ---- -Litters the current working directory with .bin files for -persistence. Will be fixed when the svn-fe infrastructure is aware of -a Git working directory. +Empty directories and unknown properties are silently discarded. + +The exit status does not reflect whether an error was detected. SEE ALSO -------- diff --git a/git-compat-util.h b/git-compat-util.h index fe845ae639..877096ecb0 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -312,6 +312,11 @@ extern size_t gitstrlcpy(char *, const char *, size_t); extern uintmax_t gitstrtoumax(const char *, char **, int); #endif +#ifdef NO_STRTOK_R +#define strtok_r gitstrtok_r +extern char *gitstrtok_r(char *s, const char *delim, char **save_ptr); +#endif + #ifdef NO_HSTRERROR #define hstrerror githstrerror extern const char *githstrerror(int herror); diff --git a/t/t0080-vcs-svn.sh b/t/t0080-vcs-svn.sh new file mode 100755 index 0000000000..d3225ada68 --- /dev/null +++ b/t/t0080-vcs-svn.sh @@ -0,0 +1,171 @@ +#!/bin/sh + +test_description='check infrastructure for svn importer' + +. ./test-lib.sh +uint32_max=4294967295 + +test_expect_success 'obj pool: store data' ' + cat <<-\EOF >expected && + 0 + 1 + EOF + + test-obj-pool <<-\EOF >actual && + alloc one 16 + set one 13 + test one 13 + reset one + EOF + test_cmp expected actual +' + +test_expect_success 'obj pool: NULL is offset ~0' ' + echo "$uint32_max" >expected && + echo null one | test-obj-pool >actual && + test_cmp expected actual +' + +test_expect_success 'obj pool: out-of-bounds access' ' + cat <<-EOF >expected && + 0 + 0 + $uint32_max + $uint32_max + 16 + 20 + $uint32_max + EOF + + test-obj-pool <<-\EOF >actual && + alloc one 16 + alloc two 16 + offset one 20 + offset two 20 + alloc one 5 + offset one 20 + free one 1 + offset one 20 + reset one + reset two + EOF + test_cmp expected actual +' + +test_expect_success 'obj pool: high-water mark' ' + cat <<-\EOF >expected && + 0 + 0 + 10 + 20 + 20 + 20 + EOF + + test-obj-pool <<-\EOF >actual && + alloc one 10 + committed one + alloc one 10 + commit one + committed one + alloc one 10 + free one 20 + committed one + reset one + EOF + test_cmp expected actual +' + +test_expect_success 'line buffer' ' + echo HELLO >expected1 && + printf "%s\n" "" HELLO >expected2 && + echo >expected3 && + printf "%s\n" "" Q | q_to_nul >expected4 && + printf "%s\n" foo "" >expected5 && + printf "%s\n" "" foo >expected6 && + + test-line-buffer <<-\EOF >actual1 && + 5 + HELLO + EOF + + test-line-buffer <<-\EOF >actual2 && + 0 + + 5 + HELLO + EOF + + q_to_nul <<-\EOF | + 1 + Q + EOF + test-line-buffer >actual3 && + + q_to_nul <<-\EOF | + 0 + + 1 + Q + EOF + test-line-buffer >actual4 && + + test-line-buffer <<-\EOF >actual5 && + 5 + foo + EOF + + test-line-buffer <<-\EOF >actual6 && + 0 + + 5 + foo + EOF + + test_cmp expected1 actual1 && + test_cmp expected2 actual2 && + test_cmp expected3 actual3 && + test_cmp expected4 actual4 && + test_cmp expected5 actual5 && + test_cmp expected6 actual6 +' + +test_expect_success 'string pool' ' + echo a does not equal b >expected.differ && + echo a equals a >expected.match && + echo equals equals equals >expected.matchmore && + + test-string-pool "a,--b" >actual.differ && + test-string-pool "a,a" >actual.match && + test-string-pool "equals-equals" >actual.matchmore && + test_must_fail test-string-pool a,a,a && + test_must_fail test-string-pool a && + + test_cmp expected.differ actual.differ && + test_cmp expected.match actual.match && + test_cmp expected.matchmore actual.matchmore +' + +test_expect_success 'treap sort' ' + cat <<-\EOF >unsorted && + 68 + 12 + 13 + 13 + 68 + 13 + 13 + 21 + 10 + 11 + 12 + 13 + 13 + EOF + sort unsorted >expected && + + test-treap <unsorted >actual && + test_cmp expected actual +' + +test_done diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh new file mode 100755 index 0000000000..a713dfc50b --- /dev/null +++ b/t/t9010-svn-fe.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +test_description='check svn dumpfile importer' + +. ./lib-git-svn.sh + +test_dump() { + label=$1 + dump=$2 + test_expect_success "$dump" ' + svnadmin create "$label-svn" && + svnadmin load "$label-svn" < "$TEST_DIRECTORY/$dump" && + svn_cmd export "file://$PWD/$label-svn" "$label-svnco" && + git init "$label-git" && + test-svn-fe "$TEST_DIRECTORY/$dump" >"$label.fe" && + ( + cd "$label-git" && + git fast-import < ../"$label.fe" + ) && + ( + cd "$label-svnco" && + git init && + git add . && + git fetch "../$label-git" master && + git diff --exit-code FETCH_HEAD + ) + ' +} + +test_dump simple t9135/svn.dump + +test_done diff --git a/test-line-buffer.c b/test-line-buffer.c new file mode 100644 index 0000000000..c11bf7f967 --- /dev/null +++ b/test-line-buffer.c @@ -0,0 +1,46 @@ +/* + * test-line-buffer.c: code to exercise the svn importer's input helper + * + * Input format: + * number NL + * (number bytes) NL + * number NL + * ... + */ + +#include "git-compat-util.h" +#include "vcs-svn/line_buffer.h" + +static uint32_t strtouint32(const char *s) +{ + char *end; + uintmax_t n = strtoumax(s, &end, 10); + if (*s == '\0' || *end != '\0') + die("invalid count: %s", s); + return (uint32_t) n; +} + +int main(int argc, char *argv[]) +{ + char *s; + + if (argc != 1) + usage("test-line-buffer < input.txt"); + if (buffer_init(NULL)) + die_errno("open error"); + while ((s = buffer_read_line())) { + s = buffer_read_string(strtouint32(s)); + fputs(s, stdout); + fputc('\n', stdout); + buffer_skip_bytes(1); + if (!(s = buffer_read_line())) + break; + buffer_copy_bytes(strtouint32(s) + 1); + } + if (buffer_deinit()) + die("input error"); + if (ferror(stdout)) + die("output error"); + buffer_reset(); + return 0; +} diff --git a/test-obj-pool.c b/test-obj-pool.c new file mode 100644 index 0000000000..5018863ef5 --- /dev/null +++ b/test-obj-pool.c @@ -0,0 +1,116 @@ +/* + * test-obj-pool.c: code to exercise the svn importer's object pool + */ + +#include "cache.h" +#include "vcs-svn/obj_pool.h" + +enum pool { POOL_ONE, POOL_TWO }; +obj_pool_gen(one, int, 1) +obj_pool_gen(two, int, 4096) + +static uint32_t strtouint32(const char *s) +{ + char *end; + uintmax_t n = strtoumax(s, &end, 10); + if (*s == '\0' || (*end != '\n' && *end != '\0')) + die("invalid offset: %s", s); + return (uint32_t) n; +} + +static void handle_command(const char *command, enum pool pool, const char *arg) +{ + switch (*command) { + case 'a': + if (!prefixcmp(command, "alloc ")) { + uint32_t n = strtouint32(arg); + printf("%"PRIu32"\n", + pool == POOL_ONE ? + one_alloc(n) : two_alloc(n)); + return; + } + case 'c': + if (!prefixcmp(command, "commit ")) { + pool == POOL_ONE ? one_commit() : two_commit(); + return; + } + if (!prefixcmp(command, "committed ")) { + printf("%"PRIu32"\n", + pool == POOL_ONE ? + one_pool.committed : two_pool.committed); + return; + } + case 'f': + if (!prefixcmp(command, "free ")) { + uint32_t n = strtouint32(arg); + pool == POOL_ONE ? one_free(n) : two_free(n); + return; + } + case 'n': + if (!prefixcmp(command, "null ")) { + printf("%"PRIu32"\n", + pool == POOL_ONE ? + one_offset(NULL) : two_offset(NULL)); + return; + } + case 'o': + if (!prefixcmp(command, "offset ")) { + uint32_t n = strtouint32(arg); + printf("%"PRIu32"\n", + pool == POOL_ONE ? + one_offset(one_pointer(n)) : + two_offset(two_pointer(n))); + return; + } + case 'r': + if (!prefixcmp(command, "reset ")) { + pool == POOL_ONE ? one_reset() : two_reset(); + return; + } + case 's': + if (!prefixcmp(command, "set ")) { + uint32_t n = strtouint32(arg); + if (pool == POOL_ONE) + *one_pointer(n) = 1; + else + *two_pointer(n) = 1; + return; + } + case 't': + if (!prefixcmp(command, "test ")) { + uint32_t n = strtouint32(arg); + printf("%d\n", pool == POOL_ONE ? + *one_pointer(n) : *two_pointer(n)); + return; + } + default: + die("unrecognized command: %s", command); + } +} + +static void handle_line(const char *line) +{ + const char *arg = strchr(line, ' '); + enum pool pool; + + if (arg && !prefixcmp(arg + 1, "one")) + pool = POOL_ONE; + else if (arg && !prefixcmp(arg + 1, "two")) + pool = POOL_TWO; + else + die("no pool specified: %s", line); + + handle_command(line, pool, arg + strlen("one ")); +} + +int main(int argc, char *argv[]) +{ + struct strbuf sb = STRBUF_INIT; + if (argc != 1) + usage("test-obj-str < script"); + + while (strbuf_getline(&sb, stdin, '\n') != EOF) + handle_line(sb.buf); + strbuf_release(&sb); + return 0; +} diff --git a/test-string-pool.c b/test-string-pool.c new file mode 100644 index 0000000000..c5782e6bce --- /dev/null +++ b/test-string-pool.c @@ -0,0 +1,31 @@ +/* + * test-string-pool.c: code to exercise the svn importer's string pool + */ + +#include "git-compat-util.h" +#include "vcs-svn/string_pool.h" + +int main(int argc, char *argv[]) +{ + const uint32_t unequal = pool_intern("does not equal"); + const uint32_t equal = pool_intern("equals"); + uint32_t buf[3]; + uint32_t n; + + if (argc != 2) + usage("test-string-pool <string>,<string>"); + + n = pool_tok_seq(3, buf, ",-", argv[1]); + if (n >= 3) + die("too many strings"); + if (n <= 1) + die("too few strings"); + + buf[2] = buf[1]; + buf[1] = (buf[0] == buf[2]) ? equal : unequal; + pool_print_seq(3, buf, ' ', stdout); + fputc('\n', stdout); + + pool_reset(); + return 0; +} diff --git a/test-svn-fe.c b/test-svn-fe.c new file mode 100644 index 0000000000..77cf78abcf --- /dev/null +++ b/test-svn-fe.c @@ -0,0 +1,17 @@ +/* + * test-svn-fe: Code to exercise the svn import lib + */ + +#include "git-compat-util.h" +#include "vcs-svn/svndump.h" + +int main(int argc, char *argv[]) +{ + if (argc != 2) + usage("test-svn-fe <file>"); + svndump_init(argv[1]); + svndump_read(NULL); + svndump_deinit(); + svndump_reset(); + return 0; +} diff --git a/test-treap.c b/test-treap.c new file mode 100644 index 0000000000..cdba5111e1 --- /dev/null +++ b/test-treap.c @@ -0,0 +1,65 @@ +/* + * test-treap.c: code to exercise the svn importer's treap structure + */ + +#include "cache.h" +#include "vcs-svn/obj_pool.h" +#include "vcs-svn/trp.h" + +struct int_node { + uintmax_t n; + struct trp_node children; +}; + +obj_pool_gen(node, struct int_node, 3) + +static int node_cmp(struct int_node *a, struct int_node *b) +{ + return (a->n > b->n) - (a->n < b->n); +} + +trp_gen(static, treap_, struct int_node, children, node, node_cmp) + +static void strtonode(struct int_node *item, const char *s) +{ + char *end; + item->n = strtoumax(s, &end, 10); + if (*s == '\0' || (*end != '\n' && *end != '\0')) + die("invalid integer: %s", s); +} + +int main(int argc, char *argv[]) +{ + struct strbuf sb = STRBUF_INIT; + struct trp_root root = { ~0 }; + uint32_t item; + + if (argc != 1) + usage("test-treap < ints"); + + while (strbuf_getline(&sb, stdin, '\n') != EOF) { + item = node_alloc(1); + strtonode(node_pointer(item), sb.buf); + treap_insert(&root, node_pointer(item)); + } + + item = node_offset(treap_first(&root)); + while (~item) { + uint32_t next; + struct int_node *tmp = node_pointer(node_alloc(1)); + + tmp->n = node_pointer(item)->n; + next = node_offset(treap_next(&root, node_pointer(item))); + + treap_remove(&root, node_pointer(item)); + item = node_offset(treap_nsearch(&root, tmp)); + + if (item != next && (!~item || node_pointer(item)->n != tmp->n)) + die("found %"PRIuMAX" in place of %"PRIuMAX"", + ~item ? node_pointer(item)->n : ~(uintmax_t) 0, + ~next ? node_pointer(next)->n : ~(uintmax_t) 0); + printf("%"PRIuMAX"\n", tmp->n); + } + node_reset(); + return 0; +} diff --git a/vcs-svn/LICENSE b/vcs-svn/LICENSE new file mode 100644 index 0000000000..0a5e3c43a0 --- /dev/null +++ b/vcs-svn/LICENSE @@ -0,0 +1,33 @@ +Copyright (C) 2010 David Barr <david.barr@cordelta.com>. +All rights reserved. + +Copyright (C) 2008 Jason Evans <jasone@canonware.com>. +All rights reserved. + +Copyright (C) 2005 Stefan Hegny, hydrografix Consulting GmbH, +Frankfurt/Main, Germany +and others, see http://svn2cc.sarovar.org + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice(s), this list of conditions and the following disclaimer + unmodified other than the allowable addition of one or more + copyright notices. +2. Redistributions in binary form must reproduce the above copyright + notice(s), this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c new file mode 100644 index 0000000000..256a0522b2 --- /dev/null +++ b/vcs-svn/fast_export.c @@ -0,0 +1,75 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "fast_export.h" +#include "line_buffer.h" +#include "repo_tree.h" +#include "string_pool.h" + +#define MAX_GITSVN_LINE_LEN 4096 + +static uint32_t first_commit_done; + +void fast_export_delete(uint32_t depth, uint32_t *path) +{ + putchar('D'); + putchar(' '); + pool_print_seq(depth, path, '/', stdout); + putchar('\n'); +} + +void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, + uint32_t mark) +{ + /* Mode must be 100644, 100755, 120000, or 160000. */ + printf("M %06o :%d ", mode, mark); + pool_print_seq(depth, path, '/', stdout); + putchar('\n'); +} + +static char gitsvnline[MAX_GITSVN_LINE_LEN]; +void fast_export_commit(uint32_t revision, uint32_t author, char *log, + uint32_t uuid, uint32_t url, + unsigned long timestamp) +{ + if (!log) + log = ""; + if (~uuid && ~url) { + snprintf(gitsvnline, MAX_GITSVN_LINE_LEN, "\n\ngit-svn-id: %s@%d %s\n", + pool_fetch(url), revision, pool_fetch(uuid)); + } else { + *gitsvnline = '\0'; + } + printf("commit refs/heads/master\n"); + printf("committer %s <%s@%s> %ld +0000\n", + ~author ? pool_fetch(author) : "nobody", + ~author ? pool_fetch(author) : "nobody", + ~uuid ? pool_fetch(uuid) : "local", timestamp); + printf("data %"PRIu32"\n%s%s\n", + (uint32_t) (strlen(log) + strlen(gitsvnline)), + log, gitsvnline); + if (!first_commit_done) { + if (revision > 1) + printf("from refs/heads/master^0\n"); + first_commit_done = 1; + } + repo_diff(revision - 1, revision); + fputc('\n', stdout); + + printf("progress Imported commit %d.\n\n", revision); +} + +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len) +{ + if (mode == REPO_MODE_LNK) { + /* svn symlink blobs start with "link " */ + buffer_skip_bytes(5); + len -= 5; + } + printf("blob\nmark :%d\ndata %d\n", mark, len); + buffer_copy_bytes(len); + fputc('\n', stdout); +} diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h new file mode 100644 index 0000000000..2aaaea53d5 --- /dev/null +++ b/vcs-svn/fast_export.h @@ -0,0 +1,11 @@ +#ifndef FAST_EXPORT_H_ +#define FAST_EXPORT_H_ + +void fast_export_delete(uint32_t depth, uint32_t *path); +void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, + uint32_t mark); +void fast_export_commit(uint32_t revision, uint32_t author, char *log, + uint32_t uuid, uint32_t url, unsigned long timestamp); +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len); + +#endif diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c new file mode 100644 index 0000000000..1543567093 --- /dev/null +++ b/vcs-svn/line_buffer.c @@ -0,0 +1,97 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "line_buffer.h" +#include "obj_pool.h" + +#define LINE_BUFFER_LEN 10000 +#define COPY_BUFFER_LEN 4096 + +/* Create memory pool for char sequence of known length */ +obj_pool_gen(blob, char, 4096) + +static char line_buffer[LINE_BUFFER_LEN]; +static char byte_buffer[COPY_BUFFER_LEN]; +static FILE *infile; + +int buffer_init(const char *filename) +{ + infile = filename ? fopen(filename, "r") : stdin; + if (!infile) + return -1; + return 0; +} + +int buffer_deinit(void) +{ + int err; + if (infile == stdin) + return ferror(infile); + err = ferror(infile); + err |= fclose(infile); + return err; +} + +/* Read a line without trailing newline. */ +char *buffer_read_line(void) +{ + char *end; + if (!fgets(line_buffer, sizeof(line_buffer), infile)) + /* Error or data exhausted. */ + return NULL; + end = line_buffer + strlen(line_buffer); + if (end[-1] == '\n') + end[-1] = '\0'; + else if (feof(infile)) + ; /* No newline at end of file. That's fine. */ + else + /* + * Line was too long. + * There is probably a saner way to deal with this, + * but for now let's return an error. + */ + return NULL; + return line_buffer; +} + +char *buffer_read_string(uint32_t len) +{ + char *s; + blob_free(blob_pool.size); + s = blob_pointer(blob_alloc(len + 1)); + s[fread(s, 1, len, infile)] = '\0'; + return ferror(infile) ? NULL : s; +} + +void buffer_copy_bytes(uint32_t len) +{ + uint32_t in; + while (len > 0 && !feof(infile) && !ferror(infile)) { + in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; + in = fread(byte_buffer, 1, in, infile); + len -= in; + fwrite(byte_buffer, 1, in, stdout); + if (ferror(stdout)) { + buffer_skip_bytes(len); + return; + } + } +} + +void buffer_skip_bytes(uint32_t len) +{ + uint32_t in; + while (len > 0 && !feof(infile) && !ferror(infile)) { + in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; + in = fread(byte_buffer, 1, in, infile); + len -= in; + } +} + +void buffer_reset(void) +{ + blob_reset(); +} diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h new file mode 100644 index 0000000000..9c78ae11a1 --- /dev/null +++ b/vcs-svn/line_buffer.h @@ -0,0 +1,12 @@ +#ifndef LINE_BUFFER_H_ +#define LINE_BUFFER_H_ + +int buffer_init(const char *filename); +int buffer_deinit(void); +char *buffer_read_line(void); +char *buffer_read_string(uint32_t len); +void buffer_copy_bytes(uint32_t len); +void buffer_skip_bytes(uint32_t len); +void buffer_reset(void); + +#endif diff --git a/vcs-svn/line_buffer.txt b/vcs-svn/line_buffer.txt new file mode 100644 index 0000000000..8906fb1f50 --- /dev/null +++ b/vcs-svn/line_buffer.txt @@ -0,0 +1,58 @@ +line_buffer API +=============== + +The line_buffer library provides a convenient interface for +mostly-line-oriented input. + +Each line is not permitted to exceed 10000 bytes. The provided +functions are not thread-safe or async-signal-safe, and like +`fgets()`, they generally do not function correctly if interrupted +by a signal without SA_RESTART set. + +Calling sequence +---------------- + +The calling program: + + - specifies a file to read with `buffer_init` + - processes input with `buffer_read_line`, `buffer_read_string`, + `buffer_skip_bytes`, and `buffer_copy_bytes` + - closes the file with `buffer_deinit`, perhaps to start over and + read another file. + +Before exiting, the caller can use `buffer_reset` to deallocate +resources for the benefit of profiling tools. + +Functions +--------- + +`buffer_init`:: + Open the named file for input. If filename is NULL, + start reading from stdin. On failure, returns -1 (with + errno indicating the nature of the failure). + +`buffer_deinit`:: + Stop reading from the current file (closing it unless + it was stdin). Returns nonzero if `fclose` fails or + the error indicator was set. + +`buffer_read_line`:: + Read a line and strip off the trailing newline. + On failure or end of file, returns NULL. + +`buffer_read_string`:: + Read `len` characters of input or up to the end of the + file, whichever comes first. Returns NULL on error. + Returns whatever characters were read (possibly "") + for end of file. + +`buffer_copy_bytes`:: + Read `len` bytes of input and dump them to the standard output + stream. Returns early for error or end of file. + +`buffer_skip_bytes`:: + Discards `len` bytes from the input stream (stopping early + if necessary because of an error or eof). + +`buffer_reset`:: + Deallocates non-static buffers. diff --git a/vcs-svn/obj_pool.h b/vcs-svn/obj_pool.h new file mode 100644 index 0000000000..deb6eb8135 --- /dev/null +++ b/vcs-svn/obj_pool.h @@ -0,0 +1,61 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#ifndef OBJ_POOL_H_ +#define OBJ_POOL_H_ + +#include "git-compat-util.h" + +#define MAYBE_UNUSED __attribute__((__unused__)) + +#define obj_pool_gen(pre, obj_t, initial_capacity) \ +static struct { \ + uint32_t committed; \ + uint32_t size; \ + uint32_t capacity; \ + obj_t *base; \ +} pre##_pool = {0, 0, 0, NULL}; \ +static MAYBE_UNUSED uint32_t pre##_alloc(uint32_t count) \ +{ \ + uint32_t offset; \ + if (pre##_pool.size + count > pre##_pool.capacity) { \ + while (pre##_pool.size + count > pre##_pool.capacity) \ + if (pre##_pool.capacity) \ + pre##_pool.capacity *= 2; \ + else \ + pre##_pool.capacity = initial_capacity; \ + pre##_pool.base = realloc(pre##_pool.base, \ + pre##_pool.capacity * sizeof(obj_t)); \ + } \ + offset = pre##_pool.size; \ + pre##_pool.size += count; \ + return offset; \ +} \ +static MAYBE_UNUSED void pre##_free(uint32_t count) \ +{ \ + pre##_pool.size -= count; \ +} \ +static MAYBE_UNUSED uint32_t pre##_offset(obj_t *obj) \ +{ \ + return obj == NULL ? ~0 : obj - pre##_pool.base; \ +} \ +static MAYBE_UNUSED obj_t *pre##_pointer(uint32_t offset) \ +{ \ + return offset >= pre##_pool.size ? NULL : &pre##_pool.base[offset]; \ +} \ +static MAYBE_UNUSED void pre##_commit(void) \ +{ \ + pre##_pool.committed = pre##_pool.size; \ +} \ +static MAYBE_UNUSED void pre##_reset(void) \ +{ \ + free(pre##_pool.base); \ + pre##_pool.base = NULL; \ + pre##_pool.size = 0; \ + pre##_pool.capacity = 0; \ + pre##_pool.committed = 0; \ +} + +#endif diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c new file mode 100644 index 0000000000..e94d91d129 --- /dev/null +++ b/vcs-svn/repo_tree.c @@ -0,0 +1,329 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" + +#include "string_pool.h" +#include "repo_tree.h" +#include "obj_pool.h" +#include "fast_export.h" + +#include "trp.h" + +struct repo_dirent { + uint32_t name_offset; + struct trp_node children; + uint32_t mode; + uint32_t content_offset; +}; + +struct repo_dir { + struct trp_root entries; +}; + +struct repo_commit { + uint32_t root_dir_offset; +}; + +/* Memory pools for commit, dir and dirent */ +obj_pool_gen(commit, struct repo_commit, 4096) +obj_pool_gen(dir, struct repo_dir, 4096) +obj_pool_gen(dent, struct repo_dirent, 4096) + +static uint32_t active_commit; +static uint32_t mark; + +static int repo_dirent_name_cmp(const void *a, const void *b); + +/* Treap for directory entries */ +trp_gen(static, dent_, struct repo_dirent, children, dent, repo_dirent_name_cmp); + +uint32_t next_blob_mark(void) +{ + return mark++; +} + +static struct repo_dir *repo_commit_root_dir(struct repo_commit *commit) +{ + return dir_pointer(commit->root_dir_offset); +} + +static struct repo_dirent *repo_first_dirent(struct repo_dir *dir) +{ + return dent_first(&dir->entries); +} + +static int repo_dirent_name_cmp(const void *a, const void *b) +{ + const struct repo_dirent *dent1 = a, *dent2 = b; + uint32_t a_offset = dent1->name_offset; + uint32_t b_offset = dent2->name_offset; + return (a_offset > b_offset) - (a_offset < b_offset); +} + +static int repo_dirent_is_dir(struct repo_dirent *dent) +{ + return dent != NULL && dent->mode == REPO_MODE_DIR; +} + +static struct repo_dir *repo_dir_from_dirent(struct repo_dirent *dent) +{ + if (!repo_dirent_is_dir(dent)) + return NULL; + return dir_pointer(dent->content_offset); +} + +static struct repo_dir *repo_clone_dir(struct repo_dir *orig_dir) +{ + uint32_t orig_o, new_o; + orig_o = dir_offset(orig_dir); + if (orig_o >= dir_pool.committed) + return orig_dir; + new_o = dir_alloc(1); + orig_dir = dir_pointer(orig_o); + *dir_pointer(new_o) = *orig_dir; + return dir_pointer(new_o); +} + +static struct repo_dirent *repo_read_dirent(uint32_t revision, uint32_t *path) +{ + uint32_t name = 0; + struct repo_dirent *key = dent_pointer(dent_alloc(1)); + struct repo_dir *dir = NULL; + struct repo_dirent *dent = NULL; + dir = repo_commit_root_dir(commit_pointer(revision)); + while (~(name = *path++)) { + key->name_offset = name; + dent = dent_search(&dir->entries, key); + if (dent == NULL || !repo_dirent_is_dir(dent)) + break; + dir = repo_dir_from_dirent(dent); + } + dent_free(1); + return dent; +} + +static void repo_write_dirent(uint32_t *path, uint32_t mode, + uint32_t content_offset, uint32_t del) +{ + uint32_t name, revision, dir_o = ~0, parent_dir_o = ~0; + struct repo_dir *dir; + struct repo_dirent *key; + struct repo_dirent *dent = NULL; + revision = active_commit; + dir = repo_commit_root_dir(commit_pointer(revision)); + dir = repo_clone_dir(dir); + commit_pointer(revision)->root_dir_offset = dir_offset(dir); + while (~(name = *path++)) { + parent_dir_o = dir_offset(dir); + + key = dent_pointer(dent_alloc(1)); + key->name_offset = name; + + dent = dent_search(&dir->entries, key); + if (dent == NULL) + dent = key; + else + dent_free(1); + + if (dent == key) { + dent->mode = REPO_MODE_DIR; + dent->content_offset = 0; + dent_insert(&dir->entries, dent); + } + + if (dent_offset(dent) < dent_pool.committed) { + dir_o = repo_dirent_is_dir(dent) ? + dent->content_offset : ~0; + dent_remove(&dir->entries, dent); + dent = dent_pointer(dent_alloc(1)); + dent->name_offset = name; + dent->mode = REPO_MODE_DIR; + dent->content_offset = dir_o; + dent_insert(&dir->entries, dent); + } + + dir = repo_dir_from_dirent(dent); + dir = repo_clone_dir(dir); + dent->content_offset = dir_offset(dir); + } + if (dent == NULL) + return; + dent->mode = mode; + dent->content_offset = content_offset; + if (del && ~parent_dir_o) + dent_remove(&dir_pointer(parent_dir_o)->entries, dent); +} + +uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst) +{ + uint32_t mode = 0, content_offset = 0; + struct repo_dirent *src_dent; + src_dent = repo_read_dirent(revision, src); + if (src_dent != NULL) { + mode = src_dent->mode; + content_offset = src_dent->content_offset; + repo_write_dirent(dst, mode, content_offset, 0); + } + return mode; +} + +void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark) +{ + repo_write_dirent(path, mode, blob_mark, 0); +} + +uint32_t repo_replace(uint32_t *path, uint32_t blob_mark) +{ + uint32_t mode = 0; + struct repo_dirent *src_dent; + src_dent = repo_read_dirent(active_commit, path); + if (src_dent != NULL) { + mode = src_dent->mode; + repo_write_dirent(path, mode, blob_mark, 0); + } + return mode; +} + +void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark) +{ + struct repo_dirent *src_dent; + src_dent = repo_read_dirent(active_commit, path); + if (src_dent != NULL && blob_mark == 0) + blob_mark = src_dent->content_offset; + repo_write_dirent(path, mode, blob_mark, 0); +} + +void repo_delete(uint32_t *path) +{ + repo_write_dirent(path, 0, 0, 1); +} + +static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir); + +static void repo_git_add(uint32_t depth, uint32_t *path, struct repo_dirent *dent) +{ + if (repo_dirent_is_dir(dent)) + repo_git_add_r(depth, path, repo_dir_from_dirent(dent)); + else + fast_export_modify(depth, path, + dent->mode, dent->content_offset); +} + +static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir) +{ + struct repo_dirent *de = repo_first_dirent(dir); + while (de) { + path[depth] = de->name_offset; + repo_git_add(depth + 1, path, de); + de = dent_next(&dir->entries, de); + } +} + +static void repo_diff_r(uint32_t depth, uint32_t *path, struct repo_dir *dir1, + struct repo_dir *dir2) +{ + struct repo_dirent *de1, *de2; + de1 = repo_first_dirent(dir1); + de2 = repo_first_dirent(dir2); + + while (de1 && de2) { + if (de1->name_offset < de2->name_offset) { + path[depth] = de1->name_offset; + fast_export_delete(depth + 1, path); + de1 = dent_next(&dir1->entries, de1); + continue; + } + if (de1->name_offset > de2->name_offset) { + path[depth] = de2->name_offset; + repo_git_add(depth + 1, path, de2); + de2 = dent_next(&dir2->entries, de2); + continue; + } + path[depth] = de1->name_offset; + + if (de1->mode == de2->mode && + de1->content_offset == de2->content_offset) { + ; /* No change. */ + } else if (repo_dirent_is_dir(de1) && repo_dirent_is_dir(de2)) { + repo_diff_r(depth + 1, path, + repo_dir_from_dirent(de1), + repo_dir_from_dirent(de2)); + } else if (!repo_dirent_is_dir(de1) && !repo_dirent_is_dir(de2)) { + repo_git_add(depth + 1, path, de2); + } else { + fast_export_delete(depth + 1, path); + repo_git_add(depth + 1, path, de2); + } + de1 = dent_next(&dir1->entries, de1); + de2 = dent_next(&dir2->entries, de2); + } + while (de1) { + path[depth] = de1->name_offset; + fast_export_delete(depth + 1, path); + de1 = dent_next(&dir1->entries, de1); + } + while (de2) { + path[depth] = de2->name_offset; + repo_git_add(depth + 1, path, de2); + de2 = dent_next(&dir2->entries, de2); + } +} + +static uint32_t path_stack[REPO_MAX_PATH_DEPTH]; + +void repo_diff(uint32_t r1, uint32_t r2) +{ + repo_diff_r(0, + path_stack, + repo_commit_root_dir(commit_pointer(r1)), + repo_commit_root_dir(commit_pointer(r2))); +} + +void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, + uint32_t url, unsigned long timestamp) +{ + fast_export_commit(revision, author, log, uuid, url, timestamp); + dent_commit(); + dir_commit(); + active_commit = commit_alloc(1); + commit_pointer(active_commit)->root_dir_offset = + commit_pointer(active_commit - 1)->root_dir_offset; +} + +static void mark_init(void) +{ + uint32_t i; + mark = 0; + for (i = 0; i < dent_pool.size; i++) + if (!repo_dirent_is_dir(dent_pointer(i)) && + dent_pointer(i)->content_offset > mark) + mark = dent_pointer(i)->content_offset; + mark++; +} + +void repo_init(void) +{ + mark_init(); + if (commit_pool.size == 0) { + /* Create empty tree for commit 0. */ + commit_alloc(1); + commit_pointer(0)->root_dir_offset = dir_alloc(1); + dir_pointer(0)->entries.trp_root = ~0; + dir_commit(); + } + /* Preallocate next commit, ready for changes. */ + active_commit = commit_alloc(1); + commit_pointer(active_commit)->root_dir_offset = + commit_pointer(active_commit - 1)->root_dir_offset; +} + +void repo_reset(void) +{ + pool_reset(); + commit_reset(); + dir_reset(); + dent_reset(); +} diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h new file mode 100644 index 0000000000..5476175922 --- /dev/null +++ b/vcs-svn/repo_tree.h @@ -0,0 +1,26 @@ +#ifndef REPO_TREE_H_ +#define REPO_TREE_H_ + +#include "git-compat-util.h" + +#define REPO_MODE_DIR 0040000 +#define REPO_MODE_BLB 0100644 +#define REPO_MODE_EXE 0100755 +#define REPO_MODE_LNK 0120000 + +#define REPO_MAX_PATH_LEN 4096 +#define REPO_MAX_PATH_DEPTH 1000 + +uint32_t next_blob_mark(void); +uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst); +void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark); +uint32_t repo_replace(uint32_t *path, uint32_t blob_mark); +void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark); +void repo_delete(uint32_t *path); +void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, + uint32_t url, long unsigned timestamp); +void repo_diff(uint32_t r1, uint32_t r2); +void repo_init(void); +void repo_reset(void); + +#endif diff --git a/vcs-svn/string_pool.c b/vcs-svn/string_pool.c new file mode 100644 index 0000000000..f5b1da836e --- /dev/null +++ b/vcs-svn/string_pool.c @@ -0,0 +1,102 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "trp.h" +#include "obj_pool.h" +#include "string_pool.h" + +static struct trp_root tree = { ~0 }; + +struct node { + uint32_t offset; + struct trp_node children; +}; + +/* Two memory pools: one for struct node, and another for strings */ +obj_pool_gen(node, struct node, 4096) +obj_pool_gen(string, char, 4096) + +static char *node_value(struct node *node) +{ + return node ? string_pointer(node->offset) : NULL; +} + +static int node_cmp(struct node *a, struct node *b) +{ + return strcmp(node_value(a), node_value(b)); +} + +/* Build a Treap from the node structure (a trp_node w/ offset) */ +trp_gen(static, tree_, struct node, children, node, node_cmp); + +const char *pool_fetch(uint32_t entry) +{ + return node_value(node_pointer(entry)); +} + +uint32_t pool_intern(const char *key) +{ + /* Canonicalize key */ + struct node *match = NULL, *node; + uint32_t key_len; + if (key == NULL) + return ~0; + key_len = strlen(key) + 1; + node = node_pointer(node_alloc(1)); + node->offset = string_alloc(key_len); + strcpy(node_value(node), key); + match = tree_search(&tree, node); + if (!match) { + tree_insert(&tree, node); + } else { + node_free(1); + string_free(key_len); + node = match; + } + return node_offset(node); +} + +uint32_t pool_tok_r(char *str, const char *delim, char **saveptr) +{ + char *token = strtok_r(str, delim, saveptr); + return token ? pool_intern(token) : ~0; +} + +void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream) +{ + uint32_t i; + for (i = 0; i < len && ~seq[i]; i++) { + fputs(pool_fetch(seq[i]), stream); + if (i < len - 1 && ~seq[i + 1]) + fputc(delim, stream); + } +} + +uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str) +{ + char *context = NULL; + uint32_t token = ~0; + uint32_t length; + + if (sz == 0) + return ~0; + if (str) + token = pool_tok_r(str, delim, &context); + for (length = 0; length < sz; length++) { + seq[length] = token; + if (token == ~0) + return length; + token = pool_tok_r(NULL, delim, &context); + } + seq[sz - 1] = ~0; + return sz; +} + +void pool_reset(void) +{ + node_reset(); + string_reset(); +} diff --git a/vcs-svn/string_pool.h b/vcs-svn/string_pool.h new file mode 100644 index 0000000000..222fb66e68 --- /dev/null +++ b/vcs-svn/string_pool.h @@ -0,0 +1,11 @@ +#ifndef STRING_POOL_H_ +#define STRING_POOL_H_ + +uint32_t pool_intern(const char *key); +const char *pool_fetch(uint32_t entry); +uint32_t pool_tok_r(char *str, const char *delim, char **saveptr); +void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream); +uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str); +void pool_reset(void); + +#endif diff --git a/vcs-svn/string_pool.txt b/vcs-svn/string_pool.txt new file mode 100644 index 0000000000..1b41f15628 --- /dev/null +++ b/vcs-svn/string_pool.txt @@ -0,0 +1,43 @@ +string_pool API +=============== + +The string_pool API provides facilities for replacing strings +with integer keys that can be more easily compared and stored. +The facilities are designed so that one could teach Git without +too much trouble to store the information needed for these keys to +remain valid over multiple executions. + +Functions +--------- + +pool_intern:: + Include a string in the string pool and get its key. + If that string is already in the pool, retrieves its + existing key. + +pool_fetch:: + Retrieve the string associated to a given key. + +pool_tok_r:: + Extract the key of the next token from a string. + Interface mimics strtok_r. + +pool_print_seq:: + Print a sequence of strings named by key to a file, using the + specified delimiter to separate them. + + If NULL (key ~0) appears in the sequence, the sequence ends + early. + +pool_tok_seq:: + Split a string into tokens, storing the keys of segments + into a caller-provided array. + + Unless sz is 0, the array will always be ~0-terminated. + If there is not enough room for all the tokens, the + array holds as many tokens as fit in the entries before + the terminating ~0. Return value is the index after the + last token, or sz if the tokens did not fit. + +pool_reset:: + Deallocate storage for the string pool. diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c new file mode 100644 index 0000000000..630eeb53b7 --- /dev/null +++ b/vcs-svn/svndump.c @@ -0,0 +1,302 @@ +/* + * Parse and rearrange a svnadmin dump. + * Create the dump with: + * svnadmin dump --incremental -r<startrev>:<endrev> <repository> >outfile + * + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "cache.h" +#include "repo_tree.h" +#include "fast_export.h" +#include "line_buffer.h" +#include "obj_pool.h" +#include "string_pool.h" + +#define NODEACT_REPLACE 4 +#define NODEACT_DELETE 3 +#define NODEACT_ADD 2 +#define NODEACT_CHANGE 1 +#define NODEACT_UNKNOWN 0 + +#define DUMP_CTX 0 +#define REV_CTX 1 +#define NODE_CTX 2 + +#define LENGTH_UNKNOWN (~0) +#define DATE_RFC2822_LEN 31 + +/* Create memory pool for log messages */ +obj_pool_gen(log, char, 4096) + +static char* log_copy(uint32_t length, char *log) +{ + char *buffer; + log_free(log_pool.size); + buffer = log_pointer(log_alloc(length)); + strncpy(buffer, log, length); + return buffer; +} + +static struct { + uint32_t action, propLength, textLength, srcRev, srcMode, mark, type; + uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH]; +} node_ctx; + +static struct { + uint32_t revision, author; + unsigned long timestamp; + char *log; +} rev_ctx; + +static struct { + uint32_t uuid, url; +} dump_ctx; + +static struct { + uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid, + revision_number, node_path, node_kind, node_action, + node_copyfrom_path, node_copyfrom_rev, text_content_length, + prop_content_length, content_length; +} keys; + +static void reset_node_ctx(char *fname) +{ + node_ctx.type = 0; + node_ctx.action = NODEACT_UNKNOWN; + node_ctx.propLength = LENGTH_UNKNOWN; + node_ctx.textLength = LENGTH_UNKNOWN; + node_ctx.src[0] = ~0; + node_ctx.srcRev = 0; + node_ctx.srcMode = 0; + pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname); + node_ctx.mark = 0; +} + +static void reset_rev_ctx(uint32_t revision) +{ + rev_ctx.revision = revision; + rev_ctx.timestamp = 0; + rev_ctx.log = NULL; + rev_ctx.author = ~0; +} + +static void reset_dump_ctx(uint32_t url) +{ + dump_ctx.url = url; + dump_ctx.uuid = ~0; +} + +static void init_keys(void) +{ + keys.svn_log = pool_intern("svn:log"); + keys.svn_author = pool_intern("svn:author"); + keys.svn_date = pool_intern("svn:date"); + keys.svn_executable = pool_intern("svn:executable"); + keys.svn_special = pool_intern("svn:special"); + keys.uuid = pool_intern("UUID"); + keys.revision_number = pool_intern("Revision-number"); + keys.node_path = pool_intern("Node-path"); + keys.node_kind = pool_intern("Node-kind"); + keys.node_action = pool_intern("Node-action"); + keys.node_copyfrom_path = pool_intern("Node-copyfrom-path"); + keys.node_copyfrom_rev = pool_intern("Node-copyfrom-rev"); + keys.text_content_length = pool_intern("Text-content-length"); + keys.prop_content_length = pool_intern("Prop-content-length"); + keys.content_length = pool_intern("Content-length"); +} + +static void read_props(void) +{ + uint32_t len; + uint32_t key = ~0; + char *val = NULL; + char *t; + while ((t = buffer_read_line()) && strcmp(t, "PROPS-END")) { + if (!strncmp(t, "K ", 2)) { + len = atoi(&t[2]); + key = pool_intern(buffer_read_string(len)); + buffer_read_line(); + } else if (!strncmp(t, "V ", 2)) { + len = atoi(&t[2]); + val = buffer_read_string(len); + if (key == keys.svn_log) { + /* Value length excludes terminating nul. */ + rev_ctx.log = log_copy(len + 1, val); + } else if (key == keys.svn_author) { + rev_ctx.author = pool_intern(val); + } else if (key == keys.svn_date) { + if (parse_date_basic(val, &rev_ctx.timestamp, NULL)) + fprintf(stderr, "Invalid timestamp: %s\n", val); + } else if (key == keys.svn_executable) { + node_ctx.type = REPO_MODE_EXE; + } else if (key == keys.svn_special) { + node_ctx.type = REPO_MODE_LNK; + } + key = ~0; + buffer_read_line(); + } + } +} + +static void handle_node(void) +{ + if (node_ctx.propLength != LENGTH_UNKNOWN && node_ctx.propLength) + read_props(); + + if (node_ctx.srcRev) + node_ctx.srcMode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); + + if (node_ctx.textLength != LENGTH_UNKNOWN && + node_ctx.type != REPO_MODE_DIR) + node_ctx.mark = next_blob_mark(); + + if (node_ctx.action == NODEACT_DELETE) { + repo_delete(node_ctx.dst); + } else if (node_ctx.action == NODEACT_CHANGE || + node_ctx.action == NODEACT_REPLACE) { + if (node_ctx.action == NODEACT_REPLACE && + node_ctx.type == REPO_MODE_DIR) + repo_replace(node_ctx.dst, node_ctx.mark); + else if (node_ctx.propLength != LENGTH_UNKNOWN) + repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); + else if (node_ctx.textLength != LENGTH_UNKNOWN) + node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); + } else if (node_ctx.action == NODEACT_ADD) { + if (node_ctx.srcRev && node_ctx.propLength != LENGTH_UNKNOWN) + repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); + else if (node_ctx.srcRev && node_ctx.textLength != LENGTH_UNKNOWN) + node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); + else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) || + node_ctx.textLength != LENGTH_UNKNOWN) + repo_add(node_ctx.dst, node_ctx.type, node_ctx.mark); + } + + if (node_ctx.propLength == LENGTH_UNKNOWN && node_ctx.srcMode) + node_ctx.type = node_ctx.srcMode; + + if (node_ctx.mark) + fast_export_blob(node_ctx.type, node_ctx.mark, node_ctx.textLength); + else if (node_ctx.textLength != LENGTH_UNKNOWN) + buffer_skip_bytes(node_ctx.textLength); +} + +static void handle_revision(void) +{ + if (rev_ctx.revision) + repo_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log, + dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp); +} + +void svndump_read(const char *url) +{ + char *val; + char *t; + uint32_t active_ctx = DUMP_CTX; + uint32_t len; + uint32_t key; + + reset_dump_ctx(pool_intern(url)); + while ((t = buffer_read_line())) { + val = strstr(t, ": "); + if (!val) + continue; + *val++ = '\0'; + *val++ = '\0'; + key = pool_intern(t); + + if (key == keys.uuid) { + dump_ctx.uuid = pool_intern(val); + } else if (key == keys.revision_number) { + if (active_ctx == NODE_CTX) + handle_node(); + if (active_ctx != DUMP_CTX) + handle_revision(); + active_ctx = REV_CTX; + reset_rev_ctx(atoi(val)); + } else if (key == keys.node_path) { + if (active_ctx == NODE_CTX) + handle_node(); + active_ctx = NODE_CTX; + reset_node_ctx(val); + } else if (key == keys.node_kind) { + if (!strcmp(val, "dir")) + node_ctx.type = REPO_MODE_DIR; + else if (!strcmp(val, "file")) + node_ctx.type = REPO_MODE_BLB; + else + fprintf(stderr, "Unknown node-kind: %s\n", val); + } else if (key == keys.node_action) { + if (!strcmp(val, "delete")) { + node_ctx.action = NODEACT_DELETE; + } else if (!strcmp(val, "add")) { + node_ctx.action = NODEACT_ADD; + } else if (!strcmp(val, "change")) { + node_ctx.action = NODEACT_CHANGE; + } else if (!strcmp(val, "replace")) { + node_ctx.action = NODEACT_REPLACE; + } else { + fprintf(stderr, "Unknown node-action: %s\n", val); + node_ctx.action = NODEACT_UNKNOWN; + } + } else if (key == keys.node_copyfrom_path) { + pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val); + } else if (key == keys.node_copyfrom_rev) { + node_ctx.srcRev = atoi(val); + } else if (key == keys.text_content_length) { + node_ctx.textLength = atoi(val); + } else if (key == keys.prop_content_length) { + node_ctx.propLength = atoi(val); + } else if (key == keys.content_length) { + len = atoi(val); + buffer_read_line(); + if (active_ctx == REV_CTX) { + read_props(); + } else if (active_ctx == NODE_CTX) { + handle_node(); + active_ctx = REV_CTX; + } else { + fprintf(stderr, "Unexpected content length header: %d\n", len); + buffer_skip_bytes(len); + } + } + } + if (active_ctx == NODE_CTX) + handle_node(); + if (active_ctx != DUMP_CTX) + handle_revision(); +} + +void svndump_init(const char *filename) +{ + buffer_init(filename); + repo_init(); + reset_dump_ctx(~0); + reset_rev_ctx(0); + reset_node_ctx(NULL); + init_keys(); +} + +void svndump_deinit(void) +{ + log_reset(); + repo_reset(); + reset_dump_ctx(~0); + reset_rev_ctx(0); + reset_node_ctx(NULL); + if (buffer_deinit()) + fprintf(stderr, "Input error\n"); + if (ferror(stdout)) + fprintf(stderr, "Output error\n"); +} + +void svndump_reset(void) +{ + log_reset(); + buffer_reset(); + repo_reset(); + reset_dump_ctx(~0); + reset_rev_ctx(0); + reset_node_ctx(NULL); +} diff --git a/vcs-svn/svndump.h b/vcs-svn/svndump.h new file mode 100644 index 0000000000..93c412f14a --- /dev/null +++ b/vcs-svn/svndump.h @@ -0,0 +1,9 @@ +#ifndef SVNDUMP_H_ +#define SVNDUMP_H_ + +void svndump_init(const char *filename); +void svndump_read(const char *url); +void svndump_deinit(void); +void svndump_reset(void); + +#endif diff --git a/vcs-svn/trp.h b/vcs-svn/trp.h new file mode 100644 index 0000000000..ee35c688a0 --- /dev/null +++ b/vcs-svn/trp.h @@ -0,0 +1,236 @@ +/* + * C macro implementation of treaps. + * + * Usage: + * #include <stdint.h> + * #include "trp.h" + * trp_gen(...) + * + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#ifndef TRP_H_ +#define TRP_H_ + +#define MAYBE_UNUSED __attribute__((__unused__)) + +/* Node structure. */ +struct trp_node { + uint32_t trpn_left; + uint32_t trpn_right; +}; + +/* Root structure. */ +struct trp_root { + uint32_t trp_root; +}; + +/* Pointer/Offset conversion. */ +#define trpn_pointer(a_base, a_offset) (a_base##_pointer(a_offset)) +#define trpn_offset(a_base, a_pointer) (a_base##_offset(a_pointer)) +#define trpn_modify(a_base, a_offset) \ + do { \ + if ((a_offset) < a_base##_pool.committed) { \ + uint32_t old_offset = (a_offset);\ + (a_offset) = a_base##_alloc(1); \ + *trpn_pointer(a_base, a_offset) = \ + *trpn_pointer(a_base, old_offset); \ + } \ + } while (0) + +/* Left accessors. */ +#define trp_left_get(a_base, a_field, a_node) \ + (trpn_pointer(a_base, a_node)->a_field.trpn_left) +#define trp_left_set(a_base, a_field, a_node, a_left) \ + do { \ + trpn_modify(a_base, a_node); \ + trp_left_get(a_base, a_field, a_node) = (a_left); \ + } while (0) + +/* Right accessors. */ +#define trp_right_get(a_base, a_field, a_node) \ + (trpn_pointer(a_base, a_node)->a_field.trpn_right) +#define trp_right_set(a_base, a_field, a_node, a_right) \ + do { \ + trpn_modify(a_base, a_node); \ + trp_right_get(a_base, a_field, a_node) = (a_right); \ + } while (0) + +/* + * Fibonacci hash function. + * The multiplier is the nearest prime to (2^32 times (√5 - 1)/2). + * See Knuth §6.4: volume 3, 3rd ed, p518. + */ +#define trpn_hash(a_node) (uint32_t) (2654435761u * (a_node)) + +/* Priority accessors. */ +#define trp_prio_get(a_node) trpn_hash(a_node) + +/* Node initializer. */ +#define trp_node_new(a_base, a_field, a_node) \ + do { \ + trp_left_set(a_base, a_field, (a_node), ~0); \ + trp_right_set(a_base, a_field, (a_node), ~0); \ + } while (0) + +/* Internal utility macros. */ +#define trpn_first(a_base, a_field, a_root, r_node) \ + do { \ + (r_node) = (a_root); \ + if ((r_node) == ~0) \ + return NULL; \ + while (~trp_left_get(a_base, a_field, (r_node))) \ + (r_node) = trp_left_get(a_base, a_field, (r_node)); \ + } while (0) + +#define trpn_rotate_left(a_base, a_field, a_node, r_node) \ + do { \ + (r_node) = trp_right_get(a_base, a_field, (a_node)); \ + trp_right_set(a_base, a_field, (a_node), \ + trp_left_get(a_base, a_field, (r_node))); \ + trp_left_set(a_base, a_field, (r_node), (a_node)); \ + } while (0) + +#define trpn_rotate_right(a_base, a_field, a_node, r_node) \ + do { \ + (r_node) = trp_left_get(a_base, a_field, (a_node)); \ + trp_left_set(a_base, a_field, (a_node), \ + trp_right_get(a_base, a_field, (r_node))); \ + trp_right_set(a_base, a_field, (r_node), (a_node)); \ + } while (0) + +#define trp_gen(a_attr, a_pre, a_type, a_field, a_base, a_cmp) \ +a_attr a_type MAYBE_UNUSED *a_pre##first(struct trp_root *treap) \ +{ \ + uint32_t ret; \ + trpn_first(a_base, a_field, treap->trp_root, ret); \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr a_type MAYBE_UNUSED *a_pre##next(struct trp_root *treap, a_type *node) \ +{ \ + uint32_t ret; \ + uint32_t offset = trpn_offset(a_base, node); \ + if (~trp_right_get(a_base, a_field, offset)) { \ + trpn_first(a_base, a_field, \ + trp_right_get(a_base, a_field, offset), ret); \ + } else { \ + uint32_t tnode = treap->trp_root; \ + ret = ~0; \ + while (1) { \ + int cmp = (a_cmp)(trpn_pointer(a_base, offset), \ + trpn_pointer(a_base, tnode)); \ + if (cmp < 0) { \ + ret = tnode; \ + tnode = trp_left_get(a_base, a_field, tnode); \ + } else if (cmp > 0) { \ + tnode = trp_right_get(a_base, a_field, tnode); \ + } else { \ + break; \ + } \ + } \ + } \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr a_type MAYBE_UNUSED *a_pre##search(struct trp_root *treap, a_type *key) \ +{ \ + int cmp; \ + uint32_t ret = treap->trp_root; \ + while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \ + if (cmp < 0) { \ + ret = trp_left_get(a_base, a_field, ret); \ + } else { \ + ret = trp_right_get(a_base, a_field, ret); \ + } \ + } \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr a_type MAYBE_UNUSED *a_pre##nsearch(struct trp_root *treap, a_type *key) \ +{ \ + int cmp; \ + uint32_t ret = treap->trp_root; \ + while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \ + if (cmp < 0) { \ + if (!~trp_left_get(a_base, a_field, ret)) \ + break; \ + ret = trp_left_get(a_base, a_field, ret); \ + } else { \ + ret = trp_right_get(a_base, a_field, ret); \ + } \ + } \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t ins_node) \ +{ \ + if (cur_node == ~0) { \ + return ins_node; \ + } else { \ + uint32_t ret; \ + int cmp = (a_cmp)(trpn_pointer(a_base, ins_node), \ + trpn_pointer(a_base, cur_node)); \ + if (cmp < 0) { \ + uint32_t left = a_pre##insert_recurse( \ + trp_left_get(a_base, a_field, cur_node), ins_node); \ + trp_left_set(a_base, a_field, cur_node, left); \ + if (trp_prio_get(left) < trp_prio_get(cur_node)) \ + trpn_rotate_right(a_base, a_field, cur_node, ret); \ + else \ + ret = cur_node; \ + } else { \ + uint32_t right = a_pre##insert_recurse( \ + trp_right_get(a_base, a_field, cur_node), ins_node); \ + trp_right_set(a_base, a_field, cur_node, right); \ + if (trp_prio_get(right) < trp_prio_get(cur_node)) \ + trpn_rotate_left(a_base, a_field, cur_node, ret); \ + else \ + ret = cur_node; \ + } \ + return ret; \ + } \ +} \ +a_attr void MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \ +{ \ + uint32_t offset = trpn_offset(a_base, node); \ + trp_node_new(a_base, a_field, offset); \ + treap->trp_root = a_pre##insert_recurse(treap->trp_root, offset); \ +} \ +a_attr uint32_t MAYBE_UNUSED a_pre##remove_recurse(uint32_t cur_node, uint32_t rem_node) \ +{ \ + int cmp = a_cmp(trpn_pointer(a_base, rem_node), \ + trpn_pointer(a_base, cur_node)); \ + if (cmp == 0) { \ + uint32_t ret; \ + uint32_t left = trp_left_get(a_base, a_field, cur_node); \ + uint32_t right = trp_right_get(a_base, a_field, cur_node); \ + if (left == ~0) { \ + if (right == ~0) \ + return ~0; \ + } else if (right == ~0 || trp_prio_get(left) < trp_prio_get(right)) { \ + trpn_rotate_right(a_base, a_field, cur_node, ret); \ + right = a_pre##remove_recurse(cur_node, rem_node); \ + trp_right_set(a_base, a_field, ret, right); \ + return ret; \ + } \ + trpn_rotate_left(a_base, a_field, cur_node, ret); \ + left = a_pre##remove_recurse(cur_node, rem_node); \ + trp_left_set(a_base, a_field, ret, left); \ + return ret; \ + } else if (cmp < 0) { \ + uint32_t left = a_pre##remove_recurse( \ + trp_left_get(a_base, a_field, cur_node), rem_node); \ + trp_left_set(a_base, a_field, cur_node, left); \ + return cur_node; \ + } else { \ + uint32_t right = a_pre##remove_recurse( \ + trp_right_get(a_base, a_field, cur_node), rem_node); \ + trp_right_set(a_base, a_field, cur_node, right); \ + return cur_node; \ + } \ +} \ +a_attr void MAYBE_UNUSED a_pre##remove(struct trp_root *treap, a_type *node) \ +{ \ + treap->trp_root = a_pre##remove_recurse(treap->trp_root, \ + trpn_offset(a_base, node)); \ +} \ + +#endif diff --git a/vcs-svn/trp.txt b/vcs-svn/trp.txt new file mode 100644 index 0000000000..eb4c191875 --- /dev/null +++ b/vcs-svn/trp.txt @@ -0,0 +1,103 @@ +Motivation +========== + +Treaps provide a memory-efficient binary search tree structure. +Insertion/deletion/search are about as about as fast in the average +case as red-black trees and the chances of worst-case behavior are +vanishingly small, thanks to (pseudo-)randomness. The bad worst-case +behavior is a small price to pay, given that treaps are much simpler +to implement. + +API +=== + +The trp API generates a data structure and functions to handle a +large growing set of objects stored in a pool. + +The caller: + +. Specifies parameters for the generated functions with the + trp_gen(static, foo_, ...) macro. + +. Allocates a `struct trp_root` variable and sets it to {~0}. + +. Adds new nodes to the set using `foo_insert`. + +. Can find a specific item in the set using `foo_search`. + +. Can iterate over items in the set using `foo_first` and `foo_next`. + +. Can remove an item from the set using `foo_remove`. + +Example: + +---- +struct ex_node { + const char *s; + struct trp_node ex_link; +}; +static struct trp_root ex_base = {~0}; +obj_pool_gen(ex, struct ex_node, 4096); +trp_gen(static, ex_, struct ex_node, ex_link, ex, strcmp) +struct ex_node *item; + +item = ex_pointer(ex_alloc(1)); +item->s = "hello"; +ex_insert(&ex_base, item); +item = ex_pointer(ex_alloc(1)); +item->s = "goodbye"; +ex_insert(&ex_base, item); +for (item = ex_first(&ex_base); item; item = ex_next(&ex_base, item)) + printf("%s\n", item->s); +---- + +Functions +--------- + +trp_gen(attr, foo_, node_type, link_field, pool, cmp):: + + Generate a type-specific treap implementation. ++ +. The storage class for generated functions will be 'attr' (e.g., `static`). +. Generated function names are prefixed with 'foo_' (e.g., `treap_`). +. Treap nodes will be of type 'node_type' (e.g., `struct treap_node`). + This type must be a struct with at least one `struct trp_node` field + to point to its children. +. The field used to access child nodes will be 'link_field'. +. All treap nodes must lie in the 'pool' object pool. +. Treap nodes must be totally ordered by the 'cmp' relation, with the + following prototype: ++ +int (*cmp)(node_type \*a, node_type \*b) ++ +and returning a value less than, equal to, or greater than zero +according to the result of comparison. + +void foo_insert(struct trp_root *treap, node_type \*node):: + + Insert node into treap. If inserted multiple times, + a node will appear in the treap multiple times. + +void foo_remove(struct trp_root *treap, node_type \*node):: + + Remove node from treap. Caller must ensure node is + present in treap before using this function. + +node_type *foo_search(struct trp_root \*treap, node_type \*key):: + + Search for a node that matches key. If no match is found, + result is NULL. + +node_type *foo_nsearch(struct trp_root \*treap, node_type \*key):: + + Like `foo_search`, but if if the key is missing return what + would be key's successor, were key in treap (NULL if no + successor). + +node_type *foo_first(struct trp_root \*treap):: + + Find the first item from the treap, in sorted order. + +node_type *foo_next(struct trp_root \*treap, node_type \*node):: + + Find the next item. |