From 3f527372d9ec6d7b6890773e41c4b3542d7ad451 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Mon, 9 Aug 2010 17:04:29 -0500 Subject: Introduce vcs-svn lib Teach the build system to build a separate library for the upcoming subversion interop support. The resulting vcs-svn/lib.a does not contain any code, nor is it built during a normal build. This is just scaffolding for later changes. Signed-off-by: Jonathan Nieder Signed-off-by: Ramkumar Ramachandra Signed-off-by: Junio C Hamano Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/LICENSE | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 vcs-svn/LICENSE (limited to 'vcs-svn') diff --git a/vcs-svn/LICENSE b/vcs-svn/LICENSE new file mode 100644 index 0000000000..6e52372f89 --- /dev/null +++ b/vcs-svn/LICENSE @@ -0,0 +1,26 @@ +Copyright (C) 2010 David Barr . +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice(s), this list of conditions and the following disclaimer + unmodified other than the allowable addition of one or more + copyright notices. +2. Redistributions in binary form must reproduce the above copyright + notice(s), this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -- cgit v1.2.3 From 4709455db3891f6cad9a96a574296b4926f70cbe Mon Sep 17 00:00:00 2001 From: David Barr Date: Mon, 9 Aug 2010 17:11:11 -0500 Subject: Add memory pool library Add a memory pool library implemented using C macros. The obj_pool_gen() macro creates a type-specific memory pool. The memory pool library is distinguished from the existing specialized allocators in alloc.c by using a contiguous block for all allocations. This means that on one hand, long-lived pointers have to be written as offsets, since the base address changes as the pool grows, but on the other hand, the entire pool can be easily written to the file system. This could allow the memory pool to persist between runs of an application. For the svn importer, such a facility is useful because each svn revision can copy trees and files from any previous revision. The relevant information for all revisions has to persist somehow to support incremental runs. [rr: minor cleanups] [jn: added tests; removed file system backing for now] Signed-off-by: David Barr Signed-off-by: Ramkumar Ramachandra Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/obj_pool.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 vcs-svn/obj_pool.h (limited to 'vcs-svn') diff --git a/vcs-svn/obj_pool.h b/vcs-svn/obj_pool.h new file mode 100644 index 0000000000..deb6eb8135 --- /dev/null +++ b/vcs-svn/obj_pool.h @@ -0,0 +1,61 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#ifndef OBJ_POOL_H_ +#define OBJ_POOL_H_ + +#include "git-compat-util.h" + +#define MAYBE_UNUSED __attribute__((__unused__)) + +#define obj_pool_gen(pre, obj_t, initial_capacity) \ +static struct { \ + uint32_t committed; \ + uint32_t size; \ + uint32_t capacity; \ + obj_t *base; \ +} pre##_pool = {0, 0, 0, NULL}; \ +static MAYBE_UNUSED uint32_t pre##_alloc(uint32_t count) \ +{ \ + uint32_t offset; \ + if (pre##_pool.size + count > pre##_pool.capacity) { \ + while (pre##_pool.size + count > pre##_pool.capacity) \ + if (pre##_pool.capacity) \ + pre##_pool.capacity *= 2; \ + else \ + pre##_pool.capacity = initial_capacity; \ + pre##_pool.base = realloc(pre##_pool.base, \ + pre##_pool.capacity * sizeof(obj_t)); \ + } \ + offset = pre##_pool.size; \ + pre##_pool.size += count; \ + return offset; \ +} \ +static MAYBE_UNUSED void pre##_free(uint32_t count) \ +{ \ + pre##_pool.size -= count; \ +} \ +static MAYBE_UNUSED uint32_t pre##_offset(obj_t *obj) \ +{ \ + return obj == NULL ? ~0 : obj - pre##_pool.base; \ +} \ +static MAYBE_UNUSED obj_t *pre##_pointer(uint32_t offset) \ +{ \ + return offset >= pre##_pool.size ? NULL : &pre##_pool.base[offset]; \ +} \ +static MAYBE_UNUSED void pre##_commit(void) \ +{ \ + pre##_pool.committed = pre##_pool.size; \ +} \ +static MAYBE_UNUSED void pre##_reset(void) \ +{ \ + free(pre##_pool.base); \ + pre##_pool.base = NULL; \ + pre##_pool.size = 0; \ + pre##_pool.capacity = 0; \ + pre##_pool.committed = 0; \ +} + +#endif -- cgit v1.2.3 From 951f316470acc7c785c460a4e40735b22822349f Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Mon, 9 Aug 2010 17:17:34 -0500 Subject: Add treap implementation Provide macros to generate a type-specific treap implementation and various functions to operate on it. It uses obj_pool.h to store memory nodes in a treap. Previously committed nodes are never removed from the pool; after any *_commit operation, it is assumed (correctly, in the case of svn-fast-export) that someone else must care about them. Treaps provide a memory-efficient binary search tree structure. Insertion/deletion/search are about as about as fast in the average case as red-black trees and the chances of worst-case behavior are vanishingly small, thanks to (pseudo-)randomness. The bad worst-case behavior is a small price to pay, given that treaps are much simpler to implement. >From http://www.canonware.com/download/trp/trp_hash/trp.h [db: Altered to reference nodes by offset from a common base pointer] [db: Bob Jenkins' hashing implementation dropped for Knuth's] [db: Methods unnecessary for search and insert dropped] [rr: Squelched compiler warnings] [db: Added support for immutable treap nodes] [jn: Reintroduced treap_nsearch(); with tests] Signed-off-by: David Barr Signed-off-by: Ramkumar Ramachandra Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/LICENSE | 3 + vcs-svn/trp.h | 236 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ vcs-svn/trp.txt | 103 +++++++++++++++++++++++++ 3 files changed, 342 insertions(+) create mode 100644 vcs-svn/trp.h create mode 100644 vcs-svn/trp.txt (limited to 'vcs-svn') diff --git a/vcs-svn/LICENSE b/vcs-svn/LICENSE index 6e52372f89..a3d384c4b4 100644 --- a/vcs-svn/LICENSE +++ b/vcs-svn/LICENSE @@ -1,6 +1,9 @@ Copyright (C) 2010 David Barr . All rights reserved. +Copyright (C) 2008 Jason Evans . +All rights reserved. + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/vcs-svn/trp.h b/vcs-svn/trp.h new file mode 100644 index 0000000000..1f5f51f143 --- /dev/null +++ b/vcs-svn/trp.h @@ -0,0 +1,236 @@ +/* + * C macro implementation of treaps. + * + * Usage: + * #include + * #include "trp.h" + * trp_gen(...) + * + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#ifndef TRP_H_ +#define TRP_H_ + +#define MAYBE_UNUSED __attribute__((__unused__)) + +/* Node structure. */ +struct trp_node { + uint32_t trpn_left; + uint32_t trpn_right; +}; + +/* Root structure. */ +struct trp_root { + uint32_t trp_root; +}; + +/* Pointer/Offset conversion. */ +#define trpn_pointer(a_base, a_offset) (a_base##_pointer(a_offset)) +#define trpn_offset(a_base, a_pointer) (a_base##_offset(a_pointer)) +#define trpn_modify(a_base, a_offset) \ + do { \ + if ((a_offset) < a_base##_pool.committed) { \ + uint32_t old_offset = (a_offset);\ + (a_offset) = a_base##_alloc(1); \ + *trpn_pointer(a_base, a_offset) = \ + *trpn_pointer(a_base, old_offset); \ + } \ + } while (0); + +/* Left accessors. */ +#define trp_left_get(a_base, a_field, a_node) \ + (trpn_pointer(a_base, a_node)->a_field.trpn_left) +#define trp_left_set(a_base, a_field, a_node, a_left) \ + do { \ + trpn_modify(a_base, a_node); \ + trp_left_get(a_base, a_field, a_node) = (a_left); \ + } while(0) + +/* Right accessors. */ +#define trp_right_get(a_base, a_field, a_node) \ + (trpn_pointer(a_base, a_node)->a_field.trpn_right) +#define trp_right_set(a_base, a_field, a_node, a_right) \ + do { \ + trpn_modify(a_base, a_node); \ + trp_right_get(a_base, a_field, a_node) = (a_right); \ + } while(0) + +/* + * Fibonacci hash function. + * The multiplier is the nearest prime to (2^32 times (√5 - 1)/2). + * See Knuth §6.4: volume 3, 3rd ed, p518. + */ +#define trpn_hash(a_node) (uint32_t) (2654435761u * (a_node)) + +/* Priority accessors. */ +#define trp_prio_get(a_node) trpn_hash(a_node) + +/* Node initializer. */ +#define trp_node_new(a_base, a_field, a_node) \ + do { \ + trp_left_set(a_base, a_field, (a_node), ~0); \ + trp_right_set(a_base, a_field, (a_node), ~0); \ + } while(0) + +/* Internal utility macros. */ +#define trpn_first(a_base, a_field, a_root, r_node) \ + do { \ + (r_node) = (a_root); \ + if ((r_node) == ~0) \ + return NULL; \ + while (~trp_left_get(a_base, a_field, (r_node))) \ + (r_node) = trp_left_get(a_base, a_field, (r_node)); \ + } while (0) + +#define trpn_rotate_left(a_base, a_field, a_node, r_node) \ + do { \ + (r_node) = trp_right_get(a_base, a_field, (a_node)); \ + trp_right_set(a_base, a_field, (a_node), \ + trp_left_get(a_base, a_field, (r_node))); \ + trp_left_set(a_base, a_field, (r_node), (a_node)); \ + } while(0) + +#define trpn_rotate_right(a_base, a_field, a_node, r_node) \ + do { \ + (r_node) = trp_left_get(a_base, a_field, (a_node)); \ + trp_left_set(a_base, a_field, (a_node), \ + trp_right_get(a_base, a_field, (r_node))); \ + trp_right_set(a_base, a_field, (r_node), (a_node)); \ + } while(0) + +#define trp_gen(a_attr, a_pre, a_type, a_field, a_base, a_cmp) \ +a_attr a_type MAYBE_UNUSED *a_pre##first(struct trp_root *treap) \ +{ \ + uint32_t ret; \ + trpn_first(a_base, a_field, treap->trp_root, ret); \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr a_type MAYBE_UNUSED *a_pre##next(struct trp_root *treap, a_type *node) \ +{ \ + uint32_t ret; \ + uint32_t offset = trpn_offset(a_base, node); \ + if (~trp_right_get(a_base, a_field, offset)) { \ + trpn_first(a_base, a_field, \ + trp_right_get(a_base, a_field, offset), ret); \ + } else { \ + uint32_t tnode = treap->trp_root; \ + ret = ~0; \ + while (1) { \ + int cmp = (a_cmp)(trpn_pointer(a_base, offset), \ + trpn_pointer(a_base, tnode)); \ + if (cmp < 0) { \ + ret = tnode; \ + tnode = trp_left_get(a_base, a_field, tnode); \ + } else if (cmp > 0) { \ + tnode = trp_right_get(a_base, a_field, tnode); \ + } else { \ + break; \ + } \ + } \ + } \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr a_type MAYBE_UNUSED *a_pre##search(struct trp_root *treap, a_type *key) \ +{ \ + int cmp; \ + uint32_t ret = treap->trp_root; \ + while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base,ret)))) { \ + if (cmp < 0) { \ + ret = trp_left_get(a_base, a_field, ret); \ + } else { \ + ret = trp_right_get(a_base, a_field, ret); \ + } \ + } \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr a_type MAYBE_UNUSED *a_pre##nsearch(struct trp_root *treap, a_type *key) \ +{ \ + int cmp; \ + uint32_t ret = treap->trp_root; \ + while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base,ret)))) { \ + if (cmp < 0) { \ + if (!~trp_left_get(a_base, a_field, ret)) \ + break; \ + ret = trp_left_get(a_base, a_field, ret); \ + } else { \ + ret = trp_right_get(a_base, a_field, ret); \ + } \ + } \ + return trpn_pointer(a_base, ret); \ +} \ +a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t ins_node) \ +{ \ + if (cur_node == ~0) { \ + return (ins_node); \ + } else { \ + uint32_t ret; \ + int cmp = (a_cmp)(trpn_pointer(a_base, ins_node), \ + trpn_pointer(a_base, cur_node)); \ + if (cmp < 0) { \ + uint32_t left = a_pre##insert_recurse( \ + trp_left_get(a_base, a_field, cur_node), ins_node); \ + trp_left_set(a_base, a_field, cur_node, left); \ + if (trp_prio_get(left) < trp_prio_get(cur_node)) \ + trpn_rotate_right(a_base, a_field, cur_node, ret); \ + else \ + ret = cur_node; \ + } else { \ + uint32_t right = a_pre##insert_recurse( \ + trp_right_get(a_base, a_field, cur_node), ins_node); \ + trp_right_set(a_base, a_field, cur_node, right); \ + if (trp_prio_get(right) < trp_prio_get(cur_node)) \ + trpn_rotate_left(a_base, a_field, cur_node, ret); \ + else \ + ret = cur_node; \ + } \ + return (ret); \ + } \ +} \ +a_attr void MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \ +{ \ + uint32_t offset = trpn_offset(a_base, node); \ + trp_node_new(a_base, a_field, offset); \ + treap->trp_root = a_pre##insert_recurse(treap->trp_root, offset); \ +} \ +a_attr uint32_t MAYBE_UNUSED a_pre##remove_recurse(uint32_t cur_node, uint32_t rem_node) \ +{ \ + int cmp = a_cmp(trpn_pointer(a_base, rem_node), \ + trpn_pointer(a_base, cur_node)); \ + if (cmp == 0) { \ + uint32_t ret; \ + uint32_t left = trp_left_get(a_base, a_field, cur_node); \ + uint32_t right = trp_right_get(a_base, a_field, cur_node); \ + if (left == ~0) { \ + if (right == ~0) \ + return (~0); \ + } else if (right == ~0 || trp_prio_get(left) < trp_prio_get(right)) { \ + trpn_rotate_right(a_base, a_field, cur_node, ret); \ + right = a_pre##remove_recurse(cur_node, rem_node); \ + trp_right_set(a_base, a_field, ret, right); \ + return (ret); \ + } \ + trpn_rotate_left(a_base, a_field, cur_node, ret); \ + left = a_pre##remove_recurse(cur_node, rem_node); \ + trp_left_set(a_base, a_field, ret, left); \ + return (ret); \ + } else if (cmp < 0) { \ + uint32_t left = a_pre##remove_recurse( \ + trp_left_get(a_base, a_field, cur_node), rem_node); \ + trp_left_set(a_base, a_field, cur_node, left); \ + return (cur_node); \ + } else { \ + uint32_t right = a_pre##remove_recurse( \ + trp_right_get(a_base, a_field, cur_node), rem_node); \ + trp_right_set(a_base, a_field, cur_node, right); \ + return (cur_node); \ + } \ +} \ +a_attr void MAYBE_UNUSED a_pre##remove(struct trp_root *treap, a_type *node) \ +{ \ + treap->trp_root = a_pre##remove_recurse(treap->trp_root, \ + trpn_offset(a_base, node)); \ +} \ + +#endif diff --git a/vcs-svn/trp.txt b/vcs-svn/trp.txt new file mode 100644 index 0000000000..eb4c191875 --- /dev/null +++ b/vcs-svn/trp.txt @@ -0,0 +1,103 @@ +Motivation +========== + +Treaps provide a memory-efficient binary search tree structure. +Insertion/deletion/search are about as about as fast in the average +case as red-black trees and the chances of worst-case behavior are +vanishingly small, thanks to (pseudo-)randomness. The bad worst-case +behavior is a small price to pay, given that treaps are much simpler +to implement. + +API +=== + +The trp API generates a data structure and functions to handle a +large growing set of objects stored in a pool. + +The caller: + +. Specifies parameters for the generated functions with the + trp_gen(static, foo_, ...) macro. + +. Allocates a `struct trp_root` variable and sets it to {~0}. + +. Adds new nodes to the set using `foo_insert`. + +. Can find a specific item in the set using `foo_search`. + +. Can iterate over items in the set using `foo_first` and `foo_next`. + +. Can remove an item from the set using `foo_remove`. + +Example: + +---- +struct ex_node { + const char *s; + struct trp_node ex_link; +}; +static struct trp_root ex_base = {~0}; +obj_pool_gen(ex, struct ex_node, 4096); +trp_gen(static, ex_, struct ex_node, ex_link, ex, strcmp) +struct ex_node *item; + +item = ex_pointer(ex_alloc(1)); +item->s = "hello"; +ex_insert(&ex_base, item); +item = ex_pointer(ex_alloc(1)); +item->s = "goodbye"; +ex_insert(&ex_base, item); +for (item = ex_first(&ex_base); item; item = ex_next(&ex_base, item)) + printf("%s\n", item->s); +---- + +Functions +--------- + +trp_gen(attr, foo_, node_type, link_field, pool, cmp):: + + Generate a type-specific treap implementation. ++ +. The storage class for generated functions will be 'attr' (e.g., `static`). +. Generated function names are prefixed with 'foo_' (e.g., `treap_`). +. Treap nodes will be of type 'node_type' (e.g., `struct treap_node`). + This type must be a struct with at least one `struct trp_node` field + to point to its children. +. The field used to access child nodes will be 'link_field'. +. All treap nodes must lie in the 'pool' object pool. +. Treap nodes must be totally ordered by the 'cmp' relation, with the + following prototype: ++ +int (*cmp)(node_type \*a, node_type \*b) ++ +and returning a value less than, equal to, or greater than zero +according to the result of comparison. + +void foo_insert(struct trp_root *treap, node_type \*node):: + + Insert node into treap. If inserted multiple times, + a node will appear in the treap multiple times. + +void foo_remove(struct trp_root *treap, node_type \*node):: + + Remove node from treap. Caller must ensure node is + present in treap before using this function. + +node_type *foo_search(struct trp_root \*treap, node_type \*key):: + + Search for a node that matches key. If no match is found, + result is NULL. + +node_type *foo_nsearch(struct trp_root \*treap, node_type \*key):: + + Like `foo_search`, but if if the key is missing return what + would be key's successor, were key in treap (NULL if no + successor). + +node_type *foo_first(struct trp_root \*treap):: + + Find the first item from the treap, in sorted order. + +node_type *foo_next(struct trp_root \*treap, node_type \*node):: + + Find the next item. -- cgit v1.2.3 From 1d73b52f5ba4184de6acf474f14668001304a10c Mon Sep 17 00:00:00 2001 From: David Barr Date: Mon, 9 Aug 2010 17:34:42 -0500 Subject: Add string-specific memory pool Intern strings so they can be compared by address and stored without wasting space. This library uses the macros in the obj_pool.h and trp.h to create a memory pool for strings and expose an API for handling them. [rr: added API docs] [jn: with some API simplifications, new documentation and tests] Signed-off-by: David Barr Signed-off-by: Ramkumar Ramachandra Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/string_pool.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++++ vcs-svn/string_pool.h | 11 ++++++ vcs-svn/string_pool.txt | 43 ++++++++++++++++++++ 3 files changed, 156 insertions(+) create mode 100644 vcs-svn/string_pool.c create mode 100644 vcs-svn/string_pool.h create mode 100644 vcs-svn/string_pool.txt (limited to 'vcs-svn') diff --git a/vcs-svn/string_pool.c b/vcs-svn/string_pool.c new file mode 100644 index 0000000000..f5b1da836e --- /dev/null +++ b/vcs-svn/string_pool.c @@ -0,0 +1,102 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "trp.h" +#include "obj_pool.h" +#include "string_pool.h" + +static struct trp_root tree = { ~0 }; + +struct node { + uint32_t offset; + struct trp_node children; +}; + +/* Two memory pools: one for struct node, and another for strings */ +obj_pool_gen(node, struct node, 4096) +obj_pool_gen(string, char, 4096) + +static char *node_value(struct node *node) +{ + return node ? string_pointer(node->offset) : NULL; +} + +static int node_cmp(struct node *a, struct node *b) +{ + return strcmp(node_value(a), node_value(b)); +} + +/* Build a Treap from the node structure (a trp_node w/ offset) */ +trp_gen(static, tree_, struct node, children, node, node_cmp); + +const char *pool_fetch(uint32_t entry) +{ + return node_value(node_pointer(entry)); +} + +uint32_t pool_intern(const char *key) +{ + /* Canonicalize key */ + struct node *match = NULL, *node; + uint32_t key_len; + if (key == NULL) + return ~0; + key_len = strlen(key) + 1; + node = node_pointer(node_alloc(1)); + node->offset = string_alloc(key_len); + strcpy(node_value(node), key); + match = tree_search(&tree, node); + if (!match) { + tree_insert(&tree, node); + } else { + node_free(1); + string_free(key_len); + node = match; + } + return node_offset(node); +} + +uint32_t pool_tok_r(char *str, const char *delim, char **saveptr) +{ + char *token = strtok_r(str, delim, saveptr); + return token ? pool_intern(token) : ~0; +} + +void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream) +{ + uint32_t i; + for (i = 0; i < len && ~seq[i]; i++) { + fputs(pool_fetch(seq[i]), stream); + if (i < len - 1 && ~seq[i + 1]) + fputc(delim, stream); + } +} + +uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str) +{ + char *context = NULL; + uint32_t token = ~0; + uint32_t length; + + if (sz == 0) + return ~0; + if (str) + token = pool_tok_r(str, delim, &context); + for (length = 0; length < sz; length++) { + seq[length] = token; + if (token == ~0) + return length; + token = pool_tok_r(NULL, delim, &context); + } + seq[sz - 1] = ~0; + return sz; +} + +void pool_reset(void) +{ + node_reset(); + string_reset(); +} diff --git a/vcs-svn/string_pool.h b/vcs-svn/string_pool.h new file mode 100644 index 0000000000..222fb66e68 --- /dev/null +++ b/vcs-svn/string_pool.h @@ -0,0 +1,11 @@ +#ifndef STRING_POOL_H_ +#define STRING_POOL_H_ + +uint32_t pool_intern(const char *key); +const char *pool_fetch(uint32_t entry); +uint32_t pool_tok_r(char *str, const char *delim, char **saveptr); +void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream); +uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str); +void pool_reset(void); + +#endif diff --git a/vcs-svn/string_pool.txt b/vcs-svn/string_pool.txt new file mode 100644 index 0000000000..1b41f15628 --- /dev/null +++ b/vcs-svn/string_pool.txt @@ -0,0 +1,43 @@ +string_pool API +=============== + +The string_pool API provides facilities for replacing strings +with integer keys that can be more easily compared and stored. +The facilities are designed so that one could teach Git without +too much trouble to store the information needed for these keys to +remain valid over multiple executions. + +Functions +--------- + +pool_intern:: + Include a string in the string pool and get its key. + If that string is already in the pool, retrieves its + existing key. + +pool_fetch:: + Retrieve the string associated to a given key. + +pool_tok_r:: + Extract the key of the next token from a string. + Interface mimics strtok_r. + +pool_print_seq:: + Print a sequence of strings named by key to a file, using the + specified delimiter to separate them. + + If NULL (key ~0) appears in the sequence, the sequence ends + early. + +pool_tok_seq:: + Split a string into tokens, storing the keys of segments + into a caller-provided array. + + Unless sz is 0, the array will always be ~0-terminated. + If there is not enough room for all the tokens, the + array holds as many tokens as fit in the entries before + the terminating ~0. Return value is the index after the + last token, or sz if the tokens did not fit. + +pool_reset:: + Deallocate storage for the string pool. -- cgit v1.2.3 From 3bbaec00a8ffc6ea7e71c3b707851fe663d93a45 Mon Sep 17 00:00:00 2001 From: David Barr Date: Mon, 9 Aug 2010 17:39:43 -0500 Subject: Add stream helper library This library provides thread-unsafe fgets()- and fread()-like functions where the caller does not have to supply a buffer. It maintains a couple of static buffers and provides an API to use them. [rr: allow input from files other than stdin] [jn: with tests, documentation, and error handling improvements] Signed-off-by: David Barr Signed-off-by: Ramkumar Ramachandra Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/line_buffer.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++ vcs-svn/line_buffer.h | 12 ++++++ vcs-svn/line_buffer.txt | 58 +++++++++++++++++++++++++++++ 3 files changed, 167 insertions(+) create mode 100644 vcs-svn/line_buffer.c create mode 100644 vcs-svn/line_buffer.h create mode 100644 vcs-svn/line_buffer.txt (limited to 'vcs-svn') diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c new file mode 100644 index 0000000000..1543567093 --- /dev/null +++ b/vcs-svn/line_buffer.c @@ -0,0 +1,97 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "line_buffer.h" +#include "obj_pool.h" + +#define LINE_BUFFER_LEN 10000 +#define COPY_BUFFER_LEN 4096 + +/* Create memory pool for char sequence of known length */ +obj_pool_gen(blob, char, 4096) + +static char line_buffer[LINE_BUFFER_LEN]; +static char byte_buffer[COPY_BUFFER_LEN]; +static FILE *infile; + +int buffer_init(const char *filename) +{ + infile = filename ? fopen(filename, "r") : stdin; + if (!infile) + return -1; + return 0; +} + +int buffer_deinit(void) +{ + int err; + if (infile == stdin) + return ferror(infile); + err = ferror(infile); + err |= fclose(infile); + return err; +} + +/* Read a line without trailing newline. */ +char *buffer_read_line(void) +{ + char *end; + if (!fgets(line_buffer, sizeof(line_buffer), infile)) + /* Error or data exhausted. */ + return NULL; + end = line_buffer + strlen(line_buffer); + if (end[-1] == '\n') + end[-1] = '\0'; + else if (feof(infile)) + ; /* No newline at end of file. That's fine. */ + else + /* + * Line was too long. + * There is probably a saner way to deal with this, + * but for now let's return an error. + */ + return NULL; + return line_buffer; +} + +char *buffer_read_string(uint32_t len) +{ + char *s; + blob_free(blob_pool.size); + s = blob_pointer(blob_alloc(len + 1)); + s[fread(s, 1, len, infile)] = '\0'; + return ferror(infile) ? NULL : s; +} + +void buffer_copy_bytes(uint32_t len) +{ + uint32_t in; + while (len > 0 && !feof(infile) && !ferror(infile)) { + in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; + in = fread(byte_buffer, 1, in, infile); + len -= in; + fwrite(byte_buffer, 1, in, stdout); + if (ferror(stdout)) { + buffer_skip_bytes(len); + return; + } + } +} + +void buffer_skip_bytes(uint32_t len) +{ + uint32_t in; + while (len > 0 && !feof(infile) && !ferror(infile)) { + in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; + in = fread(byte_buffer, 1, in, infile); + len -= in; + } +} + +void buffer_reset(void) +{ + blob_reset(); +} diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h new file mode 100644 index 0000000000..9c78ae11a1 --- /dev/null +++ b/vcs-svn/line_buffer.h @@ -0,0 +1,12 @@ +#ifndef LINE_BUFFER_H_ +#define LINE_BUFFER_H_ + +int buffer_init(const char *filename); +int buffer_deinit(void); +char *buffer_read_line(void); +char *buffer_read_string(uint32_t len); +void buffer_copy_bytes(uint32_t len); +void buffer_skip_bytes(uint32_t len); +void buffer_reset(void); + +#endif diff --git a/vcs-svn/line_buffer.txt b/vcs-svn/line_buffer.txt new file mode 100644 index 0000000000..8906fb1f50 --- /dev/null +++ b/vcs-svn/line_buffer.txt @@ -0,0 +1,58 @@ +line_buffer API +=============== + +The line_buffer library provides a convenient interface for +mostly-line-oriented input. + +Each line is not permitted to exceed 10000 bytes. The provided +functions are not thread-safe or async-signal-safe, and like +`fgets()`, they generally do not function correctly if interrupted +by a signal without SA_RESTART set. + +Calling sequence +---------------- + +The calling program: + + - specifies a file to read with `buffer_init` + - processes input with `buffer_read_line`, `buffer_read_string`, + `buffer_skip_bytes`, and `buffer_copy_bytes` + - closes the file with `buffer_deinit`, perhaps to start over and + read another file. + +Before exiting, the caller can use `buffer_reset` to deallocate +resources for the benefit of profiling tools. + +Functions +--------- + +`buffer_init`:: + Open the named file for input. If filename is NULL, + start reading from stdin. On failure, returns -1 (with + errno indicating the nature of the failure). + +`buffer_deinit`:: + Stop reading from the current file (closing it unless + it was stdin). Returns nonzero if `fclose` fails or + the error indicator was set. + +`buffer_read_line`:: + Read a line and strip off the trailing newline. + On failure or end of file, returns NULL. + +`buffer_read_string`:: + Read `len` characters of input or up to the end of the + file, whichever comes first. Returns NULL on error. + Returns whatever characters were read (possibly "") + for end of file. + +`buffer_copy_bytes`:: + Read `len` bytes of input and dump them to the standard output + stream. Returns early for error or end of file. + +`buffer_skip_bytes`:: + Discards `len` bytes from the input stream (stopping early + if necessary because of an error or eof). + +`buffer_reset`:: + Deallocates non-static buffers. -- cgit v1.2.3 From c0e6c23dca84227167a6fe1077503ddf32208919 Mon Sep 17 00:00:00 2001 From: David Barr Date: Mon, 9 Aug 2010 17:48:10 -0500 Subject: Infrastructure to write revisions in fast-export format repo_tree maintains the exporter's state and provides a facility to to call fast_export, which writes objects to stdout suitable for consumption by fast-import. The exported functions roughly correspond to Subversion FS operations. . repo_add, repo_modify, repo_copy, repo_replace, and repo_delete update the current commit, based roughly on the corresponding Subversion FS operation. . repo_commit calls out to fast_export to write the current commit to the fast-import stream in stdout. . repo_diff is used by the fast_export module to write the changes for a commit. . repo_reset erases the exporter's state, so valgrind can be happy. [rr: squelched compiler warnings] [jn: removed support for maintaining state on-disk, though we may want to add it back later] Signed-off-by: David Barr Signed-off-by: Ramkumar Ramachandra Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/fast_export.c | 74 ++++++++++++ vcs-svn/fast_export.h | 11 ++ vcs-svn/repo_tree.c | 329 ++++++++++++++++++++++++++++++++++++++++++++++++++ vcs-svn/repo_tree.h | 26 ++++ 4 files changed, 440 insertions(+) create mode 100644 vcs-svn/fast_export.c create mode 100644 vcs-svn/fast_export.h create mode 100644 vcs-svn/repo_tree.c create mode 100644 vcs-svn/repo_tree.h (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c new file mode 100644 index 0000000000..3a6156fc11 --- /dev/null +++ b/vcs-svn/fast_export.c @@ -0,0 +1,74 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "fast_export.h" +#include "line_buffer.h" +#include "repo_tree.h" +#include "string_pool.h" + +#define MAX_GITSVN_LINE_LEN 4096 + +static uint32_t first_commit_done; + +void fast_export_delete(uint32_t depth, uint32_t *path) +{ + putchar('D'); + putchar(' '); + pool_print_seq(depth, path, '/', stdout); + putchar('\n'); +} + +void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, + uint32_t mark) +{ + /* Mode must be 100644, 100755, 120000, or 160000. */ + printf("M %06o :%d ", mode, mark); + pool_print_seq(depth, path, '/', stdout); + putchar('\n'); +} + +static char gitsvnline[MAX_GITSVN_LINE_LEN]; +void fast_export_commit(uint32_t revision, uint32_t author, char *log, + uint32_t uuid, uint32_t url, + unsigned long timestamp) +{ + if (!log) + log = ""; + if (~uuid && ~url) { + snprintf(gitsvnline, MAX_GITSVN_LINE_LEN, "\n\ngit-svn-id: %s@%d %s\n", + pool_fetch(url), revision, pool_fetch(uuid)); + } else { + *gitsvnline = '\0'; + } + printf("commit refs/heads/master\n"); + printf("committer %s <%s@%s> %ld +0000\n", + ~author ? pool_fetch(author) : "nobody", + ~author ? pool_fetch(author) : "nobody", + ~uuid ? pool_fetch(uuid) : "local", timestamp); + printf("data %zd\n%s%s\n", + strlen(log) + strlen(gitsvnline), log, gitsvnline); + if (!first_commit_done) { + if (revision > 1) + printf("from refs/heads/master^0\n"); + first_commit_done = 1; + } + repo_diff(revision - 1, revision); + fputc('\n', stdout); + + printf("progress Imported commit %d.\n\n", revision); +} + +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len) +{ + if (mode == REPO_MODE_LNK) { + /* svn symlink blobs start with "link " */ + buffer_skip_bytes(5); + len -= 5; + } + printf("blob\nmark :%d\ndata %d\n", mark, len); + buffer_copy_bytes(len); + fputc('\n', stdout); +} diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h new file mode 100644 index 0000000000..2aaaea53d5 --- /dev/null +++ b/vcs-svn/fast_export.h @@ -0,0 +1,11 @@ +#ifndef FAST_EXPORT_H_ +#define FAST_EXPORT_H_ + +void fast_export_delete(uint32_t depth, uint32_t *path); +void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, + uint32_t mark); +void fast_export_commit(uint32_t revision, uint32_t author, char *log, + uint32_t uuid, uint32_t url, unsigned long timestamp); +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len); + +#endif diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c new file mode 100644 index 0000000000..c3d7ee7d24 --- /dev/null +++ b/vcs-svn/repo_tree.c @@ -0,0 +1,329 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" + +#include "string_pool.h" +#include "repo_tree.h" +#include "obj_pool.h" +#include "fast_export.h" + +#include "trp.h" + +struct repo_dirent { + uint32_t name_offset; + struct trp_node children; + uint32_t mode; + uint32_t content_offset; +}; + +struct repo_dir { + struct trp_root entries; +}; + +struct repo_commit { + uint32_t root_dir_offset; +}; + +/* Memory pools for commit, dir and dirent */ +obj_pool_gen(commit, struct repo_commit, 4096) +obj_pool_gen(dir, struct repo_dir, 4096) +obj_pool_gen(dirent, struct repo_dirent, 4096) + +static uint32_t active_commit; +static uint32_t mark; + +static int repo_dirent_name_cmp(const void *a, const void *b); + +/* Treap for directory entries */ +trp_gen(static, dirent_, struct repo_dirent, children, dirent, repo_dirent_name_cmp); + +uint32_t next_blob_mark(void) +{ + return mark++; +} + +static struct repo_dir *repo_commit_root_dir(struct repo_commit *commit) +{ + return dir_pointer(commit->root_dir_offset); +} + +static struct repo_dirent *repo_first_dirent(struct repo_dir *dir) +{ + return dirent_first(&dir->entries); +} + +static int repo_dirent_name_cmp(const void *a, const void *b) +{ + const struct repo_dirent *dirent1 = a, *dirent2 = b; + uint32_t a_offset = dirent1->name_offset; + uint32_t b_offset = dirent2->name_offset; + return (a_offset > b_offset) - (a_offset < b_offset); +} + +static int repo_dirent_is_dir(struct repo_dirent *dirent) +{ + return dirent != NULL && dirent->mode == REPO_MODE_DIR; +} + +static struct repo_dir *repo_dir_from_dirent(struct repo_dirent *dirent) +{ + if (!repo_dirent_is_dir(dirent)) + return NULL; + return dir_pointer(dirent->content_offset); +} + +static struct repo_dir *repo_clone_dir(struct repo_dir *orig_dir) +{ + uint32_t orig_o, new_o; + orig_o = dir_offset(orig_dir); + if (orig_o >= dir_pool.committed) + return orig_dir; + new_o = dir_alloc(1); + orig_dir = dir_pointer(orig_o); + *dir_pointer(new_o) = *orig_dir; + return dir_pointer(new_o); +} + +static struct repo_dirent *repo_read_dirent(uint32_t revision, uint32_t *path) +{ + uint32_t name = 0; + struct repo_dirent *key = dirent_pointer(dirent_alloc(1)); + struct repo_dir *dir = NULL; + struct repo_dirent *dirent = NULL; + dir = repo_commit_root_dir(commit_pointer(revision)); + while (~(name = *path++)) { + key->name_offset = name; + dirent = dirent_search(&dir->entries, key); + if (dirent == NULL || !repo_dirent_is_dir(dirent)) + break; + dir = repo_dir_from_dirent(dirent); + } + dirent_free(1); + return dirent; +} + +static void repo_write_dirent(uint32_t *path, uint32_t mode, + uint32_t content_offset, uint32_t del) +{ + uint32_t name, revision, dir_o = ~0, parent_dir_o = ~0; + struct repo_dir *dir; + struct repo_dirent *key; + struct repo_dirent *dirent = NULL; + revision = active_commit; + dir = repo_commit_root_dir(commit_pointer(revision)); + dir = repo_clone_dir(dir); + commit_pointer(revision)->root_dir_offset = dir_offset(dir); + while (~(name = *path++)) { + parent_dir_o = dir_offset(dir); + + key = dirent_pointer(dirent_alloc(1)); + key->name_offset = name; + + dirent = dirent_search(&dir->entries, key); + if (dirent == NULL) + dirent = key; + else + dirent_free(1); + + if (dirent == key) { + dirent->mode = REPO_MODE_DIR; + dirent->content_offset = 0; + dirent_insert(&dir->entries, dirent); + } + + if (dirent_offset(dirent) < dirent_pool.committed) { + dir_o = repo_dirent_is_dir(dirent) ? + dirent->content_offset : ~0; + dirent_remove(&dir->entries, dirent); + dirent = dirent_pointer(dirent_alloc(1)); + dirent->name_offset = name; + dirent->mode = REPO_MODE_DIR; + dirent->content_offset = dir_o; + dirent_insert(&dir->entries, dirent); + } + + dir = repo_dir_from_dirent(dirent); + dir = repo_clone_dir(dir); + dirent->content_offset = dir_offset(dir); + } + if (dirent == NULL) + return; + dirent->mode = mode; + dirent->content_offset = content_offset; + if (del && ~parent_dir_o) + dirent_remove(&dir_pointer(parent_dir_o)->entries, dirent); +} + +uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst) +{ + uint32_t mode = 0, content_offset = 0; + struct repo_dirent *src_dirent; + src_dirent = repo_read_dirent(revision, src); + if (src_dirent != NULL) { + mode = src_dirent->mode; + content_offset = src_dirent->content_offset; + repo_write_dirent(dst, mode, content_offset, 0); + } + return mode; +} + +void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark) +{ + repo_write_dirent(path, mode, blob_mark, 0); +} + +uint32_t repo_replace(uint32_t *path, uint32_t blob_mark) +{ + uint32_t mode = 0; + struct repo_dirent *src_dirent; + src_dirent = repo_read_dirent(active_commit, path); + if (src_dirent != NULL) { + mode = src_dirent->mode; + repo_write_dirent(path, mode, blob_mark, 0); + } + return mode; +} + +void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark) +{ + struct repo_dirent *src_dirent; + src_dirent = repo_read_dirent(active_commit, path); + if (src_dirent != NULL && blob_mark == 0) + blob_mark = src_dirent->content_offset; + repo_write_dirent(path, mode, blob_mark, 0); +} + +void repo_delete(uint32_t *path) +{ + repo_write_dirent(path, 0, 0, 1); +} + +static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir); + +static void repo_git_add(uint32_t depth, uint32_t *path, struct repo_dirent *dirent) +{ + if (repo_dirent_is_dir(dirent)) + repo_git_add_r(depth, path, repo_dir_from_dirent(dirent)); + else + fast_export_modify(depth, path, + dirent->mode, dirent->content_offset); +} + +static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir) +{ + struct repo_dirent *de = repo_first_dirent(dir); + while (de) { + path[depth] = de->name_offset; + repo_git_add(depth + 1, path, de); + de = dirent_next(&dir->entries, de); + } +} + +static void repo_diff_r(uint32_t depth, uint32_t *path, struct repo_dir *dir1, + struct repo_dir *dir2) +{ + struct repo_dirent *de1, *de2; + de1 = repo_first_dirent(dir1); + de2 = repo_first_dirent(dir2); + + while (de1 && de2) { + if (de1->name_offset < de2->name_offset) { + path[depth] = de1->name_offset; + fast_export_delete(depth + 1, path); + de1 = dirent_next(&dir1->entries, de1); + continue; + } + if (de1->name_offset > de2->name_offset) { + path[depth] = de2->name_offset; + repo_git_add(depth + 1, path, de2); + de2 = dirent_next(&dir2->entries, de2); + continue; + } + path[depth] = de1->name_offset; + + if (de1->mode == de2->mode && + de1->content_offset == de2->content_offset) { + ; /* No change. */ + } else if (repo_dirent_is_dir(de1) && repo_dirent_is_dir(de2)) { + repo_diff_r(depth + 1, path, + repo_dir_from_dirent(de1), + repo_dir_from_dirent(de2)); + } else if (!repo_dirent_is_dir(de1) && !repo_dirent_is_dir(de2)) { + repo_git_add(depth + 1, path, de2); + } else { + fast_export_delete(depth + 1, path); + repo_git_add(depth + 1, path, de2); + } + de1 = dirent_next(&dir1->entries, de1); + de2 = dirent_next(&dir2->entries, de2); + } + while (de1) { + path[depth] = de1->name_offset; + fast_export_delete(depth + 1, path); + de1 = dirent_next(&dir1->entries, de1); + } + while (de2) { + path[depth] = de2->name_offset; + repo_git_add(depth + 1, path, de2); + de2 = dirent_next(&dir2->entries, de2); + } +} + +static uint32_t path_stack[REPO_MAX_PATH_DEPTH]; + +void repo_diff(uint32_t r1, uint32_t r2) +{ + repo_diff_r(0, + path_stack, + repo_commit_root_dir(commit_pointer(r1)), + repo_commit_root_dir(commit_pointer(r2))); +} + +void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, + uint32_t url, unsigned long timestamp) +{ + fast_export_commit(revision, author, log, uuid, url, timestamp); + dirent_commit(); + dir_commit(); + active_commit = commit_alloc(1); + commit_pointer(active_commit)->root_dir_offset = + commit_pointer(active_commit - 1)->root_dir_offset; +} + +static void mark_init(void) +{ + uint32_t i; + mark = 0; + for (i = 0; i < dirent_pool.size; i++) + if (!repo_dirent_is_dir(dirent_pointer(i)) && + dirent_pointer(i)->content_offset > mark) + mark = dirent_pointer(i)->content_offset; + mark++; +} + +void repo_init(void) +{ + mark_init(); + if (commit_pool.size == 0) { + /* Create empty tree for commit 0. */ + commit_alloc(1); + commit_pointer(0)->root_dir_offset = dir_alloc(1); + dir_pointer(0)->entries.trp_root = ~0; + dir_commit(); + } + /* Preallocate next commit, ready for changes. */ + active_commit = commit_alloc(1); + commit_pointer(active_commit)->root_dir_offset = + commit_pointer(active_commit - 1)->root_dir_offset; +} + +void repo_reset(void) +{ + pool_reset(); + commit_reset(); + dir_reset(); + dirent_reset(); +} diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h new file mode 100644 index 0000000000..5476175922 --- /dev/null +++ b/vcs-svn/repo_tree.h @@ -0,0 +1,26 @@ +#ifndef REPO_TREE_H_ +#define REPO_TREE_H_ + +#include "git-compat-util.h" + +#define REPO_MODE_DIR 0040000 +#define REPO_MODE_BLB 0100644 +#define REPO_MODE_EXE 0100755 +#define REPO_MODE_LNK 0120000 + +#define REPO_MAX_PATH_LEN 4096 +#define REPO_MAX_PATH_DEPTH 1000 + +uint32_t next_blob_mark(void); +uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst); +void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark); +uint32_t repo_replace(uint32_t *path, uint32_t blob_mark); +void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark); +void repo_delete(uint32_t *path); +void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, + uint32_t url, long unsigned timestamp); +void repo_diff(uint32_t r1, uint32_t r2); +void repo_init(void); +void repo_reset(void); + +#endif -- cgit v1.2.3 From 21746aa34fc99d2c73634bc9829387c27c109dbe Mon Sep 17 00:00:00 2001 From: David Barr Date: Mon, 9 Aug 2010 17:55:00 -0500 Subject: SVN dump parser svndump parses data that is in SVN dumpfile format produced by `svnadmin dump` with the help of line_buffer and uses repo_tree and fast_export to emit a git fast-import stream. Based roughly on com.hydrografix.svndump 0.92 from the SvnToCCase project at , by Stefan Hegny and others. [rr: allow input from files other than stdin] [jn: with test, more error reporting] Signed-off-by: David Barr Signed-off-by: Ramkumar Ramachandra Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/LICENSE | 4 + vcs-svn/svndump.c | 302 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ vcs-svn/svndump.h | 9 ++ 3 files changed, 315 insertions(+) create mode 100644 vcs-svn/svndump.c create mode 100644 vcs-svn/svndump.h (limited to 'vcs-svn') diff --git a/vcs-svn/LICENSE b/vcs-svn/LICENSE index a3d384c4b4..0a5e3c43a0 100644 --- a/vcs-svn/LICENSE +++ b/vcs-svn/LICENSE @@ -4,6 +4,10 @@ All rights reserved. Copyright (C) 2008 Jason Evans . All rights reserved. +Copyright (C) 2005 Stefan Hegny, hydrografix Consulting GmbH, +Frankfurt/Main, Germany +and others, see http://svn2cc.sarovar.org + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c new file mode 100644 index 0000000000..630eeb53b7 --- /dev/null +++ b/vcs-svn/svndump.c @@ -0,0 +1,302 @@ +/* + * Parse and rearrange a svnadmin dump. + * Create the dump with: + * svnadmin dump --incremental -r: >outfile + * + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "cache.h" +#include "repo_tree.h" +#include "fast_export.h" +#include "line_buffer.h" +#include "obj_pool.h" +#include "string_pool.h" + +#define NODEACT_REPLACE 4 +#define NODEACT_DELETE 3 +#define NODEACT_ADD 2 +#define NODEACT_CHANGE 1 +#define NODEACT_UNKNOWN 0 + +#define DUMP_CTX 0 +#define REV_CTX 1 +#define NODE_CTX 2 + +#define LENGTH_UNKNOWN (~0) +#define DATE_RFC2822_LEN 31 + +/* Create memory pool for log messages */ +obj_pool_gen(log, char, 4096) + +static char* log_copy(uint32_t length, char *log) +{ + char *buffer; + log_free(log_pool.size); + buffer = log_pointer(log_alloc(length)); + strncpy(buffer, log, length); + return buffer; +} + +static struct { + uint32_t action, propLength, textLength, srcRev, srcMode, mark, type; + uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH]; +} node_ctx; + +static struct { + uint32_t revision, author; + unsigned long timestamp; + char *log; +} rev_ctx; + +static struct { + uint32_t uuid, url; +} dump_ctx; + +static struct { + uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid, + revision_number, node_path, node_kind, node_action, + node_copyfrom_path, node_copyfrom_rev, text_content_length, + prop_content_length, content_length; +} keys; + +static void reset_node_ctx(char *fname) +{ + node_ctx.type = 0; + node_ctx.action = NODEACT_UNKNOWN; + node_ctx.propLength = LENGTH_UNKNOWN; + node_ctx.textLength = LENGTH_UNKNOWN; + node_ctx.src[0] = ~0; + node_ctx.srcRev = 0; + node_ctx.srcMode = 0; + pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname); + node_ctx.mark = 0; +} + +static void reset_rev_ctx(uint32_t revision) +{ + rev_ctx.revision = revision; + rev_ctx.timestamp = 0; + rev_ctx.log = NULL; + rev_ctx.author = ~0; +} + +static void reset_dump_ctx(uint32_t url) +{ + dump_ctx.url = url; + dump_ctx.uuid = ~0; +} + +static void init_keys(void) +{ + keys.svn_log = pool_intern("svn:log"); + keys.svn_author = pool_intern("svn:author"); + keys.svn_date = pool_intern("svn:date"); + keys.svn_executable = pool_intern("svn:executable"); + keys.svn_special = pool_intern("svn:special"); + keys.uuid = pool_intern("UUID"); + keys.revision_number = pool_intern("Revision-number"); + keys.node_path = pool_intern("Node-path"); + keys.node_kind = pool_intern("Node-kind"); + keys.node_action = pool_intern("Node-action"); + keys.node_copyfrom_path = pool_intern("Node-copyfrom-path"); + keys.node_copyfrom_rev = pool_intern("Node-copyfrom-rev"); + keys.text_content_length = pool_intern("Text-content-length"); + keys.prop_content_length = pool_intern("Prop-content-length"); + keys.content_length = pool_intern("Content-length"); +} + +static void read_props(void) +{ + uint32_t len; + uint32_t key = ~0; + char *val = NULL; + char *t; + while ((t = buffer_read_line()) && strcmp(t, "PROPS-END")) { + if (!strncmp(t, "K ", 2)) { + len = atoi(&t[2]); + key = pool_intern(buffer_read_string(len)); + buffer_read_line(); + } else if (!strncmp(t, "V ", 2)) { + len = atoi(&t[2]); + val = buffer_read_string(len); + if (key == keys.svn_log) { + /* Value length excludes terminating nul. */ + rev_ctx.log = log_copy(len + 1, val); + } else if (key == keys.svn_author) { + rev_ctx.author = pool_intern(val); + } else if (key == keys.svn_date) { + if (parse_date_basic(val, &rev_ctx.timestamp, NULL)) + fprintf(stderr, "Invalid timestamp: %s\n", val); + } else if (key == keys.svn_executable) { + node_ctx.type = REPO_MODE_EXE; + } else if (key == keys.svn_special) { + node_ctx.type = REPO_MODE_LNK; + } + key = ~0; + buffer_read_line(); + } + } +} + +static void handle_node(void) +{ + if (node_ctx.propLength != LENGTH_UNKNOWN && node_ctx.propLength) + read_props(); + + if (node_ctx.srcRev) + node_ctx.srcMode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); + + if (node_ctx.textLength != LENGTH_UNKNOWN && + node_ctx.type != REPO_MODE_DIR) + node_ctx.mark = next_blob_mark(); + + if (node_ctx.action == NODEACT_DELETE) { + repo_delete(node_ctx.dst); + } else if (node_ctx.action == NODEACT_CHANGE || + node_ctx.action == NODEACT_REPLACE) { + if (node_ctx.action == NODEACT_REPLACE && + node_ctx.type == REPO_MODE_DIR) + repo_replace(node_ctx.dst, node_ctx.mark); + else if (node_ctx.propLength != LENGTH_UNKNOWN) + repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); + else if (node_ctx.textLength != LENGTH_UNKNOWN) + node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); + } else if (node_ctx.action == NODEACT_ADD) { + if (node_ctx.srcRev && node_ctx.propLength != LENGTH_UNKNOWN) + repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); + else if (node_ctx.srcRev && node_ctx.textLength != LENGTH_UNKNOWN) + node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); + else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) || + node_ctx.textLength != LENGTH_UNKNOWN) + repo_add(node_ctx.dst, node_ctx.type, node_ctx.mark); + } + + if (node_ctx.propLength == LENGTH_UNKNOWN && node_ctx.srcMode) + node_ctx.type = node_ctx.srcMode; + + if (node_ctx.mark) + fast_export_blob(node_ctx.type, node_ctx.mark, node_ctx.textLength); + else if (node_ctx.textLength != LENGTH_UNKNOWN) + buffer_skip_bytes(node_ctx.textLength); +} + +static void handle_revision(void) +{ + if (rev_ctx.revision) + repo_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log, + dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp); +} + +void svndump_read(const char *url) +{ + char *val; + char *t; + uint32_t active_ctx = DUMP_CTX; + uint32_t len; + uint32_t key; + + reset_dump_ctx(pool_intern(url)); + while ((t = buffer_read_line())) { + val = strstr(t, ": "); + if (!val) + continue; + *val++ = '\0'; + *val++ = '\0'; + key = pool_intern(t); + + if (key == keys.uuid) { + dump_ctx.uuid = pool_intern(val); + } else if (key == keys.revision_number) { + if (active_ctx == NODE_CTX) + handle_node(); + if (active_ctx != DUMP_CTX) + handle_revision(); + active_ctx = REV_CTX; + reset_rev_ctx(atoi(val)); + } else if (key == keys.node_path) { + if (active_ctx == NODE_CTX) + handle_node(); + active_ctx = NODE_CTX; + reset_node_ctx(val); + } else if (key == keys.node_kind) { + if (!strcmp(val, "dir")) + node_ctx.type = REPO_MODE_DIR; + else if (!strcmp(val, "file")) + node_ctx.type = REPO_MODE_BLB; + else + fprintf(stderr, "Unknown node-kind: %s\n", val); + } else if (key == keys.node_action) { + if (!strcmp(val, "delete")) { + node_ctx.action = NODEACT_DELETE; + } else if (!strcmp(val, "add")) { + node_ctx.action = NODEACT_ADD; + } else if (!strcmp(val, "change")) { + node_ctx.action = NODEACT_CHANGE; + } else if (!strcmp(val, "replace")) { + node_ctx.action = NODEACT_REPLACE; + } else { + fprintf(stderr, "Unknown node-action: %s\n", val); + node_ctx.action = NODEACT_UNKNOWN; + } + } else if (key == keys.node_copyfrom_path) { + pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val); + } else if (key == keys.node_copyfrom_rev) { + node_ctx.srcRev = atoi(val); + } else if (key == keys.text_content_length) { + node_ctx.textLength = atoi(val); + } else if (key == keys.prop_content_length) { + node_ctx.propLength = atoi(val); + } else if (key == keys.content_length) { + len = atoi(val); + buffer_read_line(); + if (active_ctx == REV_CTX) { + read_props(); + } else if (active_ctx == NODE_CTX) { + handle_node(); + active_ctx = REV_CTX; + } else { + fprintf(stderr, "Unexpected content length header: %d\n", len); + buffer_skip_bytes(len); + } + } + } + if (active_ctx == NODE_CTX) + handle_node(); + if (active_ctx != DUMP_CTX) + handle_revision(); +} + +void svndump_init(const char *filename) +{ + buffer_init(filename); + repo_init(); + reset_dump_ctx(~0); + reset_rev_ctx(0); + reset_node_ctx(NULL); + init_keys(); +} + +void svndump_deinit(void) +{ + log_reset(); + repo_reset(); + reset_dump_ctx(~0); + reset_rev_ctx(0); + reset_node_ctx(NULL); + if (buffer_deinit()) + fprintf(stderr, "Input error\n"); + if (ferror(stdout)) + fprintf(stderr, "Output error\n"); +} + +void svndump_reset(void) +{ + log_reset(); + buffer_reset(); + repo_reset(); + reset_dump_ctx(~0); + reset_rev_ctx(0); + reset_node_ctx(NULL); +} diff --git a/vcs-svn/svndump.h b/vcs-svn/svndump.h new file mode 100644 index 0000000000..93c412f14a --- /dev/null +++ b/vcs-svn/svndump.h @@ -0,0 +1,9 @@ +#ifndef SVNDUMP_H_ +#define SVNDUMP_H_ + +void svndump_init(const char *filename); +void svndump_read(const char *url); +void svndump_deinit(void); +void svndump_reset(void); + +#endif -- cgit v1.2.3 From 6ad263ce7afc6c21c3ada1691f4772993b8ae46b Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Thu, 12 Aug 2010 17:02:57 -0500 Subject: treap: style fix Missing spaces in while (0) and trpn_pointer(a, b). Remove parentheses around return value. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/trp.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/trp.h b/vcs-svn/trp.h index 1f5f51f143..ee35c688a0 100644 --- a/vcs-svn/trp.h +++ b/vcs-svn/trp.h @@ -37,7 +37,7 @@ struct trp_root { *trpn_pointer(a_base, a_offset) = \ *trpn_pointer(a_base, old_offset); \ } \ - } while (0); + } while (0) /* Left accessors. */ #define trp_left_get(a_base, a_field, a_node) \ @@ -46,7 +46,7 @@ struct trp_root { do { \ trpn_modify(a_base, a_node); \ trp_left_get(a_base, a_field, a_node) = (a_left); \ - } while(0) + } while (0) /* Right accessors. */ #define trp_right_get(a_base, a_field, a_node) \ @@ -55,7 +55,7 @@ struct trp_root { do { \ trpn_modify(a_base, a_node); \ trp_right_get(a_base, a_field, a_node) = (a_right); \ - } while(0) + } while (0) /* * Fibonacci hash function. @@ -72,7 +72,7 @@ struct trp_root { do { \ trp_left_set(a_base, a_field, (a_node), ~0); \ trp_right_set(a_base, a_field, (a_node), ~0); \ - } while(0) + } while (0) /* Internal utility macros. */ #define trpn_first(a_base, a_field, a_root, r_node) \ @@ -90,7 +90,7 @@ struct trp_root { trp_right_set(a_base, a_field, (a_node), \ trp_left_get(a_base, a_field, (r_node))); \ trp_left_set(a_base, a_field, (r_node), (a_node)); \ - } while(0) + } while (0) #define trpn_rotate_right(a_base, a_field, a_node, r_node) \ do { \ @@ -98,7 +98,7 @@ struct trp_root { trp_left_set(a_base, a_field, (a_node), \ trp_right_get(a_base, a_field, (r_node))); \ trp_right_set(a_base, a_field, (r_node), (a_node)); \ - } while(0) + } while (0) #define trp_gen(a_attr, a_pre, a_type, a_field, a_base, a_cmp) \ a_attr a_type MAYBE_UNUSED *a_pre##first(struct trp_root *treap) \ @@ -136,7 +136,7 @@ a_attr a_type MAYBE_UNUSED *a_pre##search(struct trp_root *treap, a_type *key) \ { \ int cmp; \ uint32_t ret = treap->trp_root; \ - while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base,ret)))) { \ + while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \ if (cmp < 0) { \ ret = trp_left_get(a_base, a_field, ret); \ } else { \ @@ -149,7 +149,7 @@ a_attr a_type MAYBE_UNUSED *a_pre##nsearch(struct trp_root *treap, a_type *key) { \ int cmp; \ uint32_t ret = treap->trp_root; \ - while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base,ret)))) { \ + while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \ if (cmp < 0) { \ if (!~trp_left_get(a_base, a_field, ret)) \ break; \ @@ -163,7 +163,7 @@ a_attr a_type MAYBE_UNUSED *a_pre##nsearch(struct trp_root *treap, a_type *key) a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t ins_node) \ { \ if (cur_node == ~0) { \ - return (ins_node); \ + return ins_node; \ } else { \ uint32_t ret; \ int cmp = (a_cmp)(trpn_pointer(a_base, ins_node), \ @@ -185,7 +185,7 @@ a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t i else \ ret = cur_node; \ } \ - return (ret); \ + return ret; \ } \ } \ a_attr void MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \ @@ -204,27 +204,27 @@ a_attr uint32_t MAYBE_UNUSED a_pre##remove_recurse(uint32_t cur_node, uint32_t r uint32_t right = trp_right_get(a_base, a_field, cur_node); \ if (left == ~0) { \ if (right == ~0) \ - return (~0); \ + return ~0; \ } else if (right == ~0 || trp_prio_get(left) < trp_prio_get(right)) { \ trpn_rotate_right(a_base, a_field, cur_node, ret); \ right = a_pre##remove_recurse(cur_node, rem_node); \ trp_right_set(a_base, a_field, ret, right); \ - return (ret); \ + return ret; \ } \ trpn_rotate_left(a_base, a_field, cur_node, ret); \ left = a_pre##remove_recurse(cur_node, rem_node); \ trp_left_set(a_base, a_field, ret, left); \ - return (ret); \ + return ret; \ } else if (cmp < 0) { \ uint32_t left = a_pre##remove_recurse( \ trp_left_get(a_base, a_field, cur_node), rem_node); \ trp_left_set(a_base, a_field, cur_node, left); \ - return (cur_node); \ + return cur_node; \ } else { \ uint32_t right = a_pre##remove_recurse( \ trp_right_get(a_base, a_field, cur_node), rem_node); \ trp_right_set(a_base, a_field, cur_node, right); \ - return (cur_node); \ + return cur_node; \ } \ } \ a_attr void MAYBE_UNUSED a_pre##remove(struct trp_root *treap, a_type *node) \ -- cgit v1.2.3 From 68b4cfbc91583b43e96d38b8d7efc8e6690589ad Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 13 Aug 2010 19:01:34 -0500 Subject: vcs-svn: Rename dirent pool to build on Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dirent is #define’d to mingw_dirent in compat/mingw.h, with the result that obj_pool_gen(dirent, struct repo_dirent, 4096) creates functions with names like mingw_dirent_alloc and references to dirent_alloc go unresolved. Rename the functions to dent_* to avoid this problem. Reported-by: Johannes Sixt Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/repo_tree.c | 146 ++++++++++++++++++++++++++-------------------------- 1 file changed, 73 insertions(+), 73 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c index c3d7ee7d24..e94d91d129 100644 --- a/vcs-svn/repo_tree.c +++ b/vcs-svn/repo_tree.c @@ -30,7 +30,7 @@ struct repo_commit { /* Memory pools for commit, dir and dirent */ obj_pool_gen(commit, struct repo_commit, 4096) obj_pool_gen(dir, struct repo_dir, 4096) -obj_pool_gen(dirent, struct repo_dirent, 4096) +obj_pool_gen(dent, struct repo_dirent, 4096) static uint32_t active_commit; static uint32_t mark; @@ -38,7 +38,7 @@ static uint32_t mark; static int repo_dirent_name_cmp(const void *a, const void *b); /* Treap for directory entries */ -trp_gen(static, dirent_, struct repo_dirent, children, dirent, repo_dirent_name_cmp); +trp_gen(static, dent_, struct repo_dirent, children, dent, repo_dirent_name_cmp); uint32_t next_blob_mark(void) { @@ -52,27 +52,27 @@ static struct repo_dir *repo_commit_root_dir(struct repo_commit *commit) static struct repo_dirent *repo_first_dirent(struct repo_dir *dir) { - return dirent_first(&dir->entries); + return dent_first(&dir->entries); } static int repo_dirent_name_cmp(const void *a, const void *b) { - const struct repo_dirent *dirent1 = a, *dirent2 = b; - uint32_t a_offset = dirent1->name_offset; - uint32_t b_offset = dirent2->name_offset; + const struct repo_dirent *dent1 = a, *dent2 = b; + uint32_t a_offset = dent1->name_offset; + uint32_t b_offset = dent2->name_offset; return (a_offset > b_offset) - (a_offset < b_offset); } -static int repo_dirent_is_dir(struct repo_dirent *dirent) +static int repo_dirent_is_dir(struct repo_dirent *dent) { - return dirent != NULL && dirent->mode == REPO_MODE_DIR; + return dent != NULL && dent->mode == REPO_MODE_DIR; } -static struct repo_dir *repo_dir_from_dirent(struct repo_dirent *dirent) +static struct repo_dir *repo_dir_from_dirent(struct repo_dirent *dent) { - if (!repo_dirent_is_dir(dirent)) + if (!repo_dirent_is_dir(dent)) return NULL; - return dir_pointer(dirent->content_offset); + return dir_pointer(dent->content_offset); } static struct repo_dir *repo_clone_dir(struct repo_dir *orig_dir) @@ -90,19 +90,19 @@ static struct repo_dir *repo_clone_dir(struct repo_dir *orig_dir) static struct repo_dirent *repo_read_dirent(uint32_t revision, uint32_t *path) { uint32_t name = 0; - struct repo_dirent *key = dirent_pointer(dirent_alloc(1)); + struct repo_dirent *key = dent_pointer(dent_alloc(1)); struct repo_dir *dir = NULL; - struct repo_dirent *dirent = NULL; + struct repo_dirent *dent = NULL; dir = repo_commit_root_dir(commit_pointer(revision)); while (~(name = *path++)) { key->name_offset = name; - dirent = dirent_search(&dir->entries, key); - if (dirent == NULL || !repo_dirent_is_dir(dirent)) + dent = dent_search(&dir->entries, key); + if (dent == NULL || !repo_dirent_is_dir(dent)) break; - dir = repo_dir_from_dirent(dirent); + dir = repo_dir_from_dirent(dent); } - dirent_free(1); - return dirent; + dent_free(1); + return dent; } static void repo_write_dirent(uint32_t *path, uint32_t mode, @@ -111,7 +111,7 @@ static void repo_write_dirent(uint32_t *path, uint32_t mode, uint32_t name, revision, dir_o = ~0, parent_dir_o = ~0; struct repo_dir *dir; struct repo_dirent *key; - struct repo_dirent *dirent = NULL; + struct repo_dirent *dent = NULL; revision = active_commit; dir = repo_commit_root_dir(commit_pointer(revision)); dir = repo_clone_dir(dir); @@ -119,52 +119,52 @@ static void repo_write_dirent(uint32_t *path, uint32_t mode, while (~(name = *path++)) { parent_dir_o = dir_offset(dir); - key = dirent_pointer(dirent_alloc(1)); + key = dent_pointer(dent_alloc(1)); key->name_offset = name; - dirent = dirent_search(&dir->entries, key); - if (dirent == NULL) - dirent = key; + dent = dent_search(&dir->entries, key); + if (dent == NULL) + dent = key; else - dirent_free(1); + dent_free(1); - if (dirent == key) { - dirent->mode = REPO_MODE_DIR; - dirent->content_offset = 0; - dirent_insert(&dir->entries, dirent); + if (dent == key) { + dent->mode = REPO_MODE_DIR; + dent->content_offset = 0; + dent_insert(&dir->entries, dent); } - if (dirent_offset(dirent) < dirent_pool.committed) { - dir_o = repo_dirent_is_dir(dirent) ? - dirent->content_offset : ~0; - dirent_remove(&dir->entries, dirent); - dirent = dirent_pointer(dirent_alloc(1)); - dirent->name_offset = name; - dirent->mode = REPO_MODE_DIR; - dirent->content_offset = dir_o; - dirent_insert(&dir->entries, dirent); + if (dent_offset(dent) < dent_pool.committed) { + dir_o = repo_dirent_is_dir(dent) ? + dent->content_offset : ~0; + dent_remove(&dir->entries, dent); + dent = dent_pointer(dent_alloc(1)); + dent->name_offset = name; + dent->mode = REPO_MODE_DIR; + dent->content_offset = dir_o; + dent_insert(&dir->entries, dent); } - dir = repo_dir_from_dirent(dirent); + dir = repo_dir_from_dirent(dent); dir = repo_clone_dir(dir); - dirent->content_offset = dir_offset(dir); + dent->content_offset = dir_offset(dir); } - if (dirent == NULL) + if (dent == NULL) return; - dirent->mode = mode; - dirent->content_offset = content_offset; + dent->mode = mode; + dent->content_offset = content_offset; if (del && ~parent_dir_o) - dirent_remove(&dir_pointer(parent_dir_o)->entries, dirent); + dent_remove(&dir_pointer(parent_dir_o)->entries, dent); } uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst) { uint32_t mode = 0, content_offset = 0; - struct repo_dirent *src_dirent; - src_dirent = repo_read_dirent(revision, src); - if (src_dirent != NULL) { - mode = src_dirent->mode; - content_offset = src_dirent->content_offset; + struct repo_dirent *src_dent; + src_dent = repo_read_dirent(revision, src); + if (src_dent != NULL) { + mode = src_dent->mode; + content_offset = src_dent->content_offset; repo_write_dirent(dst, mode, content_offset, 0); } return mode; @@ -178,10 +178,10 @@ void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark) uint32_t repo_replace(uint32_t *path, uint32_t blob_mark) { uint32_t mode = 0; - struct repo_dirent *src_dirent; - src_dirent = repo_read_dirent(active_commit, path); - if (src_dirent != NULL) { - mode = src_dirent->mode; + struct repo_dirent *src_dent; + src_dent = repo_read_dirent(active_commit, path); + if (src_dent != NULL) { + mode = src_dent->mode; repo_write_dirent(path, mode, blob_mark, 0); } return mode; @@ -189,10 +189,10 @@ uint32_t repo_replace(uint32_t *path, uint32_t blob_mark) void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark) { - struct repo_dirent *src_dirent; - src_dirent = repo_read_dirent(active_commit, path); - if (src_dirent != NULL && blob_mark == 0) - blob_mark = src_dirent->content_offset; + struct repo_dirent *src_dent; + src_dent = repo_read_dirent(active_commit, path); + if (src_dent != NULL && blob_mark == 0) + blob_mark = src_dent->content_offset; repo_write_dirent(path, mode, blob_mark, 0); } @@ -203,13 +203,13 @@ void repo_delete(uint32_t *path) static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir); -static void repo_git_add(uint32_t depth, uint32_t *path, struct repo_dirent *dirent) +static void repo_git_add(uint32_t depth, uint32_t *path, struct repo_dirent *dent) { - if (repo_dirent_is_dir(dirent)) - repo_git_add_r(depth, path, repo_dir_from_dirent(dirent)); + if (repo_dirent_is_dir(dent)) + repo_git_add_r(depth, path, repo_dir_from_dirent(dent)); else fast_export_modify(depth, path, - dirent->mode, dirent->content_offset); + dent->mode, dent->content_offset); } static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir) @@ -218,7 +218,7 @@ static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir) while (de) { path[depth] = de->name_offset; repo_git_add(depth + 1, path, de); - de = dirent_next(&dir->entries, de); + de = dent_next(&dir->entries, de); } } @@ -233,13 +233,13 @@ static void repo_diff_r(uint32_t depth, uint32_t *path, struct repo_dir *dir1, if (de1->name_offset < de2->name_offset) { path[depth] = de1->name_offset; fast_export_delete(depth + 1, path); - de1 = dirent_next(&dir1->entries, de1); + de1 = dent_next(&dir1->entries, de1); continue; } if (de1->name_offset > de2->name_offset) { path[depth] = de2->name_offset; repo_git_add(depth + 1, path, de2); - de2 = dirent_next(&dir2->entries, de2); + de2 = dent_next(&dir2->entries, de2); continue; } path[depth] = de1->name_offset; @@ -257,18 +257,18 @@ static void repo_diff_r(uint32_t depth, uint32_t *path, struct repo_dir *dir1, fast_export_delete(depth + 1, path); repo_git_add(depth + 1, path, de2); } - de1 = dirent_next(&dir1->entries, de1); - de2 = dirent_next(&dir2->entries, de2); + de1 = dent_next(&dir1->entries, de1); + de2 = dent_next(&dir2->entries, de2); } while (de1) { path[depth] = de1->name_offset; fast_export_delete(depth + 1, path); - de1 = dirent_next(&dir1->entries, de1); + de1 = dent_next(&dir1->entries, de1); } while (de2) { path[depth] = de2->name_offset; repo_git_add(depth + 1, path, de2); - de2 = dirent_next(&dir2->entries, de2); + de2 = dent_next(&dir2->entries, de2); } } @@ -286,7 +286,7 @@ void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, uint32_t url, unsigned long timestamp) { fast_export_commit(revision, author, log, uuid, url, timestamp); - dirent_commit(); + dent_commit(); dir_commit(); active_commit = commit_alloc(1); commit_pointer(active_commit)->root_dir_offset = @@ -297,10 +297,10 @@ static void mark_init(void) { uint32_t i; mark = 0; - for (i = 0; i < dirent_pool.size; i++) - if (!repo_dirent_is_dir(dirent_pointer(i)) && - dirent_pointer(i)->content_offset > mark) - mark = dirent_pointer(i)->content_offset; + for (i = 0; i < dent_pool.size; i++) + if (!repo_dirent_is_dir(dent_pointer(i)) && + dent_pointer(i)->content_offset > mark) + mark = dent_pointer(i)->content_offset; mark++; } @@ -325,5 +325,5 @@ void repo_reset(void) pool_reset(); commit_reset(); dir_reset(); - dirent_reset(); + dent_reset(); } -- cgit v1.2.3 From 6117abae569e53485f7a90d2595b135c7beb3c96 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 13 Aug 2010 19:03:17 -0500 Subject: vcs-svn: Avoid %z in format string In the spirit of v1.6.4-rc0~124 (MinGW: Fix compiler warning in merge-recursive, 2009-05-23), use a 32-bit integer instead; the dump file parser does not support any better, anyway. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/fast_export.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 3a6156fc11..256a0522b2 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -48,8 +48,9 @@ void fast_export_commit(uint32_t revision, uint32_t author, char *log, ~author ? pool_fetch(author) : "nobody", ~author ? pool_fetch(author) : "nobody", ~uuid ? pool_fetch(uuid) : "local", timestamp); - printf("data %zd\n%s%s\n", - strlen(log) + strlen(gitsvnline), log, gitsvnline); + printf("data %"PRIu32"\n%s%s\n", + (uint32_t) (strlen(log) + strlen(gitsvnline)), + log, gitsvnline); if (!first_commit_done) { if (revision > 1) printf("from refs/heads/master^0\n"); -- cgit v1.2.3 From 5418d96ddca8134b5abeb99430f61c062d91f722 Mon Sep 17 00:00:00 2001 From: Ramsay Jones Date: Thu, 9 Sep 2010 18:24:06 +0100 Subject: vcs-svn: Fix some printf format compiler warnings In particular, on systems that define uint32_t as an unsigned long, gcc complains as follows: CC vcs-svn/fast_export.o vcs-svn/fast_export.c: In function `fast_export_modify': vcs-svn/fast_export.c:28: warning: unsigned int format, uint32_t arg (arg 2) vcs-svn/fast_export.c:28: warning: int format, uint32_t arg (arg 3) vcs-svn/fast_export.c: In function `fast_export_commit': vcs-svn/fast_export.c:42: warning: int format, uint32_t arg (arg 5) vcs-svn/fast_export.c:62: warning: int format, uint32_t arg (arg 2) vcs-svn/fast_export.c: In function `fast_export_blob': vcs-svn/fast_export.c:72: warning: int format, uint32_t arg (arg 2) vcs-svn/fast_export.c:72: warning: int format, uint32_t arg (arg 3) CC vcs-svn/svndump.o vcs-svn/svndump.c: In function `svndump_read': vcs-svn/svndump.c:260: warning: int format, uint32_t arg (arg 3) In order to suppress the warnings we use the C99 format specifier macros PRIo32 and PRIu32 from . Signed-off-by: Ramsay Jones Acked-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/fast_export.c | 9 +++++---- vcs-svn/svndump.c | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 256a0522b2..6cfa256a37 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -25,7 +25,7 @@ void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, uint32_t mark) { /* Mode must be 100644, 100755, 120000, or 160000. */ - printf("M %06o :%d ", mode, mark); + printf("M %06"PRIo32" :%"PRIu32" ", mode, mark); pool_print_seq(depth, path, '/', stdout); putchar('\n'); } @@ -38,7 +38,8 @@ void fast_export_commit(uint32_t revision, uint32_t author, char *log, if (!log) log = ""; if (~uuid && ~url) { - snprintf(gitsvnline, MAX_GITSVN_LINE_LEN, "\n\ngit-svn-id: %s@%d %s\n", + snprintf(gitsvnline, MAX_GITSVN_LINE_LEN, + "\n\ngit-svn-id: %s@%"PRIu32" %s\n", pool_fetch(url), revision, pool_fetch(uuid)); } else { *gitsvnline = '\0'; @@ -59,7 +60,7 @@ void fast_export_commit(uint32_t revision, uint32_t author, char *log, repo_diff(revision - 1, revision); fputc('\n', stdout); - printf("progress Imported commit %d.\n\n", revision); + printf("progress Imported commit %"PRIu32".\n\n", revision); } void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len) @@ -69,7 +70,7 @@ void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len) buffer_skip_bytes(5); len -= 5; } - printf("blob\nmark :%d\ndata %d\n", mark, len); + printf("blob\nmark :%"PRIu32"\ndata %"PRIu32"\n", mark, len); buffer_copy_bytes(len); fputc('\n', stdout); } diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 630eeb53b7..53d0215d2d 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -257,7 +257,7 @@ void svndump_read(const char *url) handle_node(); active_ctx = REV_CTX; } else { - fprintf(stderr, "Unexpected content length header: %d\n", len); + fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len); buffer_skip_bytes(len); } } -- cgit v1.2.3 From b3e5bce1aa88721dd4565089960997836ce66add Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 17 Nov 2010 23:02:48 -0600 Subject: vcs-svn: Error out for v3 dumps By ignoring the Text-Delta and Prop-Delta node fields, current svn-fe happily mistakes deltas for full text and instead of cleanly erroring out, it produces a valid but semantically bogus fast-import stream when fed a dump file in the modern "svnadmin dump --deltas" format. Dump file parsers are supposed to ignore header fields they don't understand (to allow for backward-compatible extensions), but they are also supposed to check the SVN-fs-dump-format-version header to prevent misinterpretation of non backward-compatible extensions. Do so. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 53d0215d2d..fa580e62de 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -51,14 +51,14 @@ static struct { } rev_ctx; static struct { - uint32_t uuid, url; + uint32_t version, uuid, url; } dump_ctx; static struct { uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid, revision_number, node_path, node_kind, node_action, node_copyfrom_path, node_copyfrom_rev, text_content_length, - prop_content_length, content_length; + prop_content_length, content_length, svn_fs_dump_format_version; } keys; static void reset_node_ctx(char *fname) @@ -85,6 +85,7 @@ static void reset_rev_ctx(uint32_t revision) static void reset_dump_ctx(uint32_t url) { dump_ctx.url = url; + dump_ctx.version = 1; dump_ctx.uuid = ~0; } @@ -105,6 +106,7 @@ static void init_keys(void) keys.text_content_length = pool_intern("Text-content-length"); keys.prop_content_length = pool_intern("Prop-content-length"); keys.content_length = pool_intern("Content-length"); + keys.svn_fs_dump_format_version = pool_intern("SVN-fs-dump-format-version"); } static void read_props(void) @@ -206,7 +208,12 @@ void svndump_read(const char *url) *val++ = '\0'; key = pool_intern(t); - if (key == keys.uuid) { + if (key == keys.svn_fs_dump_format_version) { + dump_ctx.version = atoi(val); + if (dump_ctx.version > 2) + die("expected svn dump format version <= 2, found %d", + dump_ctx.version); + } else if (key == keys.uuid) { dump_ctx.uuid = pool_intern(val); } else if (key == keys.revision_number) { if (active_ctx == NODE_CTX) -- cgit v1.2.3 From 1f05d07c456e23c0827efbbb3e738afc9f3152e7 Mon Sep 17 00:00:00 2001 From: David Barr Date: Wed, 17 Nov 2010 23:03:51 -0600 Subject: vcs-svn: Allow simple v3 dumps (no deltas yet) Since the dumpfile version 1 days, the Subversion dump format gained some new fields: - a unique identifier for the repository (version 2 format) - whether the text and properties for a node should be interpreted as deltas - checksums for a delta's preimage - SHA-1 sums as alternatives to the existing MD5 checksums for copy source and the payload (delta). For now what is relevant to us is the Text-delta and Prop-delta fields, since not noticing these causes a dump file to be misinterpreted (see the previous commit). [jn: with tests] Signed-off-by: David Barr Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index fa580e62de..6b64c1b857 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -42,6 +42,7 @@ static char* log_copy(uint32_t length, char *log) static struct { uint32_t action, propLength, textLength, srcRev, srcMode, mark, type; uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH]; + uint32_t text_delta, prop_delta; } node_ctx; static struct { @@ -58,7 +59,9 @@ static struct { uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid, revision_number, node_path, node_kind, node_action, node_copyfrom_path, node_copyfrom_rev, text_content_length, - prop_content_length, content_length, svn_fs_dump_format_version; + prop_content_length, content_length, svn_fs_dump_format_version, + /* version 3 format */ + text_delta, prop_delta; } keys; static void reset_node_ctx(char *fname) @@ -72,6 +75,8 @@ static void reset_node_ctx(char *fname) node_ctx.srcMode = 0; pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname); node_ctx.mark = 0; + node_ctx.text_delta = 0; + node_ctx.prop_delta = 0; } static void reset_rev_ctx(uint32_t revision) @@ -107,6 +112,9 @@ static void init_keys(void) keys.prop_content_length = pool_intern("Prop-content-length"); keys.content_length = pool_intern("Content-length"); keys.svn_fs_dump_format_version = pool_intern("SVN-fs-dump-format-version"); + /* version 3 format (Subversion 1.1.0) */ + keys.text_delta = pool_intern("Text-delta"); + keys.prop_delta = pool_intern("Prop-delta"); } static void read_props(void) @@ -144,6 +152,9 @@ static void read_props(void) static void handle_node(void) { + if (node_ctx.text_delta || node_ctx.prop_delta) + die("text and property deltas not supported"); + if (node_ctx.propLength != LENGTH_UNKNOWN && node_ctx.propLength) read_props(); @@ -210,8 +221,8 @@ void svndump_read(const char *url) if (key == keys.svn_fs_dump_format_version) { dump_ctx.version = atoi(val); - if (dump_ctx.version > 2) - die("expected svn dump format version <= 2, found %d", + if (dump_ctx.version > 3) + die("expected svn dump format version <= 3, found %d", dump_ctx.version); } else if (key == keys.uuid) { dump_ctx.uuid = pool_intern(val); @@ -255,6 +266,10 @@ void svndump_read(const char *url) node_ctx.textLength = atoi(val); } else if (key == keys.prop_content_length) { node_ctx.propLength = atoi(val); + } else if (key == keys.text_delta) { + node_ctx.text_delta = !strcmp(val, "true"); + } else if (key == keys.prop_delta) { + node_ctx.prop_delta = !strcmp(val, "true"); } else if (key == keys.content_length) { len = atoi(val); buffer_read_line(); -- cgit v1.2.3 From 5c28a8b054cb69a37638b0261fc370422c8fab58 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:46:06 -0600 Subject: vcs-svn: Check for errors from open() test-svn-fe segfaults when passed a bogus path. Simplify debugging by exiting with a meaningful error message instead. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 6 ++++-- vcs-svn/svndump.h | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 6b64c1b857..db11851225 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -290,14 +290,16 @@ void svndump_read(const char *url) handle_revision(); } -void svndump_init(const char *filename) +int svndump_init(const char *filename) { - buffer_init(filename); + if (buffer_init(filename)) + return error("cannot open %s: %s", filename, strerror(errno)); repo_init(); reset_dump_ctx(~0); reset_rev_ctx(0); reset_node_ctx(NULL); init_keys(); + return 0; } void svndump_deinit(void) diff --git a/vcs-svn/svndump.h b/vcs-svn/svndump.h index 93c412f14a..df9ceb0e8d 100644 --- a/vcs-svn/svndump.h +++ b/vcs-svn/svndump.h @@ -1,7 +1,7 @@ #ifndef SVNDUMP_H_ #define SVNDUMP_H_ -void svndump_init(const char *filename); +int svndump_init(const char *filename); void svndump_read(const char *url); void svndump_deinit(void); void svndump_reset(void); -- cgit v1.2.3 From 1d13e9f600986b7ced8db37a9a9c4967ee7ff9d5 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:46:22 -0600 Subject: vcs-svn: Eliminate node_ctx.srcRev global The srcRev variable is only used in handle_node(); its purpose is to hold the old mode for a path, to only be used if properties are not being changed. Narrow its scope to make its meaningful lifetime more obvious. No functional change intended. Add some tests as a sanity-check for the simplest case (no renames). Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index db11851225..65bd335aa2 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -40,7 +40,7 @@ static char* log_copy(uint32_t length, char *log) } static struct { - uint32_t action, propLength, textLength, srcRev, srcMode, mark, type; + uint32_t action, propLength, textLength, srcRev, mark, type; uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH]; uint32_t text_delta, prop_delta; } node_ctx; @@ -72,7 +72,6 @@ static void reset_node_ctx(char *fname) node_ctx.textLength = LENGTH_UNKNOWN; node_ctx.src[0] = ~0; node_ctx.srcRev = 0; - node_ctx.srcMode = 0; pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname); node_ctx.mark = 0; node_ctx.text_delta = 0; @@ -152,6 +151,8 @@ static void read_props(void) static void handle_node(void) { + uint32_t old_mode = 0; + if (node_ctx.text_delta || node_ctx.prop_delta) die("text and property deltas not supported"); @@ -159,7 +160,7 @@ static void handle_node(void) read_props(); if (node_ctx.srcRev) - node_ctx.srcMode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); + old_mode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); if (node_ctx.textLength != LENGTH_UNKNOWN && node_ctx.type != REPO_MODE_DIR) @@ -175,19 +176,19 @@ static void handle_node(void) else if (node_ctx.propLength != LENGTH_UNKNOWN) repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); else if (node_ctx.textLength != LENGTH_UNKNOWN) - node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); + old_mode = repo_replace(node_ctx.dst, node_ctx.mark); } else if (node_ctx.action == NODEACT_ADD) { if (node_ctx.srcRev && node_ctx.propLength != LENGTH_UNKNOWN) repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); else if (node_ctx.srcRev && node_ctx.textLength != LENGTH_UNKNOWN) - node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); + old_mode = repo_replace(node_ctx.dst, node_ctx.mark); else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) || node_ctx.textLength != LENGTH_UNKNOWN) repo_add(node_ctx.dst, node_ctx.type, node_ctx.mark); } - if (node_ctx.propLength == LENGTH_UNKNOWN && node_ctx.srcMode) - node_ctx.type = node_ctx.srcMode; + if (node_ctx.propLength == LENGTH_UNKNOWN && old_mode) + node_ctx.type = old_mode; if (node_ctx.mark) fast_export_blob(node_ctx.type, node_ctx.mark, node_ctx.textLength); -- cgit v1.2.3 From da3e217447390d52363989474a5e33bd298ff3ad Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:46:54 -0600 Subject: vcs-svn: Eliminate node_ctx.mark global The mark variable is only used in handle_node(). Its life is very short and simple: first, a new mark number is allocated if this node has text attached, then that mark is recorded in the in-core tree being built up, and lastly the mark is communicated to fast-import in the stream along with the associated text. A new reader may worry about interaction with other code, especially since mark is not initialized to zero in handle_node() itself. Disperse such worries by making it local. No functional change intended. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 65bd335aa2..1fb7f82bba 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -40,7 +40,7 @@ static char* log_copy(uint32_t length, char *log) } static struct { - uint32_t action, propLength, textLength, srcRev, mark, type; + uint32_t action, propLength, textLength, srcRev, type; uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH]; uint32_t text_delta, prop_delta; } node_ctx; @@ -73,7 +73,6 @@ static void reset_node_ctx(char *fname) node_ctx.src[0] = ~0; node_ctx.srcRev = 0; pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname); - node_ctx.mark = 0; node_ctx.text_delta = 0; node_ctx.prop_delta = 0; } @@ -151,7 +150,7 @@ static void read_props(void) static void handle_node(void) { - uint32_t old_mode = 0; + uint32_t old_mode = 0, mark = 0; if (node_ctx.text_delta || node_ctx.prop_delta) die("text and property deltas not supported"); @@ -164,7 +163,7 @@ static void handle_node(void) if (node_ctx.textLength != LENGTH_UNKNOWN && node_ctx.type != REPO_MODE_DIR) - node_ctx.mark = next_blob_mark(); + mark = next_blob_mark(); if (node_ctx.action == NODEACT_DELETE) { repo_delete(node_ctx.dst); @@ -172,26 +171,26 @@ static void handle_node(void) node_ctx.action == NODEACT_REPLACE) { if (node_ctx.action == NODEACT_REPLACE && node_ctx.type == REPO_MODE_DIR) - repo_replace(node_ctx.dst, node_ctx.mark); + repo_replace(node_ctx.dst, mark); else if (node_ctx.propLength != LENGTH_UNKNOWN) - repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); + repo_modify(node_ctx.dst, node_ctx.type, mark); else if (node_ctx.textLength != LENGTH_UNKNOWN) - old_mode = repo_replace(node_ctx.dst, node_ctx.mark); + old_mode = repo_replace(node_ctx.dst, mark); } else if (node_ctx.action == NODEACT_ADD) { if (node_ctx.srcRev && node_ctx.propLength != LENGTH_UNKNOWN) - repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); + repo_modify(node_ctx.dst, node_ctx.type, mark); else if (node_ctx.srcRev && node_ctx.textLength != LENGTH_UNKNOWN) - old_mode = repo_replace(node_ctx.dst, node_ctx.mark); + old_mode = repo_replace(node_ctx.dst, mark); else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) || node_ctx.textLength != LENGTH_UNKNOWN) - repo_add(node_ctx.dst, node_ctx.type, node_ctx.mark); + repo_add(node_ctx.dst, node_ctx.type, mark); } if (node_ctx.propLength == LENGTH_UNKNOWN && old_mode) node_ctx.type = old_mode; - if (node_ctx.mark) - fast_export_blob(node_ctx.type, node_ctx.mark, node_ctx.textLength); + if (mark) + fast_export_blob(node_ctx.type, mark, node_ctx.textLength); else if (node_ctx.textLength != LENGTH_UNKNOWN) buffer_skip_bytes(node_ctx.textLength); } -- cgit v1.2.3 From d6e81a03153810f122f1b8ec3635fd84c5429f69 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:47:41 -0600 Subject: vcs-svn: Unclutter handle_node by introducing have_props var It is possible for a path node in an SVN-format dump file to leave out the properties section. svn-fe handles this by carrying over the properties (in particular, file type) from the old version of that node. To support this, handle_node tests several times whether a Prop-content-length field is present. Ancient Subversion actually leaves out the Prop-content-length field even for nodes with properties, so that's not quite the right check. Besides, this detail of mechanism is distracting when the question at hand is instead what content the new node should have. So introduce a local have_props variable. The semantics are the same as before; the adaptations to support ancient streams that leave out the prop-content-length can wait until someone needs them. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 1fb7f82bba..45f0e477d7 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -151,11 +151,12 @@ static void read_props(void) static void handle_node(void) { uint32_t old_mode = 0, mark = 0; + const int have_props = node_ctx.propLength != LENGTH_UNKNOWN; if (node_ctx.text_delta || node_ctx.prop_delta) die("text and property deltas not supported"); - if (node_ctx.propLength != LENGTH_UNKNOWN && node_ctx.propLength) + if (have_props && node_ctx.propLength) read_props(); if (node_ctx.srcRev) @@ -172,12 +173,12 @@ static void handle_node(void) if (node_ctx.action == NODEACT_REPLACE && node_ctx.type == REPO_MODE_DIR) repo_replace(node_ctx.dst, mark); - else if (node_ctx.propLength != LENGTH_UNKNOWN) + else if (have_props) repo_modify(node_ctx.dst, node_ctx.type, mark); else if (node_ctx.textLength != LENGTH_UNKNOWN) old_mode = repo_replace(node_ctx.dst, mark); } else if (node_ctx.action == NODEACT_ADD) { - if (node_ctx.srcRev && node_ctx.propLength != LENGTH_UNKNOWN) + if (node_ctx.srcRev && have_props) repo_modify(node_ctx.dst, node_ctx.type, mark); else if (node_ctx.srcRev && node_ctx.textLength != LENGTH_UNKNOWN) old_mode = repo_replace(node_ctx.dst, mark); @@ -186,7 +187,7 @@ static void handle_node(void) repo_add(node_ctx.dst, node_ctx.type, mark); } - if (node_ctx.propLength == LENGTH_UNKNOWN && old_mode) + if (!have_props && old_mode) node_ctx.type = old_mode; if (mark) -- cgit v1.2.3 From 462e1f51a5648ce9d7ca26d44ed86327c454889a Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:48:51 -0600 Subject: vcs-svn: Use mark to indicate nodes with included text Allocate a mark if needed as soon as possible so later code can use "if (mark)" to check if this node has text attached rather than explicitly checking for Text-content-length. While at it, reject directory nodes with text attached; the presence of such a node would indicate a bug in the dump generator or svn-fe's understanding. In the long term, it would be nice to be able to continue parsing and save the error for later, but for now it is simpler to error out right away. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 45f0e477d7..844076b669 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -156,15 +156,17 @@ static void handle_node(void) if (node_ctx.text_delta || node_ctx.prop_delta) die("text and property deltas not supported"); + if (node_ctx.textLength != LENGTH_UNKNOWN) + mark = next_blob_mark(); + if (have_props && node_ctx.propLength) read_props(); if (node_ctx.srcRev) old_mode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); - if (node_ctx.textLength != LENGTH_UNKNOWN && - node_ctx.type != REPO_MODE_DIR) - mark = next_blob_mark(); + if (mark && node_ctx.type == REPO_MODE_DIR) + die("invalid dump: directories cannot have text attached"); if (node_ctx.action == NODEACT_DELETE) { repo_delete(node_ctx.dst); @@ -175,15 +177,15 @@ static void handle_node(void) repo_replace(node_ctx.dst, mark); else if (have_props) repo_modify(node_ctx.dst, node_ctx.type, mark); - else if (node_ctx.textLength != LENGTH_UNKNOWN) + else if (mark) old_mode = repo_replace(node_ctx.dst, mark); } else if (node_ctx.action == NODEACT_ADD) { if (node_ctx.srcRev && have_props) repo_modify(node_ctx.dst, node_ctx.type, mark); - else if (node_ctx.srcRev && node_ctx.textLength != LENGTH_UNKNOWN) + else if (node_ctx.srcRev && mark) old_mode = repo_replace(node_ctx.dst, mark); else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) || - node_ctx.textLength != LENGTH_UNKNOWN) + mark) repo_add(node_ctx.dst, node_ctx.type, mark); } @@ -192,8 +194,6 @@ static void handle_node(void) if (mark) fast_export_blob(node_ctx.type, mark, node_ctx.textLength); - else if (node_ctx.textLength != LENGTH_UNKNOWN) - buffer_skip_bytes(node_ctx.textLength); } static void handle_revision(void) -- cgit v1.2.3 From 5af8fae2df03d1888dbf315da29d1cdaa6214f57 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:49:17 -0600 Subject: vcs-svn: handle_node: Handle deletion case early Take care of "Node-action: delete" as soon as possible, so we can stop worrying about that case in the rest of the function. Functional change: catch deletion nodes with features that would not apply to them (text, properties, or origin data) and error out for those cases. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 844076b669..bc70023073 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -159,6 +159,13 @@ static void handle_node(void) if (node_ctx.textLength != LENGTH_UNKNOWN) mark = next_blob_mark(); + if (node_ctx.action == NODEACT_DELETE) { + if (mark || have_props || node_ctx.srcRev) + die("invalid dump: deletion node has " + "copyfrom info, text, or properties"); + return repo_delete(node_ctx.dst); + } + if (have_props && node_ctx.propLength) read_props(); @@ -168,9 +175,7 @@ static void handle_node(void) if (mark && node_ctx.type == REPO_MODE_DIR) die("invalid dump: directories cannot have text attached"); - if (node_ctx.action == NODEACT_DELETE) { - repo_delete(node_ctx.dst); - } else if (node_ctx.action == NODEACT_CHANGE || + if (node_ctx.action == NODEACT_CHANGE || node_ctx.action == NODEACT_REPLACE) { if (node_ctx.action == NODEACT_REPLACE && node_ctx.type == REPO_MODE_DIR) -- cgit v1.2.3 From 6ee4a9be48ee714ddacf313a7073dabdd6c6ee11 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:49:55 -0600 Subject: vcs-svn: Replace = Delete + Add Simplify by reducing the "Node-action: replace" case to "Node-action: add". This way, the main part of handle_node() only has to deal with "add" and "change" nodes. Functional change: replacing a symlink or executable without setting properties will reset the mode. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index bc70023073..6a6aaf92b5 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -166,6 +166,11 @@ static void handle_node(void) return repo_delete(node_ctx.dst); } + if (node_ctx.action == NODEACT_REPLACE) { + repo_delete(node_ctx.dst); + node_ctx.action = NODEACT_ADD; + } + if (have_props && node_ctx.propLength) read_props(); @@ -175,12 +180,8 @@ static void handle_node(void) if (mark && node_ctx.type == REPO_MODE_DIR) die("invalid dump: directories cannot have text attached"); - if (node_ctx.action == NODEACT_CHANGE || - node_ctx.action == NODEACT_REPLACE) { - if (node_ctx.action == NODEACT_REPLACE && - node_ctx.type == REPO_MODE_DIR) - repo_replace(node_ctx.dst, mark); - else if (have_props) + if (node_ctx.action == NODEACT_CHANGE) { + if (have_props) repo_modify(node_ctx.dst, node_ctx.type, mark); else if (mark) old_mode = repo_replace(node_ctx.dst, mark); -- cgit v1.2.3 From 08c39b5c44449cb649ac32274e27be8046e373d4 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:51:50 -0600 Subject: vcs-svn: Combine repo_replace and repo_modify functions There are two functions to change the staged content for a path in the svn importer's active commit: repo_replace, which changes the text and returns the mode, and repo_modify, which changes the text and mode and returns nothing. Worse, there are more subtle differences: - A mark of 0 passed to repo_modify means "use the existing content". repo_replace uses it as mark :0 and produces a corrupt stream. - When passed a path that is not part of the active commit, repo_replace returns without doing anything. repo_modify transparently adds a new directory entry. Get rid of both and introduce a new function with the best features of both: repo_modify_path modifies the mode, content, or both for a path, depending on which arguments are zero. If no such dirent already exists, it does nothing and reports the error by returning 0. Otherwise, the return value is the resulting mode. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/repo_tree.c | 21 +++++++-------------- vcs-svn/repo_tree.h | 3 +-- vcs-svn/svndump.c | 8 ++++---- 3 files changed, 12 insertions(+), 20 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c index e94d91d129..7214ac8d0f 100644 --- a/vcs-svn/repo_tree.c +++ b/vcs-svn/repo_tree.c @@ -175,25 +175,18 @@ void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark) repo_write_dirent(path, mode, blob_mark, 0); } -uint32_t repo_replace(uint32_t *path, uint32_t blob_mark) +uint32_t repo_modify_path(uint32_t *path, uint32_t mode, uint32_t blob_mark) { - uint32_t mode = 0; struct repo_dirent *src_dent; src_dent = repo_read_dirent(active_commit, path); - if (src_dent != NULL) { - mode = src_dent->mode; - repo_write_dirent(path, mode, blob_mark, 0); - } - return mode; -} - -void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark) -{ - struct repo_dirent *src_dent; - src_dent = repo_read_dirent(active_commit, path); - if (src_dent != NULL && blob_mark == 0) + if (!src_dent) + return 0; + if (!blob_mark) blob_mark = src_dent->content_offset; + if (!mode) + mode = src_dent->mode; repo_write_dirent(path, mode, blob_mark, 0); + return mode; } void repo_delete(uint32_t *path) diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h index 5476175922..68baeb582f 100644 --- a/vcs-svn/repo_tree.h +++ b/vcs-svn/repo_tree.h @@ -14,8 +14,7 @@ uint32_t next_blob_mark(void); uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst); void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark); -uint32_t repo_replace(uint32_t *path, uint32_t blob_mark); -void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark); +uint32_t repo_modify_path(uint32_t *path, uint32_t mode, uint32_t blob_mark); void repo_delete(uint32_t *path); void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, uint32_t url, long unsigned timestamp); diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 6a6aaf92b5..e40be580a7 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -182,14 +182,14 @@ static void handle_node(void) if (node_ctx.action == NODEACT_CHANGE) { if (have_props) - repo_modify(node_ctx.dst, node_ctx.type, mark); + repo_modify_path(node_ctx.dst, node_ctx.type, mark); else if (mark) - old_mode = repo_replace(node_ctx.dst, mark); + old_mode = repo_modify_path(node_ctx.dst, 0, mark); } else if (node_ctx.action == NODEACT_ADD) { if (node_ctx.srcRev && have_props) - repo_modify(node_ctx.dst, node_ctx.type, mark); + repo_modify_path(node_ctx.dst, node_ctx.type, mark); else if (node_ctx.srcRev && mark) - old_mode = repo_replace(node_ctx.dst, mark); + old_mode = repo_modify_path(node_ctx.dst, 0, mark); else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) || mark) repo_add(node_ctx.dst, node_ctx.type, mark); -- cgit v1.2.3 From 1c7bb316169c700df0d1711555564f86c9cb9366 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:52:28 -0600 Subject: vcs-svn: Delay read of per-path properties The mode for each file in an svn-format dump is kept in the properties section. The properties section is read as soon as possible to allow the correct mode to be filled in when registering the file with the repo_tree lib. To support nodes with a missing properties section, svn-fe determines the mode in three stages: - The kind (directory or file) of the node is read from the dump and used to make an initial estimate (040000 or 100644). - Properties are read in and allowed to override this for symlinks and executables. - If there is no properties section, the mode from the previous content of the path is left alone, overriding the above considerations. This is a bit of a mess, and worse, it would get even more complicated once we start to support property deltas. If we could only register the file with a provisional value for mode and then change it later when properties say so, the procedure would be much simpler. ... oh, right, we can. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index e40be580a7..4fdfcbbc0d 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -150,7 +150,8 @@ static void read_props(void) static void handle_node(void) { - uint32_t old_mode = 0, mark = 0; + uint32_t mark = 0; + const uint32_t type = node_ctx.type; const int have_props = node_ctx.propLength != LENGTH_UNKNOWN; if (node_ctx.text_delta || node_ctx.prop_delta) @@ -171,33 +172,28 @@ static void handle_node(void) node_ctx.action = NODEACT_ADD; } - if (have_props && node_ctx.propLength) - read_props(); - - if (node_ctx.srcRev) - old_mode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); + if (node_ctx.srcRev) { + repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); + node_ctx.action = NODEACT_CHANGE; + } - if (mark && node_ctx.type == REPO_MODE_DIR) + if (mark && type == REPO_MODE_DIR) die("invalid dump: directories cannot have text attached"); - if (node_ctx.action == NODEACT_CHANGE) { - if (have_props) + if (node_ctx.action == NODEACT_CHANGE) + node_ctx.type = repo_modify_path(node_ctx.dst, 0, mark); + else /* Node-action: add */ + repo_add(node_ctx.dst, type, mark); + + if (have_props) { + const uint32_t old_mode = node_ctx.type; + node_ctx.type = type; + if (node_ctx.propLength) + read_props(); + if (node_ctx.type != old_mode) repo_modify_path(node_ctx.dst, node_ctx.type, mark); - else if (mark) - old_mode = repo_modify_path(node_ctx.dst, 0, mark); - } else if (node_ctx.action == NODEACT_ADD) { - if (node_ctx.srcRev && have_props) - repo_modify_path(node_ctx.dst, node_ctx.type, mark); - else if (node_ctx.srcRev && mark) - old_mode = repo_modify_path(node_ctx.dst, 0, mark); - else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) || - mark) - repo_add(node_ctx.dst, node_ctx.type, mark); } - if (!have_props && old_mode) - node_ctx.type = old_mode; - if (mark) fast_export_blob(node_ctx.type, mark, node_ctx.textLength); } -- cgit v1.2.3 From 414e569e453a49171b1f3db613f88378324104e8 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:52:59 -0600 Subject: vcs-svn: Reject path nodes without Node-action It would be better to flag such errors and let the import proceed anyway, but for now it is simpler not to worry about recovery from such weird cases. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 4fdfcbbc0d..0af8ac6807 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -174,7 +174,8 @@ static void handle_node(void) if (node_ctx.srcRev) { repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); - node_ctx.action = NODEACT_CHANGE; + if (node_ctx.action == NODEACT_ADD) + node_ctx.action = NODEACT_CHANGE; } if (mark && type == REPO_MODE_DIR) @@ -182,8 +183,10 @@ static void handle_node(void) if (node_ctx.action == NODEACT_CHANGE) node_ctx.type = repo_modify_path(node_ctx.dst, 0, mark); - else /* Node-action: add */ + else if (node_ctx.action == NODEACT_ADD) repo_add(node_ctx.dst, type, mark); + else + die("invalid dump: Node-path block lacks Node-action"); if (have_props) { const uint32_t old_mode = node_ctx.type; -- cgit v1.2.3 From c7dbf35e91cffbc326078d0c0470662f6422150d Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:53:34 -0600 Subject: vcs-svn: More dump format sanity checks Node-action: change is not appropriate when switching between file and directory or adding a new file. Current svn-fe silently accepts such nodes and the resulting tree has missing files in the "changed when meant to add" case. Node-action: add requires some content (text or directory); there is no such thing as an "intent to add" node in svn dumps. Current svn-fe accepts such contentless adds but produces an invalid fast-import stream that refers to nonexistent mark :0 in response. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 0af8ac6807..ab4ccfc55f 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -181,12 +181,22 @@ static void handle_node(void) if (mark && type == REPO_MODE_DIR) die("invalid dump: directories cannot have text attached"); - if (node_ctx.action == NODEACT_CHANGE) - node_ctx.type = repo_modify_path(node_ctx.dst, 0, mark); - else if (node_ctx.action == NODEACT_ADD) + if (node_ctx.action == NODEACT_CHANGE) { + uint32_t mode = repo_modify_path(node_ctx.dst, 0, mark); + if (!mode) + die("invalid dump: path to be modified is missing"); + if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR) + die("invalid dump: cannot modify a directory into a file"); + if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR) + die("invalid dump: cannot modify a file into a directory"); + node_ctx.type = mode; + } else if (node_ctx.action == NODEACT_ADD) { + if (!mark && type != REPO_MODE_DIR) + die("invalid dump: adds node without text"); repo_add(node_ctx.dst, type, mark); - else + } else { die("invalid dump: Node-path block lacks Node-action"); + } if (have_props) { const uint32_t old_mode = node_ctx.type; -- cgit v1.2.3 From 3f3e676d6e6c1d445181107770670368e0ad3160 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:53:54 -0600 Subject: vcs-svn: Make source easier to read on small screens Remove some newlines from handle_node() that are not needed for clarity. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index ab4ccfc55f..153b0c337d 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -156,31 +156,25 @@ static void handle_node(void) if (node_ctx.text_delta || node_ctx.prop_delta) die("text and property deltas not supported"); - if (node_ctx.textLength != LENGTH_UNKNOWN) mark = next_blob_mark(); - if (node_ctx.action == NODEACT_DELETE) { if (mark || have_props || node_ctx.srcRev) die("invalid dump: deletion node has " "copyfrom info, text, or properties"); return repo_delete(node_ctx.dst); } - if (node_ctx.action == NODEACT_REPLACE) { repo_delete(node_ctx.dst); node_ctx.action = NODEACT_ADD; } - if (node_ctx.srcRev) { repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); if (node_ctx.action == NODEACT_ADD) node_ctx.action = NODEACT_CHANGE; } - if (mark && type == REPO_MODE_DIR) die("invalid dump: directories cannot have text attached"); - if (node_ctx.action == NODEACT_CHANGE) { uint32_t mode = repo_modify_path(node_ctx.dst, 0, mark); if (!mode) @@ -197,7 +191,6 @@ static void handle_node(void) } else { die("invalid dump: Node-path block lacks Node-action"); } - if (have_props) { const uint32_t old_mode = node_ctx.type; node_ctx.type = type; @@ -206,7 +199,6 @@ static void handle_node(void) if (node_ctx.type != old_mode) repo_modify_path(node_ctx.dst, node_ctx.type, mark); } - if (mark) fast_export_blob(node_ctx.type, mark, node_ctx.textLength); } -- cgit v1.2.3 From 2a48afe1c256db6273a4ff99eaddc5c18dc46ffd Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:54:20 -0600 Subject: vcs-svn: Split off function for handling of individual properties The handle_property function is the part of read_props that would be interesting for most people: semantics of properties rather than the algorithm for parsing them. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 153b0c337d..5de8dadcdd 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -30,7 +30,7 @@ /* Create memory pool for log messages */ obj_pool_gen(log, char, 4096) -static char* log_copy(uint32_t length, char *log) +static char *log_copy(uint32_t length, const char *log) { char *buffer; log_free(log_pool.size); @@ -115,6 +115,23 @@ static void init_keys(void) keys.prop_delta = pool_intern("Prop-delta"); } +static void handle_property(uint32_t key, const char *val, uint32_t len) +{ + if (key == keys.svn_log) { + /* Value length excludes terminating nul. */ + rev_ctx.log = log_copy(len + 1, val); + } else if (key == keys.svn_author) { + rev_ctx.author = pool_intern(val); + } else if (key == keys.svn_date) { + if (parse_date_basic(val, &rev_ctx.timestamp, NULL)) + fprintf(stderr, "Invalid timestamp: %s\n", val); + } else if (key == keys.svn_executable) { + node_ctx.type = REPO_MODE_EXE; + } else if (key == keys.svn_special) { + node_ctx.type = REPO_MODE_LNK; + } +} + static void read_props(void) { uint32_t len; @@ -129,19 +146,7 @@ static void read_props(void) } else if (!strncmp(t, "V ", 2)) { len = atoi(&t[2]); val = buffer_read_string(len); - if (key == keys.svn_log) { - /* Value length excludes terminating nul. */ - rev_ctx.log = log_copy(len + 1, val); - } else if (key == keys.svn_author) { - rev_ctx.author = pool_intern(val); - } else if (key == keys.svn_date) { - if (parse_date_basic(val, &rev_ctx.timestamp, NULL)) - fprintf(stderr, "Invalid timestamp: %s\n", val); - } else if (key == keys.svn_executable) { - node_ctx.type = REPO_MODE_EXE; - } else if (key == keys.svn_special) { - node_ctx.type = REPO_MODE_LNK; - } + handle_property(key, val, len); key = ~0; buffer_read_line(); } -- cgit v1.2.3 From 6263c06d49abdf5e5defdf528c3ff67bf948ac9b Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:54:45 -0600 Subject: vcs-svn: Sharpen parsing of property lines Prepare to add a new type of property line (the 'D' line) to handle property deltas. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 5de8dadcdd..576d148e5e 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -134,21 +134,29 @@ static void handle_property(uint32_t key, const char *val, uint32_t len) static void read_props(void) { - uint32_t len; uint32_t key = ~0; - char *val = NULL; - char *t; + const char *t; while ((t = buffer_read_line()) && strcmp(t, "PROPS-END")) { - if (!strncmp(t, "K ", 2)) { - len = atoi(&t[2]); - key = pool_intern(buffer_read_string(len)); - buffer_read_line(); - } else if (!strncmp(t, "V ", 2)) { - len = atoi(&t[2]); - val = buffer_read_string(len); + uint32_t len; + const char *val; + const char type = t[0]; + + if (!type || t[1] != ' ') + die("invalid property line: %s\n", t); + len = atoi(&t[2]); + val = buffer_read_string(len); + buffer_skip_bytes(1); /* Discard trailing newline. */ + + switch (type) { + case 'K': + key = pool_intern(val); + continue; + case 'V': handle_property(key, val, len); key = ~0; - buffer_read_line(); + continue; + default: + die("invalid property line: %s\n", t); } } } -- cgit v1.2.3 From 6b01b67658e2905b550739f1aee56a00911ca13c Mon Sep 17 00:00:00 2001 From: David Barr Date: Fri, 19 Nov 2010 18:57:46 -0600 Subject: vcs-svn: Implement Prop-delta handling The rules for what file is used as delta source for each file are not documented in dump-load-format.txt. Luckily, the Apache Software Foundation repository has rich enough examples to figure out most of the rules: Node-action: replace implies the empty property set and empty text as preimage for deltas. Otherwise, if a copyfrom source is given, that node is the preimage for deltas. Lastly, if none of the above applies and the node path exists in the current revision, then that version forms the basis. [jn: refactored, with tests] Signed-off-by: David Barr Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 54 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 10 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 576d148e5e..c71a57599e 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -115,20 +115,35 @@ static void init_keys(void) keys.prop_delta = pool_intern("Prop-delta"); } -static void handle_property(uint32_t key, const char *val, uint32_t len) +static void handle_property(uint32_t key, const char *val, uint32_t len, + uint32_t *type_set) { if (key == keys.svn_log) { + if (!val) + die("invalid dump: unsets svn:log"); /* Value length excludes terminating nul. */ rev_ctx.log = log_copy(len + 1, val); } else if (key == keys.svn_author) { rev_ctx.author = pool_intern(val); } else if (key == keys.svn_date) { + if (!val) + die("invalid dump: unsets svn:date"); if (parse_date_basic(val, &rev_ctx.timestamp, NULL)) - fprintf(stderr, "Invalid timestamp: %s\n", val); - } else if (key == keys.svn_executable) { - node_ctx.type = REPO_MODE_EXE; - } else if (key == keys.svn_special) { - node_ctx.type = REPO_MODE_LNK; + warning("invalid timestamp: %s", val); + } else if (key == keys.svn_executable || key == keys.svn_special) { + if (*type_set) { + if (!val) + return; + die("invalid dump: sets type twice"); + } + if (!val) { + node_ctx.type = REPO_MODE_BLB; + return; + } + *type_set = 1; + node_ctx.type = key == keys.svn_executable ? + REPO_MODE_EXE : + REPO_MODE_LNK; } } @@ -136,6 +151,19 @@ static void read_props(void) { uint32_t key = ~0; const char *t; + /* + * NEEDSWORK: to support simple mode changes like + * K 11 + * svn:special + * V 1 + * * + * D 14 + * svn:executable + * we keep track of whether a mode has been set and reset to + * plain file only if not. We should be keeping track of the + * symlink and executable bits separately instead. + */ + uint32_t type_set = 0; while ((t = buffer_read_line()) && strcmp(t, "PROPS-END")) { uint32_t len; const char *val; @@ -151,8 +179,13 @@ static void read_props(void) case 'K': key = pool_intern(val); continue; + case 'D': + key = pool_intern(val); + val = NULL; + len = 0; + /* fall through */ case 'V': - handle_property(key, val, len); + handle_property(key, val, len, &type_set); key = ~0; continue; default: @@ -167,8 +200,8 @@ static void handle_node(void) const uint32_t type = node_ctx.type; const int have_props = node_ctx.propLength != LENGTH_UNKNOWN; - if (node_ctx.text_delta || node_ctx.prop_delta) - die("text and property deltas not supported"); + if (node_ctx.text_delta) + die("text deltas not supported"); if (node_ctx.textLength != LENGTH_UNKNOWN) mark = next_blob_mark(); if (node_ctx.action == NODEACT_DELETE) { @@ -206,7 +239,8 @@ static void handle_node(void) } if (have_props) { const uint32_t old_mode = node_ctx.type; - node_ctx.type = type; + if (!node_ctx.prop_delta) + node_ctx.type = type; if (node_ctx.propLength) read_props(); if (node_ctx.type != old_mode) -- cgit v1.2.3 From 97a5e3453abf63bbf5926979fcd89efb4388e937 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 5 Dec 2010 03:35:17 -0600 Subject: treap: make treap_insert return inserted node Suppose I try the following: struct int_node *node = node_pointer(node_alloc(1)); node->n = 5; treap_insert(&root, node); printf("%d\n", node->n); Usually the result will be 5. But since treap_insert draws memory from the node pool, if the caller is unlucky then (1) the node pool will be full and (2) realloc will be forced to move the node pool to find room, so the node address becomes invalid and the result of dereferencing it is undefined. So we ought to use offsets in preference to pointers for references that would remain valid after a call to treap_insert. Tweak the signature to hint at a certain special case: since the inserted node can change address (though not offset), as a convenience teach treap_insert to return its new address. So the motivational example could be fixed by adding "node =". struct int_node *node = node_pointer(node_alloc(1)); node->n = 5; node = treap_insert(&root, node); printf("%d\n", node->n); Based on a true story. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/trp.h | 3 ++- vcs-svn/trp.txt | 10 ++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/trp.h b/vcs-svn/trp.h index ee35c688a0..c32b9184e9 100644 --- a/vcs-svn/trp.h +++ b/vcs-svn/trp.h @@ -188,11 +188,12 @@ a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t i return ret; \ } \ } \ -a_attr void MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \ +a_attr a_type *MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \ { \ uint32_t offset = trpn_offset(a_base, node); \ trp_node_new(a_base, a_field, offset); \ treap->trp_root = a_pre##insert_recurse(treap->trp_root, offset); \ + return trpn_pointer(a_base, offset); \ } \ a_attr uint32_t MAYBE_UNUSED a_pre##remove_recurse(uint32_t cur_node, uint32_t rem_node) \ { \ diff --git a/vcs-svn/trp.txt b/vcs-svn/trp.txt index eb4c191875..5ca6b42edb 100644 --- a/vcs-svn/trp.txt +++ b/vcs-svn/trp.txt @@ -21,7 +21,9 @@ The caller: . Allocates a `struct trp_root` variable and sets it to {~0}. -. Adds new nodes to the set using `foo_insert`. +. Adds new nodes to the set using `foo_insert`. Any pointers + to existing nodes cannot be relied upon any more, so the caller + might retrieve them anew with `foo_pointer`. . Can find a specific item in the set using `foo_search`. @@ -73,10 +75,14 @@ int (*cmp)(node_type \*a, node_type \*b) and returning a value less than, equal to, or greater than zero according to the result of comparison. -void foo_insert(struct trp_root *treap, node_type \*node):: +node_type {asterisk}foo_insert(struct trp_root *treap, node_type \*node):: Insert node into treap. If inserted multiple times, a node will appear in the treap multiple times. ++ +The return value is the address of the node within the treap, +which might differ from `node` if `pool_alloc` had to call +`realloc` to expand the pool. void foo_remove(struct trp_root *treap, node_type \*node):: -- cgit v1.2.3 From 3c93983875af53b9f172d7f7a1022d0954cb5689 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 5 Dec 2010 03:32:53 -0600 Subject: vcs-svn: fix intermittent repo_tree corruption Pointers to directory entries do not remain valid after a call to dent_insert. Noticed in the course of importing a small Subversion repository (~1000 revs); after setting up a dirent for a certain path as a placeholder, by luck dent_insert would trigger a realloc that shifted around addresses, resulting in an import with that file replaced by a directory. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/repo_tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c index e94d91d129..e3d1fa3544 100644 --- a/vcs-svn/repo_tree.c +++ b/vcs-svn/repo_tree.c @@ -131,7 +131,7 @@ static void repo_write_dirent(uint32_t *path, uint32_t mode, if (dent == key) { dent->mode = REPO_MODE_DIR; dent->content_offset = 0; - dent_insert(&dir->entries, dent); + dent = dent_insert(&dir->entries, dent); } if (dent_offset(dent) < dent_pool.committed) { @@ -142,7 +142,7 @@ static void repo_write_dirent(uint32_t *path, uint32_t mode, dent->name_offset = name; dent->mode = REPO_MODE_DIR; dent->content_offset = dir_o; - dent_insert(&dir->entries, dent); + dent = dent_insert(&dir->entries, dent); } dir = repo_dir_from_dirent(dent); -- cgit v1.2.3 From 9e8c532108b9078812f23c53a2df3509e7ce71bf Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Mon, 6 Dec 2010 16:19:32 -0600 Subject: vcs-svn: Allow change nodes for root of tree (/) It is not uncommon for a svn repository to include change records for properties at the top level of the tracked tree: Node-path: Node-kind: dir Node-action: change Prop-delta: true Prop-content-length: 43 Content-length: 43 K 10 svn:ignore V 11 build-area PROPS-END Unfortunately a recent svn-fe change (vcs-svn: More dump format sanity checks, 2010-11-19) causes such nodes to be rejected with the error message fatal: invalid dump: path to be modified is missing The repo_tree module does not keep a dirent for the root of the tree. Add a block to the dump parser to take care of this case. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index c71a57599e..1669d0fa5e 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -221,7 +221,10 @@ static void handle_node(void) } if (mark && type == REPO_MODE_DIR) die("invalid dump: directories cannot have text attached"); - if (node_ctx.action == NODEACT_CHANGE) { + if (node_ctx.action == NODEACT_CHANGE && !~*node_ctx.dst) { + if (type != REPO_MODE_DIR) + die("invalid dump: root of tree is not a regular file"); + } else if (node_ctx.action == NODEACT_CHANGE) { uint32_t mode = repo_modify_path(node_ctx.dst, 0, mark); if (!mode) die("invalid dump: path to be modified is missing"); -- cgit v1.2.3 From 5ee5f5a65d5a60cd327f1a4b92ee0f48508f2362 Mon Sep 17 00:00:00 2001 From: Ramsay Jones Date: Tue, 11 Jan 2011 18:17:21 +0000 Subject: svndump.c: Fix a printf format compiler warning In particular, on systems that define uint32_t as an unsigned long, gcc complains as follows: CC vcs-svn/svndump.o vcs-svn/svndump.c: In function `svndump_read': vcs-svn/svndump.c:215: warning: int format, uint32_t arg (arg 2) In order to suppress the warning we use the C99 format specifier macro PRIu32 from . Signed-off-by: Ramsay Jones Acked-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index fa580e62de..2ad2c307dd 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -211,7 +211,7 @@ void svndump_read(const char *url) if (key == keys.svn_fs_dump_format_version) { dump_ctx.version = atoi(val); if (dump_ctx.version > 2) - die("expected svn dump format version <= 2, found %d", + die("expected svn dump format version <= 2, found %"PRIu32, dump_ctx.version); } else if (key == keys.uuid) { dump_ctx.uuid = pool_intern(val); -- cgit v1.2.3 From 4d21bec0d2c2f83e3f1e84edae5a3259cdab862f Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 10 Oct 2010 21:37:10 -0500 Subject: vcs-svn: eliminate global byte_buffer The data stored in byte_buffer[] is always either discarded or written to stdout immediately. No need for it to persist between function calls. Signed-off-by: Jonathan Nieder --- vcs-svn/line_buffer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'vcs-svn') diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index 1543567093..f22c94f025 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -14,7 +14,6 @@ obj_pool_gen(blob, char, 4096) static char line_buffer[LINE_BUFFER_LEN]; -static char byte_buffer[COPY_BUFFER_LEN]; static FILE *infile; int buffer_init(const char *filename) @@ -68,6 +67,7 @@ char *buffer_read_string(uint32_t len) void buffer_copy_bytes(uint32_t len) { + char byte_buffer[COPY_BUFFER_LEN]; uint32_t in; while (len > 0 && !feof(infile) && !ferror(infile)) { in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; @@ -83,6 +83,7 @@ void buffer_copy_bytes(uint32_t len) void buffer_skip_bytes(uint32_t len) { + char byte_buffer[COPY_BUFFER_LEN]; uint32_t in; while (len > 0 && !feof(infile) && !ferror(infile)) { in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; -- cgit v1.2.3 From deadcef4c15d54d0a397180a1783ae8939254188 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sat, 6 Nov 2010 12:01:28 -0500 Subject: vcs-svn: replace buffer_read_string memory pool with a strbuf obj_pool is inherently global and does not use the standard growing factor alloc_nr, which makes it feel out of place in the git codebase. Plus it is overkill for this application: all that is needed is a buffer that can grow between requests to accomodate larger strings. Use a strbuf instead. As a side effect, this improves the error handling: allocation failures will result in a clean exit instead of segfaults. It would be nice to add a test case (using ulimit or failmalloc) but that can wait for another day. Signed-off-by: Jonathan Nieder --- vcs-svn/line_buffer.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index f22c94f025..6f32f28e54 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -5,15 +5,13 @@ #include "git-compat-util.h" #include "line_buffer.h" -#include "obj_pool.h" +#include "strbuf.h" #define LINE_BUFFER_LEN 10000 #define COPY_BUFFER_LEN 4096 -/* Create memory pool for char sequence of known length */ -obj_pool_gen(blob, char, 4096) - static char line_buffer[LINE_BUFFER_LEN]; +static struct strbuf blob_buffer = STRBUF_INIT; static FILE *infile; int buffer_init(const char *filename) @@ -58,11 +56,9 @@ char *buffer_read_line(void) char *buffer_read_string(uint32_t len) { - char *s; - blob_free(blob_pool.size); - s = blob_pointer(blob_alloc(len + 1)); - s[fread(s, 1, len, infile)] = '\0'; - return ferror(infile) ? NULL : s; + strbuf_reset(&blob_buffer); + strbuf_fread(&blob_buffer, len, infile); + return ferror(infile) ? NULL : blob_buffer.buf; } void buffer_copy_bytes(uint32_t len) @@ -94,5 +90,5 @@ void buffer_skip_bytes(uint32_t len) void buffer_reset(void) { - blob_reset(); + strbuf_release(&blob_buffer); } -- cgit v1.2.3 From d350822fa7d14052713bea0ec62ff1246d8a2f7a Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 10 Oct 2010 21:39:21 -0500 Subject: vcs-svn: collect line_buffer data in a struct Prepare for the line_buffer lib to support input from multiple files, by collecting global state in a struct that can be easily passed around. No API change yet. Signed-off-by: Jonathan Nieder --- vcs-svn/line_buffer.c | 45 ++++++++++++++++++++++----------------------- vcs-svn/line_buffer.h | 11 +++++++++++ 2 files changed, 33 insertions(+), 23 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index 6f32f28e54..e7bc230fcb 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -7,17 +7,16 @@ #include "line_buffer.h" #include "strbuf.h" -#define LINE_BUFFER_LEN 10000 #define COPY_BUFFER_LEN 4096 - -static char line_buffer[LINE_BUFFER_LEN]; -static struct strbuf blob_buffer = STRBUF_INIT; -static FILE *infile; +static struct line_buffer buf_ = LINE_BUFFER_INIT; +static struct line_buffer *buf; int buffer_init(const char *filename) { - infile = filename ? fopen(filename, "r") : stdin; - if (!infile) + buf = &buf_; + + buf->infile = filename ? fopen(filename, "r") : stdin; + if (!buf->infile) return -1; return 0; } @@ -25,10 +24,10 @@ int buffer_init(const char *filename) int buffer_deinit(void) { int err; - if (infile == stdin) - return ferror(infile); - err = ferror(infile); - err |= fclose(infile); + if (buf->infile == stdin) + return ferror(buf->infile); + err = ferror(buf->infile); + err |= fclose(buf->infile); return err; } @@ -36,13 +35,13 @@ int buffer_deinit(void) char *buffer_read_line(void) { char *end; - if (!fgets(line_buffer, sizeof(line_buffer), infile)) + if (!fgets(buf->line_buffer, sizeof(buf->line_buffer), buf->infile)) /* Error or data exhausted. */ return NULL; - end = line_buffer + strlen(line_buffer); + end = buf->line_buffer + strlen(buf->line_buffer); if (end[-1] == '\n') end[-1] = '\0'; - else if (feof(infile)) + else if (feof(buf->infile)) ; /* No newline at end of file. That's fine. */ else /* @@ -51,23 +50,23 @@ char *buffer_read_line(void) * but for now let's return an error. */ return NULL; - return line_buffer; + return buf->line_buffer; } char *buffer_read_string(uint32_t len) { - strbuf_reset(&blob_buffer); - strbuf_fread(&blob_buffer, len, infile); - return ferror(infile) ? NULL : blob_buffer.buf; + strbuf_reset(&buf->blob_buffer); + strbuf_fread(&buf->blob_buffer, len, buf->infile); + return ferror(buf->infile) ? NULL : buf->blob_buffer.buf; } void buffer_copy_bytes(uint32_t len) { char byte_buffer[COPY_BUFFER_LEN]; uint32_t in; - while (len > 0 && !feof(infile) && !ferror(infile)) { + while (len > 0 && !feof(buf->infile) && !ferror(buf->infile)) { in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; - in = fread(byte_buffer, 1, in, infile); + in = fread(byte_buffer, 1, in, buf->infile); len -= in; fwrite(byte_buffer, 1, in, stdout); if (ferror(stdout)) { @@ -81,14 +80,14 @@ void buffer_skip_bytes(uint32_t len) { char byte_buffer[COPY_BUFFER_LEN]; uint32_t in; - while (len > 0 && !feof(infile) && !ferror(infile)) { + while (len > 0 && !feof(buf->infile) && !ferror(buf->infile)) { in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; - in = fread(byte_buffer, 1, in, infile); + in = fread(byte_buffer, 1, in, buf->infile); len -= in; } } void buffer_reset(void) { - strbuf_release(&blob_buffer); + strbuf_release(&buf->blob_buffer); } diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index 9c78ae11a1..4ae1133a92 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -1,6 +1,17 @@ #ifndef LINE_BUFFER_H_ #define LINE_BUFFER_H_ +#include "strbuf.h" + +#define LINE_BUFFER_LEN 10000 + +struct line_buffer { + char line_buffer[LINE_BUFFER_LEN]; + struct strbuf blob_buffer; + FILE *infile; +}; +#define LINE_BUFFER_INIT {"", STRBUF_INIT, NULL} + int buffer_init(const char *filename); int buffer_deinit(void); char *buffer_read_line(void); -- cgit v1.2.3 From e5e45ca1e35482d120a7ce776cf208369edcc459 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 10 Oct 2010 21:41:06 -0500 Subject: vcs-svn: teach line_buffer to handle multiple input files Collect the line_buffer state in a newly public line_buffer struct. Callers can use multiple line_buffers to manage input from multiple files at a time. svn-fe's delta applier will use this to stream a delta from svnrdump and the preimage it applies to from fast-import at the same time. The tests don't take advantage of the new features, but I think that's okay. It is easier to find lingering examples of nonreentrant code by searching for "static" in line_buffer.c. Signed-off-by: Jonathan Nieder --- vcs-svn/fast_export.c | 6 +++--- vcs-svn/fast_export.h | 5 ++++- vcs-svn/line_buffer.c | 20 ++++++++------------ vcs-svn/line_buffer.h | 14 +++++++------- vcs-svn/line_buffer.txt | 5 +++-- vcs-svn/svndump.c | 29 ++++++++++++++++------------- 6 files changed, 41 insertions(+), 38 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 6cfa256a37..260cf50e77 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -63,14 +63,14 @@ void fast_export_commit(uint32_t revision, uint32_t author, char *log, printf("progress Imported commit %"PRIu32".\n\n", revision); } -void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len) +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, struct line_buffer *input) { if (mode == REPO_MODE_LNK) { /* svn symlink blobs start with "link " */ - buffer_skip_bytes(5); + buffer_skip_bytes(input, 5); len -= 5; } printf("blob\nmark :%"PRIu32"\ndata %"PRIu32"\n", mark, len); - buffer_copy_bytes(len); + buffer_copy_bytes(input, len); fputc('\n', stdout); } diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h index 2aaaea53d5..054e7d5eb1 100644 --- a/vcs-svn/fast_export.h +++ b/vcs-svn/fast_export.h @@ -1,11 +1,14 @@ #ifndef FAST_EXPORT_H_ #define FAST_EXPORT_H_ +#include "line_buffer.h" + void fast_export_delete(uint32_t depth, uint32_t *path); void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, uint32_t mark); void fast_export_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, uint32_t url, unsigned long timestamp); -void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len); +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, + struct line_buffer *input); #endif diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index e7bc230fcb..806932b321 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -8,20 +8,16 @@ #include "strbuf.h" #define COPY_BUFFER_LEN 4096 -static struct line_buffer buf_ = LINE_BUFFER_INIT; -static struct line_buffer *buf; -int buffer_init(const char *filename) +int buffer_init(struct line_buffer *buf, const char *filename) { - buf = &buf_; - buf->infile = filename ? fopen(filename, "r") : stdin; if (!buf->infile) return -1; return 0; } -int buffer_deinit(void) +int buffer_deinit(struct line_buffer *buf) { int err; if (buf->infile == stdin) @@ -32,7 +28,7 @@ int buffer_deinit(void) } /* Read a line without trailing newline. */ -char *buffer_read_line(void) +char *buffer_read_line(struct line_buffer *buf) { char *end; if (!fgets(buf->line_buffer, sizeof(buf->line_buffer), buf->infile)) @@ -53,14 +49,14 @@ char *buffer_read_line(void) return buf->line_buffer; } -char *buffer_read_string(uint32_t len) +char *buffer_read_string(struct line_buffer *buf, uint32_t len) { strbuf_reset(&buf->blob_buffer); strbuf_fread(&buf->blob_buffer, len, buf->infile); return ferror(buf->infile) ? NULL : buf->blob_buffer.buf; } -void buffer_copy_bytes(uint32_t len) +void buffer_copy_bytes(struct line_buffer *buf, uint32_t len) { char byte_buffer[COPY_BUFFER_LEN]; uint32_t in; @@ -70,13 +66,13 @@ void buffer_copy_bytes(uint32_t len) len -= in; fwrite(byte_buffer, 1, in, stdout); if (ferror(stdout)) { - buffer_skip_bytes(len); + buffer_skip_bytes(buf, len); return; } } } -void buffer_skip_bytes(uint32_t len) +void buffer_skip_bytes(struct line_buffer *buf, uint32_t len) { char byte_buffer[COPY_BUFFER_LEN]; uint32_t in; @@ -87,7 +83,7 @@ void buffer_skip_bytes(uint32_t len) } } -void buffer_reset(void) +void buffer_reset(struct line_buffer *buf) { strbuf_release(&buf->blob_buffer); } diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index 4ae1133a92..fb373903d2 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -12,12 +12,12 @@ struct line_buffer { }; #define LINE_BUFFER_INIT {"", STRBUF_INIT, NULL} -int buffer_init(const char *filename); -int buffer_deinit(void); -char *buffer_read_line(void); -char *buffer_read_string(uint32_t len); -void buffer_copy_bytes(uint32_t len); -void buffer_skip_bytes(uint32_t len); -void buffer_reset(void); +int buffer_init(struct line_buffer *buf, const char *filename); +int buffer_deinit(struct line_buffer *buf); +char *buffer_read_line(struct line_buffer *buf); +char *buffer_read_string(struct line_buffer *buf, uint32_t len); +void buffer_copy_bytes(struct line_buffer *buf, uint32_t len); +void buffer_skip_bytes(struct line_buffer *buf, uint32_t len); +void buffer_reset(struct line_buffer *buf); #endif diff --git a/vcs-svn/line_buffer.txt b/vcs-svn/line_buffer.txt index 8906fb1f50..f8eaa4dd8c 100644 --- a/vcs-svn/line_buffer.txt +++ b/vcs-svn/line_buffer.txt @@ -14,14 +14,15 @@ Calling sequence The calling program: + - initializes a `struct line_buffer` to LINE_BUFFER_INIT - specifies a file to read with `buffer_init` - processes input with `buffer_read_line`, `buffer_read_string`, `buffer_skip_bytes`, and `buffer_copy_bytes` - closes the file with `buffer_deinit`, perhaps to start over and read another file. -Before exiting, the caller can use `buffer_reset` to deallocate -resources for the benefit of profiling tools. +When finished, the caller can use `buffer_reset` to deallocate +resources. Functions --------- diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 2ad2c307dd..4195da9cf8 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -30,6 +30,8 @@ /* Create memory pool for log messages */ obj_pool_gen(log, char, 4096) +static struct line_buffer input = LINE_BUFFER_INIT; + static char* log_copy(uint32_t length, char *log) { char *buffer; @@ -115,14 +117,14 @@ static void read_props(void) uint32_t key = ~0; char *val = NULL; char *t; - while ((t = buffer_read_line()) && strcmp(t, "PROPS-END")) { + while ((t = buffer_read_line(&input)) && strcmp(t, "PROPS-END")) { if (!strncmp(t, "K ", 2)) { len = atoi(&t[2]); - key = pool_intern(buffer_read_string(len)); - buffer_read_line(); + key = pool_intern(buffer_read_string(&input, len)); + buffer_read_line(&input); } else if (!strncmp(t, "V ", 2)) { len = atoi(&t[2]); - val = buffer_read_string(len); + val = buffer_read_string(&input, len); if (key == keys.svn_log) { /* Value length excludes terminating nul. */ rev_ctx.log = log_copy(len + 1, val); @@ -137,7 +139,7 @@ static void read_props(void) node_ctx.type = REPO_MODE_LNK; } key = ~0; - buffer_read_line(); + buffer_read_line(&input); } } } @@ -179,9 +181,10 @@ static void handle_node(void) node_ctx.type = node_ctx.srcMode; if (node_ctx.mark) - fast_export_blob(node_ctx.type, node_ctx.mark, node_ctx.textLength); + fast_export_blob(node_ctx.type, + node_ctx.mark, node_ctx.textLength, &input); else if (node_ctx.textLength != LENGTH_UNKNOWN) - buffer_skip_bytes(node_ctx.textLength); + buffer_skip_bytes(&input, node_ctx.textLength); } static void handle_revision(void) @@ -200,7 +203,7 @@ void svndump_read(const char *url) uint32_t key; reset_dump_ctx(pool_intern(url)); - while ((t = buffer_read_line())) { + while ((t = buffer_read_line(&input))) { val = strstr(t, ": "); if (!val) continue; @@ -257,7 +260,7 @@ void svndump_read(const char *url) node_ctx.propLength = atoi(val); } else if (key == keys.content_length) { len = atoi(val); - buffer_read_line(); + buffer_read_line(&input); if (active_ctx == REV_CTX) { read_props(); } else if (active_ctx == NODE_CTX) { @@ -265,7 +268,7 @@ void svndump_read(const char *url) active_ctx = REV_CTX; } else { fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len); - buffer_skip_bytes(len); + buffer_skip_bytes(&input, len); } } } @@ -277,7 +280,7 @@ void svndump_read(const char *url) void svndump_init(const char *filename) { - buffer_init(filename); + buffer_init(&input, filename); repo_init(); reset_dump_ctx(~0); reset_rev_ctx(0); @@ -292,7 +295,7 @@ void svndump_deinit(void) reset_dump_ctx(~0); reset_rev_ctx(0); reset_node_ctx(NULL); - if (buffer_deinit()) + if (buffer_deinit(&input)) fprintf(stderr, "Input error\n"); if (ferror(stdout)) fprintf(stderr, "Output error\n"); @@ -301,7 +304,7 @@ void svndump_deinit(void) void svndump_reset(void) { log_reset(); - buffer_reset(); + buffer_reset(&input); repo_reset(); reset_dump_ctx(~0); reset_rev_ctx(0); -- cgit v1.2.3 From e832f43c1d26bf70611d98b62d95870a99292add Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 2 Jan 2011 21:05:46 -0600 Subject: vcs-svn: add binary-safe read function buffer_read_string works well for non line-oriented input except for one problem: it does not tell the caller how many bytes were actually written. This means that unless one is very careful about checking for errors (and eof) the calling program cannot tell the difference between the string "foo" followed by an early end of file and the string "foo\0bar\0baz". So introduce a variant that reports the length, too, a thinner wrapper around strbuf_fread. Its result is written to a strbuf so the caller does not need to keep track of the number of bytes read. Signed-off-by: Jonathan Nieder --- vcs-svn/line_buffer.c | 6 ++++++ vcs-svn/line_buffer.h | 1 + 2 files changed, 7 insertions(+) (limited to 'vcs-svn') diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index 806932b321..661b007092 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -56,6 +56,12 @@ char *buffer_read_string(struct line_buffer *buf, uint32_t len) return ferror(buf->infile) ? NULL : buf->blob_buffer.buf; } +void buffer_read_binary(struct line_buffer *buf, + struct strbuf *sb, uint32_t size) +{ + strbuf_fread(sb, size, buf->infile); +} + void buffer_copy_bytes(struct line_buffer *buf, uint32_t len) { char byte_buffer[COPY_BUFFER_LEN]; diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index fb373903d2..0c2d3d955a 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -16,6 +16,7 @@ int buffer_init(struct line_buffer *buf, const char *filename); int buffer_deinit(struct line_buffer *buf); char *buffer_read_line(struct line_buffer *buf); char *buffer_read_string(struct line_buffer *buf, uint32_t len); +void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len); void buffer_copy_bytes(struct line_buffer *buf, uint32_t len); void buffer_skip_bytes(struct line_buffer *buf, uint32_t len); void buffer_reset(struct line_buffer *buf); -- cgit v1.2.3 From cc193f1f0b45e4e65f246f1d5e6e8134844aa35b Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 2 Jan 2011 21:06:32 -0600 Subject: vcs-svn: allow character-oriented input buffer_read_char can be used in place of buffer_read_string(1) to avoid consuming valuable static buffer space. The delta applier will use this to read variable-length integers one byte at a time. Underneath, it is fgetc, wrapped so the line_buffer library can maintain its role as gatekeeper of input. Later it might be worth checking if fgetc_unlocked is faster --- most line_buffer functions are not thread-safe anyway. Helpd-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/line_buffer.c | 5 +++++ vcs-svn/line_buffer.h | 1 + 2 files changed, 6 insertions(+) (limited to 'vcs-svn') diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index 661b007092..37ec56e5be 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -27,6 +27,11 @@ int buffer_deinit(struct line_buffer *buf) return err; } +int buffer_read_char(struct line_buffer *buf) +{ + return fgetc(buf->infile); +} + /* Read a line without trailing newline. */ char *buffer_read_line(struct line_buffer *buf) { diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index 0c2d3d955a..0a59c73e8b 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -16,6 +16,7 @@ int buffer_init(struct line_buffer *buf, const char *filename); int buffer_deinit(struct line_buffer *buf); char *buffer_read_line(struct line_buffer *buf); char *buffer_read_string(struct line_buffer *buf, uint32_t len); +int buffer_read_char(struct line_buffer *buf); void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len); void buffer_copy_bytes(struct line_buffer *buf, uint32_t len); void buffer_skip_bytes(struct line_buffer *buf, uint32_t len); -- cgit v1.2.3 From cb3f87cf1ba90373fdc240d65a4d65434099d9a3 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 2 Jan 2011 21:09:38 -0600 Subject: vcs-svn: allow input from file descriptor Based-on-patch-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/line_buffer.c | 8 ++++++++ vcs-svn/line_buffer.h | 1 + vcs-svn/line_buffer.txt | 9 +++++---- 3 files changed, 14 insertions(+), 4 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index 37ec56e5be..e29a81a536 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -17,6 +17,14 @@ int buffer_init(struct line_buffer *buf, const char *filename) return 0; } +int buffer_fdinit(struct line_buffer *buf, int fd) +{ + buf->infile = fdopen(fd, "r"); + if (!buf->infile) + return -1; + return 0; +} + int buffer_deinit(struct line_buffer *buf) { int err; diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index 0a59c73e8b..630d83c31a 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -13,6 +13,7 @@ struct line_buffer { #define LINE_BUFFER_INIT {"", STRBUF_INIT, NULL} int buffer_init(struct line_buffer *buf, const char *filename); +int buffer_fdinit(struct line_buffer *buf, int fd); int buffer_deinit(struct line_buffer *buf); char *buffer_read_line(struct line_buffer *buf); char *buffer_read_string(struct line_buffer *buf, uint32_t len); diff --git a/vcs-svn/line_buffer.txt b/vcs-svn/line_buffer.txt index f8eaa4dd8c..4e8fb719c1 100644 --- a/vcs-svn/line_buffer.txt +++ b/vcs-svn/line_buffer.txt @@ -27,10 +27,11 @@ resources. Functions --------- -`buffer_init`:: - Open the named file for input. If filename is NULL, - start reading from stdin. On failure, returns -1 (with - errno indicating the nature of the failure). +`buffer_init`, `buffer_fdinit`:: + Open the named file or file descriptor for input. + buffer_init(buf, NULL) prepares to read from stdin. + On failure, returns -1 (with errno indicating the nature + of the failure). `buffer_deinit`:: Stop reading from the current file (closing it unless -- cgit v1.2.3 From b1c9b798a6dd391aeaea31663a65164815701244 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 2 Jan 2011 21:10:59 -0600 Subject: vcs-svn: teach line_buffer about temporary files It can sometimes be useful to write information temporarily to file, to read back later. These functions allow a program to use the line_buffer facilities when doing so. It works like this: 1. find a unique filename with buffer_tmpfile_init. 2. rewind with buffer_tmpfile_rewind. This returns a stdio handle for writing. 3. when finished writing, declare so with buffer_tmpfile_prepare_to_read. The return value indicates how many bytes were written. 4. read whatever portion of the file is needed. 5. if finished, remove the temporary file with buffer_deinit. otherwise, go back to step 2, The svn support would use this to buffer the postimage from delta application until the length is known and fast-import can receive the resulting blob. Based-on-patch-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/line_buffer.c | 24 ++++++++++++++++++++++++ vcs-svn/line_buffer.h | 7 ++++++- vcs-svn/line_buffer.txt | 22 ++++++++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) (limited to 'vcs-svn') diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index e29a81a536..aedf105b70 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -25,6 +25,14 @@ int buffer_fdinit(struct line_buffer *buf, int fd) return 0; } +int buffer_tmpfile_init(struct line_buffer *buf) +{ + buf->infile = tmpfile(); + if (!buf->infile) + return -1; + return 0; +} + int buffer_deinit(struct line_buffer *buf) { int err; @@ -35,6 +43,22 @@ int buffer_deinit(struct line_buffer *buf) return err; } +FILE *buffer_tmpfile_rewind(struct line_buffer *buf) +{ + rewind(buf->infile); + return buf->infile; +} + +long buffer_tmpfile_prepare_to_read(struct line_buffer *buf) +{ + long pos = ftell(buf->infile); + if (pos < 0) + return error("ftell error: %s", strerror(errno)); + if (fseek(buf->infile, 0, SEEK_SET)) + return error("seek error: %s", strerror(errno)); + return pos; +} + int buffer_read_char(struct line_buffer *buf) { return fgetc(buf->infile); diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index 630d83c31a..96ce966a22 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -15,12 +15,17 @@ struct line_buffer { int buffer_init(struct line_buffer *buf, const char *filename); int buffer_fdinit(struct line_buffer *buf, int fd); int buffer_deinit(struct line_buffer *buf); +void buffer_reset(struct line_buffer *buf); + +int buffer_tmpfile_init(struct line_buffer *buf); +FILE *buffer_tmpfile_rewind(struct line_buffer *buf); /* prepare to write. */ +long buffer_tmpfile_prepare_to_read(struct line_buffer *buf); + char *buffer_read_line(struct line_buffer *buf); char *buffer_read_string(struct line_buffer *buf, uint32_t len); int buffer_read_char(struct line_buffer *buf); void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len); void buffer_copy_bytes(struct line_buffer *buf, uint32_t len); void buffer_skip_bytes(struct line_buffer *buf, uint32_t len); -void buffer_reset(struct line_buffer *buf); #endif diff --git a/vcs-svn/line_buffer.txt b/vcs-svn/line_buffer.txt index 4e8fb719c1..e89cc41d56 100644 --- a/vcs-svn/line_buffer.txt +++ b/vcs-svn/line_buffer.txt @@ -24,6 +24,28 @@ The calling program: When finished, the caller can use `buffer_reset` to deallocate resources. +Using temporary files +--------------------- + +Temporary files provide a place to store data that should not outlive +the calling program. A program + + - initializes a `struct line_buffer` to LINE_BUFFER_INIT + - requests a temporary file with `buffer_tmpfile_init` + - acquires an output handle by calling `buffer_tmpfile_rewind` + - uses standard I/O functions like `fprintf` and `fwrite` to fill + the temporary file + - declares writing is over with `buffer_tmpfile_prepare_to_read` + - can re-read what was written with `buffer_read_line`, + `buffer_read_string`, and so on + - can reuse the temporary file by calling `buffer_tmpfile_rewind` + again + - removes the temporary file with `buffer_deinit`, perhaps to + reuse the line_buffer for some other file. + +When finished, the calling program can use `buffer_reset` to deallocate +resources. + Functions --------- -- cgit v1.2.3 From 4f5de755a7931c5e15f4e7fc3d501588aa9ff88d Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sat, 20 Nov 2010 13:25:28 -0600 Subject: vcs-svn: introduce repo_read_path to check the content at a path The repo_tree structure remembers, for each path in each revision, a mode (regular file, executable, symlink, or directory) and content (blob mark or directory structure). Maintaining a second copy of all this information when it's already in the target repository is wasteful, it does not persist between svn-fe invocations, and most importantly, there is no convenient way to transfer it from one machine to another. So it would be nice to get rid of it. As a first step, let's change the repo_tree API to match fast-import's read commands more closely. Currently to read the mode for a path, one uses repo_modify_path(path, new_mode, new_content); which changes the mode and content as a side effect. There is no function to read the content at a path; add one. Signed-off-by: Jonathan Nieder Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/repo_tree.c | 12 +++++++++++- vcs-svn/repo_tree.h | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'vcs-svn') diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c index 491f0135a7..8763de5c29 100644 --- a/vcs-svn/repo_tree.c +++ b/vcs-svn/repo_tree.c @@ -87,7 +87,8 @@ static struct repo_dir *repo_clone_dir(struct repo_dir *orig_dir) return dir_pointer(new_o); } -static struct repo_dirent *repo_read_dirent(uint32_t revision, uint32_t *path) +static struct repo_dirent *repo_read_dirent(uint32_t revision, + const uint32_t *path) { uint32_t name = 0; struct repo_dirent *key = dent_pointer(dent_alloc(1)); @@ -157,6 +158,15 @@ static void repo_write_dirent(uint32_t *path, uint32_t mode, dent_remove(&dir_pointer(parent_dir_o)->entries, dent); } +uint32_t repo_read_path(const uint32_t *path) +{ + uint32_t content_offset = 0; + struct repo_dirent *dent = repo_read_dirent(active_commit, path); + if (dent != NULL) + content_offset = dent->content_offset; + return content_offset; +} + uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst) { uint32_t mode = 0, content_offset = 0; diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h index 68baeb582f..3202bbeffe 100644 --- a/vcs-svn/repo_tree.h +++ b/vcs-svn/repo_tree.h @@ -15,6 +15,7 @@ uint32_t next_blob_mark(void); uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst); void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark); uint32_t repo_modify_path(uint32_t *path, uint32_t mode, uint32_t blob_mark); +uint32_t repo_read_path(const uint32_t *path); void repo_delete(uint32_t *path); void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, uint32_t url, long unsigned timestamp); -- cgit v1.2.3 From 5a38b186d3ac5840d6ae78511d6dccab8367f242 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 10 Dec 2010 04:28:06 -0600 Subject: vcs-svn: handle_node: use repo_read_path svn-fe processes each commit in two stages: first decide on the correct content for all paths and export the relevant blobs, then export a commit with the result. But we can keep less state and simplify svn-fe a great deal by exporting the commit in one step: use 'inline' blobs for each path and remember nothing. This way, the repo_tree structure could be eliminated, and we would get support for incremental imports 'for free'. Reorganize handle_node along these lines. This is just a code cleanup; the changes in repo_tree and handle_revision will come later. [db: backported to apply without text delta support] Signed-off-by: Jonathan Nieder Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/svndump.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index ee7c0bb2ea..f07376f964 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -201,13 +201,14 @@ static void handle_node(void) uint32_t mark = 0; const uint32_t type = node_ctx.type; const int have_props = node_ctx.propLength != LENGTH_UNKNOWN; + const int have_text = node_ctx.textLength != LENGTH_UNKNOWN; if (node_ctx.text_delta) die("text deltas not supported"); - if (node_ctx.textLength != LENGTH_UNKNOWN) + if (have_text) mark = next_blob_mark(); if (node_ctx.action == NODEACT_DELETE) { - if (mark || have_props || node_ctx.srcRev) + if (have_text || have_props || node_ctx.srcRev) die("invalid dump: deletion node has " "copyfrom info, text, or properties"); return repo_delete(node_ctx.dst); @@ -221,13 +222,20 @@ static void handle_node(void) if (node_ctx.action == NODEACT_ADD) node_ctx.action = NODEACT_CHANGE; } - if (mark && type == REPO_MODE_DIR) + if (have_text && type == REPO_MODE_DIR) die("invalid dump: directories cannot have text attached"); + + /* + * Decide on the new content (mark) and mode (node_ctx.type). + */ if (node_ctx.action == NODEACT_CHANGE && !~*node_ctx.dst) { if (type != REPO_MODE_DIR) die("invalid dump: root of tree is not a regular file"); } else if (node_ctx.action == NODEACT_CHANGE) { - uint32_t mode = repo_modify_path(node_ctx.dst, 0, mark); + uint32_t mode; + if (!have_text) + mark = repo_read_path(node_ctx.dst); + mode = repo_modify_path(node_ctx.dst, 0, 0); if (!mode) die("invalid dump: path to be modified is missing"); if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR) @@ -236,22 +244,27 @@ static void handle_node(void) die("invalid dump: cannot modify a file into a directory"); node_ctx.type = mode; } else if (node_ctx.action == NODEACT_ADD) { - if (!mark && type != REPO_MODE_DIR) + if (!have_text && type != REPO_MODE_DIR) die("invalid dump: adds node without text"); - repo_add(node_ctx.dst, type, mark); } else { die("invalid dump: Node-path block lacks Node-action"); } + + /* + * Adjust mode to reflect properties. + */ if (have_props) { - const uint32_t old_mode = node_ctx.type; if (!node_ctx.prop_delta) node_ctx.type = type; if (node_ctx.propLength) read_props(); - if (node_ctx.type != old_mode) - repo_modify_path(node_ctx.dst, node_ctx.type, mark); } - if (mark) + + /* + * Save the result. + */ + repo_add(node_ctx.dst, node_ctx.type, mark); + if (have_text) fast_export_blob(node_ctx.type, mark, node_ctx.textLength, &input); } -- cgit v1.2.3 From e75316de5340e0ba3ac75937c59fa2c9d6ab48d7 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 10 Dec 2010 00:53:54 -0600 Subject: vcs-svn: simplify repo_modify_path and repo_copy Restrict the repo_tree API to functions that are actually needed. - decouple reading the mode and content of dirents from other operations. - remove repo_modify_path. It is only used to read the mode from dirents. - remove the ability to use repo_read_mode on a missing path. The existing code only errors out in that case, anyway. Signed-off-by: Jonathan Nieder Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/repo_tree.c | 27 ++++++++++----------------- vcs-svn/repo_tree.h | 4 ++-- vcs-svn/svndump.c | 4 +--- 3 files changed, 13 insertions(+), 22 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c index 8763de5c29..14bcc192b6 100644 --- a/vcs-svn/repo_tree.c +++ b/vcs-svn/repo_tree.c @@ -106,7 +106,7 @@ static struct repo_dirent *repo_read_dirent(uint32_t revision, return dent; } -static void repo_write_dirent(uint32_t *path, uint32_t mode, +static void repo_write_dirent(const uint32_t *path, uint32_t mode, uint32_t content_offset, uint32_t del) { uint32_t name, revision, dir_o = ~0, parent_dir_o = ~0; @@ -167,7 +167,15 @@ uint32_t repo_read_path(const uint32_t *path) return content_offset; } -uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst) +uint32_t repo_read_mode(const uint32_t *path) +{ + struct repo_dirent *dent = repo_read_dirent(active_commit, path); + if (dent == NULL) + die("invalid dump: path to be modified is missing"); + return dent->mode; +} + +void repo_copy(uint32_t revision, const uint32_t *src, const uint32_t *dst) { uint32_t mode = 0, content_offset = 0; struct repo_dirent *src_dent; @@ -177,7 +185,6 @@ uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst) content_offset = src_dent->content_offset; repo_write_dirent(dst, mode, content_offset, 0); } - return mode; } void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark) @@ -185,20 +192,6 @@ void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark) repo_write_dirent(path, mode, blob_mark, 0); } -uint32_t repo_modify_path(uint32_t *path, uint32_t mode, uint32_t blob_mark) -{ - struct repo_dirent *src_dent; - src_dent = repo_read_dirent(active_commit, path); - if (!src_dent) - return 0; - if (!blob_mark) - blob_mark = src_dent->content_offset; - if (!mode) - mode = src_dent->mode; - repo_write_dirent(path, mode, blob_mark, 0); - return mode; -} - void repo_delete(uint32_t *path) { repo_write_dirent(path, 0, 0, 1); diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h index 3202bbeffe..11d48c2444 100644 --- a/vcs-svn/repo_tree.h +++ b/vcs-svn/repo_tree.h @@ -12,10 +12,10 @@ #define REPO_MAX_PATH_DEPTH 1000 uint32_t next_blob_mark(void); -uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst); +void repo_copy(uint32_t revision, const uint32_t *src, const uint32_t *dst); void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark); -uint32_t repo_modify_path(uint32_t *path, uint32_t mode, uint32_t blob_mark); uint32_t repo_read_path(const uint32_t *path); +uint32_t repo_read_mode(const uint32_t *path); void repo_delete(uint32_t *path); void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, uint32_t url, long unsigned timestamp); diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index f07376f964..e6d84bada5 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -235,9 +235,7 @@ static void handle_node(void) uint32_t mode; if (!have_text) mark = repo_read_path(node_ctx.dst); - mode = repo_modify_path(node_ctx.dst, 0, 0); - if (!mode) - die("invalid dump: path to be modified is missing"); + mode = repo_read_mode(node_ctx.dst); if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR) die("invalid dump: cannot modify a directory into a file"); if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR) -- cgit v1.2.3 From efc749b48f729992d838484d652ba24f5291ee28 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 10 Oct 2010 21:51:21 -0500 Subject: vcs-svn: allow input errors to be detected promptly The line_buffer library silently flags input errors until buffer_deinit time; unfortunately, by that point usually errno is invalid. Expose the error flag so callers can check for and report errors early for easy debugging. some_error_prone_operation(...); if (buffer_ferror(buf)) return error("input error: %s", strerror(errno)); Signed-off-by: Jonathan Nieder Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/line_buffer.c | 5 +++++ vcs-svn/line_buffer.h | 1 + 2 files changed, 6 insertions(+) (limited to 'vcs-svn') diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index aedf105b70..eb8a6a7f7b 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -59,6 +59,11 @@ long buffer_tmpfile_prepare_to_read(struct line_buffer *buf) return pos; } +int buffer_ferror(struct line_buffer *buf) +{ + return ferror(buf->infile); +} + int buffer_read_char(struct line_buffer *buf) { return fgetc(buf->infile); diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index 96ce966a22..3c9629e09d 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -21,6 +21,7 @@ int buffer_tmpfile_init(struct line_buffer *buf); FILE *buffer_tmpfile_rewind(struct line_buffer *buf); /* prepare to write. */ long buffer_tmpfile_prepare_to_read(struct line_buffer *buf); +int buffer_ferror(struct line_buffer *buf); char *buffer_read_line(struct line_buffer *buf); char *buffer_read_string(struct line_buffer *buf, uint32_t len); int buffer_read_char(struct line_buffer *buf); -- cgit v1.2.3 From 06316234accdcb6608506aed6600cd60ff5c5c8e Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 16 Mar 2011 02:02:42 -0500 Subject: vcs-svn: remove spurious semicolons trp_gen is not a statement or function call, so it should not be followed with a semicolon. Noticed by gcc -pedantic. vcs-svn/repo_tree.c:41:81: warning: ISO C does not allow extra ';' outside of a function [-pedantic] Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/repo_tree.c | 2 +- vcs-svn/string_pool.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c index 491f0135a7..207ffc3a83 100644 --- a/vcs-svn/repo_tree.c +++ b/vcs-svn/repo_tree.c @@ -38,7 +38,7 @@ static uint32_t mark; static int repo_dirent_name_cmp(const void *a, const void *b); /* Treap for directory entries */ -trp_gen(static, dent_, struct repo_dirent, children, dent, repo_dirent_name_cmp); +trp_gen(static, dent_, struct repo_dirent, children, dent, repo_dirent_name_cmp) uint32_t next_blob_mark(void) { diff --git a/vcs-svn/string_pool.c b/vcs-svn/string_pool.c index f5b1da836e..8af8d54d6e 100644 --- a/vcs-svn/string_pool.c +++ b/vcs-svn/string_pool.c @@ -30,7 +30,7 @@ static int node_cmp(struct node *a, struct node *b) } /* Build a Treap from the node structure (a trp_node w/ offset) */ -trp_gen(static, tree_, struct node, children, node, node_cmp); +trp_gen(static, tree_, struct node, children, node, node_cmp) const char *pool_fetch(uint32_t entry) { -- cgit v1.2.3 From 93b709c79eea6231b7d75a6817245a416b4f8fb5 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 10 Oct 2010 21:46:24 -0500 Subject: vcs-svn: improve support for reading large files Move from uint32_t to off_t as the fundamental unit of length used by the line_buffer library. Performance would get worse if anything but I think it's worth it for support of deltas that need to skip large pieces (> 4 GiB). Exception: buffer_read_string still takes a uint32_t, since it keeps its result in an in-core obj_pool. Callers still have to be updated to take advantage of this. Signed-off-by: Jonathan Nieder Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/line_buffer.c | 4 ++-- vcs-svn/line_buffer.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index eb8a6a7f7b..747de07e6b 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -104,7 +104,7 @@ void buffer_read_binary(struct line_buffer *buf, strbuf_fread(sb, size, buf->infile); } -void buffer_copy_bytes(struct line_buffer *buf, uint32_t len) +void buffer_copy_bytes(struct line_buffer *buf, off_t len) { char byte_buffer[COPY_BUFFER_LEN]; uint32_t in; @@ -120,7 +120,7 @@ void buffer_copy_bytes(struct line_buffer *buf, uint32_t len) } } -void buffer_skip_bytes(struct line_buffer *buf, uint32_t len) +void buffer_skip_bytes(struct line_buffer *buf, off_t len) { char byte_buffer[COPY_BUFFER_LEN]; uint32_t in; diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index 3c9629e09d..a090dd6874 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -26,7 +26,7 @@ char *buffer_read_line(struct line_buffer *buf); char *buffer_read_string(struct line_buffer *buf, uint32_t len); int buffer_read_char(struct line_buffer *buf); void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len); -void buffer_copy_bytes(struct line_buffer *buf, uint32_t len); -void buffer_skip_bytes(struct line_buffer *buf, uint32_t len); +void buffer_copy_bytes(struct line_buffer *buf, off_t len); +void buffer_skip_bytes(struct line_buffer *buf, off_t len); #endif -- cgit v1.2.3 From d234f54b2f82067699f36593188e687fc7dc321a Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 10 Oct 2010 21:44:21 -0500 Subject: vcs-svn: make buffer_skip_bytes return length read Currently there is no way to detect when input ended if it ended early during buffer_skip_bytes. Tell the calling program how many bytes were actually skipped for easier debugging. Existing callers will still ignore early EOF. Signed-off-by: Jonathan Nieder Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/line_buffer.c | 13 +++++++------ vcs-svn/line_buffer.h | 2 +- vcs-svn/line_buffer.txt | 3 ++- 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index 747de07e6b..39d52b88b7 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -120,15 +120,16 @@ void buffer_copy_bytes(struct line_buffer *buf, off_t len) } } -void buffer_skip_bytes(struct line_buffer *buf, off_t len) +off_t buffer_skip_bytes(struct line_buffer *buf, off_t nbytes) { char byte_buffer[COPY_BUFFER_LEN]; - uint32_t in; - while (len > 0 && !feof(buf->infile) && !ferror(buf->infile)) { - in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; - in = fread(byte_buffer, 1, in, buf->infile); - len -= in; + off_t done = 0; + while (done < nbytes && !feof(buf->infile) && !ferror(buf->infile)) { + off_t len = nbytes - done; + size_t in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; + done += fread(byte_buffer, 1, in, buf->infile); } + return done; } void buffer_reset(struct line_buffer *buf) diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index a090dd6874..7d10f9c751 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -27,6 +27,6 @@ char *buffer_read_string(struct line_buffer *buf, uint32_t len); int buffer_read_char(struct line_buffer *buf); void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len); void buffer_copy_bytes(struct line_buffer *buf, off_t len); -void buffer_skip_bytes(struct line_buffer *buf, off_t len); +off_t buffer_skip_bytes(struct line_buffer *buf, off_t len); #endif diff --git a/vcs-svn/line_buffer.txt b/vcs-svn/line_buffer.txt index e89cc41d56..4ef0755cf5 100644 --- a/vcs-svn/line_buffer.txt +++ b/vcs-svn/line_buffer.txt @@ -76,7 +76,8 @@ Functions `buffer_skip_bytes`:: Discards `len` bytes from the input stream (stopping early - if necessary because of an error or eof). + if necessary because of an error or eof). Return value is + the number of bytes successfully read. `buffer_reset`:: Deallocates non-static buffers. -- cgit v1.2.3 From 26557fc1b37480d184a32de025b060aa1aa231db Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Tue, 28 Dec 2010 04:26:17 -0600 Subject: vcs-svn: make buffer_copy_bytes return length read Currently buffer_copy_bytes does not report to its caller whether it encountered an early end of file. Add a return value representing the number of bytes read (but not the number of bytes copied). This way all three unusual conditions can be distinguished: input error with buffer_ferror, output error with ferror(outfile), early end of input by checking the return value. Signed-off-by: Jonathan Nieder Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/line_buffer.c | 18 +++++++++--------- vcs-svn/line_buffer.h | 3 ++- 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index 39d52b88b7..33e733a04c 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -104,20 +104,20 @@ void buffer_read_binary(struct line_buffer *buf, strbuf_fread(sb, size, buf->infile); } -void buffer_copy_bytes(struct line_buffer *buf, off_t len) +off_t buffer_copy_bytes(struct line_buffer *buf, off_t nbytes) { char byte_buffer[COPY_BUFFER_LEN]; - uint32_t in; - while (len > 0 && !feof(buf->infile) && !ferror(buf->infile)) { - in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; + off_t done = 0; + while (done < nbytes && !feof(buf->infile) && !ferror(buf->infile)) { + off_t len = nbytes - done; + size_t in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; in = fread(byte_buffer, 1, in, buf->infile); - len -= in; + done += in; fwrite(byte_buffer, 1, in, stdout); - if (ferror(stdout)) { - buffer_skip_bytes(buf, len); - return; - } + if (ferror(stdout)) + return done + buffer_skip_bytes(buf, nbytes - done); } + return done; } off_t buffer_skip_bytes(struct line_buffer *buf, off_t nbytes) diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index 7d10f9c751..f5c468afa4 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -26,7 +26,8 @@ char *buffer_read_line(struct line_buffer *buf); char *buffer_read_string(struct line_buffer *buf, uint32_t len); int buffer_read_char(struct line_buffer *buf); void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len); -void buffer_copy_bytes(struct line_buffer *buf, off_t len); +/* Returns number of bytes read (not necessarily written). */ +off_t buffer_copy_bytes(struct line_buffer *buf, off_t len); off_t buffer_skip_bytes(struct line_buffer *buf, off_t len); #endif -- cgit v1.2.3 From c9d1c8ba059577e64fb2213cb0c5f3c4619c7519 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Tue, 28 Dec 2010 04:30:54 -0600 Subject: vcs-svn: improve reporting of input errors Catch input errors and exit early enough to print a reasonable diagnosis based on errno. Signed-off-by: Jonathan Nieder Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/fast_export.c | 13 +++++++++++-- vcs-svn/svndump.c | 29 ++++++++++++++++++++++++++--- 2 files changed, 37 insertions(+), 5 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 260cf50e77..07a8353c8b 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -63,14 +63,23 @@ void fast_export_commit(uint32_t revision, uint32_t author, char *log, printf("progress Imported commit %"PRIu32".\n\n", revision); } +static void die_short_read(struct line_buffer *input) +{ + if (buffer_ferror(input)) + die_errno("error reading dump file"); + die("invalid dump: unexpected end of file"); +} + void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, struct line_buffer *input) { if (mode == REPO_MODE_LNK) { /* svn symlink blobs start with "link " */ - buffer_skip_bytes(input, 5); len -= 5; + if (buffer_skip_bytes(input, 5) != 5) + die_short_read(input); } printf("blob\nmark :%"PRIu32"\ndata %"PRIu32"\n", mark, len); - buffer_copy_bytes(input, len); + if (buffer_copy_bytes(input, len) != len) + die_short_read(input); fputc('\n', stdout); } diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index e6d84bada5..15f822ea84 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -149,6 +149,13 @@ static void handle_property(uint32_t key, const char *val, uint32_t len, } } +static void die_short_read(void) +{ + if (buffer_ferror(&input)) + die_errno("error reading dump file"); + die("invalid dump: unexpected end of file"); +} + static void read_props(void) { uint32_t key = ~0; @@ -170,12 +177,21 @@ static void read_props(void) uint32_t len; const char *val; const char type = t[0]; + int ch; if (!type || t[1] != ' ') die("invalid property line: %s\n", t); len = atoi(&t[2]); val = buffer_read_string(&input, len); - buffer_skip_bytes(&input, 1); /* Discard trailing newline. */ + if (!val || strlen(val) != len) + die_short_read(); + + /* Discard trailing newline. */ + ch = buffer_read_char(&input); + if (ch == EOF) + die_short_read(); + if (ch != '\n') + die("invalid dump: expected newline after %s", val); switch (type) { case 'K': @@ -344,7 +360,11 @@ void svndump_read(const char *url) node_ctx.prop_delta = !strcmp(val, "true"); } else if (key == keys.content_length) { len = atoi(val); - buffer_read_line(&input); + t = buffer_read_line(&input); + if (!t) + die_short_read(); + if (*t) + die("invalid dump: expected blank line after content length header"); if (active_ctx == REV_CTX) { read_props(); } else if (active_ctx == NODE_CTX) { @@ -352,10 +372,13 @@ void svndump_read(const char *url) active_ctx = REV_CTX; } else { fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len); - buffer_skip_bytes(&input, len); + if (buffer_skip_bytes(&input, len) != len) + die_short_read(); } } } + if (buffer_ferror(&input)) + die_short_read(); if (active_ctx == NODE_CTX) handle_node(); if (active_ctx != DUMP_CTX) -- cgit v1.2.3 From dce33c9c18e2987da1fe8ade67d27057bcb80a67 Mon Sep 17 00:00:00 2001 From: David Barr Date: Tue, 22 Mar 2011 10:49:50 +1100 Subject: vcs-svn: use strbuf for revision log obj_pool is overkill for this application: all that is needed is a buffer that can resize from rev to rev to accomodate differently-sized strings. In the spirit of commit deadcef4 (2010-11-06), use a strbuf instead. This is a small step towards removing dependence on obj_pool.h. Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/svndump.c | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 15f822ea84..559a8084ab 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -11,8 +11,8 @@ #include "repo_tree.h" #include "fast_export.h" #include "line_buffer.h" -#include "obj_pool.h" #include "string_pool.h" +#include "strbuf.h" #define NODEACT_REPLACE 4 #define NODEACT_DELETE 3 @@ -27,20 +27,8 @@ #define LENGTH_UNKNOWN (~0) #define DATE_RFC2822_LEN 31 -/* Create memory pool for log messages */ -obj_pool_gen(log, char, 4096) - static struct line_buffer input = LINE_BUFFER_INIT; -static char *log_copy(uint32_t length, const char *log) -{ - char *buffer; - log_free(log_pool.size); - buffer = log_pointer(log_alloc(length)); - strncpy(buffer, log, length); - return buffer; -} - static struct { uint32_t action, propLength, textLength, srcRev, type; uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH]; @@ -50,7 +38,7 @@ static struct { static struct { uint32_t revision, author; unsigned long timestamp; - char *log; + struct strbuf log; } rev_ctx; static struct { @@ -83,7 +71,7 @@ static void reset_rev_ctx(uint32_t revision) { rev_ctx.revision = revision; rev_ctx.timestamp = 0; - rev_ctx.log = NULL; + strbuf_reset(&rev_ctx.log); rev_ctx.author = ~0; } @@ -123,8 +111,8 @@ static void handle_property(uint32_t key, const char *val, uint32_t len, if (key == keys.svn_log) { if (!val) die("invalid dump: unsets svn:log"); - /* Value length excludes terminating nul. */ - rev_ctx.log = log_copy(len + 1, val); + strbuf_reset(&rev_ctx.log); + strbuf_add(&rev_ctx.log, val, len); } else if (key == keys.svn_author) { rev_ctx.author = pool_intern(val); } else if (key == keys.svn_date) { @@ -286,7 +274,7 @@ static void handle_node(void) static void handle_revision(void) { if (rev_ctx.revision) - repo_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log, + repo_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log.buf, dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp); } @@ -390,6 +378,7 @@ int svndump_init(const char *filename) if (buffer_init(&input, filename)) return error("cannot open %s: %s", filename, strerror(errno)); repo_init(); + strbuf_init(&rev_ctx.log, 4096); reset_dump_ctx(~0); reset_rev_ctx(0); reset_node_ctx(NULL); @@ -399,11 +388,11 @@ int svndump_init(const char *filename) void svndump_deinit(void) { - log_reset(); repo_reset(); reset_dump_ctx(~0); reset_rev_ctx(0); reset_node_ctx(NULL); + strbuf_release(&rev_ctx.log); if (buffer_deinit(&input)) fprintf(stderr, "Input error\n"); if (ferror(stdout)) @@ -412,7 +401,6 @@ void svndump_deinit(void) void svndump_reset(void) { - log_reset(); buffer_reset(&input); repo_reset(); reset_dump_ctx(~0); -- cgit v1.2.3 From 7c5817d3ba111bb71a5d7e3c8526e0925f96c92d Mon Sep 17 00:00:00 2001 From: David Barr Date: Tue, 22 Mar 2011 17:52:17 -0500 Subject: vcs-svn: use strbuf for author, UUID, and URL Use strbufs and strings instead of interned strings for values of rev, dump, and node fields that happen to be strings. After this change, the only remaining string_pool use is for paths in the repo_tree API and internals. Functional change: treat an empty author, UUID, or URL as none at all. So for example, in repos where the first revision has an empty svn:author property, the first rev will be treated as by "nobody" rather than by a person with empty name and email address created by prepending an @ sign to the repository UUID. Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/fast_export.c | 14 +++++++------- vcs-svn/fast_export.h | 5 +++-- vcs-svn/repo_tree.c | 4 ++-- vcs-svn/repo_tree.h | 5 +++-- vcs-svn/svndump.c | 45 ++++++++++++++++++++++++++++----------------- 5 files changed, 43 insertions(+), 30 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 07a8353c8b..a4d4d9993d 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -31,24 +31,24 @@ void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, } static char gitsvnline[MAX_GITSVN_LINE_LEN]; -void fast_export_commit(uint32_t revision, uint32_t author, char *log, - uint32_t uuid, uint32_t url, +void fast_export_commit(uint32_t revision, const char *author, char *log, + const char *uuid, const char *url, unsigned long timestamp) { if (!log) log = ""; - if (~uuid && ~url) { + if (*uuid && *url) { snprintf(gitsvnline, MAX_GITSVN_LINE_LEN, "\n\ngit-svn-id: %s@%"PRIu32" %s\n", - pool_fetch(url), revision, pool_fetch(uuid)); + url, revision, uuid); } else { *gitsvnline = '\0'; } printf("commit refs/heads/master\n"); printf("committer %s <%s@%s> %ld +0000\n", - ~author ? pool_fetch(author) : "nobody", - ~author ? pool_fetch(author) : "nobody", - ~uuid ? pool_fetch(uuid) : "local", timestamp); + *author ? author : "nobody", + *author ? author : "nobody", + *uuid ? uuid : "local", timestamp); printf("data %"PRIu32"\n%s%s\n", (uint32_t) (strlen(log) + strlen(gitsvnline)), log, gitsvnline); diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h index 054e7d5eb1..05cf97f3a7 100644 --- a/vcs-svn/fast_export.h +++ b/vcs-svn/fast_export.h @@ -6,8 +6,9 @@ void fast_export_delete(uint32_t depth, uint32_t *path); void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, uint32_t mark); -void fast_export_commit(uint32_t revision, uint32_t author, char *log, - uint32_t uuid, uint32_t url, unsigned long timestamp); +void fast_export_commit(uint32_t revision, const char *author, char *log, + const char *uuid, const char *url, + unsigned long timestamp); void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, struct line_buffer *input); diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c index 14bcc192b6..d722e3212f 100644 --- a/vcs-svn/repo_tree.c +++ b/vcs-svn/repo_tree.c @@ -278,8 +278,8 @@ void repo_diff(uint32_t r1, uint32_t r2) repo_commit_root_dir(commit_pointer(r2))); } -void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, - uint32_t url, unsigned long timestamp) +void repo_commit(uint32_t revision, const char *author, char *log, + const char *uuid, const char *url, unsigned long timestamp) { fast_export_commit(revision, author, log, uuid, url, timestamp); dent_commit(); diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h index 11d48c2444..a1b0e87651 100644 --- a/vcs-svn/repo_tree.h +++ b/vcs-svn/repo_tree.h @@ -17,8 +17,9 @@ void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark); uint32_t repo_read_path(const uint32_t *path); uint32_t repo_read_mode(const uint32_t *path); void repo_delete(uint32_t *path); -void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, - uint32_t url, long unsigned timestamp); +void repo_commit(uint32_t revision, const char *author, + char *log, const char *uuid, const char *url, + long unsigned timestamp); void repo_diff(uint32_t r1, uint32_t r2); void repo_init(void); void repo_reset(void); diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 559a8084ab..7ac74877fa 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -36,13 +36,14 @@ static struct { } node_ctx; static struct { - uint32_t revision, author; + uint32_t revision; unsigned long timestamp; - struct strbuf log; + struct strbuf log, author; } rev_ctx; static struct { - uint32_t version, uuid, url; + uint32_t version; + struct strbuf uuid, url; } dump_ctx; static struct { @@ -72,14 +73,16 @@ static void reset_rev_ctx(uint32_t revision) rev_ctx.revision = revision; rev_ctx.timestamp = 0; strbuf_reset(&rev_ctx.log); - rev_ctx.author = ~0; + strbuf_reset(&rev_ctx.author); } -static void reset_dump_ctx(uint32_t url) +static void reset_dump_ctx(const char *url) { - dump_ctx.url = url; + strbuf_reset(&dump_ctx.url); + if (url) + strbuf_addstr(&dump_ctx.url, url); dump_ctx.version = 1; - dump_ctx.uuid = ~0; + strbuf_reset(&dump_ctx.uuid); } static void init_keys(void) @@ -114,7 +117,9 @@ static void handle_property(uint32_t key, const char *val, uint32_t len, strbuf_reset(&rev_ctx.log); strbuf_add(&rev_ctx.log, val, len); } else if (key == keys.svn_author) { - rev_ctx.author = pool_intern(val); + strbuf_reset(&rev_ctx.author); + if (val) + strbuf_add(&rev_ctx.author, val, len); } else if (key == keys.svn_date) { if (!val) die("invalid dump: unsets svn:date"); @@ -274,8 +279,9 @@ static void handle_node(void) static void handle_revision(void) { if (rev_ctx.revision) - repo_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log.buf, - dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp); + repo_commit(rev_ctx.revision, rev_ctx.author.buf, + rev_ctx.log.buf, dump_ctx.uuid.buf, dump_ctx.url.buf, + rev_ctx.timestamp); } void svndump_read(const char *url) @@ -286,7 +292,7 @@ void svndump_read(const char *url) uint32_t len; uint32_t key; - reset_dump_ctx(pool_intern(url)); + reset_dump_ctx(url); while ((t = buffer_read_line(&input))) { val = strstr(t, ": "); if (!val) @@ -301,7 +307,8 @@ void svndump_read(const char *url) die("expected svn dump format version <= 3, found %"PRIu32, dump_ctx.version); } else if (key == keys.uuid) { - dump_ctx.uuid = pool_intern(val); + strbuf_reset(&dump_ctx.uuid); + strbuf_addstr(&dump_ctx.uuid, val); } else if (key == keys.revision_number) { if (active_ctx == NODE_CTX) handle_node(); @@ -378,8 +385,11 @@ int svndump_init(const char *filename) if (buffer_init(&input, filename)) return error("cannot open %s: %s", filename, strerror(errno)); repo_init(); + strbuf_init(&dump_ctx.uuid, 4096); + strbuf_init(&dump_ctx.url, 4096); strbuf_init(&rev_ctx.log, 4096); - reset_dump_ctx(~0); + strbuf_init(&rev_ctx.author, 4096); + reset_dump_ctx(NULL); reset_rev_ctx(0); reset_node_ctx(NULL); init_keys(); @@ -389,7 +399,7 @@ int svndump_init(const char *filename) void svndump_deinit(void) { repo_reset(); - reset_dump_ctx(~0); + reset_dump_ctx(NULL); reset_rev_ctx(0); reset_node_ctx(NULL); strbuf_release(&rev_ctx.log); @@ -403,7 +413,8 @@ void svndump_reset(void) { buffer_reset(&input); repo_reset(); - reset_dump_ctx(~0); - reset_rev_ctx(0); - reset_node_ctx(NULL); + strbuf_release(&dump_ctx.uuid); + strbuf_release(&dump_ctx.url); + strbuf_release(&rev_ctx.log); + strbuf_release(&rev_ctx.author); } -- cgit v1.2.3 From 044ad2906a5e4b805bc8c8d121466d8ff94ecbfb Mon Sep 17 00:00:00 2001 From: David Barr Date: Mon, 13 Dec 2010 19:13:24 +1100 Subject: vcs-svn: implement perfect hash for node-prop keys Instead of interning property names and comparing their string_pool keys, look them up in a table by string length, which should be about as fast. This is a small step towards removing dependence on string_pool. Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/svndump.c | 62 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 19 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 15f822ea84..322d1cd305 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -14,6 +14,12 @@ #include "obj_pool.h" #include "string_pool.h" +/* + * Compare start of string to literal of equal length; + * must be guarded by length test. + */ +#define constcmp(s, ref) memcmp(s, ref, sizeof(ref) - 1) + #define NODEACT_REPLACE 4 #define NODEACT_DELETE 3 #define NODEACT_ADD 2 @@ -58,8 +64,7 @@ static struct { } dump_ctx; static struct { - uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid, - revision_number, node_path, node_kind, node_action, + uint32_t uuid, revision_number, node_path, node_kind, node_action, node_copyfrom_path, node_copyfrom_rev, text_content_length, prop_content_length, content_length, svn_fs_dump_format_version, /* version 3 format */ @@ -96,11 +101,6 @@ static void reset_dump_ctx(uint32_t url) static void init_keys(void) { - keys.svn_log = pool_intern("svn:log"); - keys.svn_author = pool_intern("svn:author"); - keys.svn_date = pool_intern("svn:date"); - keys.svn_executable = pool_intern("svn:executable"); - keys.svn_special = pool_intern("svn:special"); keys.uuid = pool_intern("UUID"); keys.revision_number = pool_intern("Revision-number"); keys.node_path = pool_intern("Node-path"); @@ -117,22 +117,43 @@ static void init_keys(void) keys.prop_delta = pool_intern("Prop-delta"); } -static void handle_property(uint32_t key, const char *val, uint32_t len, +static void handle_property(const struct strbuf *key_buf, + const char *val, uint32_t len, uint32_t *type_set) { - if (key == keys.svn_log) { + const char *key = key_buf->buf; + size_t keylen = key_buf->len; + + switch (keylen + 1) { + case sizeof("svn:log"): + if (constcmp(key, "svn:log")) + break; if (!val) die("invalid dump: unsets svn:log"); /* Value length excludes terminating nul. */ rev_ctx.log = log_copy(len + 1, val); - } else if (key == keys.svn_author) { + break; + case sizeof("svn:author"): + if (constcmp(key, "svn:author")) + break; rev_ctx.author = pool_intern(val); - } else if (key == keys.svn_date) { + break; + case sizeof("svn:date"): + if (constcmp(key, "svn:date")) + break; if (!val) die("invalid dump: unsets svn:date"); if (parse_date_basic(val, &rev_ctx.timestamp, NULL)) warning("invalid timestamp: %s", val); - } else if (key == keys.svn_executable || key == keys.svn_special) { + break; + case sizeof("svn:executable"): + case sizeof("svn:special"): + if (keylen == strlen("svn:executable") && + constcmp(key, "svn:executable")) + break; + if (keylen == strlen("svn:special") && + constcmp(key, "svn:special")) + break; if (*type_set) { if (!val) return; @@ -143,7 +164,7 @@ static void handle_property(uint32_t key, const char *val, uint32_t len, return; } *type_set = 1; - node_ctx.type = key == keys.svn_executable ? + node_ctx.type = keylen == strlen("svn:executable") ? REPO_MODE_EXE : REPO_MODE_LNK; } @@ -158,7 +179,7 @@ static void die_short_read(void) static void read_props(void) { - uint32_t key = ~0; + static struct strbuf key = STRBUF_INIT; const char *t; /* * NEEDSWORK: to support simple mode changes like @@ -195,16 +216,19 @@ static void read_props(void) switch (type) { case 'K': - key = pool_intern(val); - continue; case 'D': - key = pool_intern(val); + strbuf_reset(&key); + if (val) + strbuf_add(&key, val, len); + if (type == 'K') + continue; + assert(type == 'D'); val = NULL; len = 0; /* fall through */ case 'V': - handle_property(key, val, len, &type_set); - key = ~0; + handle_property(&key, val, len, &type_set); + strbuf_reset(&key); continue; default: die("invalid property line: %s\n", t); -- cgit v1.2.3 From 90c0a3cfe390208c86144bf97ec8fa5610febe0f Mon Sep 17 00:00:00 2001 From: David Barr Date: Mon, 13 Dec 2010 19:56:01 +1100 Subject: vcs-svn: implement perfect hash for top-level keys Instead of interning property names and comparing their string_pool keys, look them up in a table by string length, which should be about as fast. Another small step towards removing dependence on string_pool altogether. Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/svndump.c | 109 +++++++++++++++++++++++++++++------------------------- 1 file changed, 59 insertions(+), 50 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 322d1cd305..77680a31e8 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -63,14 +63,6 @@ static struct { uint32_t version, uuid, url; } dump_ctx; -static struct { - uint32_t uuid, revision_number, node_path, node_kind, node_action, - node_copyfrom_path, node_copyfrom_rev, text_content_length, - prop_content_length, content_length, svn_fs_dump_format_version, - /* version 3 format */ - text_delta, prop_delta; -} keys; - static void reset_node_ctx(char *fname) { node_ctx.type = 0; @@ -99,24 +91,6 @@ static void reset_dump_ctx(uint32_t url) dump_ctx.uuid = ~0; } -static void init_keys(void) -{ - keys.uuid = pool_intern("UUID"); - keys.revision_number = pool_intern("Revision-number"); - keys.node_path = pool_intern("Node-path"); - keys.node_kind = pool_intern("Node-kind"); - keys.node_action = pool_intern("Node-action"); - keys.node_copyfrom_path = pool_intern("Node-copyfrom-path"); - keys.node_copyfrom_rev = pool_intern("Node-copyfrom-rev"); - keys.text_content_length = pool_intern("Text-content-length"); - keys.prop_content_length = pool_intern("Prop-content-length"); - keys.content_length = pool_intern("Content-length"); - keys.svn_fs_dump_format_version = pool_intern("SVN-fs-dump-format-version"); - /* version 3 format (Subversion 1.1.0) */ - keys.text_delta = pool_intern("Text-delta"); - keys.prop_delta = pool_intern("Prop-delta"); -} - static void handle_property(const struct strbuf *key_buf, const char *val, uint32_t len, uint32_t *type_set) @@ -320,44 +294,61 @@ void svndump_read(const char *url) char *t; uint32_t active_ctx = DUMP_CTX; uint32_t len; - uint32_t key; reset_dump_ctx(pool_intern(url)); while ((t = buffer_read_line(&input))) { val = strstr(t, ": "); if (!val) continue; - *val++ = '\0'; - *val++ = '\0'; - key = pool_intern(t); + val += 2; - if (key == keys.svn_fs_dump_format_version) { + /* strlen(key) + 1 */ + switch (val - t - 1) { + case sizeof("SVN-fs-dump-format-version"): + if (constcmp(t, "SVN-fs-dump-format-version")) + continue; dump_ctx.version = atoi(val); if (dump_ctx.version > 3) die("expected svn dump format version <= 3, found %"PRIu32, dump_ctx.version); - } else if (key == keys.uuid) { + break; + case sizeof("UUID"): + if (constcmp(t, "UUID")) + continue; dump_ctx.uuid = pool_intern(val); - } else if (key == keys.revision_number) { + break; + case sizeof("Revision-number"): + if (constcmp(t, "Revision-number")) + continue; if (active_ctx == NODE_CTX) handle_node(); if (active_ctx != DUMP_CTX) handle_revision(); active_ctx = REV_CTX; reset_rev_ctx(atoi(val)); - } else if (key == keys.node_path) { - if (active_ctx == NODE_CTX) - handle_node(); - active_ctx = NODE_CTX; - reset_node_ctx(val); - } else if (key == keys.node_kind) { + break; + case sizeof("Node-path"): + if (prefixcmp(t, "Node-")) + continue; + if (!constcmp(t + strlen("Node-"), "path")) { + if (active_ctx == NODE_CTX) + handle_node(); + active_ctx = NODE_CTX; + reset_node_ctx(val); + break; + } + if (constcmp(t + strlen("Node-"), "kind")) + continue; if (!strcmp(val, "dir")) node_ctx.type = REPO_MODE_DIR; else if (!strcmp(val, "file")) node_ctx.type = REPO_MODE_BLB; else fprintf(stderr, "Unknown node-kind: %s\n", val); - } else if (key == keys.node_action) { + break; + case sizeof("Node-action"): + if (constcmp(t, "Node-action")) + continue; if (!strcmp(val, "delete")) { node_ctx.action = NODEACT_DELETE; } else if (!strcmp(val, "add")) { @@ -370,19 +361,38 @@ void svndump_read(const char *url) fprintf(stderr, "Unknown node-action: %s\n", val); node_ctx.action = NODEACT_UNKNOWN; } - } else if (key == keys.node_copyfrom_path) { + break; + case sizeof("Node-copyfrom-path"): + if (constcmp(t, "Node-copyfrom-path")) + continue; pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val); - } else if (key == keys.node_copyfrom_rev) { + break; + case sizeof("Node-copyfrom-rev"): + if (constcmp(t, "Node-copyfrom-rev")) + continue; node_ctx.srcRev = atoi(val); - } else if (key == keys.text_content_length) { - node_ctx.textLength = atoi(val); - } else if (key == keys.prop_content_length) { + break; + case sizeof("Text-content-length"): + if (!constcmp(t, "Text-content-length")) { + node_ctx.textLength = atoi(val); + break; + } + if (constcmp(t, "Prop-content-length")) + continue; node_ctx.propLength = atoi(val); - } else if (key == keys.text_delta) { - node_ctx.text_delta = !strcmp(val, "true"); - } else if (key == keys.prop_delta) { + break; + case sizeof("Text-delta"): + if (!constcmp(t, "Text-delta")) { + node_ctx.text_delta = !strcmp(val, "true"); + break; + } + if (constcmp(t, "Prop-delta")) + continue; node_ctx.prop_delta = !strcmp(val, "true"); - } else if (key == keys.content_length) { + break; + case sizeof("Content-length"): + if (constcmp(t, "Content-length")) + continue; len = atoi(val); t = buffer_read_line(&input); if (!t) @@ -417,7 +427,6 @@ int svndump_init(const char *filename) reset_dump_ctx(~0); reset_rev_ctx(0); reset_node_ctx(NULL); - init_keys(); return 0; } -- cgit v1.2.3 From f1602054e3a45e195edf814681e8f5ba88851623 Mon Sep 17 00:00:00 2001 From: David Barr Date: Tue, 14 Dec 2010 11:06:43 +1100 Subject: vcs-svn: use strchr to find RFC822 delimiter This is a small optimisation (4% reduction in user time) but is the largest artifact within the parsing portion of svndump.c Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/svndump.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 77680a31e8..0919a576dc 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -297,10 +297,13 @@ void svndump_read(const char *url) reset_dump_ctx(pool_intern(url)); while ((t = buffer_read_line(&input))) { - val = strstr(t, ": "); + val = strchr(t, ':'); if (!val) continue; - val += 2; + val++; + if (*val != ' ') + continue; + val++; /* strlen(key) + 1 */ switch (val - t - 1) { -- cgit v1.2.3 From e7d04ee147dcbe6af1fa1d2147466696e2be31bc Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sat, 26 Mar 2011 00:15:10 -0500 Subject: vcs-svn: make reading of properties binary-safe svn-fe errors out on revision 59151 of the ASF repository: fatal: invalid dump: unexpected end of file The proximate cause is a property with an embedded NUL character. Previously such anomalies were ignored but commit c9d1c8ba (2010-12-28) introduced a check strlen(val) == len to avoid reading uninitialized data when a property list ends early and unfortunately this test does not distinguish between "foo" followed by EOF and the string "foo\0bar\0baz". Fix it by using buffer_read_binary to read to a strbuf and checking the actual length read. Most consumers of properties still use C-style strings, so in practice an author or log message with embedded NULs will be truncated, but a least this way svn-fe won't error out (fixing the regression). Reported-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/svndump.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index ea5b128e4f..c00f031179 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -147,6 +147,7 @@ static void die_short_read(void) static void read_props(void) { static struct strbuf key = STRBUF_INIT; + static struct strbuf val = STRBUF_INIT; const char *t; /* * NEEDSWORK: to support simple mode changes like @@ -163,15 +164,15 @@ static void read_props(void) uint32_t type_set = 0; while ((t = buffer_read_line(&input)) && strcmp(t, "PROPS-END")) { uint32_t len; - const char *val; const char type = t[0]; int ch; if (!type || t[1] != ' ') die("invalid property line: %s\n", t); len = atoi(&t[2]); - val = buffer_read_string(&input, len); - if (!val || strlen(val) != len) + strbuf_reset(&val); + buffer_read_binary(&input, &val, len); + if (val.len < len) die_short_read(); /* Discard trailing newline. */ @@ -179,22 +180,17 @@ static void read_props(void) if (ch == EOF) die_short_read(); if (ch != '\n') - die("invalid dump: expected newline after %s", val); + die("invalid dump: expected newline after %s", val.buf); switch (type) { case 'K': + strbuf_swap(&key, &val); + continue; case 'D': - strbuf_reset(&key); - if (val) - strbuf_add(&key, val, len); - if (type == 'K') - continue; - assert(type == 'D'); - val = NULL; - len = 0; - /* fall through */ + handle_property(&val, NULL, 0, &type_set); + continue; case 'V': - handle_property(&key, val, len, &type_set); + handle_property(&key, val.buf, len, &type_set); strbuf_reset(&key); continue; default: -- cgit v1.2.3 From 7e2fe3a9fc816391b322ad9b3f2adf9342631db6 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Thu, 24 Mar 2011 23:09:19 -0500 Subject: vcs-svn: remove buffer_read_string All previous users of buffer_read_string have already been converted to use the more intuitive buffer_read_binary, so remove the old API to avoid some confusion. Signed-off-by: Jonathan Nieder --- vcs-svn/line_buffer.c | 8 -------- vcs-svn/line_buffer.h | 4 +--- vcs-svn/line_buffer.txt | 12 +++--------- 3 files changed, 4 insertions(+), 20 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index 33e733a04c..c39038723e 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -91,13 +91,6 @@ char *buffer_read_line(struct line_buffer *buf) return buf->line_buffer; } -char *buffer_read_string(struct line_buffer *buf, uint32_t len) -{ - strbuf_reset(&buf->blob_buffer); - strbuf_fread(&buf->blob_buffer, len, buf->infile); - return ferror(buf->infile) ? NULL : buf->blob_buffer.buf; -} - void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t size) { @@ -134,5 +127,4 @@ off_t buffer_skip_bytes(struct line_buffer *buf, off_t nbytes) void buffer_reset(struct line_buffer *buf) { - strbuf_release(&buf->blob_buffer); } diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index f5c468afa4..d0b22dda76 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -7,10 +7,9 @@ struct line_buffer { char line_buffer[LINE_BUFFER_LEN]; - struct strbuf blob_buffer; FILE *infile; }; -#define LINE_BUFFER_INIT {"", STRBUF_INIT, NULL} +#define LINE_BUFFER_INIT { "", NULL } int buffer_init(struct line_buffer *buf, const char *filename); int buffer_fdinit(struct line_buffer *buf, int fd); @@ -23,7 +22,6 @@ long buffer_tmpfile_prepare_to_read(struct line_buffer *buf); int buffer_ferror(struct line_buffer *buf); char *buffer_read_line(struct line_buffer *buf); -char *buffer_read_string(struct line_buffer *buf, uint32_t len); int buffer_read_char(struct line_buffer *buf); void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len); /* Returns number of bytes read (not necessarily written). */ diff --git a/vcs-svn/line_buffer.txt b/vcs-svn/line_buffer.txt index 4ef0755cf5..8e139eb22d 100644 --- a/vcs-svn/line_buffer.txt +++ b/vcs-svn/line_buffer.txt @@ -16,8 +16,8 @@ The calling program: - initializes a `struct line_buffer` to LINE_BUFFER_INIT - specifies a file to read with `buffer_init` - - processes input with `buffer_read_line`, `buffer_read_string`, - `buffer_skip_bytes`, and `buffer_copy_bytes` + - processes input with `buffer_read_line`, `buffer_skip_bytes`, + and `buffer_copy_bytes` - closes the file with `buffer_deinit`, perhaps to start over and read another file. @@ -37,7 +37,7 @@ the calling program. A program the temporary file - declares writing is over with `buffer_tmpfile_prepare_to_read` - can re-read what was written with `buffer_read_line`, - `buffer_read_string`, and so on + `buffer_copy_bytes`, and so on - can reuse the temporary file by calling `buffer_tmpfile_rewind` again - removes the temporary file with `buffer_deinit`, perhaps to @@ -64,12 +64,6 @@ Functions Read a line and strip off the trailing newline. On failure or end of file, returns NULL. -`buffer_read_string`:: - Read `len` characters of input or up to the end of the - file, whichever comes first. Returns NULL on error. - Returns whatever characters were read (possibly "") - for end of file. - `buffer_copy_bytes`:: Read `len` bytes of input and dump them to the standard output stream. Returns early for error or end of file. -- cgit v1.2.3 From 4c3169b03ec567ac43edcfc08ffdab119e0ebe94 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Thu, 24 Mar 2011 23:10:00 -0500 Subject: vcs-svn: avoid unnecessary copying of log message and author Use strbuf_swap when storing the svn:log and svn:author properties, so pointers to rather than the contents of buffers get copied. The main effect should be to make the code a little easier to read. Signed-off-by: Jonathan Nieder --- vcs-svn/svndump.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index c00f031179..88ecef1066 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -83,7 +83,7 @@ static void reset_dump_ctx(const char *url) } static void handle_property(const struct strbuf *key_buf, - const char *val, uint32_t len, + struct strbuf *val, uint32_t *type_set) { const char *key = key_buf->buf; @@ -95,23 +95,23 @@ static void handle_property(const struct strbuf *key_buf, break; if (!val) die("invalid dump: unsets svn:log"); - strbuf_reset(&rev_ctx.log); - strbuf_add(&rev_ctx.log, val, len); + strbuf_swap(&rev_ctx.log, val); break; case sizeof("svn:author"): if (constcmp(key, "svn:author")) break; - strbuf_reset(&rev_ctx.author); - if (val) - strbuf_add(&rev_ctx.author, val, len); + if (!val) + strbuf_reset(&rev_ctx.author); + else + strbuf_swap(&rev_ctx.author, val); break; case sizeof("svn:date"): if (constcmp(key, "svn:date")) break; if (!val) die("invalid dump: unsets svn:date"); - if (parse_date_basic(val, &rev_ctx.timestamp, NULL)) - warning("invalid timestamp: %s", val); + if (parse_date_basic(val->buf, &rev_ctx.timestamp, NULL)) + warning("invalid timestamp: %s", val->buf); break; case sizeof("svn:executable"): case sizeof("svn:special"): @@ -187,10 +187,10 @@ static void read_props(void) strbuf_swap(&key, &val); continue; case 'D': - handle_property(&val, NULL, 0, &type_set); + handle_property(&val, NULL, &type_set); continue; case 'V': - handle_property(&key, val.buf, len, &type_set); + handle_property(&key, &val, &type_set); strbuf_reset(&key); continue; default: -- cgit v1.2.3 From 195b7ca6f229455da61f9f6b6e56a6558fb0e8ee Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sat, 26 Mar 2011 00:49:37 -0500 Subject: vcs-svn: handle log message with embedded NUL Pass the log message by strbuf instead of as a C-style string and use fwrite instead of printf to write it to fast-import so embedded '\0' bytes can be preserved. Currently "git log" doesn't show the embedded NULs but "git cat-file commit" can. While at it, stop including system headers from repo_tree.h. git source files need to include git-compat-util.h (or cache.h or builtin.h) sooner to ensure the appropriate feature test macros are defined. Signed-off-by: Jonathan Nieder --- vcs-svn/fast_export.c | 12 +++++++----- vcs-svn/fast_export.h | 7 ++++--- vcs-svn/repo_tree.c | 5 +++-- vcs-svn/repo_tree.h | 4 ++-- vcs-svn/svndump.c | 2 +- 5 files changed, 17 insertions(+), 13 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index a4d4d9993d..2e5bb67255 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -31,12 +31,14 @@ void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, } static char gitsvnline[MAX_GITSVN_LINE_LEN]; -void fast_export_commit(uint32_t revision, const char *author, char *log, +void fast_export_commit(uint32_t revision, const char *author, + const struct strbuf *log, const char *uuid, const char *url, unsigned long timestamp) { + static const struct strbuf empty = STRBUF_INIT; if (!log) - log = ""; + log = ∅ if (*uuid && *url) { snprintf(gitsvnline, MAX_GITSVN_LINE_LEN, "\n\ngit-svn-id: %s@%"PRIu32" %s\n", @@ -49,9 +51,9 @@ void fast_export_commit(uint32_t revision, const char *author, char *log, *author ? author : "nobody", *author ? author : "nobody", *uuid ? uuid : "local", timestamp); - printf("data %"PRIu32"\n%s%s\n", - (uint32_t) (strlen(log) + strlen(gitsvnline)), - log, gitsvnline); + printf("data %"PRIuMAX"\n", log->len + strlen(gitsvnline)); + fwrite(log->buf, log->len, 1, stdout); + printf("%s\n", gitsvnline); if (!first_commit_done) { if (revision > 1) printf("from refs/heads/master^0\n"); diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h index 05cf97f3a7..33a8fe996f 100644 --- a/vcs-svn/fast_export.h +++ b/vcs-svn/fast_export.h @@ -2,13 +2,14 @@ #define FAST_EXPORT_H_ #include "line_buffer.h" +struct strbuf; void fast_export_delete(uint32_t depth, uint32_t *path); void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, uint32_t mark); -void fast_export_commit(uint32_t revision, const char *author, char *log, - const char *uuid, const char *url, - unsigned long timestamp); +void fast_export_commit(uint32_t revision, const char *author, + const struct strbuf *log, const char *uuid, + const char *url, unsigned long timestamp); void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, struct line_buffer *input); diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c index d722e3212f..8caa0159d1 100644 --- a/vcs-svn/repo_tree.c +++ b/vcs-svn/repo_tree.c @@ -278,8 +278,9 @@ void repo_diff(uint32_t r1, uint32_t r2) repo_commit_root_dir(commit_pointer(r2))); } -void repo_commit(uint32_t revision, const char *author, char *log, - const char *uuid, const char *url, unsigned long timestamp) +void repo_commit(uint32_t revision, const char *author, + const struct strbuf *log, const char *uuid, const char *url, + unsigned long timestamp) { fast_export_commit(revision, author, log, uuid, url, timestamp); dent_commit(); diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h index a1b0e87651..37bde2e374 100644 --- a/vcs-svn/repo_tree.h +++ b/vcs-svn/repo_tree.h @@ -1,7 +1,7 @@ #ifndef REPO_TREE_H_ #define REPO_TREE_H_ -#include "git-compat-util.h" +struct strbuf; #define REPO_MODE_DIR 0040000 #define REPO_MODE_BLB 0100644 @@ -18,7 +18,7 @@ uint32_t repo_read_path(const uint32_t *path); uint32_t repo_read_mode(const uint32_t *path); void repo_delete(uint32_t *path); void repo_commit(uint32_t revision, const char *author, - char *log, const char *uuid, const char *url, + const struct strbuf *log, const char *uuid, const char *url, long unsigned timestamp); void repo_diff(uint32_t r1, uint32_t r2); void repo_init(void); diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 88ecef1066..eef49ca192 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -274,7 +274,7 @@ static void handle_revision(void) { if (rev_ctx.revision) repo_commit(rev_ctx.revision, rev_ctx.author.buf, - rev_ctx.log.buf, dump_ctx.uuid.buf, dump_ctx.url.buf, + &rev_ctx.log, dump_ctx.uuid.buf, dump_ctx.url.buf, rev_ctx.timestamp); } -- cgit v1.2.3 From 41e6b91f01bc9bb7e1679542a8cce9bd4252fd2e Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 27 Mar 2011 12:19:14 -0500 Subject: vcs-svn: add missing cast to printf argument gcc -m32 correctly warns: vcs-svn/fast_export.c: In function 'fast_export_commit': vcs-svn/fast_export.c:54:2: warning: format '%llu' expects argument of type 'long long unsigned int', but argument 2 has type 'unsigned int' [-Wformat] Fix it. Signed-off-by: Jonathan Nieder --- vcs-svn/fast_export.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 2e5bb67255..99ed70b88a 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -51,7 +51,8 @@ void fast_export_commit(uint32_t revision, const char *author, *author ? author : "nobody", *author ? author : "nobody", *uuid ? uuid : "local", timestamp); - printf("data %"PRIuMAX"\n", log->len + strlen(gitsvnline)); + printf("data %"PRIuMAX"\n", + (uintmax_t) (log->len + strlen(gitsvnline))); fwrite(log->buf, log->len, 1, stdout); printf("%s\n", gitsvnline); if (!first_commit_done) { -- cgit v1.2.3 From 8cc299daf29c6726acada3ffad87c3fe2098aa02 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 27 Mar 2011 19:38:15 -0500 Subject: vcs-svn: add missing cast to printf argument gcc -m32 correctly warns: vcs-svn/fast_export.c: In function 'fast_export_commit': vcs-svn/fast_export.c:54:2: warning: format '%llu' expects argument of type 'long long unsigned int', but argument 2 has type 'unsigned int' [-Wformat] Fix it. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/fast_export.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'vcs-svn') diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 2e5bb67255..99ed70b88a 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -51,7 +51,8 @@ void fast_export_commit(uint32_t revision, const char *author, *author ? author : "nobody", *author ? author : "nobody", *uuid ? uuid : "local", timestamp); - printf("data %"PRIuMAX"\n", log->len + strlen(gitsvnline)); + printf("data %"PRIuMAX"\n", + (uintmax_t) (log->len + strlen(gitsvnline))); fwrite(log->buf, log->len, 1, stdout); printf("%s\n", gitsvnline); if (!first_commit_done) { -- cgit v1.2.3 From 9e113988d3d95f1595e2c33f704defeb2cbcc5d6 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Tue, 29 Mar 2011 17:31:30 +0000 Subject: vcs-svn: a void function shouldn't try to return something MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As v1.7.4-rc0~184 (2010-10-04) and C99 §6.8.6.4.1 remind us, standard C does not permit returning an expression of type void, even for a tail call. Noticed with gcc -pedantic: vcs-svn/svndump.c: In function 'handle_node': vcs-svn/svndump.c:213:3: warning: ISO C forbids 'return' with expression, in function returning void [-pedantic] [jn: with simplified log message] Signed-off-by: Michael Witten Signed-off-by: Jonathan Nieder --- vcs-svn/svndump.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index eef49ca192..572a995966 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -214,7 +214,8 @@ static void handle_node(void) if (have_text || have_props || node_ctx.srcRev) die("invalid dump: deletion node has " "copyfrom info, text, or properties"); - return repo_delete(node_ctx.dst); + repo_delete(node_ctx.dst); + return; } if (node_ctx.action == NODEACT_REPLACE) { repo_delete(node_ctx.dst); -- cgit v1.2.3 From 0353a0c4ec91cf2d0a8e209025aa2e1909d05f19 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Wed, 13 Apr 2011 17:39:40 +0200 Subject: remove doubled words, e.g., s/to to/to/, and fix related typos I found that some doubled words had snuck back into projects from which I'd already removed them, so now there's a "syntax-check" makefile rule in gnulib to help prevent recurrence. Running the command below spotted a few in git, too: git ls-files | xargs perl -0777 -n \ -e 'while (/\b(then?|[iao]n|i[fst]|but|f?or|at|and|[dt])\s+\1\b/gims)' \ -e '{$n=($` =~ tr/\n/\n/ + 1); ($v=$&)=~s/\n/\\n/g;' \ -e 'print "$ARGV:$n:$v\n"}' Signed-off-by: Jim Meyering Signed-off-by: Junio C Hamano --- vcs-svn/trp.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'vcs-svn') diff --git a/vcs-svn/trp.txt b/vcs-svn/trp.txt index 5ca6b42edb..177ebca335 100644 --- a/vcs-svn/trp.txt +++ b/vcs-svn/trp.txt @@ -96,7 +96,7 @@ node_type *foo_search(struct trp_root \*treap, node_type \*key):: node_type *foo_nsearch(struct trp_root \*treap, node_type \*key):: - Like `foo_search`, but if if the key is missing return what + Like `foo_search`, but if the key is missing return what would be key's successor, were key in treap (NULL if no successor). -- cgit v1.2.3 From c51477229ee4c7846d40a447860b5bf94aa1103d Mon Sep 17 00:00:00 2001 From: Ramsay Jones Date: Thu, 7 Apr 2011 19:49:33 +0100 Subject: sparse: Fix some "symbol not declared" warnings In particular, sparse issues the "symbol 'a_symbol' was not declared. Should it be static?" warnings for the following symbols: attr.c:468:12: 'git_etc_gitattributes' attr.c:476:5: 'git_attr_system' vcs-svn/svndump.c:282:6: 'svndump_read' vcs-svn/svndump.c:417:5: 'svndump_init' vcs-svn/svndump.c:432:6: 'svndump_deinit' vcs-svn/svndump.c:445:6: 'svndump_reset' The symbols in attr.c only require file scope, so we add the static modifier to their declaration. The symbols in vcs-svn/svndump.c are external symbols, and they already have extern declarations in the "svndump.h" header file, so we simply include the header in svndump.c. Signed-off-by: Ramsay Jones Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 1 + 1 file changed, 1 insertion(+) (limited to 'vcs-svn') diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 572a995966..bc792223b2 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -13,6 +13,7 @@ #include "line_buffer.h" #include "string_pool.h" #include "strbuf.h" +#include "svndump.h" /* * Compare start of string to literal of equal length; -- cgit v1.2.3