From 9d2f5ddfe56fcc228a36dd079f0897e0f474eb4e Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 2 Jan 2011 21:54:58 -0600 Subject: vcs-svn: learn to maintain a sliding view of a file Each section of a Subversion-format delta only requires examining (and keeping in random-access memory) a small portion of the preimage. At any moment, this portion starts at a certain file offset and has a well-defined length, and as the delta is applied, the portion advances from the beginning to the end of the preimage. Add a move_window function to keep track of this view into the preimage. You can use it like this: buffer_init(f, NULL); struct sliding_view window = SLIDING_VIEW_INIT(f); move_window(&window, 3, 7); /* (1) */ move_window(&window, 5, 5); /* (2) */ move_window(&window, 12, 2); /* (3) */ strbuf_release(&window.buf); buffer_deinit(f); The data structure is called sliding_view instead of _window to prevent confusion with svndiff0 Windows. In this example, (1) reads 10 bytes and discards the first 3; (2) discards the first 2, which are not needed any more; and (3) skips 2 bytes and reads 2 new bytes to work with. When move_window returns, the file position indicator is at position window->off + window->width and the data from positions window->off to the current file position are stored in window->buf. This function performs only sequential access from the input file and never seeks, so it can be safely used on pipes and sockets. On end-of-file, move_window silently reads less than the caller requested. On other errors, it prints a message and returns -1. Helped-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/LICENSE | 2 ++ vcs-svn/sliding_window.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++ vcs-svn/sliding_window.h | 17 +++++++++++ 3 files changed, 96 insertions(+) create mode 100644 vcs-svn/sliding_window.c create mode 100644 vcs-svn/sliding_window.h (limited to 'vcs-svn') diff --git a/vcs-svn/LICENSE b/vcs-svn/LICENSE index 0a5e3c43a0..805882c838 100644 --- a/vcs-svn/LICENSE +++ b/vcs-svn/LICENSE @@ -1,6 +1,8 @@ Copyright (C) 2010 David Barr . All rights reserved. +Copyright (C) 2010 Jonathan Nieder . + Copyright (C) 2008 Jason Evans . All rights reserved. diff --git a/vcs-svn/sliding_window.c b/vcs-svn/sliding_window.c new file mode 100644 index 0000000000..1b8d9875ed --- /dev/null +++ b/vcs-svn/sliding_window.c @@ -0,0 +1,77 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "sliding_window.h" +#include "line_buffer.h" +#include "strbuf.h" + +static int input_error(struct line_buffer *file) +{ + if (!buffer_ferror(file)) + return error("delta preimage ends early"); + return error("cannot read delta preimage: %s", strerror(errno)); +} + +static int skip_or_whine(struct line_buffer *file, off_t gap) +{ + if (buffer_skip_bytes(file, gap) != gap) + return input_error(file); + return 0; +} + +static int read_to_fill_or_whine(struct line_buffer *file, + struct strbuf *buf, size_t width) +{ + buffer_read_binary(file, buf, width - buf->len); + if (buf->len != width) + return input_error(file); + return 0; +} + +static int check_overflow(off_t a, size_t b) +{ + if (b > maximum_signed_value_of_type(off_t)) + return error("unrepresentable length in delta: " + "%"PRIuMAX" > OFF_MAX", (uintmax_t) b); + if (signed_add_overflows(a, (off_t) b)) + return error("unrepresentable offset in delta: " + "%"PRIuMAX" + %"PRIuMAX" > OFF_MAX", + (uintmax_t) a, (uintmax_t) b); + return 0; +} + +int move_window(struct sliding_view *view, off_t off, size_t width) +{ + off_t file_offset; + assert(view); + assert(view->width <= view->buf.len); + assert(!check_overflow(view->off, view->buf.len)); + + if (check_overflow(off, width)) + return -1; + if (off < view->off || off + width < view->off + view->width) + return error("invalid delta: window slides left"); + + file_offset = view->off + view->buf.len; + if (off < file_offset) { + /* Move the overlapping region into place. */ + strbuf_remove(&view->buf, 0, off - view->off); + } else { + /* Seek ahead to skip the gap. */ + if (skip_or_whine(view->file, off - file_offset)) + return -1; + strbuf_setlen(&view->buf, 0); + } + + if (view->buf.len > width) + ; /* Already read. */ + else if (read_to_fill_or_whine(view->file, &view->buf, width)) + return -1; + + view->off = off; + view->width = width; + return 0; +} diff --git a/vcs-svn/sliding_window.h b/vcs-svn/sliding_window.h new file mode 100644 index 0000000000..ed0bfdd65c --- /dev/null +++ b/vcs-svn/sliding_window.h @@ -0,0 +1,17 @@ +#ifndef SLIDING_WINDOW_H_ +#define SLIDING_WINDOW_H_ + +#include "strbuf.h" + +struct sliding_view { + struct line_buffer *file; + off_t off; + size_t width; + struct strbuf buf; +}; + +#define SLIDING_VIEW_INIT(input) { (input), 0, 0, STRBUF_INIT } + +extern int move_window(struct sliding_view *view, off_t off, size_t width); + +#endif -- cgit v1.2.3 From 896e4bfcec4f6b489aba2197f60a59bc7f45a8ac Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 2 Jan 2011 21:37:36 -0600 Subject: vcs-svn: make buffer_read_binary API more convenient buffer_read_binary is a thin wrapper around fread, but its signature is wrong: - fread can fill an arbitrary in-memory buffer. buffer_read_binary is limited to buffers whose size is representable by a 32-bit integer. - The result from fread is the number of bytes actually read. buffer_read_binary only reports the number of bytes read by incrementing sb->len by that amount and returns void. Fix both: let buffer_read_binary accept a size_t instead of uint32_t for the number of bytes to read and as a convenience return the number of bytes actually read. Signed-off-by: Jonathan Nieder Signed-off-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/line_buffer.c | 6 +++--- vcs-svn/line_buffer.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index c39038723e..01fcb842f1 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -91,10 +91,10 @@ char *buffer_read_line(struct line_buffer *buf) return buf->line_buffer; } -void buffer_read_binary(struct line_buffer *buf, - struct strbuf *sb, uint32_t size) +size_t buffer_read_binary(struct line_buffer *buf, + struct strbuf *sb, size_t size) { - strbuf_fread(sb, size, buf->infile); + return strbuf_fread(sb, size, buf->infile); } off_t buffer_copy_bytes(struct line_buffer *buf, off_t nbytes) diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index d0b22dda76..8901f214ba 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -23,7 +23,7 @@ long buffer_tmpfile_prepare_to_read(struct line_buffer *buf); int buffer_ferror(struct line_buffer *buf); char *buffer_read_line(struct line_buffer *buf); int buffer_read_char(struct line_buffer *buf); -void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len); +size_t buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, size_t len); /* Returns number of bytes read (not necessarily written). */ off_t buffer_copy_bytes(struct line_buffer *buf, off_t len); off_t buffer_skip_bytes(struct line_buffer *buf, off_t len); -- cgit v1.2.3 From ddcc8c5b469d2564dbacd629a873e7703f2dbd83 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sat, 25 Dec 2010 05:11:32 -0600 Subject: vcs-svn: skeleton of an svn delta parser A delta in the subversion delta (svndiff0) format consists of the magic bytes SVN\0 followed by a sequence of windows of a certain well specified format (starting with five integers). Add an svndiff0_apply function and test-svn-fe -d commandline tool to parse such a delta in the special case of not including any windows. Later patches will add features to turn this into a fully functional delta applier for svn-fe to use to parse the streams produced by "svnrdump dump" and "svnadmin dump --deltas". The content of symlinks starts with the word "link " in Subversion's worldview, so we need to be able to prepend that text to input for the sake of delta application. So initialization of the input state of the delta preimage is left to the calling program, giving callers a chance to seed the buffer with text of their choice. Improved-by: Ramkumar Ramachandra Improved-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/svndiff.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ vcs-svn/svndiff.h | 10 ++++++++++ 2 files changed, 62 insertions(+) create mode 100644 vcs-svn/svndiff.c create mode 100644 vcs-svn/svndiff.h (limited to 'vcs-svn') diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c new file mode 100644 index 0000000000..591603669c --- /dev/null +++ b/vcs-svn/svndiff.c @@ -0,0 +1,52 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "line_buffer.h" +#include "svndiff.h" + +/* + * svndiff0 applier + * + * See http://svn.apache.org/repos/asf/subversion/trunk/notes/svndiff. + * + * svndiff0 ::= 'SVN\0' window* + */ + +static int error_short_read(struct line_buffer *input) +{ + if (buffer_ferror(input)) + return error("error reading delta: %s", strerror(errno)); + return error("invalid delta: unexpected end of file"); +} + +static int read_magic(struct line_buffer *in, off_t *len) +{ + static const char magic[] = {'S', 'V', 'N', '\0'}; + struct strbuf sb = STRBUF_INIT; + + if (*len < sizeof(magic) || + buffer_read_binary(in, &sb, sizeof(magic)) != sizeof(magic)) + return error_short_read(in); + + if (memcmp(sb.buf, magic, sizeof(magic))) + return error("invalid delta: unrecognized file type"); + + *len -= sizeof(magic); + strbuf_release(&sb); + return 0; +} + +int svndiff0_apply(struct line_buffer *delta, off_t delta_len, + struct sliding_view *preimage, FILE *postimage) +{ + assert(delta && preimage && postimage); + + if (read_magic(delta, &delta_len)) + return -1; + if (delta_len) + return error("What do you think I am? A delta applier?"); + return 0; +} diff --git a/vcs-svn/svndiff.h b/vcs-svn/svndiff.h new file mode 100644 index 0000000000..74eb464bab --- /dev/null +++ b/vcs-svn/svndiff.h @@ -0,0 +1,10 @@ +#ifndef SVNDIFF_H_ +#define SVNDIFF_H_ + +struct line_buffer; +struct sliding_view; + +extern int svndiff0_apply(struct line_buffer *delta, off_t delta_len, + struct sliding_view *preimage, FILE *postimage); + +#endif -- cgit v1.2.3 From 252712111fad127db365e3dd764309fe5658679a Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:21:43 -0500 Subject: vcs-svn: parse svndiff0 window header Each window in a subversion delta (svndiff0-format file) starts with a window header, consisting of five integers with variable-length representation: source view offset source view length output length instructions length auxiliary data length Parse it. The result is not usable for deltas with nonempty postimage yet; in fact, this only adds support for deltas without any instructions or auxiliary data. This is a good place to stop, though, since that little support lets us add some simple passing tests concerning error handling to the test suite. Improved-by: Ramkumar Ramachandra Improved-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/svndiff.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 87 insertions(+), 5 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index 591603669c..249efb6eed 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -13,8 +13,16 @@ * See http://svn.apache.org/repos/asf/subversion/trunk/notes/svndiff. * * svndiff0 ::= 'SVN\0' window* + * window ::= int int int int int instructions inline_data; + * int ::= highdigit* lowdigit; + * highdigit ::= # binary 1000 0000 OR-ed with 7 bit value; + * lowdigit ::= # 7 bit value; */ +#define VLI_CONTINUE 0x80 +#define VLI_DIGIT_MASK 0x7f +#define VLI_BITS_PER_DIGIT 7 + static int error_short_read(struct line_buffer *input) { if (buffer_ferror(input)) @@ -28,17 +36,84 @@ static int read_magic(struct line_buffer *in, off_t *len) struct strbuf sb = STRBUF_INIT; if (*len < sizeof(magic) || - buffer_read_binary(in, &sb, sizeof(magic)) != sizeof(magic)) - return error_short_read(in); + buffer_read_binary(in, &sb, sizeof(magic)) != sizeof(magic)) { + error_short_read(in); + strbuf_release(&sb); + return -1; + } - if (memcmp(sb.buf, magic, sizeof(magic))) + if (memcmp(sb.buf, magic, sizeof(magic))) { + strbuf_release(&sb); return error("invalid delta: unrecognized file type"); + } *len -= sizeof(magic); strbuf_release(&sb); return 0; } +static int read_int(struct line_buffer *in, uintmax_t *result, off_t *len) +{ + uintmax_t rv = 0; + off_t sz; + for (sz = *len; sz; sz--) { + const int ch = buffer_read_char(in); + if (ch == EOF) + break; + + rv <<= VLI_BITS_PER_DIGIT; + rv += (ch & VLI_DIGIT_MASK); + if (ch & VLI_CONTINUE) + continue; + + *result = rv; + *len = sz - 1; + return 0; + } + return error_short_read(in); +} + +static int read_offset(struct line_buffer *in, off_t *result, off_t *len) +{ + uintmax_t val; + if (read_int(in, &val, len)) + return -1; + if (val > maximum_signed_value_of_type(off_t)) + return error("unrepresentable offset in delta: %"PRIuMAX"", val); + *result = val; + return 0; +} + +static int read_length(struct line_buffer *in, size_t *result, off_t *len) +{ + uintmax_t val; + if (read_int(in, &val, len)) + return -1; + if (val > SIZE_MAX) + return error("unrepresentable length in delta: %"PRIuMAX"", val); + *result = val; + return 0; +} + +static int apply_one_window(struct line_buffer *delta, off_t *delta_len) +{ + size_t out_len; + size_t instructions_len; + size_t data_len; + assert(delta_len); + + /* "source view" offset and length already handled; */ + if (read_length(delta, &out_len, delta_len) || + read_length(delta, &instructions_len, delta_len) || + read_length(delta, &data_len, delta_len)) + return -1; + if (instructions_len) + return error("What do you think I am? A delta applier?"); + if (data_len) + return error("No support for inline data yet"); + return 0; +} + int svndiff0_apply(struct line_buffer *delta, off_t delta_len, struct sliding_view *preimage, FILE *postimage) { @@ -46,7 +121,14 @@ int svndiff0_apply(struct line_buffer *delta, off_t delta_len, if (read_magic(delta, &delta_len)) return -1; - if (delta_len) - return error("What do you think I am? A delta applier?"); + while (delta_len) { /* For each window: */ + off_t pre_off; + size_t pre_len; + + if (read_offset(delta, &pre_off, &delta_len) || + read_length(delta, &pre_len, &delta_len) || + apply_one_window(delta, &delta_len)) + return -1; + } return 0; } -- cgit v1.2.3 From bcd254621f9a98794cdc32906db10af7135824c4 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:30:37 -0500 Subject: vcs-svn: read the preimage when applying deltas The source view offset heading each svndiff0 window represents a number of bytes past the beginning of the preimage. Together with the source view length, it dictates to the delta applier what portion of the preimage instructions will refer to. Read that portion right away using the sliding window code. Maybe some day we will use mmap to read data more lazily. Subversion's implementation tolerates source view offsets pointing past the end of the preimage file but we do not, for simplicity. This does not teach the delta applier to read instructions or copy data from the source view. Deltas that could produce nonempty output will still be rejected. Improved-by: Ramkumar Ramachandra Improved-by: David Barr Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra --- vcs-svn/svndiff.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'vcs-svn') diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index 249efb6eed..b7c2c8bf53 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -4,6 +4,7 @@ */ #include "git-compat-util.h" +#include "sliding_window.h" #include "line_buffer.h" #include "svndiff.h" @@ -127,6 +128,7 @@ int svndiff0_apply(struct line_buffer *delta, off_t delta_len, if (read_offset(delta, &pre_off, &delta_len) || read_length(delta, &pre_len, &delta_len) || + move_window(preimage, pre_off, pre_len) || apply_one_window(delta, &delta_len)) return -1; } -- cgit v1.2.3 From fc4ae43b2cbd53da6ac2a0047fb4e53175921696 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:35:59 -0500 Subject: vcs-svn: read inline data from deltas Each window of an svndiff0-format delta includes a section for novel text to be copied to the postimage (in the order it appears in the window, possibly interspersed with other data). Slurp in this data when encountering it. It is not actually necessary to do so --- it would be just as easy to copy from delta to output as part of interpreting the relevant instructions --- but this way, the code that interprets svndiff0 instructions can proceed very quickly because it does not require I/O. Subversion's svndiff0 parser rejects deltas that do not consume all the novel text that was provided. Omit that check for now so we can test the new functionality right away, rather than waiting to learn instructions that consume data. Do check for truncated data sections. Subversion's parser rejects deltas that end in the middle of a declared novel-text section, so it should be safe for us to reject them, too. Improved-by: Ramkumar Ramachandra Improved-by: David Barr Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra --- vcs-svn/svndiff.c | 46 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 11 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index b7c2c8bf53..175168f599 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -24,6 +24,17 @@ #define VLI_DIGIT_MASK 0x7f #define VLI_BITS_PER_DIGIT 7 +struct window { + struct strbuf data; +}; + +#define WINDOW_INIT { STRBUF_INIT } + +static void window_release(struct window *ctx) +{ + strbuf_release(&ctx->data); +} + static int error_short_read(struct line_buffer *input) { if (buffer_ferror(input)) @@ -31,24 +42,30 @@ static int error_short_read(struct line_buffer *input) return error("invalid delta: unexpected end of file"); } +static int read_chunk(struct line_buffer *delta, off_t *delta_len, + struct strbuf *buf, size_t len) +{ + strbuf_reset(buf); + if (len > *delta_len || + buffer_read_binary(delta, buf, len) != len) + return error_short_read(delta); + *delta_len -= buf->len; + return 0; +} + static int read_magic(struct line_buffer *in, off_t *len) { static const char magic[] = {'S', 'V', 'N', '\0'}; struct strbuf sb = STRBUF_INIT; - if (*len < sizeof(magic) || - buffer_read_binary(in, &sb, sizeof(magic)) != sizeof(magic)) { - error_short_read(in); + if (read_chunk(in, len, &sb, sizeof(magic))) { strbuf_release(&sb); return -1; } - if (memcmp(sb.buf, magic, sizeof(magic))) { strbuf_release(&sb); return error("invalid delta: unrecognized file type"); } - - *len -= sizeof(magic); strbuf_release(&sb); return 0; } @@ -98,6 +115,7 @@ static int read_length(struct line_buffer *in, size_t *result, off_t *len) static int apply_one_window(struct line_buffer *delta, off_t *delta_len) { + struct window ctx = WINDOW_INIT; size_t out_len; size_t instructions_len; size_t data_len; @@ -107,12 +125,18 @@ static int apply_one_window(struct line_buffer *delta, off_t *delta_len) if (read_length(delta, &out_len, delta_len) || read_length(delta, &instructions_len, delta_len) || read_length(delta, &data_len, delta_len)) - return -1; - if (instructions_len) - return error("What do you think I am? A delta applier?"); - if (data_len) - return error("No support for inline data yet"); + goto error_out; + if (instructions_len) { + error("What do you think I am? A delta applier?"); + goto error_out; + } + if (read_chunk(delta, delta_len, &ctx.data, data_len)) + goto error_out; + window_release(&ctx); return 0; +error_out: + window_release(&ctx); + return -1; } int svndiff0_apply(struct line_buffer *delta, off_t delta_len, -- cgit v1.2.3 From ef2ac77e9f8f4819f75cf52721567463e60a805c Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:38:01 -0500 Subject: vcs-svn: read instructions from deltas Buffer the instruction section upon encountering it for later interpretation. An alternative design would involve parsing the instructions at this point and buffering them in some processed form. Using the unprocessed form is simpler. Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra --- vcs-svn/svndiff.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index 175168f599..8968fdb4eb 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -25,13 +25,15 @@ #define VLI_BITS_PER_DIGIT 7 struct window { + struct strbuf instructions; struct strbuf data; }; -#define WINDOW_INIT { STRBUF_INIT } +#define WINDOW_INIT { STRBUF_INIT, STRBUF_INIT } static void window_release(struct window *ctx) { + strbuf_release(&ctx->instructions); strbuf_release(&ctx->data); } @@ -124,7 +126,8 @@ static int apply_one_window(struct line_buffer *delta, off_t *delta_len) /* "source view" offset and length already handled; */ if (read_length(delta, &out_len, delta_len) || read_length(delta, &instructions_len, delta_len) || - read_length(delta, &data_len, delta_len)) + read_length(delta, &data_len, delta_len) || + read_chunk(delta, delta_len, &ctx.instructions, instructions_len)) goto error_out; if (instructions_len) { error("What do you think I am? A delta applier?"); -- cgit v1.2.3 From ec71aa2e1f229b90092e6678ac7c2dca3d15b5f3 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:39:44 -0500 Subject: vcs-svn: implement copyfrom_data delta instruction The copyfrom_data instruction copies a few bytes verbatim from the novel text section of a window to the postimage. [jn: with memory leak fix from David] Improved-by: David Barr Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra --- vcs-svn/svndiff.c | 115 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 108 insertions(+), 7 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index 8968fdb4eb..ed1d4a08be 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -15,28 +15,49 @@ * * svndiff0 ::= 'SVN\0' window* * window ::= int int int int int instructions inline_data; + * instructions ::= instruction*; + * instruction ::= view_selector int int + * | copyfrom_data int + * | packed_view_selector int + * | packed_copyfrom_data + * ; + * copyfrom_data ::= # binary 10 000000; + * packed_copyfrom_data ::= # copyfrom_data OR-ed with 6 bit value; * int ::= highdigit* lowdigit; * highdigit ::= # binary 1000 0000 OR-ed with 7 bit value; * lowdigit ::= # 7 bit value; */ +#define INSN_MASK 0xc0 +#define INSN_COPYFROM_DATA 0x80 +#define OPERAND_MASK 0x3f + #define VLI_CONTINUE 0x80 #define VLI_DIGIT_MASK 0x7f #define VLI_BITS_PER_DIGIT 7 struct window { + struct strbuf out; struct strbuf instructions; struct strbuf data; }; -#define WINDOW_INIT { STRBUF_INIT, STRBUF_INIT } +#define WINDOW_INIT { STRBUF_INIT, STRBUF_INIT, STRBUF_INIT } static void window_release(struct window *ctx) { + strbuf_release(&ctx->out); strbuf_release(&ctx->instructions); strbuf_release(&ctx->data); } +static int write_strbuf(struct strbuf *sb, FILE *out) +{ + if (fwrite(sb->buf, 1, sb->len, out) == sb->len) /* Success. */ + return 0; + return error("cannot write delta postimage: %s", strerror(errno)); +} + static int error_short_read(struct line_buffer *input) { if (buffer_ferror(input)) @@ -93,6 +114,25 @@ static int read_int(struct line_buffer *in, uintmax_t *result, off_t *len) return error_short_read(in); } +static int parse_int(const char **buf, size_t *result, const char *end) +{ + size_t rv = 0; + const char *pos; + for (pos = *buf; pos != end; pos++) { + unsigned char ch = *pos; + + rv <<= VLI_BITS_PER_DIGIT; + rv += (ch & VLI_DIGIT_MASK); + if (ch & VLI_CONTINUE) + continue; + + *result = rv; + *buf = pos + 1; + return 0; + } + return error("invalid delta: unexpected end of instructions section"); +} + static int read_offset(struct line_buffer *in, off_t *result, off_t *len) { uintmax_t val; @@ -115,7 +155,64 @@ static int read_length(struct line_buffer *in, size_t *result, off_t *len) return 0; } -static int apply_one_window(struct line_buffer *delta, off_t *delta_len) +static int copyfrom_data(struct window *ctx, size_t *data_pos, size_t nbytes) +{ + const size_t pos = *data_pos; + if (unsigned_add_overflows(pos, nbytes) || + pos + nbytes > ctx->data.len) + return error("invalid delta: copies unavailable inline data"); + strbuf_add(&ctx->out, ctx->data.buf + pos, nbytes); + *data_pos += nbytes; + return 0; +} + +static int parse_first_operand(const char **buf, size_t *out, const char *end) +{ + size_t result = (unsigned char) *(*buf)++ & OPERAND_MASK; + if (result) { /* immediate operand */ + *out = result; + return 0; + } + return parse_int(buf, out, end); +} + +static int execute_one_instruction(struct window *ctx, + const char **instructions, size_t *data_pos) +{ + unsigned int instruction; + const char *insns_end = ctx->instructions.buf + ctx->instructions.len; + size_t nbytes; + assert(ctx); + assert(instructions && *instructions); + assert(data_pos); + + instruction = (unsigned char) **instructions; + if (parse_first_operand(instructions, &nbytes, insns_end)) + return -1; + if ((instruction & INSN_MASK) != INSN_COPYFROM_DATA) + return error("Unknown instruction %x", instruction); + return copyfrom_data(ctx, data_pos, nbytes); +} + +static int apply_window_in_core(struct window *ctx) +{ + const char *instructions; + size_t data_pos = 0; + + /* + * Fill ctx->out.buf using data from the source, target, + * and inline data views. + */ + for (instructions = ctx->instructions.buf; + instructions != ctx->instructions.buf + ctx->instructions.len; + ) + if (execute_one_instruction(ctx, &instructions, &data_pos)) + return -1; + return 0; +} + +static int apply_one_window(struct line_buffer *delta, off_t *delta_len, + FILE *out) { struct window ctx = WINDOW_INIT; size_t out_len; @@ -127,13 +224,17 @@ static int apply_one_window(struct line_buffer *delta, off_t *delta_len) if (read_length(delta, &out_len, delta_len) || read_length(delta, &instructions_len, delta_len) || read_length(delta, &data_len, delta_len) || - read_chunk(delta, delta_len, &ctx.instructions, instructions_len)) + read_chunk(delta, delta_len, &ctx.instructions, instructions_len) || + read_chunk(delta, delta_len, &ctx.data, data_len)) + goto error_out; + strbuf_grow(&ctx.out, out_len); + if (apply_window_in_core(&ctx)) goto error_out; - if (instructions_len) { - error("What do you think I am? A delta applier?"); + if (ctx.out.len != out_len) { + error("invalid delta: incorrect postimage length"); goto error_out; } - if (read_chunk(delta, delta_len, &ctx.data, data_len)) + if (write_strbuf(&ctx.out, out)) goto error_out; window_release(&ctx); return 0; @@ -156,7 +257,7 @@ int svndiff0_apply(struct line_buffer *delta, off_t delta_len, if (read_offset(delta, &pre_off, &delta_len) || read_length(delta, &pre_len, &delta_len) || move_window(preimage, pre_off, pre_len) || - apply_one_window(delta, &delta_len)) + apply_one_window(delta, &delta_len, postimage)) return -1; } return 0; -- cgit v1.2.3 From 4c9b93ed7644a7a7c72bdd8105d88a9ebb8e3e74 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:48:07 -0500 Subject: vcs-svn: verify that deltas consume all inline data By constraining the format of deltas, we can more easily detect corruption and other breakage. Requiring deltas not to provide unconsumed data also opens the possibility of ignoring the declared amount of novel data and simply streaming the data as needed to fulfill copyfrom_data requests. Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra --- vcs-svn/svndiff.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'vcs-svn') diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index ed1d4a08be..fb7dc22f92 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -208,6 +208,8 @@ static int apply_window_in_core(struct window *ctx) ) if (execute_one_instruction(ctx, &instructions, &data_pos)) return -1; + if (data_pos != ctx->data.len) + return error("invalid delta: does not copy all inline data"); return 0; } -- cgit v1.2.3 From d3f131b57ec0e69a37bca882fa6bf39aa4c1c387 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:50:07 -0500 Subject: vcs-svn: let deltas use data from postimage The copyfrom_target instruction copies appends data that is already present in the current output view to the end of output. (The offset argument is relative to the beginning of output produced in the current window.) The region copied is allowed to run past the end of the existing output. To support that case, copy one character at a time rather than calling memcpy or memmove. This allows copyfrom_target to be used once to repeat a string many times. For example: COPYFROM_DATA 2 COPYFROM_OUTPUT 10, 0 DATA "ab" would produce the output "ababababababababababab". Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra --- vcs-svn/svndiff.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index fb7dc22f92..a02eee0410 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -21,7 +21,12 @@ * | packed_view_selector int * | packed_copyfrom_data * ; + * view_selector ::= copyfrom_source + * | copyfrom_target + * ; + * copyfrom_target ::= # binary 01 000000; * copyfrom_data ::= # binary 10 000000; + * packed_view_selector ::= # view_selector OR-ed with 6 bit value; * packed_copyfrom_data ::= # copyfrom_data OR-ed with 6 bit value; * int ::= highdigit* lowdigit; * highdigit ::= # binary 1000 0000 OR-ed with 7 bit value; @@ -29,6 +34,7 @@ */ #define INSN_MASK 0xc0 +#define INSN_COPYFROM_TARGET 0x40 #define INSN_COPYFROM_DATA 0x80 #define OPERAND_MASK 0x3f @@ -155,6 +161,19 @@ static int read_length(struct line_buffer *in, size_t *result, off_t *len) return 0; } +static int copyfrom_target(struct window *ctx, const char **instructions, + size_t nbytes, const char *instructions_end) +{ + size_t offset; + if (parse_int(instructions, &offset, instructions_end)) + return -1; + if (offset >= ctx->out.len) + return error("invalid delta: copies from the future"); + for (; nbytes > 0; nbytes--) + strbuf_addch(&ctx->out, ctx->out.buf[offset++]); + return 0; +} + static int copyfrom_data(struct window *ctx, size_t *data_pos, size_t nbytes) { const size_t pos = *data_pos; @@ -189,9 +208,14 @@ static int execute_one_instruction(struct window *ctx, instruction = (unsigned char) **instructions; if (parse_first_operand(instructions, &nbytes, insns_end)) return -1; - if ((instruction & INSN_MASK) != INSN_COPYFROM_DATA) + switch (instruction & INSN_MASK) { + case INSN_COPYFROM_TARGET: + return copyfrom_target(ctx, instructions, nbytes, insns_end); + case INSN_COPYFROM_DATA: + return copyfrom_data(ctx, data_pos, nbytes); + default: return error("Unknown instruction %x", instruction); - return copyfrom_data(ctx, data_pos, nbytes); + } } static int apply_window_in_core(struct window *ctx) -- cgit v1.2.3 From c846e4107876936bed7177a811559bd74a72dcd8 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Wed, 13 Oct 2010 04:58:30 -0500 Subject: vcs-svn: let deltas use data from preimage The copyfrom_source instruction appends data from the preimage buffer to the end of output. Its arguments are a length and an offset relative to the beginning of the source view. With this change, the delta applier is able to reproduce all 5,636,613 blobs in the early history of the ASF repository. Tested with mkfifo backflow svn-fe backflow with svn-asf-public-r0:940166 produced by whatever version of Subversion the dumps in /dump/ on svn.apache.org use (presumably 1.6.something). Improved-by: Ramkumar Ramachandra Improved-by: David Barr Signed-off-by: Jonathan Nieder Acked-by: Ramkumar Ramachandra --- vcs-svn/svndiff.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'vcs-svn') diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c index a02eee0410..9ee41bbc90 100644 --- a/vcs-svn/svndiff.c +++ b/vcs-svn/svndiff.c @@ -24,6 +24,7 @@ * view_selector ::= copyfrom_source * | copyfrom_target * ; + * copyfrom_source ::= # binary 00 000000; * copyfrom_target ::= # binary 01 000000; * copyfrom_data ::= # binary 10 000000; * packed_view_selector ::= # view_selector OR-ed with 6 bit value; @@ -34,6 +35,7 @@ */ #define INSN_MASK 0xc0 +#define INSN_COPYFROM_SOURCE 0x00 #define INSN_COPYFROM_TARGET 0x40 #define INSN_COPYFROM_DATA 0x80 #define OPERAND_MASK 0x3f @@ -43,12 +45,13 @@ #define VLI_BITS_PER_DIGIT 7 struct window { + struct sliding_view *in; struct strbuf out; struct strbuf instructions; struct strbuf data; }; -#define WINDOW_INIT { STRBUF_INIT, STRBUF_INIT, STRBUF_INIT } +#define WINDOW_INIT(w) { (w), STRBUF_INIT, STRBUF_INIT, STRBUF_INIT } static void window_release(struct window *ctx) { @@ -161,6 +164,19 @@ static int read_length(struct line_buffer *in, size_t *result, off_t *len) return 0; } +static int copyfrom_source(struct window *ctx, const char **instructions, + size_t nbytes, const char *insns_end) +{ + size_t offset; + if (parse_int(instructions, &offset, insns_end)) + return -1; + if (unsigned_add_overflows(offset, nbytes) || + offset + nbytes > ctx->in->width) + return error("invalid delta: copies source data outside view"); + strbuf_add(&ctx->out, ctx->in->buf.buf + offset, nbytes); + return 0; +} + static int copyfrom_target(struct window *ctx, const char **instructions, size_t nbytes, const char *instructions_end) { @@ -209,12 +225,14 @@ static int execute_one_instruction(struct window *ctx, if (parse_first_operand(instructions, &nbytes, insns_end)) return -1; switch (instruction & INSN_MASK) { + case INSN_COPYFROM_SOURCE: + return copyfrom_source(ctx, instructions, nbytes, insns_end); case INSN_COPYFROM_TARGET: return copyfrom_target(ctx, instructions, nbytes, insns_end); case INSN_COPYFROM_DATA: return copyfrom_data(ctx, data_pos, nbytes); default: - return error("Unknown instruction %x", instruction); + return error("invalid delta: unrecognized instruction"); } } @@ -238,9 +256,9 @@ static int apply_window_in_core(struct window *ctx) } static int apply_one_window(struct line_buffer *delta, off_t *delta_len, - FILE *out) + struct sliding_view *preimage, FILE *out) { - struct window ctx = WINDOW_INIT; + struct window ctx = WINDOW_INIT(preimage); size_t out_len; size_t instructions_len; size_t data_len; @@ -283,7 +301,7 @@ int svndiff0_apply(struct line_buffer *delta, off_t delta_len, if (read_offset(delta, &pre_off, &delta_len) || read_length(delta, &pre_len, &delta_len) || move_window(preimage, pre_off, pre_len) || - apply_one_window(delta, &delta_len, postimage)) + apply_one_window(delta, &delta_len, preimage, postimage)) return -1; } return 0; -- cgit v1.2.3 From fbdd4f6fb477885e4bf81658e02c3542a861c695 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 27 May 2011 04:07:44 -0500 Subject: vcs-svn: cap number of bytes read from sliding view Introduce a "max_off" field in struct sliding_view, roughly representing a maximum number of bytes that can be read from "file". If it is set to a nonnegative integer, a call to move_window() attempting to put the right endpoint beyond that offset will return an error instead. The idea is to use this when applying Subversion-format deltas to prevent reads past the end of the preimage (which has known length). Without such a check, corrupt deltas would cause svn-fe to block indefinitely when data in the input pipe is exhausted. Inspired-by: Ramkumar Ramachandra Signed-off-by: Jonathan Nieder --- vcs-svn/sliding_window.c | 2 ++ vcs-svn/sliding_window.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'vcs-svn') diff --git a/vcs-svn/sliding_window.c b/vcs-svn/sliding_window.c index 1b8d9875ed..1bac7a4c7f 100644 --- a/vcs-svn/sliding_window.c +++ b/vcs-svn/sliding_window.c @@ -54,6 +54,8 @@ int move_window(struct sliding_view *view, off_t off, size_t width) return -1; if (off < view->off || off + width < view->off + view->width) return error("invalid delta: window slides left"); + if (view->max_off >= 0 && view->max_off < off + width) + return error("delta preimage ends early"); file_offset = view->off + view->buf.len; if (off < file_offset) { diff --git a/vcs-svn/sliding_window.h b/vcs-svn/sliding_window.h index ed0bfdd65c..b43a825cba 100644 --- a/vcs-svn/sliding_window.h +++ b/vcs-svn/sliding_window.h @@ -7,10 +7,11 @@ struct sliding_view { struct line_buffer *file; off_t off; size_t width; + off_t max_off; /* -1 means unlimited */ struct strbuf buf; }; -#define SLIDING_VIEW_INIT(input) { (input), 0, 0, STRBUF_INIT } +#define SLIDING_VIEW_INIT(input, len) { (input), 0, 0, (len), STRBUF_INIT } extern int move_window(struct sliding_view *view, off_t off, size_t width); -- cgit v1.2.3