From fa48de62ac00bdd634ea5bcd5b1a6bccfa4d9117 Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys Date: Mon, 21 Feb 2022 18:46:04 +0000 Subject: Documentation: object_id_len goes up to 31 The value is stored in a 5-bit field, so we can't support more without a format version upgrade. Signed-off-by: Han-Wen Nienhuys Signed-off-by: Junio C Hamano --- Documentation/technical/reftable.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/technical/reftable.txt b/Documentation/technical/reftable.txt index d7c3b645cf..6a67cc4174 100644 --- a/Documentation/technical/reftable.txt +++ b/Documentation/technical/reftable.txt @@ -443,7 +443,7 @@ Obj block format Object blocks are optional. Writers may choose to omit object blocks, especially if readers will not use the object name to ref mapping. -Object blocks use unique, abbreviated 2-32 object name keys, mapping to +Object blocks use unique, abbreviated 2-31 byte object name keys, mapping to ref blocks containing references pointing to that object directly, or as the peeled value of an annotated tag. Like ref blocks, object blocks use the file's standard block size. The abbreviation length is available in -- cgit v1.2.3 From eff5832ba13b81ebbf68b0c28934fe18ba8601fc Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys Date: Mon, 21 Feb 2022 18:46:05 +0000 Subject: reftable: reject 0 object_id_len The spec says 2 <= object_id_len <= 31. We are lenient and allow 1, but we forbid 0, so we can be sure that we never read a 0-length key. Signed-off-by: Han-Wen Nienhuys Signed-off-by: Junio C Hamano --- reftable/reader.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/reftable/reader.c b/reftable/reader.c index 00906e7a2d..54b4025105 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -155,6 +155,11 @@ static int parse_footer(struct reftable_reader *r, uint8_t *footer, r->log_offsets.is_present = (first_block_typ == BLOCK_TYPE_LOG || r->log_offsets.offset > 0); r->obj_offsets.is_present = r->obj_offsets.offset > 0; + if (r->obj_offsets.is_present && !r->object_id_len) { + err = REFTABLE_FORMAT_ERROR; + goto done; + } + err = 0; done: return err; -- cgit v1.2.3 From 140765911050d62a23503972ef6513ef18252ad3 Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys Date: Mon, 21 Feb 2022 18:46:06 +0000 Subject: reftable: add a test that verifies that writing empty keys fails Empty keys can only be written as ref records with empty names. The log record has a logical timestamp in the key, so the key is never empty. Signed-off-by: Han-Wen Nienhuys Signed-off-by: Junio C Hamano --- reftable/readwrite_test.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c index 605ba0f9fd..fd5922e55f 100644 --- a/reftable/readwrite_test.c +++ b/reftable/readwrite_test.c @@ -667,6 +667,29 @@ static void test_write_empty_table(void) strbuf_release(&buf); } +static void test_write_empty_key(void) +{ + struct reftable_write_options opts = { 0 }; + struct strbuf buf = STRBUF_INIT; + struct reftable_writer *w = + reftable_new_writer(&strbuf_add_void, &buf, &opts); + struct reftable_ref_record ref = { + .refname = "", + .update_index = 1, + .value_type = REFTABLE_REF_DELETION, + }; + int err; + + reftable_writer_set_limits(w, 1, 1); + err = reftable_writer_add_ref(w, &ref); + EXPECT(err == REFTABLE_API_ERROR); + + err = reftable_writer_close(w); + EXPECT(err == REFTABLE_EMPTY_TABLE_ERROR); + reftable_writer_free(w); + strbuf_release(&buf); +} + static void test_write_key_order(void) { struct reftable_write_options opts = { 0 }; @@ -746,6 +769,7 @@ int readwrite_test_main(int argc, const char *argv[]) RUN_TEST(test_table_read_write_seek_index); RUN_TEST(test_table_refs_for_no_index); RUN_TEST(test_table_refs_for_obj_index); + RUN_TEST(test_write_empty_key); RUN_TEST(test_write_empty_table); RUN_TEST(test_log_overflow); return 0; -- cgit v1.2.3 From 45c2fcc2a0659e42c7e4f21f0b97393df261ca51 Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys Date: Mon, 21 Feb 2022 18:46:07 +0000 Subject: reftable: avoid writing empty keys at the block layer The public interface (reftable_writer) already ensures that keys are written in strictly increasing order, and an empty key by definition fails this check. However, by also enforcing this at the block layer, it is easier to verify that records (which are written into blocks) never have to consider the possibility of empty keys. Signed-off-by: Han-Wen Nienhuys Signed-off-by: Junio C Hamano --- reftable/block.c | 27 +++++++++++++++++---------- reftable/block_test.c | 5 +++++ reftable/writer.c | 3 +-- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/reftable/block.c b/reftable/block.c index 2170748c5e..34d4d07369 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -88,8 +88,9 @@ uint8_t block_writer_type(struct block_writer *bw) return bw->buf[bw->header_off]; } -/* adds the reftable_record to the block. Returns -1 if it does not fit, 0 on - success */ +/* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on + success. Returns REFTABLE_API_ERROR if attempting to write a record with + empty key. */ int block_writer_add(struct block_writer *w, struct reftable_record *rec) { struct strbuf empty = STRBUF_INIT; @@ -105,8 +106,14 @@ int block_writer_add(struct block_writer *w, struct reftable_record *rec) int is_restart = 0; struct strbuf key = STRBUF_INIT; int n = 0; + int err = -1; reftable_record_key(rec, &key); + if (!key.len) { + err = REFTABLE_API_ERROR; + goto done; + } + n = reftable_encode_key(&is_restart, out, last, key, reftable_record_val_type(rec)); if (n < 0) @@ -118,16 +125,11 @@ int block_writer_add(struct block_writer *w, struct reftable_record *rec) goto done; string_view_consume(&out, n); - if (block_writer_register_restart(w, start.len - out.len, is_restart, - &key) < 0) - goto done; - - strbuf_release(&key); - return 0; - + err = block_writer_register_restart(w, start.len - out.len, is_restart, + &key); done: strbuf_release(&key); - return -1; + return err; } int block_writer_finish(struct block_writer *w) @@ -332,6 +334,9 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec) if (n < 0) return -1; + if (!key.len) + return REFTABLE_FORMAT_ERROR; + string_view_consume(&in, n); n = reftable_record_decode(rec, key, extra, in, it->br->hash_size); if (n < 0) @@ -358,6 +363,8 @@ int block_reader_first_key(struct block_reader *br, struct strbuf *key) int n = reftable_decode_key(key, &extra, empty, in); if (n < 0) return n; + if (!key->len) + return REFTABLE_FORMAT_ERROR; return 0; } diff --git a/reftable/block_test.c b/reftable/block_test.c index fa2ee092ec..cb88af4a56 100644 --- a/reftable/block_test.c +++ b/reftable/block_test.c @@ -42,6 +42,11 @@ static void test_block_read_write(void) block_writer_init(&bw, BLOCK_TYPE_REF, block.data, block_size, header_off, hash_size(GIT_SHA1_FORMAT_ID)); + rec.u.ref.refname = ""; + rec.u.ref.value_type = REFTABLE_REF_DELETION; + n = block_writer_add(&bw, &rec); + EXPECT(n == REFTABLE_API_ERROR); + for (i = 0; i < N; i++) { char name[100]; uint8_t hash[GIT_SHA1_RAWSZ]; diff --git a/reftable/writer.c b/reftable/writer.c index 944c2329ab..d54215a50d 100644 --- a/reftable/writer.c +++ b/reftable/writer.c @@ -240,14 +240,13 @@ static int writer_add_record(struct reftable_writer *w, writer_reinit_block_writer(w, reftable_record_type(rec)); err = block_writer_add(w->block_writer, rec); - if (err < 0) { + if (err == -1) { /* we are writing into memory, so an error can only mean it * doesn't fit. */ err = REFTABLE_ENTRY_TOO_BIG_ERROR; goto done; } - err = 0; done: strbuf_release(&key); return err; -- cgit v1.2.3 From b4007fcc6fe04deeeb3cdf7b018413f65c9de6e8 Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys Date: Mon, 21 Feb 2022 18:46:08 +0000 Subject: reftable: ensure that obj_id_len is >= 2 on writing When writing the same hash many times, we might decide to use a length-1 object ID prefix for the ObjectID => ref table, which is out of spec. Signed-off-by: Han-Wen Nienhuys Signed-off-by: Junio C Hamano --- reftable/readwrite_test.c | 37 +++++++++++++++++++++++++++++++++++++ reftable/writer.c | 4 +++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c index fd5922e55f..35142eb070 100644 --- a/reftable/readwrite_test.c +++ b/reftable/readwrite_test.c @@ -667,6 +667,42 @@ static void test_write_empty_table(void) strbuf_release(&buf); } +static void test_write_object_id_min_length(void) +{ + struct reftable_write_options opts = { + .block_size = 75, + }; + struct strbuf buf = STRBUF_INIT; + struct reftable_writer *w = + reftable_new_writer(&strbuf_add_void, &buf, &opts); + uint8_t hash[GIT_SHA1_RAWSZ] = {42}; + struct reftable_ref_record ref = { + .update_index = 1, + .value_type = REFTABLE_REF_VAL1, + .value.val1 = hash, + }; + int err; + int i; + + reftable_writer_set_limits(w, 1, 1); + + /* Write the same hash in many refs. If there is only 1 hash, the + * disambiguating prefix is length 0 */ + for (i = 0; i < 256; i++) { + char name[256]; + snprintf(name, sizeof(name), "ref%05d", i); + ref.refname = name; + err = reftable_writer_add_ref(w, &ref); + EXPECT_ERR(err); + } + + err = reftable_writer_close(w); + EXPECT_ERR(err); + EXPECT(writer_stats(w)->object_id_len == 2); + reftable_writer_free(w); + strbuf_release(&buf); +} + static void test_write_empty_key(void) { struct reftable_write_options opts = { 0 }; @@ -772,5 +808,6 @@ int readwrite_test_main(int argc, const char *argv[]) RUN_TEST(test_write_empty_key); RUN_TEST(test_write_empty_table); RUN_TEST(test_log_overflow); + RUN_TEST(test_write_object_id_min_length); return 0; } diff --git a/reftable/writer.c b/reftable/writer.c index d54215a50d..5e4e6e9341 100644 --- a/reftable/writer.c +++ b/reftable/writer.c @@ -515,7 +515,9 @@ static void object_record_free(void *void_arg, void *key) static int writer_dump_object_index(struct reftable_writer *w) { struct write_record_arg closure = { .w = w }; - struct common_prefix_arg common = { NULL }; + struct common_prefix_arg common = { + .max = 1, /* obj_id_len should be >= 2. */ + }; if (w->obj_index_tree) { infix_walk(w->obj_index_tree, &update_common, &common); } -- cgit v1.2.3 From 3c443a02a9f98227fa5c22935d88bcf68cfeaeec Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys Date: Mon, 21 Feb 2022 18:46:09 +0000 Subject: reftable: add test for length of disambiguating prefix The ID => ref map is trimming object IDs to a disambiguating prefix. Check that we are computing their length correctly. Signed-off-by: Han-Wen Nienhuys Signed-off-by: Junio C Hamano --- reftable/readwrite_test.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c index 35142eb070..a1b835785a 100644 --- a/reftable/readwrite_test.c +++ b/reftable/readwrite_test.c @@ -703,6 +703,43 @@ static void test_write_object_id_min_length(void) strbuf_release(&buf); } +static void test_write_object_id_length(void) +{ + struct reftable_write_options opts = { + .block_size = 75, + }; + struct strbuf buf = STRBUF_INIT; + struct reftable_writer *w = + reftable_new_writer(&strbuf_add_void, &buf, &opts); + uint8_t hash[GIT_SHA1_RAWSZ] = {42}; + struct reftable_ref_record ref = { + .update_index = 1, + .value_type = REFTABLE_REF_VAL1, + .value.val1 = hash, + }; + int err; + int i; + + reftable_writer_set_limits(w, 1, 1); + + /* Write the same hash in many refs. If there is only 1 hash, the + * disambiguating prefix is length 0 */ + for (i = 0; i < 256; i++) { + char name[256]; + snprintf(name, sizeof(name), "ref%05d", i); + ref.refname = name; + ref.value.val1[15] = i; + err = reftable_writer_add_ref(w, &ref); + EXPECT_ERR(err); + } + + err = reftable_writer_close(w); + EXPECT_ERR(err); + EXPECT(writer_stats(w)->object_id_len == 16); + reftable_writer_free(w); + strbuf_release(&buf); +} + static void test_write_empty_key(void) { struct reftable_write_options opts = { 0 }; @@ -808,6 +845,7 @@ int readwrite_test_main(int argc, const char *argv[]) RUN_TEST(test_write_empty_key); RUN_TEST(test_write_empty_table); RUN_TEST(test_log_overflow); + RUN_TEST(test_write_object_id_length); RUN_TEST(test_write_object_id_min_length); return 0; } -- cgit v1.2.3 From 73a4c188b77acbfdf25641e2bb4d1600c45338ec Mon Sep 17 00:00:00 2001 From: Han-Wen Nienhuys Date: Mon, 21 Feb 2022 18:46:10 +0000 Subject: reftable: rename writer_stats to reftable_writer_stats This function is part of the reftable API, so it should use the reftable_ prefix Signed-off-by: Han-Wen Nienhuys Signed-off-by: Junio C Hamano --- reftable/readwrite_test.c | 10 +++++----- reftable/reftable-writer.h | 2 +- reftable/writer.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c index a1b835785a..469ab79a5a 100644 --- a/reftable/readwrite_test.c +++ b/reftable/readwrite_test.c @@ -100,7 +100,7 @@ static void write_table(char ***names, struct strbuf *buf, int N, n = reftable_writer_close(w); EXPECT(n == 0); - stats = writer_stats(w); + stats = reftable_writer_stats(w); for (i = 0; i < stats->ref_stats.blocks; i++) { int off = i * opts.block_size; if (off == 0) { @@ -239,7 +239,7 @@ static void test_log_write_read(void) n = reftable_writer_close(w); EXPECT(n == 0); - stats = writer_stats(w); + stats = reftable_writer_stats(w); EXPECT(stats->log_stats.blocks > 0); reftable_writer_free(w); w = NULL; @@ -330,7 +330,7 @@ static void test_log_zlib_corruption(void) n = reftable_writer_close(w); EXPECT(n == 0); - stats = writer_stats(w); + stats = reftable_writer_stats(w); EXPECT(stats->log_stats.blocks > 0); reftable_writer_free(w); w = NULL; @@ -698,7 +698,7 @@ static void test_write_object_id_min_length(void) err = reftable_writer_close(w); EXPECT_ERR(err); - EXPECT(writer_stats(w)->object_id_len == 2); + EXPECT(reftable_writer_stats(w)->object_id_len == 2); reftable_writer_free(w); strbuf_release(&buf); } @@ -735,7 +735,7 @@ static void test_write_object_id_length(void) err = reftable_writer_close(w); EXPECT_ERR(err); - EXPECT(writer_stats(w)->object_id_len == 16); + EXPECT(reftable_writer_stats(w)->object_id_len == 16); reftable_writer_free(w); strbuf_release(&buf); } diff --git a/reftable/reftable-writer.h b/reftable/reftable-writer.h index a560dc1725..db8de197f6 100644 --- a/reftable/reftable-writer.h +++ b/reftable/reftable-writer.h @@ -143,7 +143,7 @@ int reftable_writer_close(struct reftable_writer *w); This struct becomes invalid when the writer is freed. */ -const struct reftable_stats *writer_stats(struct reftable_writer *w); +const struct reftable_stats *reftable_writer_stats(struct reftable_writer *w); /* reftable_writer_free deallocates memory for the writer */ void reftable_writer_free(struct reftable_writer *w); diff --git a/reftable/writer.c b/reftable/writer.c index 5e4e6e9341..6d979e245f 100644 --- a/reftable/writer.c +++ b/reftable/writer.c @@ -695,7 +695,7 @@ static int writer_flush_block(struct reftable_writer *w) return writer_flush_nonempty_block(w); } -const struct reftable_stats *writer_stats(struct reftable_writer *w) +const struct reftable_stats *reftable_writer_stats(struct reftable_writer *w) { return &w->stats; } -- cgit v1.2.3