From 3ba3d0621b6822e974f06123db9394b33e454ed7 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 24 Aug 2021 12:15:54 -0400 Subject: pack-bitmap-write.c: gracefully fail to write non-closed bitmaps The set of objects covered by a bitmap must be closed under reachability, since it must be the case that there is a valid bit position assigned for every possible reachable object (otherwise the bitmaps would be incomplete). Pack bitmaps are never written from 'git repack' unless repacking all-into-one, and so we never write non-closed bitmaps (except in the case of partial clones where we aren't guaranteed to have all objects). But multi-pack bitmaps change this, since it isn't known whether the set of objects in the MIDX is closed under reachability until walking them. Plumb through a bit that is set when a reachable object isn't found. As soon as a reachable object isn't found in the set of objects to include in the bitmap, bitmap_writer_build() knows that the set is not closed, and so it now fails gracefully. A test is added in t0410 to trigger a bitmap write without full reachability closure by removing local copies of some reachable objects from a promisor remote. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'builtin') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index de00adbb9e..8a523624a1 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1256,7 +1256,8 @@ static void write_pack_file(void) bitmap_writer_show_progress(progress); bitmap_writer_select_commits(indexed_commits, indexed_commits_nr, -1); - bitmap_writer_build(&to_pack); + if (bitmap_writer_build(&to_pack) < 0) + die(_("failed to write bitmap index")); bitmap_writer_finish(written_list, nr_written, tmpname.buf, write_bitmap_options); write_bitmap_index = 0; -- cgit v1.2.3 From f57a739691e2efa1d71851cd725154a5a760d8f4 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 1 Sep 2021 16:34:01 -0400 Subject: midx: avoid opening multiple MIDXs when writing Opening multiple instance of the same MIDX can lead to problems like two separate packed_git structures which represent the same pack being added to the repository's object store. The above scenario can happen because prepare_midx_pack() checks if `m->packs[pack_int_id]` is NULL in order to determine if a pack has been opened and installed in the repository before. But a caller can construct two copies of the same MIDX by calling get_multi_pack_index() and load_multi_pack_index() since the former manipulates the object store directly but the latter is a lower-level routine which allocates a new MIDX for each call. So if prepare_midx_pack() is called on multiple MIDXs with the same pack_int_id, then that pack will be installed twice in the object store's packed_git pointer. This can lead to problems in, for e.g., the pack-bitmap code, which does something like the following (in pack-bitmap.c:open_pack_bitmap()): struct bitmap_index *bitmap_git = ...; for (p = get_all_packs(r); p; p = p->next) { if (open_pack_bitmap_1(bitmap_git, p) == 0) ret = 0; } which is a problem if two copies of the same pack exist in the packed_git list because pack-bitmap.c:open_pack_bitmap_1() contains a conditional like the following: if (bitmap_git->pack || bitmap_git->midx) { /* ignore extra bitmap file; we can only handle one */ warning("ignoring extra bitmap file: %s", packfile->pack_name); close(fd); return -1; } Avoid this scenario by not letting write_midx_internal() open a MIDX that isn't also pointed at by the object store. So long as this is the case, other routines should prefer to open MIDXs with get_multi_pack_index() or reprepare_packed_git() instead of creating instances on their own. Because get_multi_pack_index() returns `r->object_store->multi_pack_index` if it is non-NULL, we'll only have one instance of a MIDX open at one time, avoiding these problems. To encourage this, drop the `struct multi_pack_index *` parameter from `write_midx_internal()`, and rely instead on the `object_dir` to find (or initialize) the correct MIDX instance. Likewise, replace the call to `close_midx()` with `close_object_store()`, since we're about to replace the MIDX with a new one and should invalidate the object store's memory of any MIDX that might have existed beforehand. Note that this now forbids passing object directories that don't belong to alternate repositories over `--object-dir`, since before we would have happily opened a MIDX in any directory, but now restrict ourselves to only those reachable by `r->objects->multi_pack_index` (and alternate MIDXs that we can see by walking the `next` pointer). As far as I can tell, supporting arbitrary directories with `--object-dir` was a historical accident, since even the documentation says `` when referring to the value passed to this option. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'builtin') diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index cd86315221..003eaaac5c 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -43,28 +43,6 @@ static struct opts_commit_graph { int enable_changed_paths; } opts; -static struct object_directory *find_odb(struct repository *r, - const char *obj_dir) -{ - struct object_directory *odb; - char *obj_dir_real = real_pathdup(obj_dir, 1); - struct strbuf odb_path_real = STRBUF_INIT; - - prepare_alt_odb(r); - for (odb = r->objects->odb; odb; odb = odb->next) { - strbuf_realpath(&odb_path_real, odb->path, 1); - if (!strcmp(obj_dir_real, odb_path_real.buf)) - break; - } - - free(obj_dir_real); - strbuf_release(&odb_path_real); - - if (!odb) - die(_("could not find object directory matching %s"), obj_dir); - return odb; -} - static int graph_verify(int argc, const char **argv) { struct commit_graph *graph = NULL; -- cgit v1.2.3 From 0f533c728418fd3ef6ebcae5240e8df566cdaa72 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 31 Aug 2021 16:52:21 -0400 Subject: pack-bitmap: read multi-pack bitmaps This prepares the code in pack-bitmap to interpret the new multi-pack bitmaps described in Documentation/technical/bitmap-format.txt, which mostly involves converting bit positions to accommodate looking them up in a MIDX. Note that there are currently no writers who write multi-pack bitmaps, and that this will be implemented in the subsequent commit. Note also that get_midx_checksum() and get_midx_filename() are made non-static so they can be called from pack-bitmap.c. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'builtin') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 8a523624a1..e11d3ac2e5 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1124,6 +1124,11 @@ static void write_reused_pack(struct hashfile *f) break; offset += ewah_bit_ctz64(word >> offset); + /* + * Can use bit positions directly, even for MIDX + * bitmaps. See comment in try_partial_reuse() + * for why. + */ write_reused_pack_one(pos + offset, f, &w_curs); display_progress(progress_state, ++written); } -- cgit v1.2.3 From c528e179662ce1353c51f65e1b5895d02988c78c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 31 Aug 2021 16:52:24 -0400 Subject: pack-bitmap: write multi-pack bitmaps Write multi-pack bitmaps in the format described by Documentation/technical/bitmap-format.txt, inferring their presence with the absence of '--bitmap'. To write a multi-pack bitmap, this patch attempts to reuse as much of the existing machinery from pack-objects as possible. Specifically, the MIDX code prepares a packing_data struct that pretends as if a single packfile has been generated containing all of the objects contained within the MIDX. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/multi-pack-index.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'builtin') diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 8ff0dee2ec..73c0113b48 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -68,6 +68,8 @@ static int cmd_multi_pack_index_write(int argc, const char **argv) OPT_STRING(0, "preferred-pack", &opts.preferred_pack, N_("preferred-pack"), N_("pack for reuse when computing a multi-pack bitmap")), + OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"), + MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), OPT_END(), }; -- cgit v1.2.3 From ff1e653c8e252f9baf132478e126e1a2398bfee9 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 31 Aug 2021 16:52:43 -0400 Subject: midx: respect 'GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP' Introduce a new 'GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP' environment variable to also write a multi-pack bitmap when 'GIT_TEST_MULTI_PACK_INDEX' is set. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/repack.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 5f9bc74adc..82ab668272 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -515,6 +515,10 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository()) write_bitmaps = 0; + } else if (write_bitmaps && + git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0) && + git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP, 0)) { + write_bitmaps = 0; } if (pack_kept_objects < 0) pack_kept_objects = write_bitmaps > 0; @@ -725,8 +729,12 @@ int cmd_repack(int argc, const char **argv, const char *prefix) update_server_info(0); remove_temporary_files(); - if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) - write_midx_file(get_object_directory(), NULL, 0); + if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) { + unsigned flags = 0; + if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP, 0)) + flags |= MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX; + write_midx_file(get_object_directory(), NULL, flags); + } string_list_clear(&names, 0); string_list_clear(&rollback, 0); -- cgit v1.2.3